Commit 7db2a9e8 by zhiwei

分享链接消失并失效,解析改为有验证码的链接

parent 7ad96e77
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion> <modelVersion>4.0.0</modelVersion>
<groupId>com.zhiwei</groupId> <groupId>com.zhiwei</groupId>
<artifactId>wechat</artifactId> <artifactId>wechat</artifactId>
<version>1.1.7-SNAPSHOT</version> <version>1.1.8-SNAPSHOT</version>
<description> <description>
知微微信采集程序,包含 知微微信采集程序,包含
1.微信历史文章采集 1.微信历史文章采集
2.搜狗微信接口关键词采集 2.搜狗微信接口关键词采集
3.点赞阅读更新接口 3.点赞阅读更新接口
4.根据关键词或微信id查询帐号信息 4.根据关键词或微信id查询帐号信息
5.根据文章链接采集评论列表及评论数 5.根据文章链接采集评论列表及评论数
</description> </description>
<properties> <properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding> <project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
</properties> </properties>
<developers> <developers>
<developer> <developer>
<id>Bewilder</id> <id>Bewilder</id>
<name>zhiwei zhang</name> <name>zhiwei zhang</name>
<email>zhangzhiwei@zhiweidata.com</email> <email>zhangzhiwei@zhiweidata.com</email>
</developer> </developer>
</developers> </developers>
<!-- 打包管理 --> <!-- 打包管理 -->
<build> <build>
<plugins> <plugins>
<!-- 发布源码 --> <!-- 发布源码 -->
<plugin> <plugin>
<artifactId>maven-source-plugin</artifactId> <artifactId>maven-source-plugin</artifactId>
<version>2.4</version> <version>2.4</version>
<configuration> <configuration>
<attach>true</attach> <attach>true</attach>
</configuration> </configuration>
<executions> <executions>
<execution> <execution>
<phase>compile</phase> <phase>compile</phase>
<goals> <goals>
<goal>jar</goal> <goal>jar</goal>
</goals> </goals>
</execution> </execution>
</executions> </executions>
</plugin> </plugin>
<plugin> <plugin>
<groupId>org.apache.maven.plugins</groupId> <groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-javadoc-plugin</artifactId> <artifactId>maven-javadoc-plugin</artifactId>
<version>2.10.4</version> <version>2.10.4</version>
</plugin> </plugin>
<!-- 解决maven test命令时console出现中文乱码乱码 --> <!-- 解决maven test命令时console出现中文乱码乱码 -->
<plugin> <plugin>
<groupId>org.apache.maven.plugins</groupId> <groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId> <artifactId>maven-surefire-plugin</artifactId>
<version>2.19.1</version> <version>2.19.1</version>
<configuration> <configuration>
<forkMode>once</forkMode> <forkMode>once</forkMode>
<argLine>-Dfile.encoding=UTF-8</argLine> <argLine>-Dfile.encoding=UTF-8</argLine>
</configuration> </configuration>
</plugin> </plugin>
</plugins> </plugins>
</build> </build>
<!-- 分发管理:管理distribution和supporting files --> <!-- 分发管理:管理distribution和supporting files -->
<distributionManagement> <distributionManagement>
<snapshotRepository> <snapshotRepository>
<id>nexus-releases</id> <id>nexus-releases</id>
<name>User Porject Snapshot</name> <name>User Porject Snapshot</name>
<url>http://192.168.0.30:8081/nexus/content/repositories/snapshots/</url> <url>http://192.168.0.30:8081/nexus/content/repositories/snapshots/</url>
<uniqueVersion>true</uniqueVersion> <uniqueVersion>true</uniqueVersion>
</snapshotRepository> </snapshotRepository>
<repository> <repository>
<id>nexus-releases</id> <id>nexus-releases</id>
<name>User Porject Release</name> <name>User Porject Release</name>
<url>http://192.168.0.30:8081/nexus/content/repositories/releases/</url> <url>http://192.168.0.30:8081/nexus/content/repositories/releases/</url>
</repository> </repository>
</distributionManagement> </distributionManagement>
<dependencies> <dependencies>
<dependency> <dependency>
<groupId>com.zhiwei.tools</groupId> <groupId>com.zhiwei.tools</groupId>
<artifactId>zhiwei-tools</artifactId> <artifactId>zhiwei-tools</artifactId>
<version>0.1.4-SNAPSHOT</version> <version>0.1.4-SNAPSHOT</version>
<scope>provided</scope> <scope>provided</scope>
</dependency> </dependency>
<dependency> <dependency>
<groupId>com.zhiwei.crawler</groupId> <groupId>com.zhiwei.crawler</groupId>
<artifactId>crawler-core</artifactId> <artifactId>crawler-core</artifactId>
<version>0.5.2-SNAPSHOT</version> <version>0.5.5.6-SNAPSHOT</version>
<scope>provided</scope> <scope>provided</scope>
</dependency> </dependency>
</dependencies> </dependencies>
</project> </project>
\ No newline at end of file
//package com.zhiwei.wechat.example; package com.zhiwei.wechat.example;
//
//import java.io.UnsupportedEncodingException; import java.io.IOException;
//import java.net.UnknownHostException; import java.io.UnsupportedEncodingException;
//import java.util.ArrayList; import java.net.Proxy;
//import java.util.List; import java.net.URLEncoder;
// import java.net.UnknownHostException;
//import org.slf4j.Logger; import java.util.ArrayList;
//import org.slf4j.LoggerFactory; import java.util.HashMap;
// import java.util.List;
//import com.zhiwei.common.config.GroupType; import java.util.Map;
//import com.zhiwei.crawler.proxy.ProxyFactory; import java.util.regex.Matcher;
//import com.zhiwei.crawler.proxy.ProxyHolder; import java.util.regex.Pattern;
//import com.zhiwei.wechat.entity.WechatAricle;
//import com.zhiwei.wechat.search.WechatAritcleSearch; import com.zhiwei.crawler.core.HttpBoot;
// import com.zhiwei.crawler.utils.RequestUtils;
///** import org.apache.commons.lang3.StringUtils;
// * @ClassName: WechatSearchExample import org.slf4j.Logger;
// * @Description: TODO(根据关键词等采集数据) import org.slf4j.LoggerFactory;
// * @author hero
// * @date 2016年12月16日 上午9:15:42 import com.zhiwei.common.config.GroupType;
// */ import com.zhiwei.crawler.proxy.ProxyFactory;
//public class WechatSearchExample{ import com.zhiwei.crawler.proxy.ProxyHolder;
// import com.zhiwei.wechat.entity.WechatAricle;
// private Logger logger = LoggerFactory.getLogger(WechatSearchExample.class); import com.zhiwei.wechat.search.WechatAritcleSearch;
// private static final String registry = "zookeeper://192.168.0.36:2181";
// private static final String group = "local"; /**
// * @ClassName: WechatSearchExample
// public static void main(String[] args) { * @Description: TODO(根据关键词等采集数据)
// ProxyFactory.init(registry, group, GroupType.PROVIDER); * @author hero
// try { * @date 2016年12月16日 上午9:15:42
// WechatSearchExample.wechatSearchExample(); */
// } catch (UnknownHostException e) { public class WechatSearchExample{
// e.printStackTrace();
// } private Logger logger = LoggerFactory.getLogger(WechatSearchExample.class);
// } private static final String registry = "zookeeper://192.168.0.36:2181";
// private static final String group = "local";
// private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build();
// public static void wechatSearchExample() throws UnknownHostException private static Proxy proxy = null;
// {
// List<String> wordList = new ArrayList<String>(); public static void main(String[] args) {
// wordList.add("京东"); ProxyFactory.init(registry, group, GroupType.PROVIDER,10000018);
// for(String word : wordList) proxy = ProxyHolder.SOUGOU_INNER_PROXY.getProxy();
// { try {
// try { WechatSearchExample.wechatSearchExample();
// List<WechatAricle> list = WechatAritcleSearch.wechatKeywordSearch(word, 5, null,"2019-07-24", "2019-07-24", ProxyHolder.SOUGOU_INNER_PROXY.getProxy(), 21); } catch (UnknownHostException e) {
// System.out.println("======"+list.size()); e.printStackTrace();
// for(WechatAricle wechat : list){ }
//// System.out.println(wechat.getTitle()); }
// }
// } catch (UnsupportedEncodingException e) {
// e.printStackTrace(); public static void wechatSearchExample() throws UnknownHostException
// } catch (Exception e) { {
// e.printStackTrace(); List<String> wordList = new ArrayList<String>();
// } wordList.add("京东");
//// for(String wxId : wechatIds) for(String word : wordList)
//// { {
//// try { try {
//// logger.info("需要采集的wxId:::{}", wxId); List<WechatAricle> list = WechatAritcleSearch.wechatKeywordSearch(word, 5, null,"2019-10-28", "2019-10-28",proxy, 51);
//// System.out.println("======"+list.size());
//// } catch (UnsupportedEncodingException e) { for(WechatAricle wechat : list){
//// e.printStackTrace(); System.out.println(wechat.getId());
//// } catch (Exception e) { }
//// e.printStackTrace(); } catch (UnsupportedEncodingException e) {
//// } e.printStackTrace();
//// } } catch (Exception e) {
// } e.printStackTrace();
// } }
// // for(String wxId : wechatIds)
// // {
//} // try {
// logger.info("需要采集的wxId:::{}", wxId);
//
// } catch (UnsupportedEncodingException e) {
// e.printStackTrace();
// } catch (Exception e) {
// e.printStackTrace();
// }
// }
}
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment