Commit b21d2070 by chenweitao

测试类及项目结构文件、爬虫核心包升级、热搜基础项目构造器更新

parent a2bf4e4f
...@@ -113,6 +113,12 @@ ...@@ -113,6 +113,12 @@
<artifactId>jedis</artifactId> <artifactId>jedis</artifactId>
<version>2.8.1</version> <version>2.8.1</version>
</dependency> </dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.12</version>
<scope>test</scope>
</dependency>
</dependencies> </dependencies>
......
<?xml version="1.0" encoding="UTF-8"?>
<module org.jetbrains.idea.maven.project.MavenProjectsManager.isMavenModule="true" type="JAVA_MODULE" version="4">
<component name="FacetManager">
<facet type="Spring" name="Spring">
<configuration />
</facet>
</component>
<component name="NewModuleRootManager" LANGUAGE_LEVEL="JDK_1_8">
<output url="file://$MODULE_DIR$/target/classes" />
<output-test url="file://$MODULE_DIR$/target/test-classes" />
<content url="file://$MODULE_DIR$">
<sourceFolder url="file://$MODULE_DIR$/src/main/java" isTestSource="false" />
<sourceFolder url="file://$MODULE_DIR$/src/main/resources" type="java-resource" />
<sourceFolder url="file://$MODULE_DIR$/src/test/java" isTestSource="true" />
<excludeFolder url="file://$MODULE_DIR$/target" />
</content>
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
<orderEntry type="library" name="Maven: org.mongodb:mongo-java-driver:3.12.2" level="project" />
<orderEntry type="library" name="Maven: com.zhiwei:sendmail:0.0.1-SNAPSHOT" level="project" />
<orderEntry type="library" name="Maven: javax.mail:mail:1.4.7" level="project" />
<orderEntry type="library" name="Maven: javax.activation:activation:1.1" level="project" />
<orderEntry type="library" name="Maven: com.zhiwei.tools:zhiwei-tools:0.1.6-SNAPSHOT" level="project" />
<orderEntry type="library" name="Maven: com.alibaba:fastjson:1.2.58" level="project" />
<orderEntry type="library" name="Maven: de.ruedigermoeller:fst:2.57" level="project" />
<orderEntry type="library" name="Maven: com.fasterxml.jackson.core:jackson-core:2.8.8" level="project" />
<orderEntry type="library" name="Maven: org.javassist:javassist:3.21.0-GA" level="project" />
<orderEntry type="library" name="Maven: org.objenesis:objenesis:2.5.1" level="project" />
<orderEntry type="library" name="Maven: org.apache.commons:commons-lang3:3.8.1" level="project" />
<orderEntry type="library" name="Maven: com.zhiwei.crawler:crawler-core:0.6.7.4-SNAPSHOT" level="project" />
<orderEntry type="library" name="Maven: com.squareup.okhttp3:okhttp:3.14.9" level="project" />
<orderEntry type="library" name="Maven: com.squareup.okio:okio:1.17.2" level="project" />
<orderEntry type="library" name="Maven: org.jsoup:jsoup:1.13.1" level="project" />
<orderEntry type="library" name="Maven: cn.wanghaomiao:JsoupXpath:2.3.2" level="project" />
<orderEntry type="library" name="Maven: org.antlr:antlr4-runtime:4.7" level="project" />
<orderEntry type="library" name="Maven: org.apache.commons:commons-compress:1.20" level="project" />
<orderEntry type="library" name="Maven: org.brotli:dec:0.1.2" level="project" />
<orderEntry type="library" name="Maven: com.ibm.icu:icu4j:67.1" level="project" />
<orderEntry type="library" name="Maven: com.google.guava:guava:29.0-jre" level="project" />
<orderEntry type="library" name="Maven: com.google.guava:failureaccess:1.0.1" level="project" />
<orderEntry type="library" name="Maven: com.google.guava:listenablefuture:9999.0-empty-to-avoid-conflict-with-guava" level="project" />
<orderEntry type="library" name="Maven: com.google.code.findbugs:jsr305:3.0.2" level="project" />
<orderEntry type="library" name="Maven: org.checkerframework:checker-qual:2.11.1" level="project" />
<orderEntry type="library" name="Maven: com.google.errorprone:error_prone_annotations:2.3.4" level="project" />
<orderEntry type="library" name="Maven: com.google.j2objc:j2objc-annotations:1.3" level="project" />
<orderEntry type="library" name="Maven: org.apache.logging.log4j:log4j-core:2.13.3" level="project" />
<orderEntry type="library" name="Maven: org.apache.logging.log4j:log4j-api:2.13.3" level="project" />
<orderEntry type="library" name="Maven: org.apache.logging.log4j:log4j-1.2-api:2.13.3" level="project" />
<orderEntry type="library" name="Maven: org.slf4j:slf4j-log4j12:1.8.0-beta4" level="project" />
<orderEntry type="library" name="Maven: org.slf4j:slf4j-api:1.8.0-beta4" level="project" />
<orderEntry type="library" name="Maven: log4j:log4j:1.2.17" level="project" />
<orderEntry type="library" name="Maven: com.zhiwei.async:task-boot:0.0.3-SNAPSHOT" level="project" />
<orderEntry type="library" name="Maven: com.zhiwei.crawler:proxy-client:1.0.5-SNAPSHOT" level="project" />
<orderEntry type="library" name="Maven: org.apache.dubbo:dubbo:2.7.4.1" level="project" />
<orderEntry type="library" name="Maven: io.netty:netty-all:4.1.25.Final" level="project" />
<orderEntry type="library" name="Maven: com.google.code.gson:gson:2.8.5" level="project" />
<orderEntry type="library" name="Maven: org.apache.curator:curator-recipes:2.12.0" level="project" />
<orderEntry type="library" name="Maven: org.apache.curator:curator-framework:2.12.0" level="project" />
<orderEntry type="library" name="Maven: org.apache.curator:curator-client:2.12.0" level="project" />
<orderEntry type="library" name="Maven: org.apache.zookeeper:zookeeper:3.4.8" level="project" />
<orderEntry type="library" name="Maven: jline:jline:0.9.94" level="project" />
<orderEntry type="library" name="Maven: io.netty:netty:3.7.0.Final" level="project" />
<orderEntry type="library" name="Maven: com.kohlschutter.boilerpipe:boilerpipe-extractor:0.0.1-SNAPSHOT" level="project" />
<orderEntry type="library" name="Maven: org.projectlombok:lombok:1.18.8" level="project" />
<orderEntry type="library" name="Maven: org.springframework:spring-aop:4.2.2.RELEASE" level="project" />
<orderEntry type="library" name="Maven: aopalliance:aopalliance:1.0" level="project" />
<orderEntry type="library" name="Maven: org.springframework:spring-beans:4.2.2.RELEASE" level="project" />
<orderEntry type="library" name="Maven: org.springframework:spring-core:4.2.2.RELEASE" level="project" />
<orderEntry type="library" name="Maven: commons-logging:commons-logging:1.2" level="project" />
<orderEntry type="library" name="Maven: org.springframework:spring-test:4.2.2.RELEASE" level="project" />
<orderEntry type="library" name="Maven: org.springframework:spring-context:4.2.2.RELEASE" level="project" />
<orderEntry type="library" name="Maven: org.springframework:spring-expression:4.2.2.RELEASE" level="project" />
<orderEntry type="library" name="Maven: org.springframework:spring-context-support:4.2.2.RELEASE" level="project" />
<orderEntry type="library" name="Maven: org.springframework:spring-web:4.2.2.RELEASE" level="project" />
<orderEntry type="library" name="Maven: org.springframework:spring-tx:4.2.2.RELEASE" level="project" />
<orderEntry type="library" name="Maven: redis.clients:jedis:2.8.1" level="project" />
<orderEntry type="library" name="Maven: org.apache.commons:commons-pool2:2.4.2" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: junit:junit:4.12" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: org.hamcrest:hamcrest-core:1.3" level="project" />
</component>
</module>
\ No newline at end of file
...@@ -102,7 +102,7 @@ public class HotSearchList implements Serializable{ ...@@ -102,7 +102,7 @@ public class HotSearchList implements Serializable{
public HotSearchList(){} public HotSearchList(){}
public HotSearchList(String url, String name, Integer count,Boolean hot,Integer rank,String type,String icon,Date date){ public HotSearchList(String url, String name, Integer count,Boolean hot,Integer rank,String type,String icon,Date date){
this.id = name + "_" + new Date().getTime() + "_" + type; this.id = name + "_" + System.currentTimeMillis() + "_" + type;
this.url = url; this.url = url;
this.name = name; this.name = name;
this.count = count; this.count = count;
...@@ -116,7 +116,7 @@ public class HotSearchList implements Serializable{ ...@@ -116,7 +116,7 @@ public class HotSearchList implements Serializable{
public HotSearchList(String url, String name, Integer count,Integer rank,String type,Date date){ public HotSearchList(String url, String name, Integer count,Integer rank,String type,Date date){
this.id = name + "_" + new Date().getTime()+ "_" + type; this.id = name + "_" + System.currentTimeMillis()+ "_" + type;
this.url = url; this.url = url;
this.name = name; this.name = name;
this.count = count; this.count = count;
...@@ -129,7 +129,7 @@ public class HotSearchList implements Serializable{ ...@@ -129,7 +129,7 @@ public class HotSearchList implements Serializable{
public HotSearchList(String url, String name, Integer count,Integer rank,String type, Integer commentCount, String topicLead,Date date){ public HotSearchList(String url, String name, Integer count,Integer rank,String type, Integer commentCount, String topicLead,Date date){
this.id = name + "_" + new Date().getTime()+ "_" + type; this.id = name + "_" + System.currentTimeMillis()+ "_" + type;
this.url = url; this.url = url;
this.name = name; this.name = name;
this.count = count; this.count = count;
...@@ -143,7 +143,7 @@ public class HotSearchList implements Serializable{ ...@@ -143,7 +143,7 @@ public class HotSearchList implements Serializable{
} }
public HotSearchList(String url, String name, Integer count, Boolean hot,Integer rank, String type, Date date, String icon, String topicResult){ public HotSearchList(String url, String name, Integer count, Boolean hot,Integer rank, String type, Date date, String icon, String topicResult){
this.id = name + "_" + new Date().getTime() + "_" + type; this.id = name + "_" + System.currentTimeMillis() + "_" + type;
this.url = url; this.url = url;
this.name = name; this.name = name;
this.hot = hot; this.hot = hot;
...@@ -157,7 +157,7 @@ public class HotSearchList implements Serializable{ ...@@ -157,7 +157,7 @@ public class HotSearchList implements Serializable{
} }
public HotSearchList(String url, String name, String topicLead, Integer count, Boolean hot, Date time, Integer rank, String type, Integer view, Integer barrage, String pictureUrl) { public HotSearchList(String url, String name, String topicLead, Integer count, Boolean hot, Date time, Integer rank, String type, Integer view, Integer barrage, String pictureUrl) {
this.id = name + "_" + new Date().getTime()+ "_" + type; this.id = name + "_" + System.currentTimeMillis()+ "_" + type;
this.url = url; this.url = url;
this.name = name; this.name = name;
this.topicLead = topicLead; this.topicLead = topicLead;
......
package com.zhiwei.searchhotcrawler.run; package com.zhiwei.searchhotcrawler.run;
import com.zhiwei.crawler.proxy.ProxyFactory; import com.zhiwei.crawler.core.proxy.ProxyFactory;
import com.zhiwei.proxy.config.SimpleConfig; import com.zhiwei.proxy.config.SimpleConfig;
import com.zhiwei.searchhotcrawler.cache.CacheListener; import com.zhiwei.searchhotcrawler.cache.CacheListener;
import com.zhiwei.searchhotcrawler.config.ProxyConfig; import com.zhiwei.searchhotcrawler.config.ProxyConfig;
......
package com.zhiwei.searchhotcrawler.test;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.mongodb.client.MongoDatabase;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.core.proxy.ProxyFactory;
import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.proxy.config.SimpleConfig;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import com.zhiwei.searchhotcrawler.config.DBConfig;
import com.zhiwei.searchhotcrawler.config.ProxyConfig;
import com.zhiwei.searchhotcrawler.dbtemplate.MongoDBLocalTemplate;
import com.zhiwei.searchhotcrawler.util.HttpClientUtils;
import lombok.extern.log4j.Log4j2;
import okhttp3.Request;
import okhttp3.Response;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.springframework.context.ApplicationContext;
import org.springframework.context.support.ClassPathXmlApplicationContext;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@Log4j2
public class Job51Test {
public static void main(String[] args) {
// ApplicationContext context = new ClassPathXmlApplicationContext("applicationContext.xml");
SimpleConfig simpleConfig = SimpleConfig.builder().registry(ProxyConfig.registry)
.group(ProxyConfig.group).appId(10000013).appName("hotsearch").build();
ProxyFactory.init(simpleConfig);
HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build();
// MongoDatabase mongoDBLocal = MongoDBLocalTemplate.getDB(DBConfig.dbName);
List<HotSearchList> list = new ArrayList<>();
String url = "https://search.51job.com/list/080300,000000,0000,00,9,99,java,2,1.html?lang=c&postchannel=0000&workyear=99&cotype=99&degreefrom=99&jobterm=99&companysize=99&ord_field=0&dibiaoid=0&line=&welfare=";
Map<String,Object> header = new HashMap<>();
header.put("Accept","text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9");
header.put("Accept-Encoding","gzip, deflate, br");
header.put("Accept-Language","zh-CN,zh;q=0.9");
header.put("Cache-Control","max-age=0");
header.put("Connection","keep-alive");
header.put("Cookie","guid=1925f996c7ae446cdf1f579f113bff6e; _ujz=MTg3NDg4MTM4MA%3D%3D; ps=needv%3D0; slife=lowbrowser%3Dnot%26%7C%26lastlogindate%3D20210318%26%7C%26securetime%3DBztcaVQzWTsEZlJrWmJdPwQ2Ajw%253D; track=registertype%3D1; 51job=cuid%3D187488138%26%7C%26cusername%3Dphone_15757871020_202103189219%26%7C%26cpassword%3D%26%7C%26cname%3D%25B3%25C2%25EC%25BF%25CC%25CE%26%7C%26cemail%3D15757871020%2540163.com%26%7C%26cemailstatus%3D0%26%7C%26cnickname%3D%26%7C%26ccry%3D.0b4qUteozwmg%26%7C%26cconfirmkey%3D%25241%2524UXfAYBHG%2524Hni.5zaFu5kr7BN.eVcOU%252F%26%7C%26cautologin%3D1%26%7C%26cenglish%3D0%26%7C%26sex%3D0%26%7C%26cnamekey%3D%25241%2524CN04lL8j%2524kCHAFcf4TNh%252F2odmIqujW1%26%7C%26to%3D8019a57bb26817913b5f3c2080ba5792605354bf%26%7C%26; nsearch=jobarea%3D%26%7C%26ord_field%3D%26%7C%26recentSearch0%3D%26%7C%26recentSearch1%3D%26%7C%26recentSearch2%3D%26%7C%26recentSearch3%3D%26%7C%26recentSearch4%3D%26%7C%26collapse_expansion%3D; search=jobarea%7E%60080300%7C%21ord_field%7E%600%7C%21recentSearch0%7E%60080300%A1%FB%A1%FA000000%A1%FB%A1%FA0000%A1%FB%A1%FA00%A1%FB%A1%FA99%A1%FB%A1%FA%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA9%A1%FB%A1%FA99%A1%FB%A1%FA%A1%FB%A1%FA0%A1%FB%A1%FAjava%A1%FB%A1%FA2%A1%FB%A1%FA1%7C%21recentSearch1%7E%60080300%A1%FB%A1%FA000000%A1%FB%A1%FA0000%A1%FB%A1%FA00%A1%FB%A1%FA99%A1%FB%A1%FA%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA9%A1%FB%A1%FA99%A1%FB%A1%FA%A1%FB%A1%FA0%A1%FB%A1%FA%A1%FB%A1%FA2%A1%FB%A1%FA1%7C%21");
header.put("Host","search.51job.com");
header.put("Referer","https://search.51job.com/list/080300,000000,0000,00,9,99,%2B,2,1.html?lang=c&postchannel=0000&workyear=99&cotype=99&degreefrom=99&jobterm=99&companysize=99&ord_field=0&dibiaoid=0&line=&welfare=");
header.put("sec-ch-ua","\"Google Chrome\";v=\"89\", \"Chromium\";v=\"89\", \";Not A Brand\";v=\"99\"");
header.put("Sec-Fetch-Dest","document");
header.put("User-Agent","Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.82 Safari/537.36");
// header.put("","");
JSONObject jsonObject = null;
String htmlBody = null;
Request request = RequestUtils.wrapGet(url,header);
for (int t = 0; t < 1 && jsonObject == null; t++) {
try (Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY)) {
htmlBody = response.body().string();
} catch (IOException e) {
log.error("知乎热搜页面连接异常", e);
}
if (htmlBody != null) {
Document document = Jsoup.parse(htmlBody);
log.info("document:{}",document);
log.info("======================");
String html = document.getElementsByClass("j_joblist").first().html();
log.info("html:{}",html);
jsonObject = JSONObject.parseObject(html);
if (jsonObject != null) {
// JSONArray dataJson = jsonObject.getJSONObject("initialState").getJSONObject("topsearch").getJSONArray("data");
// for (int i = 0; i < dataJson.size(); i++) {
// Integer rank = i + 1;
// JSONObject data = dataJson.getJSONObject(i);
// String name = data.getString("queryDisplay");
// String realQuery = data.getString("realQuery");
// String zhihuUrl = "https://www.zhihu.com/search?q=" + realQuery + "&utm_content=search_hot&type=content";
//
// }
}
} else {
log.error("临时爬取出问题");
}
}
}
}
#redis.host=127.0.0.1 #redis.host=115.236.59.91
#redis.port=6379 #redis.port=7382
#redis.password= #redis.password=
#redis #redis
#redis.host = 192.168.0.39
#redis.port = 7382
#redis.database = 3
#redis
redis.host = 192.168.0.39 redis.host = 192.168.0.39
redis.port = 6379 redis.port = 6379
redis.database = 1 redis.database = 1
#maxIdle #maxIdle
redis.maxIdle=20 redis.maxIdle=20
#minIdle #minIdle
......
/**
* ***************************************************
* Copyright (C), NingBo ZhiWeiReach info. Co., Ltd. *
*****************************************************
* 类的详细说明
*
* @author 东临碣石
* @Date 2016年1月16日
* @version 1.00
*/
import org.junit.runner.RunWith;
import org.springframework.test.context.ContextConfiguration;
import org.springframework.test.context.junit4.AbstractJUnit4SpringContextTests;
import org.springframework.test.context.junit4.SpringJUnit4ClassRunner;
/**
* @Description: SpringTest的父类,用来加载基础的配置文件
* @date 2016年1月16日 上午11:40:14
*/
@RunWith(SpringJUnit4ClassRunner.class)
@ContextConfiguration(locations =
{ "classpath:applicationContext.xml" })
public abstract class ObjectTest extends AbstractJUnit4SpringContextTests
{
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment