Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
T
toutiao
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
zhiwei
toutiao
Commits
3d198f56
Commit
3d198f56
authored
Jun 12, 2020
by
chenweiyang
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
爬虫核心包版本升级 版本升级0.4.4
parent
862fa762
Hide whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
117 additions
and
122 deletions
+117
-122
pom.xml
+2
-7
src/main/java/com/zhiwei/toutiao/parse/TouTiaoAccountParse.java
+2
-2
src/main/java/com/zhiwei/toutiao/parse/TouTiaoArticleParse.java
+2
-2
src/main/java/com/zhiwei/toutiao/parse/TouTiaoCommentParse.java
+2
-2
src/main/java/com/zhiwei/toutiao/parse/TouTiaoQuestionAnswerParse.java
+2
-2
src/main/java/com/zhiwei/toutiao/parse/TouTiaoQuestionParse.java
+2
-2
src/main/java/com/zhiwei/toutiao/parse/TouTiaoSearchParse.java
+2
-2
src/main/java/com/zhiwei/wangyi/parse/WangyiNewParse.java
+1
-1
src/test/java/com/zhiwei/toutiao/test/TouTiaoAccountExample.java
+44
-44
src/test/java/com/zhiwei/toutiao/test/TouTiaoQuestionExample.java
+58
-58
No files found.
pom.xml
View file @
3d198f56
...
...
@@ -3,7 +3,7 @@
<modelVersion>
4.0.0
</modelVersion>
<groupId>
com.zhiwei
</groupId>
<artifactId>
toutiao
</artifactId>
<version>
0.4.
3
-SNAPSHOT
</version>
<version>
0.4.
4
-SNAPSHOT
</version>
<dependencies>
<dependency>
...
...
@@ -15,7 +15,7 @@
<dependency>
<groupId>
com.zhiwei.crawler
</groupId>
<artifactId>
crawler-core
</artifactId>
<version>
0.
5.6.2-RELEASE
</version>
<version>
0.
6.6.3-SNAPSHOT
</version>
<scope>
provided
</scope>
</dependency>
<dependency>
...
...
@@ -23,11 +23,6 @@
<artifactId>
excelpoi
</artifactId>
<version>
0.0.5-SNAPSHOT
</version>
</dependency>
<dependency>
<groupId>
org.mongodb
</groupId>
<artifactId>
mongo-java-driver
</artifactId>
<version>
3.8.1
</version>
</dependency>
</dependencies>
<!-- 打包管理 -->
...
...
src/main/java/com/zhiwei/toutiao/parse/TouTiaoAccountParse.java
View file @
3d198f56
...
...
@@ -16,8 +16,8 @@ import org.apache.logging.log4j.util.Strings;
import
com.alibaba.fastjson.JSONArray
;
import
com.alibaba.fastjson.JSONObject
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
import
com.zhiwei.crawler.utils.RequestUtils
;
import
com.zhiwei.crawler.
core.
proxy.ProxyHolder
;
import
com.zhiwei.crawler.
core.
utils.RequestUtils
;
import
com.zhiwei.tools.tools.URLCodeUtil
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
com.zhiwei.toutiao.bean.Signature
;
...
...
src/main/java/com/zhiwei/toutiao/parse/TouTiaoArticleParse.java
View file @
3d198f56
...
...
@@ -34,8 +34,8 @@ import org.jsoup.Jsoup;
import
com.alibaba.fastjson.JSONArray
;
import
com.alibaba.fastjson.JSONObject
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
import
com.zhiwei.crawler.utils.RequestUtils
;
import
com.zhiwei.crawler.
core.
proxy.ProxyHolder
;
import
com.zhiwei.crawler.
core.
utils.RequestUtils
;
import
com.zhiwei.tools.timeparse.TimeParse
;
import
com.zhiwei.toutiao.bean.Signature
;
import
com.zhiwei.toutiao.bean.TouTiaoArticle
;
...
...
src/main/java/com/zhiwei/toutiao/parse/TouTiaoCommentParse.java
View file @
3d198f56
...
...
@@ -15,8 +15,8 @@ import org.apache.logging.log4j.Logger;
import
com.alibaba.fastjson.JSONArray
;
import
com.alibaba.fastjson.JSONObject
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
import
com.zhiwei.crawler.utils.RequestUtils
;
import
com.zhiwei.crawler.
core.
proxy.ProxyHolder
;
import
com.zhiwei.crawler.
core.
utils.RequestUtils
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
com.zhiwei.toutiao.bean.TouTiaoComment
;
import
com.zhiwei.toutiao.util.Tools
;
...
...
src/main/java/com/zhiwei/toutiao/parse/TouTiaoQuestionAnswerParse.java
View file @
3d198f56
...
...
@@ -14,8 +14,8 @@ import org.jsoup.nodes.Document;
import
com.alibaba.fastjson.JSONArray
;
import
com.alibaba.fastjson.JSONObject
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
import
com.zhiwei.crawler.utils.RequestUtils
;
import
com.zhiwei.crawler.
core.
proxy.ProxyHolder
;
import
com.zhiwei.crawler.
core.
utils.RequestUtils
;
import
com.zhiwei.tools.timeparse.TimeParse
;
import
com.zhiwei.toutiao.bean.TouTiaoQuestionAnswer
;
import
com.zhiwei.toutiao.util.Tools
;
...
...
src/main/java/com/zhiwei/toutiao/parse/TouTiaoQuestionParse.java
View file @
3d198f56
...
...
@@ -13,8 +13,8 @@ import org.apache.logging.log4j.Logger;
import
com.alibaba.fastjson.JSONArray
;
import
com.alibaba.fastjson.JSONObject
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
import
com.zhiwei.crawler.utils.RequestUtils
;
import
com.zhiwei.crawler.
core.
proxy.ProxyHolder
;
import
com.zhiwei.crawler.
core.
utils.RequestUtils
;
import
com.zhiwei.tools.timeparse.TimeParse
;
import
com.zhiwei.tools.tools.URLCodeUtil
;
import
com.zhiwei.toutiao.bean.TouTiaoQuestion
;
...
...
src/main/java/com/zhiwei/toutiao/parse/TouTiaoSearchParse.java
View file @
3d198f56
...
...
@@ -14,8 +14,8 @@ import com.alibaba.fastjson.JSONArray;
import
com.alibaba.fastjson.JSONException
;
import
com.alibaba.fastjson.JSONObject
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
import
com.zhiwei.crawler.utils.RequestUtils
;
import
com.zhiwei.crawler.
core.
proxy.ProxyHolder
;
import
com.zhiwei.crawler.
core.
utils.RequestUtils
;
import
com.zhiwei.tools.httpclient.HeaderTool
;
import
com.zhiwei.tools.timeparse.TimeParse
;
import
com.zhiwei.toutiao.bean.TouTiaoArticle
;
...
...
src/main/java/com/zhiwei/wangyi/parse/WangyiNewParse.java
View file @
3d198f56
...
...
@@ -12,7 +12,7 @@ import org.apache.logging.log4j.Logger;
import
com.alibaba.fastjson.JSONArray
;
import
com.alibaba.fastjson.JSONObject
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.utils.RequestUtils
;
import
com.zhiwei.crawler.
core.
utils.RequestUtils
;
import
com.zhiwei.tools.timeparse.TimeParse
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
com.zhiwei.toutiao.util.Tools
;
...
...
src/test/java/com/zhiwei/toutiao/test/TouTiaoAccountExample.java
View file @
3d198f56
package
com
.
zhiwei
.
toutiao
.
test
;
import
java.util.List
;
import
com.zhiwei.common.config.GroupType
;
import
com.zhiwei.crawler.proxy.ProxyFactory
;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
import
com.zhiwei.toutiao.bean.TouTiaoAccount
;
import
com.zhiwei.toutiao.parse.TouTiaoAccountParse
;
/**
* @ClassName: TouTiaoAccountExample
* @Description: TODO(今日头条帐号采集)
* @author hero
* @date 2017年10月17日 下午4:03:44
*/
public
class
TouTiaoAccountExample
{
private
static
final
String
registry
=
"zookeeper://192.168.0.36:2181"
;
private
static
final
String
group
=
"local"
;
public
static
void
main
(
String
[]
args
)
{
ProxyFactory
.
init
(
registry
,
group
,
GroupType
.
PROVIDER
);
touTiaoAccountFriendTest
();
}
public
void
touTiaoAccountTest
(){
String
word
=
"华尔街瞭望"
;
System
.
out
.
println
(
"===================="
+
TouTiaoAccountParse
.
getTouTiaoAccountInfoByName
(
word
,
null
));
}
public
static
void
touTiaoAccountFriendTest
(){
String
userid
=
"3478445819704347"
;
List
<
TouTiaoAccount
>
userList
=
TouTiaoAccountParse
.
getFriendsList
(
userid
,
ProxyHolder
.
NAT_HEAVY_PROXY
);
for
(
TouTiaoAccount
tta
:
userList
){
System
.
out
.
println
(
tta
);
}
}
}
//
package com.zhiwei.toutiao.test;
//
//
import java.util.List;
//
//
import com.zhiwei.common.config.GroupType;
//
import com.zhiwei.crawler.proxy.ProxyFactory;
//
import com.zhiwei.crawler.proxy.ProxyHolder;
//
import com.zhiwei.toutiao.bean.TouTiaoAccount;
//
import com.zhiwei.toutiao.parse.TouTiaoAccountParse;
//
/
//
**
//
* @ClassName: TouTiaoAccountExample
//
* @Description: TODO(今日头条帐号采集)
//
* @author hero
//
* @date 2017年10月17日 下午4:03:44
//
*/
//
public class TouTiaoAccountExample {
//
//
private static final String registry = "zookeeper://192.168.0.36:2181";
//
private static final String group = "local";
//
//
//
public static void main(String[] args) {
//
ProxyFactory.init(registry, group, GroupType.PROVIDER);
//
touTiaoAccountFriendTest();
//
//
}
//
//
public void touTiaoAccountTest(){
//
String word = "华尔街瞭望";
//
System.out.println("===================="+TouTiaoAccountParse.getTouTiaoAccountInfoByName(word, null));
//
}
//
//
//
//
public static void touTiaoAccountFriendTest(){
//
String userid = "3478445819704347";
//
List<TouTiaoAccount> userList = TouTiaoAccountParse.getFriendsList(userid, ProxyHolder.NAT_HEAVY_PROXY);
//
for(TouTiaoAccount tta : userList){
//
System.out.println(tta);
//
}
//
//
}
//
}
src/test/java/com/zhiwei/toutiao/test/TouTiaoQuestionExample.java
View file @
3d198f56
package
com
.
zhiwei
.
toutiao
.
test
;
import
java.util.List
;
import
java.util.Map
;
import
com.alibaba.fastjson.JSONObject
;
import
com.mongodb.BasicDBObject
;
import
com.mongodb.DB
;
import
com.mongodb.DBCollection
;
import
com.mongodb.DBObject
;
import
com.mongodb.Mongo
;
import
com.zhiwei.common.config.GroupType
;
import
com.zhiwei.crawler.proxy.ProxyFactory
;
import
com.zhiwei.excelpoi.bean.ExcelResult
;
import
com.zhiwei.excelpoi.excel.PoiExcelUtil
;
import
com.zhiwei.toutiao.bean.TouTiaoQuestion
;
import
com.zhiwei.toutiao.parse.TouTiaoQuestionParse
;
/**
* @ClassName: TouTiaoQuestionExample
* @Description: TODO(头条问答采集测试类)
* @author hero
* @date 2017年7月20日 下午3:06:51
*/
public
class
TouTiaoQuestionExample
{
private
static
final
String
registry
=
"zookeeper://192.168.0.36:2181"
;
private
static
final
String
group
=
"local"
;
public
static
void
main
(
String
[]
args
)
{
ProxyFactory
.
init
(
registry
,
group
,
GroupType
.
PROVIDER
);
Mongo
mongo
=
new
Mongo
(
"192.168.0.81"
,
27017
);
DB
db
=
mongo
.
getDB
(
"wukong"
);
DBCollection
coll
=
db
.
getCollection
(
"wukong"
);
touTiaoQuestionTest
(
coll
);
}
public
static
void
touTiaoQuestionTest
(
DBCollection
coll
)
{
PoiExcelUtil
poi
=
PoiExcelUtil
.
getInstance
();
ExcelResult
excelResult
=
poi
.
importExcelResult
(
"C:\\Users\\qq859\\Desktop\\悟空问答关键词.xlsx"
,
0
);
List
<
Map
<
String
,
Object
>>
dataList
=
excelResult
.
getBodyList
();
for
(
Map
<
String
,
Object
>
data
:
dataList
)
{
String
word
=
data
.
get
(
"关键词"
).
toString
();
System
.
out
.
println
(
"word================"
+
word
);
List
<
TouTiaoQuestion
>
list
=
TouTiaoQuestionParse
.
getSearchTouTiaoQuestion
(
word
);
System
.
out
.
println
(
list
.
size
());
for
(
TouTiaoQuestion
question
:
list
){
String
jsonStr
=
JSONObject
.
toJSONString
(
question
);
Map
dataMap
=
JSONObject
.
toJavaObject
(
JSONObject
.
parseObject
(
jsonStr
),
Map
.
class
);
dataMap
.
put
(
"word"
,
word
);
coll
.
save
(
new
BasicDBObject
(
dataMap
));
}
}
}
}
//
package com.zhiwei.toutiao.test;
//
//
import java.util.List;
//
import java.util.Map;
//
import com.alibaba.fastjson.JSONObject;
//
import com.mongodb.BasicDBObject;
//
import com.mongodb.DB;
//
import com.mongodb.DBCollection;
//
import com.mongodb.DBObject;
//
import com.mongodb.Mongo;
//
import com.zhiwei.common.config.GroupType;
//
import com.zhiwei.crawler.proxy.ProxyFactory;
//
import com.zhiwei.excelpoi.bean.ExcelResult;
//
import com.zhiwei.excelpoi.excel.PoiExcelUtil;
//
import com.zhiwei.toutiao.bean.TouTiaoQuestion;
//
import com.zhiwei.toutiao.parse.TouTiaoQuestionParse;
//
/
//
**
//
* @ClassName: TouTiaoQuestionExample
//
* @Description: TODO(头条问答采集测试类)
//
* @author hero
//
* @date 2017年7月20日 下午3:06:51
//
*/
//
public class TouTiaoQuestionExample {
//
//
private static final String registry = "zookeeper://192.168.0.36:2181";
//
private static final String group = "local";
//
//
public static void main(String[] args) {
//
ProxyFactory.init(registry, group, GroupType.PROVIDER);
//
//
Mongo mongo = new Mongo("192.168.0.81", 27017);
//
DB db = mongo.getDB("wukong");
//
DBCollection coll = db.getCollection("wukong");
//
//
touTiaoQuestionTest(coll);
//
//
}
//
//
public static void touTiaoQuestionTest(DBCollection coll) {
//
PoiExcelUtil poi = PoiExcelUtil.getInstance();
//
ExcelResult excelResult = poi.importExcelResult("C:\\Users\\qq859\\Desktop\\悟空问答关键词.xlsx", 0);
//
List<Map<String,Object>> dataList = excelResult.getBodyList();
//
for(Map<String,Object> data : dataList) {
//
String word = data.get("关键词").toString();
//
System.out.println("word================"+word);
//
List<TouTiaoQuestion> list = TouTiaoQuestionParse.getSearchTouTiaoQuestion(word);
//
System.out.println(list.size());
//
for(TouTiaoQuestion question : list){
//
String jsonStr = JSONObject.toJSONString(question);
//
Map dataMap = JSONObject.toJavaObject(JSONObject.parseObject(jsonStr), Map.class);
//
dataMap.put("word", word);
//
coll.save(new BasicDBObject(dataMap));
//
}
//
}
//
}
//
//
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment