Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
W
weibohotcrawler
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
chenweiyang
weibohotcrawler
Commits
05561321
Commit
05561321
authored
Sep 20, 2018
by
[zhangzhiwei]
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
去重中间件版本的新包,并修改了http采集方式
parent
cd252c2d
Show whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
83 additions
and
134 deletions
+83
-134
pom.xml
+10
-31
src/main/java/com/zhiwei/weibocrawler/crawler/DataCrawlerStart.java
+3
-3
src/main/java/com/zhiwei/weibocrawler/crawler/DataUpdate.java
+2
-2
src/main/java/com/zhiwei/weibocrawler/crawler/getdata/WeiboBangdanData.java
+2
-2
src/main/java/com/zhiwei/weibocrawler/crawler/getdata/WeiboCrawlerAnalysis.java
+31
-25
src/main/java/com/zhiwei/weibocrawler/crawler/getdata/WeiboHotData.java
+1
-1
src/main/java/com/zhiwei/weibocrawler/crawler/getdata/WeiboSocietyData.java
+1
-1
src/main/java/com/zhiwei/weibocrawler/httpclient/HttpClientDemo.java
+10
-53
src/main/java/com/zhiwei/weibocrawler/rsidClient/RsidClientDAO.java
+21
-14
src/test/java/weibotest/HotWeiboTest.java
+2
-2
No files found.
pom.xml
View file @
05561321
...
@@ -3,44 +3,17 @@
...
@@ -3,44 +3,17 @@
<modelVersion>
4.0.0
</modelVersion>
<modelVersion>
4.0.0
</modelVersion>
<groupId>
com.zhiwei
</groupId>
<groupId>
com.zhiwei
</groupId>
<artifactId>
weibohotcrawler
</artifactId>
<artifactId>
weibohotcrawler
</artifactId>
<version>
0.0.
1
-SNAPSHOT
</version>
<version>
0.0.
2
-SNAPSHOT
</version>
<name>
weibohotcrawler
</name>
<name>
weibohotcrawler
</name>
<description>
微博热搜1小时榜单,社会、热点采集程序
</description>
<description>
微博热搜1小时榜单,社会、热点采集程序
</description>
<dependencies>
<dependencies>
<dependency>
<groupId>
com.zhiwei.middleware
</groupId>
<artifactId>
rsid-client
</artifactId>
<version>
0.0.2-SNAPSHOT
</version>
</dependency>
<dependency>
<dependency>
<groupId>
javax.mail
</groupId>
<groupId>
javax.mail
</groupId>
<artifactId>
mail
</artifactId>
<artifactId>
mail
</artifactId>
<version>
1.4.7
</version>
<version>
1.4.7
</version>
</dependency>
</dependency>
<dependency>
<groupId>
junit
</groupId>
<artifactId>
junit
</artifactId>
<version>
3.8.1
</version>
<scope>
test
</scope>
</dependency>
<dependency>
<groupId>
org.apache.httpcomponents
</groupId>
<artifactId>
httpcore
</artifactId>
<version>
4.4.6
</version>
</dependency>
<dependency>
<groupId>
org.apache.httpcomponents
</groupId>
<artifactId>
httpclient
</artifactId>
<version>
4.5.3
</version>
</dependency>
<dependency>
<groupId>
org.jsoup
</groupId>
<artifactId>
jsoup
</artifactId>
<version>
1.8.3
</version>
</dependency>
<dependency>
<dependency>
<groupId>
com.zhiwei
</groupId>
<groupId>
com.zhiwei
</groupId>
...
@@ -48,10 +21,16 @@
...
@@ -48,10 +21,16 @@
<version>
0.0.5-SNAPSHOT
</version>
<version>
0.0.5-SNAPSHOT
</version>
</dependency>
</dependency>
<dependency>
<dependency>
<groupId>
com.zhiwei
</groupId>
<groupId>
com.zhiwei
.middleware
</groupId>
<artifactId>
zhiweiTools
</artifactId>
<artifactId>
cleaner-unified-urlfilter
</artifactId>
<version>
0.0.6
-SNAPSHOT
</version>
<version>
1.0
-SNAPSHOT
</version>
</dependency>
</dependency>
<dependency>
<groupId>
com.zhiwei.tools
</groupId>
<artifactId>
zhiwei-tools
</artifactId>
<version>
0.0.5-SNAPSHOT
</version>
</dependency>
</dependencies>
</dependencies>
<!-- 打包管理 -->
<!-- 打包管理 -->
<build>
<build>
...
...
src/main/java/com/zhiwei/weibocrawler/crawler/DataCrawlerStart.java
View file @
05561321
...
@@ -17,9 +17,9 @@ public class DataCrawlerStart{
...
@@ -17,9 +17,9 @@ public class DataCrawlerStart{
}
}
public
void
start
()
{
public
void
start
()
{
scheduled
.
scheduleWithFixedDelay
(
new
WeiboBangdanData
(),
2000
,
15
*
1000
,
TimeUnit
.
MILLISECONDS
);
scheduled
.
scheduleWithFixedDelay
(
new
WeiboBangdanData
(),
2000
,
3
*
60
*
1000
,
TimeUnit
.
MILLISECONDS
);
scheduled
.
scheduleWithFixedDelay
(
new
WeiboHotData
(),
1000
,
2
0
*
1000
,
TimeUnit
.
MILLISECONDS
);
scheduled
.
scheduleWithFixedDelay
(
new
WeiboHotData
(),
1000
,
3
*
6
0
*
1000
,
TimeUnit
.
MILLISECONDS
);
scheduled
.
scheduleWithFixedDelay
(
new
WeiboSocietyData
(),
3000
,
19
*
1000
,
TimeUnit
.
MILLISECONDS
);
scheduled
.
scheduleWithFixedDelay
(
new
WeiboSocietyData
(),
3000
,
3
*
60
*
1000
,
TimeUnit
.
MILLISECONDS
);
}
}
...
...
src/main/java/com/zhiwei/weibocrawler/crawler/DataUpdate.java
View file @
05561321
...
@@ -6,12 +6,11 @@ import java.util.List;
...
@@ -6,12 +6,11 @@ import java.util.List;
import
org.slf4j.Logger
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
org.slf4j.LoggerFactory
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
com.zhiwei.weibobusiness.weibo4j.model.Status
;
import
com.zhiwei.weibobusiness.weibo4j.model.Status
;
import
com.zhiwei.weibocrawler.crawler.getdata.WeiboCrawlerAnalysis
;
import
com.zhiwei.weibocrawler.crawler.getdata.WeiboCrawlerAnalysis
;
import
com.zhiwei.weibocrawler.rsidClient.DataQueue
;
import
com.zhiwei.weibocrawler.rsidClient.DataQueue
;
import
com.zhiwei.weibocrawler.rsidClient.UpdateQueue
;
import
com.zhiwei.weibocrawler.rsidClient.UpdateQueue
;
import
com.zhiwei.zhiweiTools.timeParse.TimeParse
;
import
com.zhiwei.zhiweiTools.tools.ZhiWeiTools
;
/***
/***
*
*
* @ClassName DataUpdate
* @ClassName DataUpdate
...
@@ -56,6 +55,7 @@ public class DataUpdate implements Runnable{
...
@@ -56,6 +55,7 @@ public class DataUpdate implements Runnable{
}
}
i
=
1
;
i
=
1
;
}
else
{
}
else
{
logger
.
error
(
"目前数据量不足50,目前队列中得数据量为:::{}"
,
DataQueue
.
linkQueue
.
size
());
i
++;
i
++;
}
}
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
...
...
src/main/java/com/zhiwei/weibocrawler/crawler/getdata/WeiboBangdanData.java
View file @
05561321
...
@@ -5,7 +5,7 @@ import java.util.Date;
...
@@ -5,7 +5,7 @@ import java.util.Date;
import
org.slf4j.Logger
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
org.slf4j.LoggerFactory
;
import
com.zhiwei.
zhiweiT
ools.tools.ZhiWeiTools
;
import
com.zhiwei.
t
ools.tools.ZhiWeiTools
;
/**
/**
*
*
* @ClassName WeiboBangdanData
* @ClassName WeiboBangdanData
...
@@ -28,7 +28,7 @@ public class WeiboBangdanData implements Runnable{
...
@@ -28,7 +28,7 @@ public class WeiboBangdanData implements Runnable{
weiboCrawlerAnalysis
.
getWeiboHotMid
(
url
);
weiboCrawlerAnalysis
.
getWeiboHotMid
(
url
);
ZhiWeiTools
.
sleep
(
12000
);
ZhiWeiTools
.
sleep
(
12000
);
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
logger
.
error
(
"出错====榜单的出错了"
,
e
.
getMessage
()
);
logger
.
error
(
"出错====榜单的出错了"
,
e
);
e
.
printStackTrace
();
e
.
printStackTrace
();
ZhiWeiTools
.
sleep
(
20
);
ZhiWeiTools
.
sleep
(
20
);
continue
;
continue
;
...
...
src/main/java/com/zhiwei/weibocrawler/crawler/getdata/WeiboCrawlerAnalysis.java
View file @
05561321
package
com
.
zhiwei
.
weibocrawler
.
crawler
.
getdata
;
package
com
.
zhiwei
.
weibocrawler
.
crawler
.
getdata
;
import
java.util.ArrayList
;
import
java.util.ArrayList
;
import
java.util.Date
;
import
java.util.List
;
import
java.util.List
;
import
java.util.regex.Matcher
;
import
java.util.regex.Pattern
;
import
org.jsoup.Jsoup
;
import
org.jsoup.Jsoup
;
import
org.jsoup.nodes.Document
;
import
org.jsoup.nodes.Document
;
...
@@ -14,6 +11,7 @@ import org.slf4j.Logger;
...
@@ -14,6 +11,7 @@ import org.slf4j.Logger;
import
org.slf4j.LoggerFactory
;
import
org.slf4j.LoggerFactory
;
import
com.alibaba.fastjson.JSONObject
;
import
com.alibaba.fastjson.JSONObject
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
com.zhiwei.weibobusiness.business.SearchBusiness
;
import
com.zhiwei.weibobusiness.business.SearchBusiness
;
import
com.zhiwei.weibobusiness.weibo4j.model.Status
;
import
com.zhiwei.weibobusiness.weibo4j.model.Status
;
import
com.zhiwei.weibobusiness.weibo4j.model.StatusWapper
;
import
com.zhiwei.weibobusiness.weibo4j.model.StatusWapper
;
...
@@ -21,7 +19,7 @@ import com.zhiwei.weibobusiness.weibo4j.model.WeiboException;
...
@@ -21,7 +19,7 @@ import com.zhiwei.weibobusiness.weibo4j.model.WeiboException;
import
com.zhiwei.weibocrawler.httpclient.HttpClientDemo
;
import
com.zhiwei.weibocrawler.httpclient.HttpClientDemo
;
import
com.zhiwei.weibocrawler.rsidClient.DataQueue
;
import
com.zhiwei.weibocrawler.rsidClient.DataQueue
;
import
com.zhiwei.weibocrawler.rsidClient.RsidClientDAO
;
import
com.zhiwei.weibocrawler.rsidClient.RsidClientDAO
;
import
com.zhiwei.zhiweiTools.tools.ZhiWeiTools
;
/**
/**
*
*
* @ClassName WeiboCrawlerAnalysis
* @ClassName WeiboCrawlerAnalysis
...
@@ -33,7 +31,6 @@ import com.zhiwei.zhiweiTools.tools.ZhiWeiTools;
...
@@ -33,7 +31,6 @@ import com.zhiwei.zhiweiTools.tools.ZhiWeiTools;
public
class
WeiboCrawlerAnalysis
{
public
class
WeiboCrawlerAnalysis
{
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
WeiboCrawlerAnalysis
.
class
);
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
WeiboCrawlerAnalysis
.
class
);
/**
/**
*
*
* @Description (mid获取微博数据)
* @Description (mid获取微博数据)
...
@@ -41,38 +38,38 @@ public class WeiboCrawlerAnalysis {
...
@@ -41,38 +38,38 @@ public class WeiboCrawlerAnalysis {
* @param businessToken
* @param businessToken
* @return
* @return
*/
*/
public
static
List
<
Status
>
getWeiboData
(
List
<
String
>
midsList
,
String
businessToken
)
{
public
static
List
<
Status
>
getWeiboData
(
List
<
String
>
midsList
,
String
businessToken
)
{
SearchBusiness
searchBusiness
=
new
SearchBusiness
(
businessToken
);
SearchBusiness
searchBusiness
=
new
SearchBusiness
(
businessToken
);
if
(
midsList
.
size
()
<
1
)
{
if
(
midsList
.
size
()
<
1
)
{
return
null
;
return
null
;
}
}
List
<
Status
>
statuses
=
new
ArrayList
<
Status
>();
List
<
Status
>
statuses
=
new
ArrayList
<
Status
>();
String
mids
=
""
;
String
mids
=
""
;
int
i
=
0
;
int
i
=
0
;
for
(
String
mid
:
midsList
)
{
for
(
String
mid
:
midsList
)
{
mids
=
mids
+
mid
+
","
;
mids
=
mids
+
mid
+
","
;
i
++;
i
++;
if
(
i
>
48
)
{
if
(
i
>
48
)
{
try
{
try
{
mids
=
mids
.
substring
(
0
,
mids
.
length
()-
1
);
mids
=
mids
.
substring
(
0
,
mids
.
length
()
-
1
);
StatusWapper
statusWapper
=
searchBusiness
.
showStatusBusniess
(
mids
);
StatusWapper
statusWapper
=
searchBusiness
.
showStatusBusniess
(
mids
);
statuses
.
addAll
(
statusWapper
.
getStatuses
());
statuses
.
addAll
(
statusWapper
.
getStatuses
());
i
=
0
;
i
=
0
;
mids
=
""
;
mids
=
""
;
}
catch
(
WeiboException
e
)
{
}
catch
(
WeiboException
e
)
{
logger
.
error
(
"数据更新出错部分mids========="
+
mids
);
logger
.
error
(
"数据更新出错部分mids========="
+
mids
);
e
.
printStackTrace
();
e
.
printStackTrace
();
continue
;
continue
;
}
}
}
}
}
}
try
{
try
{
mids
=
mids
.
substring
(
0
,
mids
.
length
()-
1
);
mids
=
mids
.
substring
(
0
,
mids
.
length
()
-
1
);
StatusWapper
statusWapper
=
searchBusiness
.
showStatusBusniess
(
mids
);
StatusWapper
statusWapper
=
searchBusiness
.
showStatusBusniess
(
mids
);
statuses
.
addAll
(
statusWapper
.
getStatuses
());
statuses
.
addAll
(
statusWapper
.
getStatuses
());
}
catch
(
WeiboException
e
)
{
}
catch
(
WeiboException
e
)
{
logger
.
error
(
"数据更新出错部分mids========="
+
mids
);
logger
.
error
(
"数据更新出错部分mids========="
+
mids
);
logger
.
error
(
"数据出错"
,
e
.
getMessage
());
logger
.
error
(
"数据出错"
,
e
.
getMessage
());
e
.
printStackTrace
();
e
.
printStackTrace
();
return
null
;
return
null
;
}
}
...
@@ -104,26 +101,35 @@ public class WeiboCrawlerAnalysis {
...
@@ -104,26 +101,35 @@ public class WeiboCrawlerAnalysis {
JSONObject
json
=
(
JSONObject
)
JSONObject
.
parse
(
result
);
JSONObject
json
=
(
JSONObject
)
JSONObject
.
parse
(
result
);
String
s
=
json
.
getString
(
"data"
);
String
s
=
json
.
getString
(
"data"
);
Document
document
=
Jsoup
.
parse
(
s
);
Document
document
=
Jsoup
.
parse
(
s
);
Elements
elements
=
document
.
select
(
"div.UG_contents"
).
select
(
"ul.clearfix"
).
select
(
"div[action-type=feed_list_item]"
);
Elements
elements
=
document
.
select
(
"div.UG_contents"
).
select
(
"ul.clearfix"
)
for
(
Element
element
:
elements
)
{
.
select
(
"div[action-type=feed_list_item]"
);
System
.
out
.
println
(
"elements size is "
+
elements
.
size
());
List
<
String
>
midsList
=
new
ArrayList
<
String
>();
for
(
Element
element
:
elements
)
{
try
{
try
{
String
mid
=
element
.
attr
(
"mid"
);
String
mid
=
element
.
attr
(
"mid"
);
if
(
mid
.
length
()
>
16
)
{
if
(
mid
.
length
()
>
16
)
{
mid
=
mid
.
substring
(
mid
.
length
()-
16
,
mid
.
length
());
mid
=
mid
.
substring
(
mid
.
length
()
-
16
,
mid
.
length
());
}
}
if
(
RsidClientDAO
.
isWeiboExit
(
mid
))
{
if
(!
midsList
.
contains
(
mid
))
{
DataQueue
.
offer
(
mid
);
midsList
.
add
(
mid
);
}
}
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
logger
.
error
(
"数据解析出错"
,
e
.
getMessage
());
logger
.
error
(
"数据解析出错"
,
e
.
getMessage
());
ZhiWeiTools
.
sleep
(
200
);
ZhiWeiTools
.
sleep
(
200
);
e
.
printStackTrace
();
e
.
printStackTrace
();
continue
;
continue
;
}
}
}
if
(!
midsList
.
isEmpty
()){
for
(
String
mid
:
midsList
){
if
(!
RsidClientDAO
.
isWeiboExit
(
mid
))
{
DataQueue
.
offer
(
mid
);
}
}
}
}
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
logger
.
error
(
"数据解析出错"
,
e
.
getMessage
());
logger
.
error
(
"数据解析出错"
,
e
.
getMessage
());
ZhiWeiTools
.
sleep
(
200
);
ZhiWeiTools
.
sleep
(
200
);
e
.
printStackTrace
();
e
.
printStackTrace
();
}
}
...
...
src/main/java/com/zhiwei/weibocrawler/crawler/getdata/WeiboHotData.java
View file @
05561321
...
@@ -5,7 +5,7 @@ import java.util.Date;
...
@@ -5,7 +5,7 @@ import java.util.Date;
import
org.slf4j.Logger
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
org.slf4j.LoggerFactory
;
import
com.zhiwei.
zhiweiT
ools.tools.ZhiWeiTools
;
import
com.zhiwei.
t
ools.tools.ZhiWeiTools
;
/**
/**
*
*
* @ClassName WeiboHotData
* @ClassName WeiboHotData
...
...
src/main/java/com/zhiwei/weibocrawler/crawler/getdata/WeiboSocietyData.java
View file @
05561321
...
@@ -5,7 +5,7 @@ import java.util.Date;
...
@@ -5,7 +5,7 @@ import java.util.Date;
import
org.slf4j.Logger
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
org.slf4j.LoggerFactory
;
import
com.zhiwei.
zhiweiT
ools.tools.ZhiWeiTools
;
import
com.zhiwei.
t
ools.tools.ZhiWeiTools
;
/**
/**
*
*
* @ClassName WeiboSocietyData
* @ClassName WeiboSocietyData
...
...
src/main/java/com/zhiwei/weibocrawler/httpclient/HttpClientDemo.java
View file @
05561321
package
com
.
zhiwei
.
weibocrawler
.
httpclient
;
package
com
.
zhiwei
.
weibocrawler
.
httpclient
;
import
java.io.IOException
;
import
java.util.HashMap
;
import
java.util.HashMap
;
import
java.util.Map
;
import
java.util.Map
;
import
java.util.Map.Entry
;
import
org.apache.http.client.config.RequestConfig
;
import
org.apache.http.client.methods.HttpGet
;
import
org.apache.http.impl.client.CloseableHttpClient
;
import
org.apache.http.impl.client.HttpClients
;
import
org.apache.http.util.EntityUtils
;
import
org.slf4j.Logger
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
org.slf4j.LoggerFactory
;
import
com.zhiwei.
zhiweiTools.httpC
lient.HttpClientTemplateOK
;
import
com.zhiwei.
tools.httpc
lient.HttpClientTemplateOK
;
public
class
HttpClientDemo
{
public
class
HttpClientDemo
{
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
HttpClientDemo
.
class
);
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
HttpClientDemo
.
class
);
// public static String executeHttpRequestGet(String url) throws IOException {
public
static
String
executeHttpRequestGet
(
String
url
)
{
// String result = null;
// Map<String, String> headerMap = new HashMap<String, String>();
// headerMap.put("User-Agent",
// "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36");
// headerMap.put("Accept","*/*");
// headerMap.put("Accept-Encoding", "gzip, deflate, br");
// headerMap.put("Accept-Language", "zh-CN,zh;q=0.9");
// headerMap.put("Connection", "keep-alive");
// headerMap.put("Content-Type", "application/x-www-form-urlencoded");
// headerMap.put("Host", "weibo.com");
// CloseableHttpClient httpClient = null;
// for(int j = 1;j <= 3;j++) {
// try {
// HttpGet httpGet = new HttpGet(url);
// RequestConfig requestConfig = RequestConfig.custom()
// .setSocketTimeout(8000).setConnectTimeout(8000).build();
// httpClient = HttpClients.custom()
// .setDefaultRequestConfig(requestConfig).build();
// if (headerMap != null) {
// for (Entry<String, String> header : headerMap.entrySet()) {
// httpGet.setHeader(header.getKey(), header.getValue());
// }
// }
// result = EntityUtils
// .toString(httpClient.execute(httpGet).getEntity());
// return result;
// }catch (Exception e) {
// e.printStackTrace();
// continue;
// }finally {
// if (httpClient != null) {
// httpClient.close();
// }
// }
// }
// return result;
//
// }
public
static
String
executeHttpRequestGet
(
String
url
)
throws
IOException
{
String
result
=
null
;
String
result
=
null
;
Map
<
String
,
String
>
headerMap
=
new
HashMap
<
String
,
String
>();
Map
<
String
,
String
>
headerMap
=
new
HashMap
<
String
,
String
>();
headerMap
.
put
(
"User-Agent"
,
headerMap
.
put
(
"User-Agent"
,
...
@@ -72,11 +23,17 @@ public class HttpClientDemo {
...
@@ -72,11 +23,17 @@ public class HttpClientDemo {
headerMap
.
put
(
"Content-Type"
,
"application/x-www-form-urlencoded"
);
headerMap
.
put
(
"Content-Type"
,
"application/x-www-form-urlencoded"
);
headerMap
.
put
(
"Host"
,
"weibo.com"
);
headerMap
.
put
(
"Host"
,
"weibo.com"
);
try
{
try
{
System
.
out
.
println
(
"开始下载"
);
// Response response = HttpBoot.syncCall(RequestUtils.wrapGet(url, headerMap));
// result = response.body().string();
result
=
HttpClientTemplateOK
.
get
(
url
,
null
,
headerMap
);
result
=
HttpClientTemplateOK
.
get
(
url
,
null
,
headerMap
);
System
.
out
.
println
(
"下载结束"
);
return
result
;
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
logger
.
error
(
"httpClient 获取数据出现问题:{}"
,
e
.
getMessage
());
logger
.
info
(
"httpClient 获取数据出现问题:{}"
,
e
.
getMessage
());
e
.
printStackTrace
();
}
}
return
result
;
return
result
;
}
}
...
...
src/main/java/com/zhiwei/weibocrawler/rsidClient/RsidClientDAO.java
View file @
05561321
...
@@ -3,9 +3,11 @@ package com.zhiwei.weibocrawler.rsidClient;
...
@@ -3,9 +3,11 @@ package com.zhiwei.weibocrawler.rsidClient;
import
org.slf4j.Logger
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
org.slf4j.LoggerFactory
;
import
com.zhiwei.rsid.core.RsidClient
;
import
com.zhiwei.middleware.cleaner.ptenum.PTENUM
;
import
com.zhiwei.middleware.cleaner.urlfilter.UnifiedUrlFilterClient
;
import
com.zhiwei.middleware.filter.config.Definition
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
com.zhiwei.weibocrawler.config.Config
;
import
com.zhiwei.weibocrawler.config.Config
;
import
com.zhiwei.zhiweiTools.tools.ZhiWeiTools
;
/**
/**
* @ClassName RsidClientDAO
* @ClassName RsidClientDAO
...
@@ -16,16 +18,21 @@ import com.zhiwei.zhiweiTools.tools.ZhiWeiTools;
...
@@ -16,16 +18,21 @@ import com.zhiwei.zhiweiTools.tools.ZhiWeiTools;
*/
*/
public
class
RsidClientDAO
{
public
class
RsidClientDAO
{
// private static final String rsidUrl = "zookeeper://192.168.0.203:2181"; //中间件zookkeeper地址,服务器地址
private
static
UnifiedUrlFilterClient
client
;
//
// private static final String rsidGroup = "rsidserver"; //中间件分组
//
// private static final String redisWeiboKey = "weibo"; //去重的分组
private
static
RsidClient
client
=
RsidClient
.
build
(
Config
.
rsidUrl
,
Config
.
rsidGroup
);
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
RsidClientDAO
.
class
);
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
RsidClientDAO
.
class
);
static
{
if
(
client
==
null
){
synchronized
(
RsidClientDAO
.
class
)
{
if
(
client
==
null
)
{
try
{
client
=
UnifiedUrlFilterClient
.
getClient
(
Config
.
rsidUrl
,
Config
.
rsidGroup
,
Definition
.
GroupType
.
PROVIDER
);
}
catch
(
Exception
e
)
{
logger
.
error
(
"链接清洗中间件时出现错误,错误为:::{}"
,
e
);
}
}
}
}
}
/**
/**
* @Description 验证微博是否重复
* @Description 验证微博是否重复
...
@@ -36,7 +43,9 @@ public class RsidClientDAO {
...
@@ -36,7 +43,9 @@ public class RsidClientDAO {
//循环3次避免连接超时引起的验证失效
//循环3次避免连接超时引起的验证失效
for
(
int
i
=
0
;
i
<
3
;
i
++){
for
(
int
i
=
0
;
i
<
3
;
i
++){
try
{
try
{
return
client
.
addFilterUrl
(
mid
,
false
,
Config
.
redisWeiboKey
);
boolean
f
=
client
.
contains
(
mid
,
PTENUM
.
COMMON
);
System
.
out
.
println
(
mid
+
"==========="
+
f
);
return
f
;
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
logger
.
error
(
"判断此条微博消息是否存在出现问题"
,
e
.
fillInStackTrace
());
logger
.
error
(
"判断此条微博消息是否存在出现问题"
,
e
.
fillInStackTrace
());
ZhiWeiTools
.
sleep
(
200
);
ZhiWeiTools
.
sleep
(
200
);
...
@@ -46,6 +55,4 @@ public class RsidClientDAO {
...
@@ -46,6 +55,4 @@ public class RsidClientDAO {
return
false
;
return
false
;
}
}
}
}
src/test/java/weibotest/HotWeiboTest.java
View file @
05561321
...
@@ -3,14 +3,14 @@ package weibotest;
...
@@ -3,14 +3,14 @@ package weibotest;
import
java.util.Map
;
import
java.util.Map
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
com.zhiwei.weibocrawler.crawler.GetData
;
import
com.zhiwei.weibocrawler.crawler.GetData
;
import
com.zhiwei.zhiweiTools.tools.ZhiWeiTools
;
public
class
HotWeiboTest
{
public
class
HotWeiboTest
{
public
static
void
main
(
String
[]
args
)
{
public
static
void
main
(
String
[]
args
)
{
//开启采集
//开启采集
String
token
=
"2.00HUuC3C3_jZ8E
0c00a67ab8xbOHqB
"
;
String
token
=
"2.00HUuC3C3_jZ8E
36c5026e390AzIOP
"
;
GetData
.
start
(
token
);
GetData
.
start
(
token
);
// //获取数据
// //获取数据
while
(
true
){
while
(
true
){
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment