Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
S
searchhotcrawler
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
zhiwei
searchhotcrawler
Commits
a65b651d
Commit
a65b651d
authored
Mar 18, 2019
by
zhiwei
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
修改log文件,修改接口发送请求方式
parent
07f90bdc
Show whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
54 additions
and
151 deletions
+54
-151
pom.xml
+2
-2
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiboHotSearchCrawler.java
+2
-9
src/main/java/com/zhiwei/searchhotcrawler/crawler/ZhihuHotSearchCrawler.java
+1
-1
src/main/java/com/zhiwei/searchhotcrawler/util/HttpRequest.java
+0
-97
src/main/java/com/zhiwei/searchhotcrawler/util/MyX509TrustManager.java
+0
-26
src/main/java/com/zhiwei/searchhotcrawler/util/WechatCodeUtil.java
+17
-5
src/main/resources/log4j.properties
+0
-11
src/main/resources/log4j2.xml
+32
-0
No files found.
pom.xml
View file @
a65b651d
...
...
@@ -38,12 +38,12 @@
<dependency>
<groupId>
com.zhiwei.crawler
</groupId>
<artifactId>
crawler-core
</artifactId>
<version>
0.
1.1
-RELEASE
</version>
<version>
0.
3.0
-RELEASE
</version>
</dependency>
<dependency>
<groupId>
com.zhiwei.tools
</groupId>
<artifactId>
zhiwei-tools
</artifactId>
<version>
0.1.
1
-SNAPSHOT
</version>
<version>
0.1.
2
-SNAPSHOT
</version>
</dependency>
</dependencies>
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiboHotSearchCrawler.java
View file @
a65b651d
...
...
@@ -16,7 +16,7 @@ import org.slf4j.LoggerFactory;
import
com.alibaba.fastjson.JSONArray
;
import
com.alibaba.fastjson.JSONObject
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.
core
.RequestUtils
;
import
com.zhiwei.crawler.
utils
.RequestUtils
;
import
com.zhiwei.searchhotcrawler.bean.WeiboHotSearch
;
import
com.zhiwei.searchhotcrawler.mail.SendMailWeibo
;
import
com.zhiwei.tools.tools.URLCodeUtil
;
...
...
@@ -41,18 +41,11 @@ public class WeiboHotSearchCrawler {
public
static
List
<
WeiboHotSearch
>
weiboHotSearch
(){
String
url
=
"https://s.weibo.com/top/summary?cate=realtimehot"
;
Map
<
String
,
String
>
headerMap
=
new
HashMap
<
String
,
String
>();
// headerMap.put("Referer", "https://s.weibo.com/top/summary?cate=realtimehot");
headerMap
.
put
(
"Host"
,
"s.weibo.com"
);
headerMap
.
put
(
"User-Agent"
,
"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36"
);
headerMap
.
put
(
"Upgrade-Insecure-Requests"
,
"1"
);
List
<
WeiboHotSearch
>
list
=
new
ArrayList
<
WeiboHotSearch
>();
for
(
int
i
=
0
;
i
<
3
;
i
++){
String
htmlBody
=
null
;
try
{
htmlBody
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
,
headerMap
)).
body
().
string
();
htmlBody
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
)).
body
().
string
();
if
(
htmlBody
!=
null
&&
htmlBody
.
contains
(
"pl_top_realtimehot"
)){
try
{
// String script = htmlBody.split("<script>STK && STK.pageletM && STK.pageletM.view")[5].split("<\\/script>")[0];
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/ZhihuHotSearchCrawler.java
View file @
a65b651d
...
...
@@ -12,7 +12,7 @@ import org.slf4j.LoggerFactory;
import
com.alibaba.fastjson.JSONArray
;
import
com.alibaba.fastjson.JSONObject
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.
core
.RequestUtils
;
import
com.zhiwei.crawler.
utils
.RequestUtils
;
import
com.zhiwei.searchhotcrawler.bean.ZhihuHotSearch
;
import
com.zhiwei.tools.httpclient.HeaderTool
;
import
com.zhiwei.tools.tools.URLCodeUtil
;
...
...
src/main/java/com/zhiwei/searchhotcrawler/util/HttpRequest.java
deleted
100644 → 0
View file @
07f90bdc
package
com
.
zhiwei
.
searchhotcrawler
.
util
;
import
java.io.BufferedReader
;
import
java.io.InputStream
;
import
java.io.InputStreamReader
;
import
java.io.OutputStream
;
import
java.net.ConnectException
;
import
java.net.URL
;
import
javax.net.ssl.HttpsURLConnection
;
import
javax.net.ssl.SSLContext
;
import
javax.net.ssl.SSLSocketFactory
;
import
javax.net.ssl.TrustManager
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
com.alibaba.fastjson.JSONObject
;
/**
* 菜单管理器类
*
* @author liufeng
* @date 2013-08-08
*/
public
class
HttpRequest
{
private
static
Logger
log
=
LoggerFactory
.
getLogger
(
HttpRequest
.
class
);
/**
* 发起https请求并获取结果
*
* @param requestUrl
* 请求地址
* @param requestMethod
* 请求方式(GET、POST)
* @param outputStr
* 提交的数据
* @return JSONObject(通过JSONObject.get(key)的方式获取json对象的属性值)
*/
public
static
JSONObject
httpRequest
(
String
requestUrl
,
String
requestMethod
,
String
outputStr
)
{
JSONObject
jsonObject
=
null
;
StringBuffer
buffer
=
new
StringBuffer
();
try
{
// 创建SSLContext对象,并使用我们指定的信任管理器初始化
TrustManager
[]
tm
=
{
new
MyX509TrustManager
()
};
SSLContext
sslContext
=
SSLContext
.
getInstance
(
"SSL"
,
"SunJSSE"
);
sslContext
.
init
(
null
,
tm
,
new
java
.
security
.
SecureRandom
());
// 从上述SSLContext对象中得到SSLSocketFactory对象
SSLSocketFactory
ssf
=
sslContext
.
getSocketFactory
();
URL
url
=
new
URL
(
requestUrl
);
HttpsURLConnection
httpUrlConn
=
(
HttpsURLConnection
)
url
.
openConnection
();
httpUrlConn
.
setSSLSocketFactory
(
ssf
);
httpUrlConn
.
setDoOutput
(
true
);
httpUrlConn
.
setDoInput
(
true
);
httpUrlConn
.
setUseCaches
(
false
);
// 设置请求方式(GET/POST)
httpUrlConn
.
setRequestMethod
(
requestMethod
);
if
(
"GET"
.
equalsIgnoreCase
(
requestMethod
))
httpUrlConn
.
connect
();
// 当有数据需要提交时
if
(
null
!=
outputStr
)
{
OutputStream
outputStream
=
httpUrlConn
.
getOutputStream
();
// 注意编码格式,防止中文乱码
outputStream
.
write
(
outputStr
.
getBytes
(
"UTF-8"
));
outputStream
.
close
();
}
// 将返回的输入流转换成字符串
InputStream
inputStream
=
httpUrlConn
.
getInputStream
();
InputStreamReader
inputStreamReader
=
new
InputStreamReader
(
inputStream
,
"utf-8"
);
BufferedReader
bufferedReader
=
new
BufferedReader
(
inputStreamReader
);
String
str
=
null
;
while
((
str
=
bufferedReader
.
readLine
())
!=
null
)
{
buffer
.
append
(
str
);
}
bufferedReader
.
close
();
inputStreamReader
.
close
();
// 释放资源
inputStream
.
close
();
inputStream
=
null
;
httpUrlConn
.
disconnect
();
jsonObject
=
JSONObject
.
parseObject
(
buffer
.
toString
());
}
catch
(
ConnectException
ce
)
{
log
.
error
(
"Weixin server connection timed out."
);
}
catch
(
Exception
e
)
{
log
.
error
(
"https request error:{}"
,
e
);
}
return
jsonObject
;
}
}
src/main/java/com/zhiwei/searchhotcrawler/util/MyX509TrustManager.java
deleted
100644 → 0
View file @
07f90bdc
package
com
.
zhiwei
.
searchhotcrawler
.
util
;
import
java.security.cert.CertificateException
;
import
java.security.cert.X509Certificate
;
import
javax.net.ssl.X509TrustManager
;
/**
* 证书信任管理器(用于https请求)
*
* @author liufeng
* @date 2013-08-08
*/
public
class
MyX509TrustManager
implements
X509TrustManager
{
public
void
checkClientTrusted
(
X509Certificate
[]
chain
,
String
authType
)
throws
CertificateException
{
}
public
void
checkServerTrusted
(
X509Certificate
[]
chain
,
String
authType
)
throws
CertificateException
{
}
public
X509Certificate
[]
getAcceptedIssuers
()
{
return
null
;
}
}
\ No newline at end of file
src/main/java/com/zhiwei/searchhotcrawler/util/WechatCodeUtil.java
View file @
a65b651d
...
...
@@ -5,13 +5,16 @@ import java.util.HashMap;
import
java.util.List
;
import
java.util.Map
;
import
org.apache.commons.lang3.StringUtils
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
com.alibaba.dubbo.rpc.protocol.rest.support.ContentType
;
import
com.alibaba.fastjson.JSONArray
;
import
com.alibaba.fastjson.JSONObject
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.core.RequestUtils
;
import
com.zhiwei.crawler.utils.RequestUtils
;
import
com.zhiwei.crawler.utils.RequestUtils.HttpMethod
;
import
com.zhiwei.tools.httpclient.HeaderTool
;
public
class
WechatCodeUtil
{
...
...
@@ -62,7 +65,9 @@ public class WechatCodeUtil {
int
msgid
=
0
;
String
url
=
WechatConstant
.
WECHAT_TEMPLET_SEND_URL
.
replace
(
"ACCESS_TOKEN"
,
getToken
());
try
{
JSONObject
jsonObject
=
HttpRequest
.
httpRequest
(
url
,
"POST"
,
templateJson
.
toString
());
String
htmlBody
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapPost
(
url
,
"application/json"
,
templateJson
.
toJSONString
())).
body
().
string
();
if
(
StringUtils
.
isNotBlank
(
htmlBody
))
{
JSONObject
jsonObject
=
JSONObject
.
parseObject
(
htmlBody
);
if
(
null
!=
jsonObject
)
{
if
(
"ok"
.
equals
(
jsonObject
.
getString
(
"errmsg"
)))
{
msgid
=
jsonObject
.
getIntValue
(
"msgid"
);
...
...
@@ -71,6 +76,7 @@ public class WechatCodeUtil {
logger
.
info
(
"消息推送失败,错误为::{}"
,
jsonObject
.
toString
());
}
}
}
}
catch
(
Exception
e
)
{
logger
.
error
(
"消息推送失败,错误为::{}"
,
e
.
fillInStackTrace
());
msgid
=
0
;
...
...
@@ -97,7 +103,10 @@ public class WechatCodeUtil {
JSONObject
postData
=
new
JSONObject
();
postData
.
put
(
"tagid"
,
getGroupIp
(
groupName
));
postData
.
put
(
"next_openid"
,
""
);
JSONObject
jsonObject
=
HttpRequest
.
httpRequest
(
url
,
"GET"
,
postData
.
toString
());
String
htmlBody
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapPost
(
url
,
"application/json"
,
postData
.
toJSONString
())).
body
().
string
();
if
(
StringUtils
.
isNotBlank
(
htmlBody
)
&&
htmlBody
.
contains
(
"data"
))
{
JSONObject
jsonObject
=
JSONObject
.
parseObject
(
htmlBody
);
if
(
null
!=
jsonObject
)
{
if
(
jsonObject
.
containsKey
(
"data"
))
{
return
(
List
<
String
>)
jsonObject
.
getJSONObject
(
"data"
).
getObject
(
"openid"
,
List
.
class
);
...
...
@@ -105,6 +114,7 @@ public class WechatCodeUtil {
logger
.
info
(
"拉取用户列表时,出现问题{}"
,
jsonObject
);
}
}
}
}
else
{
logger
.
info
(
"token 获取失败"
);
}
...
...
@@ -126,7 +136,9 @@ public class WechatCodeUtil {
JSONObject
postData
=
new
JSONObject
();
postData
.
put
(
"tagid"
,
groupId
);
postData
.
put
(
"next_openid"
,
""
);
JSONObject
jsonObject
=
HttpRequest
.
httpRequest
(
url
,
"GET"
,
postData
.
toString
());
String
htmlBody
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapPost
(
url
,
"application/json"
,
postData
.
toJSONString
())).
body
().
string
();
if
(
StringUtils
.
isNotBlank
(
htmlBody
)
&&
htmlBody
.
contains
(
"data"
))
{
JSONObject
jsonObject
=
JSONObject
.
parseObject
(
htmlBody
);
if
(
null
!=
jsonObject
)
{
if
(
jsonObject
.
containsKey
(
"data"
))
{
return
(
List
<
String
>)
jsonObject
.
getJSONObject
(
"data"
).
getObject
(
"openid"
,
List
.
class
);
...
...
@@ -134,10 +146,10 @@ public class WechatCodeUtil {
logger
.
info
(
"拉取用户列表时,出现问题{}"
,
jsonObject
);
}
}
}
}
else
{
logger
.
info
(
"token 获取失败"
);
}
}
catch
(
Exception
e
)
{
e
.
printStackTrace
();
return
null
;
...
...
src/main/resources/log4j.properties
deleted
100644 → 0
View file @
07f90bdc
log4j.rootLogger
=
INFO,stdout,ROLLING_FILE
log4j.appender.stdout
=
org.apache.log4j.ConsoleAppender
log4j.appender.stdout.layout
=
org.apache.log4j.PatternLayout
log4j.appender.stdout.layout.ConversionPattern
=
<%d>[%5p] %c - %m%n
log4j.appender.ROLLING_FILE
=
org.apache.log4j.DailyRollingFileAppender
log4j.appender.ROLLING_FILE.Threshold
=
INFO
log4j.appender.ROLLING_FILE.File
=
./Log/searchhotcrawler.log
log4j.appender.ROLLING_FILE.Append
=
true
log4j.appender.ROLLING_FILE.layout
=
org.apache.log4j.PatternLayout
log4j.appender.ROLLING_FILE.layout.ConversionPattern
=
<%d>[%5p] %c - %m%n
\ No newline at end of file
src/main/resources/log4j2.xml
0 → 100644
View file @
a65b651d
<?xml version="1.0" encoding="UTF-8"?>
<!-- log4j2 自身的日志级别 -->
<Configuration
status=
"WARN"
>
<properties>
<property
name=
"LOG_HOME"
>
Log/
</property>
<property
name=
"LOG_FILE"
>
crawler
</property>
</properties>
<Appenders>
<!-- 定义日志输出地 -->
<Console
name=
"Console"
target=
"SYSTEM_OUT"
>
<PatternLayout
pattern=
"%d{yyyy-MM-dd HH:mm:ss.SSS} %-5level %logger{36} - %msg%n"
/>
</Console>
<RollingRandomAccessFile
name=
"LogFile"
fileName=
"${LOG_HOME}/${LOG_FILE}.log"
filePattern=
"${LOG_HOME}/$${date:yyyy-MM}/${LOG_FILE}-%d{yyyy-MM-dd}-%i.log"
>
<PatternLayout
pattern=
"%d{yyyy-MM-dd HH:mm:ss.SSS} %-5level %logger{36} - %msg%n"
/>
<Policies>
<TimeBasedTriggeringPolicy
interval=
"1"
/>
<SizeBasedTriggeringPolicy
size=
"20 MB"
/>
</Policies>
<DefaultRolloverStrategy
max=
"20"
/>
</RollingRandomAccessFile>
</Appenders>
<Loggers>
<Root
level=
"all"
>
<AppenderRef
ref=
"Console"
level=
"info"
/>
<AppenderRef
ref=
"LogFile"
level=
"info"
/>
</Root>
</Loggers>
</Configuration>
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment