Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
S
source_forward
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
zhiwei
source_forward
Commits
b6fe1572
Commit
b6fe1572
authored
Aug 28, 2018
by
yangchen
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
微信正文获取修改
parent
87e9aaf3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
27 additions
and
4 deletions
+27
-4
src/main/java/com/zhiwei/source_forward/crawler/UrlLiveCrawler.java
+1
-1
src/main/java/com/zhiwei/source_forward/run/ContentMatch.java
+9
-0
src/main/java/com/zhiwei/source_forward/util/MatchContent.java
+17
-3
No files found.
src/main/java/com/zhiwei/source_forward/crawler/UrlLiveCrawler.java
View file @
b6fe1572
...
@@ -60,7 +60,7 @@ public class UrlLiveCrawler {
...
@@ -60,7 +60,7 @@ public class UrlLiveCrawler {
logger
.
info
(
"当前处理 URL: {}"
,
url
);
logger
.
info
(
"当前处理 URL: {}"
,
url
);
Request
request
=
HttpRequestBuilder
.
newGetRequest
(
url
,
null
);
Request
request
=
HttpRequestBuilder
.
newGetRequest
(
url
,
null
);
counter
.
increase
();
counter
.
increase
();
HttpBoot
.
asyncCall
(
request
,
ProxyClientUtil
.
getNATProxy
(),
false
).
addListeners
(
future
->
{
HttpBoot
.
asyncCall
(
request
,
ProxyClientUtil
.
getNATProxy
(),
false
,
false
).
addListeners
(
future
->
{
if
(
future
.
isSuccess
())
{
if
(
future
.
isSuccess
())
{
Response
response
=
future
.
result
();
Response
response
=
future
.
result
();
try
{
try
{
...
...
src/main/java/com/zhiwei/source_forward/run/ContentMatch.java
View file @
b6fe1572
...
@@ -55,6 +55,15 @@ public class ContentMatch {
...
@@ -55,6 +55,15 @@ public class ContentMatch {
return
dataList
;
return
dataList
;
}
}
public
static
void
main
(
String
[]
args
)
{
List
<
String
>
urlList
=
new
ArrayList
<>();
urlList
.
add
(
"https://mp.weixin.qq.com/s?src=11×tamp=1535449515&ver=1088&signature=9kByOydse2KaausR0FP5HoQpSeSXs097LR-akxhJxfCV*onfJuoWkznZ8UEk5OfFox4aVzDqx0n0xwbtTm6KUzPpNz2desfNiQ4Uevp4LaTSyoH3OKysG2qxy2jisojb&new=1"
);
List
<
ContentBean
>
l
=
getContentMatch
(
urlList
);
for
(
ContentBean
cb
:
l
)
{
System
.
out
.
println
(
cb
.
toString
());
}
}
static
class
ContentMatchCrawlerThread
extends
Thread
{
static
class
ContentMatchCrawlerThread
extends
Thread
{
private
static
List
<
ContentBean
>
getContentMatch
(
List
<
String
>
urlList
){
private
static
List
<
ContentBean
>
getContentMatch
(
List
<
String
>
urlList
){
...
...
src/main/java/com/zhiwei/source_forward/util/MatchContent.java
View file @
b6fe1572
...
@@ -29,8 +29,12 @@ public class MatchContent {
...
@@ -29,8 +29,12 @@ public class MatchContent {
public
static
String
matchContent
(
String
url
,
String
html
)
{
public
static
String
matchContent
(
String
url
,
String
html
)
{
String
content
=
null
;
String
content
=
null
;
try
{
try
{
Document
document
=
Jsoup
.
parse
(
html
);
Document
document
=
Jsoup
.
parse
(
html
);
content
=
mathchContent
(
html
,
document
);
if
(
url
.
contains
(
"weixin.qq.com"
))
{
content
=
matchContentWeixin
(
document
);
}
else
{
content
=
mathchContent
(
html
,
document
);
}
return
content
;
return
content
;
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
logger
.
debug
(
"获取全文失败"
,
e
.
fillInStackTrace
());
logger
.
debug
(
"获取全文失败"
,
e
.
fillInStackTrace
());
...
@@ -39,8 +43,18 @@ public class MatchContent {
...
@@ -39,8 +43,18 @@ public class MatchContent {
return
content
;
return
content
;
}
}
/**
/**
*
* @Description 微信文本获取
* @param html
* @return
*/
private
static
String
matchContentWeixin
(
Document
document
)
{
return
document
.
select
(
"div.rich_media_content"
).
text
();
}
/**
* @Title: mathchContent
* @Title: mathchContent
* @author hero
* @author hero
* @Description: 匹配正文数据
* @Description: 匹配正文数据
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment