Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
S
source_forward
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
zhiwei
source_forward
Commits
252ee982
Commit
252ee982
authored
Aug 18, 2020
by
zhiwei
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
来源转发添加腾讯自选股
parent
dd6b6b30
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
61 additions
and
57 deletions
+61
-57
src/main/java/com/zhiwei/source_forward/crawler/SourceForwardCrawler.java
+14
-2
src/main/java/com/zhiwei/source_forward/run/SourceForward.java
+2
-3
src/test/java/com/zhiwei/source_forward/sourceforward/test/SourceForwardTest.java
+45
-52
No files found.
src/main/java/com/zhiwei/source_forward/crawler/SourceForwardCrawler.java
View file @
252ee982
...
...
@@ -5,6 +5,7 @@ import java.util.List;
import
java.util.Map
;
import
java.util.Objects
;
import
com.alibaba.fastjson.JSONObject
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
org.apache.logging.log4j.LogManager
;
import
org.apache.logging.log4j.Logger
;
...
...
@@ -70,6 +71,11 @@ public class SourceForwardCrawler {
if
(
url
.
contains
(
"china.prcfe.com"
))
{
url
=
"http://china.prcfe.com/e/extend/ShowSource/?id="
+
url
.
split
(
"/"
)[
url
.
split
(
"/"
).
length
-
1
].
split
(
"\\."
)[
0
];
}
if
(
url
.
contains
(
"gu.qq.com"
))
{
String
id
=
url
.
split
(
"\\?id="
)[
1
];
url
=
"https://snp.tenpay.com/cgi-bin/snpgw_unified_newsinfo.fcgi?&filter=0&zappid=zxg_h5&sign=b2aceeb8a8ef093862608d806c1d6ab8&nonce=8464&reserve=1572995&&channel=zxg&user_openid=undefined&user_skey=undefined&&news_id="
+
id
;
headers
.
put
(
"referer"
,
"https://gu.qq.com/resources/shy/news/detail-v2/index.html"
);
}
Request
request
=
RequestUtils
.
wrapGet
(
url
,
headers
);
counter
.
add
();
httpBoot
.
asyncCall
(
request
,
ProxyHolder
.
NAT_HEAVY_PROXY
,
true
).
whenComplete
((
rs
,
ex
)
->
{
...
...
@@ -94,8 +100,8 @@ public class SourceForwardCrawler {
String
channel
=
"新闻"
;
String
isforward
=
"未知"
;
try
{
Document
document
=
Jsoup
.
parse
(
body
);
if
(
attr
.
get
().
toString
().
contains
(
"mp.weixin.qq.com"
)){
Document
document
=
Jsoup
.
parse
(
body
);
isforward
=
document
.
select
(
"div#meta_content"
).
select
(
"span#copyright_logo"
).
text
();
if
(
isforward
.
contains
(
"原创"
)){
isforward
=
"原创"
;
...
...
@@ -106,15 +112,21 @@ public class SourceForwardCrawler {
if
(
body
.
contains
(
"isOriginal"
)
&&
body
.
contains
(
"isOriginal: true"
)){
isforward
=
"原创"
;
}
}
else
if
(
attr
.
get
().
toString
().
contains
(
"snp.tenpay.com"
)
||
attr
.
get
().
toString
().
contains
(
"gu.qq.com"
)){
if
(
body
.
contains
(
"source"
)){
source
=
body
.
split
(
"\"media_name\":\""
)[
1
].
split
(
"\""
)[
0
];
}
}
else
{
Document
document
=
Jsoup
.
parse
(
body
);
source
=
MatchSource
.
matchSource
(
attr
.
get
().
toString
(),
document
.
toString
(),
sourceList
);
channel
=
MatchChannel
.
verifyChannel
(
attr
.
get
().
toString
());
if
(
channel
==
null
){
List
<
Node
>
nodeList
=
document
.
head
().
childNodes
();
channel
=
MatchChannel
.
matchChannel
(
nodeList
);
}
source
=
MatchSource
.
matchSource
(
attr
.
get
().
toString
(),
document
.
toString
(),
sourceList
);
}
}
catch
(
Exception
e
)
{
e
.
printStackTrace
();
source
=
null
;
channel
=
"新闻"
;
}
...
...
src/main/java/com/zhiwei/source_forward/run/SourceForward.java
View file @
252ee982
...
...
@@ -80,10 +80,10 @@ public class SourceForward {
public
static
void
main
(
String
[]
args
)
{
ProxyInit
.
initProxy
();
List
<
String
>
urlList
=
new
ArrayList
<>();
urlList
.
add
(
"http://
www.wangjiaozixun.com/html/zx20/2020/0730/1396388.html
"
);
urlList
.
add
(
"http://
gu.qq.com/resources/shy/news/detail-v2/index.html?#/index?id=SN202006091653447945411f
"
);
List
<
SourceForwardBean
>
da
=
SourceForward
.
getSourceForward
(
urlList
);
for
(
SourceForwardBean
sfb
:
da
)
{
System
.
out
.
println
(
sfb
.
toString
());
System
.
out
.
println
(
"=============="
+
sfb
.
toString
());
}
}
...
...
@@ -94,7 +94,6 @@ public class SourceForward {
try
{
SourceForwardCrawler
crawler
=
new
SourceForwardCrawler
();
SourceForwardDataCallBack
callback
=
new
SourceForwardDataCallBack
()
{
@Override
public
void
onData
(
SourceForwardBean
data
,
Attribution
attr
)
{
list
.
add
(
data
);
...
...
src/test/java/com/zhiwei/source_forward/sourceforward/test/SourceForwardTest.java
View file @
252ee982
//package com.zhiwei.source_forward.sourceforward.test;
//
//import java.util.ArrayList;
//import java.util.List;
//import java.util.Map;
//import java.util.Map.Entry;
//
//import org.junit.Test;
//
//import com.zhiwei.excelpoi.excel.PoiExcelUtil;
//import com.zhiwei.source_forward.run.SourceForward;
//import com.zhiwei.source_forward.util.ReadMediaData;
//
///**
// * @ClassName: SourceForwardTest
// * @Description: 来源验证
// * @author hero
// * @date 2017年12月6日 上午9:55:13
// */
//public class SourceForwardTest {
//
// @Test
// public void sourceForwardTest(){
// String path = "E://稿件汇总网媒数据//JD稿件转载情况-1206.xlsx";
// PoiExcelUtil poi = PoiExcelUtil.getInstance();
// Map<String,Object> data = poi.importExcel(path, 0);
// @SuppressWarnings("unchecked")
// List<String> headList = (List<String>)data.get("head");
// headList.add("频道");
// headList.add("原来源");
// headList.add("是否转发");
// @SuppressWarnings("unchecked")
// List<Map<String,Object>> dataList = (List<Map<String,Object>>)data.get("body");
//
// Map<String,Map<String,Object>> dataMap = ReadMediaData.getUrl(dataList);
// dataMap = SourceForward.getSourceForward(dataMap);
//
// List<Map<String,Object>> bodyList = new ArrayList<>();
// for(Entry<String,Map<String,Object>> dataEntry : dataMap.entrySet()){
// bodyList.add(dataEntry.getValue());
// }
// poi.exportExcel(path ,"匹配后数据", headList, bodyList);
// }
//
//
//
//
//
//
//
//
//}
//package com.zhiwei.source_forward.sourceforward.test;
//
//import java.util.ArrayList;
//import java.util.List;
//import java.util.Map;
//import java.util.Map.Entry;
//
//import org.junit.Test;
//
//import com.zhiwei.excelpoi.excel.PoiExcelUtil;
//import com.zhiwei.source_forward.run.SourceForward;
//import com.zhiwei.source_forward.util.ReadMediaData;
//
///**
// * @ClassName: SourceForwardTest
// * @Description: 来源验证
// * @author hero
// * @date 2017年12月6日 上午9:55:13
// */
//public class SourceForwardTest {
//
// @Test
// public void sourceForwardTest(){
// String path = "E://稿件汇总网媒数据//JD稿件转载情况-1206.xlsx";
// PoiExcelUtil poi = PoiExcelUtil.getInstance();
// Map<String,Object> data = poi.importExcel(path, 0);
// @SuppressWarnings("unchecked")
// List<String> headList = (List<String>)data.get("head");
// headList.add("频道");
// headList.add("原来源");
// headList.add("是否转发");
// @SuppressWarnings("unchecked")
// List<Map<String,Object>> dataList = (List<Map<String,Object>>)data.get("body");
//
// Map<String,Map<String,Object>> dataMap = ReadMediaData.getUrl(dataList);
// dataMap = SourceForward.getSourceForward(dataMap);
//
// List<Map<String,Object>> bodyList = new ArrayList<>();
// for(Entry<String,Map<String,Object>> dataEntry : dataMap.entrySet()){
// bodyList.add(dataEntry.getValue());
// }
// poi.exportExcel(path ,"匹配后数据", headList, bodyList);
// }
//
//}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment