Commit 9ef31c31 by [zhangzhiwei]

因修改采集核心包版本,修改相应的方法

parent 9205aa61
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="CompilerConfiguration">
<annotationProcessing>
<profile name="Maven default annotation processors profile" enabled="true">
<sourceOutputDir name="target/generated-sources/annotations" />
<sourceTestOutputDir name="target/generated-test-sources/test-annotations" />
<outputRelativeToContentRoot value="true" />
<module name="media_data_crawler" />
</profile>
</annotationProcessing>
</component>
</project>
\ No newline at end of file
<component name="ProjectDictionaryState">
<dictionary name="bewiler hk" />
</component>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="Encoding" defaultCharsetForPropertiesFiles="UTF-8">
<file url="PROJECT" charset="UTF-8" />
</component>
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ExternalStorageConfigurationManager" enabled="true" />
<component name="Kotlin2JsCompilerArguments">
<option name="sourceMapEmbedSources" />
<option name="sourceMapPrefix" />
</component>
<component name="MavenProjectsManager">
<option name="originalFiles">
<list>
<option value="$PROJECT_DIR$/pom.xml" />
</list>
</option>
</component>
<component name="ProjectRootManager" version="2" languageLevel="JDK_1_8" project-jdk-name="1.8" project-jdk-type="JavaSDK">
<output url="file://$PROJECT_DIR$/out" />
</component>
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="Palette2">
<group name="Swing">
<item class="com.intellij.uiDesigner.HSpacer" tooltip-text="Horizontal Spacer" icon="/com/intellij/uiDesigner/icons/hspacer.png" removable="false" auto-create-binding="false" can-attach-label="false">
<default-constraints vsize-policy="1" hsize-policy="6" anchor="0" fill="1" />
</item>
<item class="com.intellij.uiDesigner.VSpacer" tooltip-text="Vertical Spacer" icon="/com/intellij/uiDesigner/icons/vspacer.png" removable="false" auto-create-binding="false" can-attach-label="false">
<default-constraints vsize-policy="6" hsize-policy="1" anchor="0" fill="2" />
</item>
<item class="javax.swing.JPanel" icon="/com/intellij/uiDesigner/icons/panel.png" removable="false" auto-create-binding="false" can-attach-label="false">
<default-constraints vsize-policy="3" hsize-policy="3" anchor="0" fill="3" />
</item>
<item class="javax.swing.JScrollPane" icon="/com/intellij/uiDesigner/icons/scrollPane.png" removable="false" auto-create-binding="false" can-attach-label="true">
<default-constraints vsize-policy="7" hsize-policy="7" anchor="0" fill="3" />
</item>
<item class="javax.swing.JButton" icon="/com/intellij/uiDesigner/icons/button.png" removable="false" auto-create-binding="true" can-attach-label="false">
<default-constraints vsize-policy="0" hsize-policy="3" anchor="0" fill="1" />
<initial-values>
<property name="text" value="Button" />
</initial-values>
</item>
<item class="javax.swing.JRadioButton" icon="/com/intellij/uiDesigner/icons/radioButton.png" removable="false" auto-create-binding="true" can-attach-label="false">
<default-constraints vsize-policy="0" hsize-policy="3" anchor="8" fill="0" />
<initial-values>
<property name="text" value="RadioButton" />
</initial-values>
</item>
<item class="javax.swing.JCheckBox" icon="/com/intellij/uiDesigner/icons/checkBox.png" removable="false" auto-create-binding="true" can-attach-label="false">
<default-constraints vsize-policy="0" hsize-policy="3" anchor="8" fill="0" />
<initial-values>
<property name="text" value="CheckBox" />
</initial-values>
</item>
<item class="javax.swing.JLabel" icon="/com/intellij/uiDesigner/icons/label.png" removable="false" auto-create-binding="false" can-attach-label="false">
<default-constraints vsize-policy="0" hsize-policy="0" anchor="8" fill="0" />
<initial-values>
<property name="text" value="Label" />
</initial-values>
</item>
<item class="javax.swing.JTextField" icon="/com/intellij/uiDesigner/icons/textField.png" removable="false" auto-create-binding="true" can-attach-label="true">
<default-constraints vsize-policy="0" hsize-policy="6" anchor="8" fill="1">
<preferred-size width="150" height="-1" />
</default-constraints>
</item>
<item class="javax.swing.JPasswordField" icon="/com/intellij/uiDesigner/icons/passwordField.png" removable="false" auto-create-binding="true" can-attach-label="true">
<default-constraints vsize-policy="0" hsize-policy="6" anchor="8" fill="1">
<preferred-size width="150" height="-1" />
</default-constraints>
</item>
<item class="javax.swing.JFormattedTextField" icon="/com/intellij/uiDesigner/icons/formattedTextField.png" removable="false" auto-create-binding="true" can-attach-label="true">
<default-constraints vsize-policy="0" hsize-policy="6" anchor="8" fill="1">
<preferred-size width="150" height="-1" />
</default-constraints>
</item>
<item class="javax.swing.JTextArea" icon="/com/intellij/uiDesigner/icons/textArea.png" removable="false" auto-create-binding="true" can-attach-label="true">
<default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3">
<preferred-size width="150" height="50" />
</default-constraints>
</item>
<item class="javax.swing.JTextPane" icon="/com/intellij/uiDesigner/icons/textPane.png" removable="false" auto-create-binding="true" can-attach-label="true">
<default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3">
<preferred-size width="150" height="50" />
</default-constraints>
</item>
<item class="javax.swing.JEditorPane" icon="/com/intellij/uiDesigner/icons/editorPane.png" removable="false" auto-create-binding="true" can-attach-label="true">
<default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3">
<preferred-size width="150" height="50" />
</default-constraints>
</item>
<item class="javax.swing.JComboBox" icon="/com/intellij/uiDesigner/icons/comboBox.png" removable="false" auto-create-binding="true" can-attach-label="true">
<default-constraints vsize-policy="0" hsize-policy="2" anchor="8" fill="1" />
</item>
<item class="javax.swing.JTable" icon="/com/intellij/uiDesigner/icons/table.png" removable="false" auto-create-binding="true" can-attach-label="false">
<default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3">
<preferred-size width="150" height="50" />
</default-constraints>
</item>
<item class="javax.swing.JList" icon="/com/intellij/uiDesigner/icons/list.png" removable="false" auto-create-binding="true" can-attach-label="false">
<default-constraints vsize-policy="6" hsize-policy="2" anchor="0" fill="3">
<preferred-size width="150" height="50" />
</default-constraints>
</item>
<item class="javax.swing.JTree" icon="/com/intellij/uiDesigner/icons/tree.png" removable="false" auto-create-binding="true" can-attach-label="false">
<default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3">
<preferred-size width="150" height="50" />
</default-constraints>
</item>
<item class="javax.swing.JTabbedPane" icon="/com/intellij/uiDesigner/icons/tabbedPane.png" removable="false" auto-create-binding="true" can-attach-label="false">
<default-constraints vsize-policy="3" hsize-policy="3" anchor="0" fill="3">
<preferred-size width="200" height="200" />
</default-constraints>
</item>
<item class="javax.swing.JSplitPane" icon="/com/intellij/uiDesigner/icons/splitPane.png" removable="false" auto-create-binding="false" can-attach-label="false">
<default-constraints vsize-policy="3" hsize-policy="3" anchor="0" fill="3">
<preferred-size width="200" height="200" />
</default-constraints>
</item>
<item class="javax.swing.JSpinner" icon="/com/intellij/uiDesigner/icons/spinner.png" removable="false" auto-create-binding="true" can-attach-label="true">
<default-constraints vsize-policy="0" hsize-policy="6" anchor="8" fill="1" />
</item>
<item class="javax.swing.JSlider" icon="/com/intellij/uiDesigner/icons/slider.png" removable="false" auto-create-binding="true" can-attach-label="false">
<default-constraints vsize-policy="0" hsize-policy="6" anchor="8" fill="1" />
</item>
<item class="javax.swing.JSeparator" icon="/com/intellij/uiDesigner/icons/separator.png" removable="false" auto-create-binding="false" can-attach-label="false">
<default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3" />
</item>
<item class="javax.swing.JProgressBar" icon="/com/intellij/uiDesigner/icons/progressbar.png" removable="false" auto-create-binding="true" can-attach-label="false">
<default-constraints vsize-policy="0" hsize-policy="6" anchor="0" fill="1" />
</item>
<item class="javax.swing.JToolBar" icon="/com/intellij/uiDesigner/icons/toolbar.png" removable="false" auto-create-binding="false" can-attach-label="false">
<default-constraints vsize-policy="0" hsize-policy="6" anchor="0" fill="1">
<preferred-size width="-1" height="20" />
</default-constraints>
</item>
<item class="javax.swing.JToolBar$Separator" icon="/com/intellij/uiDesigner/icons/toolbarSeparator.png" removable="false" auto-create-binding="false" can-attach-label="false">
<default-constraints vsize-policy="0" hsize-policy="0" anchor="0" fill="1" />
</item>
<item class="javax.swing.JScrollBar" icon="/com/intellij/uiDesigner/icons/scrollbar.png" removable="false" auto-create-binding="true" can-attach-label="false">
<default-constraints vsize-policy="6" hsize-policy="0" anchor="0" fill="2" />
</item>
</group>
</component>
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="" vcs="Git" />
</component>
</project>
\ No newline at end of file
...@@ -65,7 +65,7 @@ ...@@ -65,7 +65,7 @@
<dependency> <dependency>
<groupId>com.zhiwei.tools</groupId> <groupId>com.zhiwei.tools</groupId>
<artifactId>zhiwei-tools</artifactId> <artifactId>zhiwei-tools</artifactId>
<version>0.0.5-SNAPSHOT</version> <version>0.0.8-SNAPSHOT</version>
</dependency> </dependency>
</dependencies> </dependencies>
</project> </project>
\ No newline at end of file
...@@ -28,6 +28,7 @@ import java.util.regex.Pattern; ...@@ -28,6 +28,7 @@ import java.util.regex.Pattern;
public class BaiduNewsCrawlerParse { public class BaiduNewsCrawlerParse {
private static Logger logger = LogManager.getLogger(BaiduNewsCrawlerParse.class); private static Logger logger = LogManager.getLogger(BaiduNewsCrawlerParse.class);
private static HttpBoot httpBoot = new HttpBoot();
private static final String pt = "百度新闻"; private static final String pt = "百度新闻";
/** /**
...@@ -200,7 +201,7 @@ public class BaiduNewsCrawlerParse { ...@@ -200,7 +201,7 @@ public class BaiduNewsCrawlerParse {
// 下载数据页面 // 下载数据页面
for (int i = 1; i <= 3; i++) { for (int i = 1; i <= 3; i++) {
try { try {
Response response = HttpBoot.syncCall(RequestUtils.wrapGet(url,headerMap)); Response response = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap));
return response.body().string(); return response.body().string();
} catch (Exception e) { } catch (Exception e) {
logger.error("获取数据时出现问题,问题为:{}", e.fillInStackTrace()); logger.error("获取数据时出现问题,问题为:{}", e.fillInStackTrace());
...@@ -246,7 +247,7 @@ public class BaiduNewsCrawlerParse { ...@@ -246,7 +247,7 @@ public class BaiduNewsCrawlerParse {
// 下载数据页面 // 下载数据页面
for (int i = 1; i <= 3; i++) { for (int i = 1; i <= 3; i++) {
try { try {
Response response = HttpBoot.syncCall(RequestUtils.wrapGet(url,headerMap),proxy,false); Response response = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap),proxy,false);
return response.body().string(); return response.body().string();
} catch (Exception e) { } catch (Exception e) {
logger.error("获取数据时出现问题,问题为:{}", e.fillInStackTrace()); logger.error("获取数据时出现问题,问题为:{}", e.fillInStackTrace());
...@@ -270,7 +271,7 @@ public class BaiduNewsCrawlerParse { ...@@ -270,7 +271,7 @@ public class BaiduNewsCrawlerParse {
// 下载数据页面 // 下载数据页面
for (int i = 1; i <= 3; i++) { for (int i = 1; i <= 3; i++) {
try { try {
Response response = HttpBoot.syncCall(RequestUtils.wrapGet(url,headerMap),proxy,false); Response response = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap),proxy,false);
return response.body().string(); return response.body().string();
} catch (Exception e) { } catch (Exception e) {
logger.error("获取数据时出现问题,问题为:{}", e.fillInStackTrace()); logger.error("获取数据时出现问题,问题为:{}", e.fillInStackTrace());
......
...@@ -24,7 +24,7 @@ import java.util.regex.Matcher; ...@@ -24,7 +24,7 @@ import java.util.regex.Matcher;
import java.util.regex.Pattern; import java.util.regex.Pattern;
public class BaiduTiebaCrawlerParse { public class BaiduTiebaCrawlerParse {
private static HttpBoot httpBoot = new HttpBoot();
private static Logger logger = LogManager.getLogger(BaiduTiebaCrawlerParse.class); private static Logger logger = LogManager.getLogger(BaiduTiebaCrawlerParse.class);
/** /**
* @Title: getBaiduTiebaData * @Title: getBaiduTiebaData
...@@ -217,7 +217,7 @@ public class BaiduTiebaCrawlerParse { ...@@ -217,7 +217,7 @@ public class BaiduTiebaCrawlerParse {
// 下载数据页面 // 下载数据页面
for (int i = 1; i <= 3; i++) { for (int i = 1; i <= 3; i++) {
try { try {
Response response = HttpBoot.syncCall(RequestUtils.wrapGet(url,headerMap),proxy,false); Response response = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap),proxy,false);
return response.body().toString(); return response.body().toString();
} catch (Exception e) { } catch (Exception e) {
logger.error("获取数据时出现问题,问题为:{}", e.fillInStackTrace()); logger.error("获取数据时出现问题,问题为:{}", e.fillInStackTrace());
...@@ -255,7 +255,7 @@ public class BaiduTiebaCrawlerParse { ...@@ -255,7 +255,7 @@ public class BaiduTiebaCrawlerParse {
// 下载数据页面 // 下载数据页面
for (int i = 1; i <= 3; i++) { for (int i = 1; i <= 3; i++) {
try { try {
Response response = HttpBoot.syncCall(RequestUtils.wrapGet(url,headerMap),proxy,false); Response response = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap),proxy,false);
return response.body().string(); return response.body().string();
} catch (Exception e) { } catch (Exception e) {
logger.error("获取数据时出现问题,问题为:{}", e.fillInStackTrace()); logger.error("获取数据时出现问题,问题为:{}", e.fillInStackTrace());
......
...@@ -24,6 +24,7 @@ import java.util.Map; ...@@ -24,6 +24,7 @@ import java.util.Map;
public class DoubanCrawlerParse { public class DoubanCrawlerParse {
private static Logger logger = LogManager.getLogger(BaiduTiebaCrawlerParse.class); private static Logger logger = LogManager.getLogger(BaiduTiebaCrawlerParse.class);
private static HttpBoot httpBoot = new HttpBoot();
/** /**
* *
* @Title: getDoubanData * @Title: getDoubanData
...@@ -93,7 +94,7 @@ public class DoubanCrawlerParse { ...@@ -93,7 +94,7 @@ public class DoubanCrawlerParse {
// 下载数据页面 // 下载数据页面
for (int i = 1; i <= 3; i++) { for (int i = 1; i <= 3; i++) {
try { try {
Response response = HttpBoot.syncCall(RequestUtils.wrapGet(url,headerMap),proxy,false); Response response = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap),proxy,false);
return response.body().string(); return response.body().string();
} catch (Exception e) { } catch (Exception e) {
logger.error("获取数据时出现问题,问题为:{}", e.fillInStackTrace()); logger.error("获取数据时出现问题,问题为:{}", e.fillInStackTrace());
...@@ -115,7 +116,7 @@ public class DoubanCrawlerParse { ...@@ -115,7 +116,7 @@ public class DoubanCrawlerParse {
// 下载数据页面 // 下载数据页面
for (int i = 1; i <= 3; i++) { for (int i = 1; i <= 3; i++) {
try { try {
Response response = HttpBoot.syncCall(RequestUtils.wrapGet(url,headerMap),proxy,false); Response response = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap),proxy,false);
return response.body().toString(); return response.body().toString();
} catch (Exception e) { } catch (Exception e) {
logger.error("获取数据时出现问题,问题为:{}", e.fillInStackTrace()); logger.error("获取数据时出现问题,问题为:{}", e.fillInStackTrace());
......
...@@ -28,6 +28,7 @@ import java.util.Map; ...@@ -28,6 +28,7 @@ import java.util.Map;
public class SoCrawlerParse { public class SoCrawlerParse {
private static Logger logger = LogManager.getLogger(SoCrawlerParse.class); private static Logger logger = LogManager.getLogger(SoCrawlerParse.class);
private static HttpBoot httpBoot = new HttpBoot();
private static final String pt = "360网页"; private static final String pt = "360网页";
/** /**
...@@ -103,7 +104,7 @@ public class SoCrawlerParse { ...@@ -103,7 +104,7 @@ public class SoCrawlerParse {
// 下载数据页面 // 下载数据页面
for (int i = 1; i <= 3; i++) { for (int i = 1; i <= 3; i++) {
try { try {
Response response = HttpBoot.syncCall(RequestUtils.wrapGet(url,headerMap),proxy,false); Response response = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap),proxy,false);
return response.body().string(); return response.body().string();
} catch (Exception e) { } catch (Exception e) {
logger.error("获取360新闻数据时出现问题,问题为:{}", e.fillInStackTrace()); logger.error("获取360新闻数据时出现问题,问题为:{}", e.fillInStackTrace());
...@@ -283,7 +284,7 @@ public class SoCrawlerParse { ...@@ -283,7 +284,7 @@ public class SoCrawlerParse {
String url = null; String url = null;
if(link != null) { if(link != null) {
try { try {
Response response = HttpBoot.syncCall(RequestUtils.wrapGet(url),proxy,false); Response response = httpBoot.syncCall(RequestUtils.wrapGet(url),proxy,false);
String htmlBody = response.body().toString(); String htmlBody = response.body().toString();
if(htmlBody!=null) { if(htmlBody!=null) {
url = htmlBody.split("window.location.replace\\(\"")[1].split("\"\\)")[0]; url = htmlBody.split("window.location.replace\\(\"")[1].split("\"\\)")[0];
......
...@@ -25,6 +25,7 @@ import java.util.Map; ...@@ -25,6 +25,7 @@ import java.util.Map;
public class SoNewsCrawlerParse { public class SoNewsCrawlerParse {
private static Logger logger = LogManager.getLogger(SoNewsCrawlerParse.class); private static Logger logger = LogManager.getLogger(SoNewsCrawlerParse.class);
private static HttpBoot httpBoot = new HttpBoot();
private static final String pt = "360新闻"; private static final String pt = "360新闻";
/** /**
...@@ -135,7 +136,7 @@ public class SoNewsCrawlerParse { ...@@ -135,7 +136,7 @@ public class SoNewsCrawlerParse {
// 下载数据页面 // 下载数据页面
for (int i = 1; i <= 3; i++) { for (int i = 1; i <= 3; i++) {
try { try {
Response response = HttpBoot.syncCall(RequestUtils.wrapGet(url,headerMap),proxy,false); Response response = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap),proxy,false);
return response.body().string(); return response.body().string();
} catch (Exception e) { } catch (Exception e) {
logger.error("获取360新闻数据时出现问题,问题为:{}", e.fillInStackTrace()); logger.error("获取360新闻数据时出现问题,问题为:{}", e.fillInStackTrace());
......
...@@ -26,6 +26,7 @@ import java.util.Map; ...@@ -26,6 +26,7 @@ import java.util.Map;
public class SougouNewsCrawlerParse { public class SougouNewsCrawlerParse {
private static Logger logger = LogManager.getLogger(BaiduTiebaCrawlerParse.class); private static Logger logger = LogManager.getLogger(BaiduTiebaCrawlerParse.class);
private static HttpBoot httpBoot = new HttpBoot();
private static final String pt = "搜狗新闻"; private static final String pt = "搜狗新闻";
...@@ -129,7 +130,7 @@ public class SougouNewsCrawlerParse { ...@@ -129,7 +130,7 @@ public class SougouNewsCrawlerParse {
//下载数据页面 //下载数据页面
for(int i = 1; i<=3; i++){ for(int i = 1; i<=3; i++){
try { try {
Response response = HttpBoot.syncCall(RequestUtils.wrapGet(url,headerMap),proxy,false); Response response = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap),proxy,false);
return response.body().string(); return response.body().string();
} catch (Exception e) { } catch (Exception e) {
logger.error("获取搜狗新闻数据时出现问题,问题为:{}", e.fillInStackTrace()); logger.error("获取搜狗新闻数据时出现问题,问题为:{}", e.fillInStackTrace());
...@@ -153,7 +154,7 @@ public class SougouNewsCrawlerParse { ...@@ -153,7 +154,7 @@ public class SougouNewsCrawlerParse {
//下载数据页面 //下载数据页面
for(int i = 1; i<=3; i++){ for(int i = 1; i<=3; i++){
try { try {
Response response = HttpBoot.syncCall(RequestUtils.wrapGet(url,headerMap),proxy,false); Response response = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap),proxy,false);
return response.body().string(); return response.body().string();
} catch (Exception e) { } catch (Exception e) {
logger.error("获取搜狗新闻数据时出现问题,问题为:{}", e.fillInStackTrace()); logger.error("获取搜狗新闻数据时出现问题,问题为:{}", e.fillInStackTrace());
......
...@@ -25,6 +25,7 @@ import java.util.regex.Pattern; ...@@ -25,6 +25,7 @@ import java.util.regex.Pattern;
public class SougouZhihuCrawlerParse{ public class SougouZhihuCrawlerParse{
private static Logger logger = LogManager.getLogger(SougouZhihuCrawlerParse.class); private static Logger logger = LogManager.getLogger(SougouZhihuCrawlerParse.class);
private static HttpBoot httpBoot = new HttpBoot();
private static final String pt = "搜狗知乎"; private static final String pt = "搜狗知乎";
...@@ -96,7 +97,7 @@ public class SougouZhihuCrawlerParse{ ...@@ -96,7 +97,7 @@ public class SougouZhihuCrawlerParse{
//下载数据页面 //下载数据页面
for(int i = 1; i<=3; i++){ for(int i = 1; i<=3; i++){
try { try {
Response response = HttpBoot.syncCall(RequestUtils.wrapGet(url,headerMap),proxy,false); Response response = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap),proxy,false);
return response.body().string(); return response.body().string();
} catch (Exception e) { } catch (Exception e) {
logger.error("获取搜狗新闻数据时出现问题,问题为:{}", e.fillInStackTrace()); logger.error("获取搜狗新闻数据时出现问题,问题为:{}", e.fillInStackTrace());
...@@ -123,7 +124,7 @@ public class SougouZhihuCrawlerParse{ ...@@ -123,7 +124,7 @@ public class SougouZhihuCrawlerParse{
//下载数据页面 //下载数据页面
for(int i = 1; i<=3; i++){ for(int i = 1; i<=3; i++){
try { try {
Response response = HttpBoot.syncCall(RequestUtils.wrapGet(url,headerMap),proxy,false); Response response = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap),proxy,false);
return response.body().string(); return response.body().string();
} catch (Exception e) { } catch (Exception e) {
logger.error("获取搜狗新闻数据时出现问题,问题为:{}", e.fillInStackTrace()); logger.error("获取搜狗新闻数据时出现问题,问题为:{}", e.fillInStackTrace());
......
...@@ -22,6 +22,7 @@ import java.util.*; ...@@ -22,6 +22,7 @@ import java.util.*;
public class TianYaCrawlerParse { public class TianYaCrawlerParse {
private static Logger logger = LogManager.getLogger(TianYaCrawlerParse.class); private static Logger logger = LogManager.getLogger(TianYaCrawlerParse.class);
private static HttpBoot httpBoot = new HttpBoot();
private static final String pt = "天涯论坛"; private static final String pt = "天涯论坛";
/** /**
* @Title: getBaiduTiebaData * @Title: getBaiduTiebaData
...@@ -86,7 +87,7 @@ public class TianYaCrawlerParse { ...@@ -86,7 +87,7 @@ public class TianYaCrawlerParse {
// 下载数据页面 // 下载数据页面
for (int i = 1; i <= 3; i++) { for (int i = 1; i <= 3; i++) {
try { try {
Response response = HttpBoot.syncCall(RequestUtils.wrapGet(url,headerMap),proxy,false); Response response = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap),proxy,false);
return response.body().string(); return response.body().string();
} catch (Exception e) { } catch (Exception e) {
logger.error("获取数据时出现问题,问题为:{}", e.fillInStackTrace()); logger.error("获取数据时出现问题,问题为:{}", e.fillInStackTrace());
......
package com.zhiwei.media_data_crawler.crawler; package com.zhiwei.media_data_crawler.crawler;
import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.core.RequestUtils; import com.zhiwei.crawler.core.RequestUtils;
import com.zhiwei.media_data_crawler.entity.ZhihuAnswer; import com.zhiwei.media_data_crawler.entity.ZhihuAnswer;
import com.zhiwei.tools.timeparse.TimeParse; import com.zhiwei.tools.timeparse.TimeParse;
import com.zhiwei.tools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
import okhttp3.Response; import okhttp3.Response;
import org.jsoup.Jsoup; import org.jsoup.Jsoup;
import org.jsoup.nodes.Document; import org.jsoup.nodes.Document;
import org.jsoup.select.Elements; import org.jsoup.select.Elements;
import java.net.Proxy; import java.net.Proxy;
import java.util.*; import java.util.*;
/** /**
* 知乎评论采集 * 知乎评论采集
*/ */
public class ZhihuAnwserCrawlerParse { public class ZhihuAnwserCrawlerParse {
private static HttpBoot httpBoot = new HttpBoot();
/**
* 获取数据 /**
* @param url * 获取数据
* @param endDate * @param url
* @param proxy * @param endDate
* @return * @param proxy
* @throws Exception * @return
*/ * @throws Exception
public static List<ZhihuAnswer> getAnswerList(String url, Date endDate, Proxy proxy) throws Exception{ */
try{ public static List<ZhihuAnswer> getAnswerList(String url, Date endDate, Proxy proxy) throws Exception{
List<ZhihuAnswer> answerList = new ArrayList<>(); try{
String questionId = getQuestionId(url); List<ZhihuAnswer> answerList = new ArrayList<>();
String bord = getNumberBoard(url, proxy); String questionId = getQuestionId(url);
boolean more = true; String bord = getNumberBoard(url, proxy);
int page = 0; boolean more = true;
while(more){ int page = 0;
try{ while(more){
Map<String,Object> dataMap = analsis(questionId,endDate,page,bord,proxy); try{
if(dataMap!=null && !dataMap.isEmpty()){ Map<String,Object> dataMap = analsis(questionId,endDate,page,bord,proxy);
more = (boolean)dataMap.get("more"); if(dataMap!=null && !dataMap.isEmpty()){
List<ZhihuAnswer> list = (List<ZhihuAnswer>)dataMap.get("data"); more = (boolean)dataMap.get("more");
if(list!=null && !list.isEmpty()){ @SuppressWarnings("unchecked")
answerList.addAll(list); List<ZhihuAnswer> list = (List<ZhihuAnswer>)dataMap.get("data");
}else{ if(list!=null && !list.isEmpty()){
more = false; answerList.addAll(list);
} }else{
} more = false;
//单线程采集避免被封休眠8s }
ZhiWeiTools.sleep(8000); }
page++; //单线程采集避免被封休眠8s
}catch (Exception e){ ZhiWeiTools.sleep(8000);
more = false; page++;
} }catch (Exception e){
} more = false;
return answerList; }
}catch (Exception e){ }
throw e; return answerList;
} }catch (Exception e){
} throw e;
}
}
/**
* 获取问题的关注者和浏览量
* @param url /**
* @param proxy * 获取问题的关注者和浏览量
* @return * @param url
* @throws Exception * @param proxy
*/ * @return
private static String getNumberBoard(String url, Proxy proxy) throws Exception{ * @throws Exception
try{ */
String body = download(url, proxy); private static String getNumberBoard(String url, Proxy proxy) throws Exception{
Document document = Jsoup.parse(body); try{
Elements views = document.select("strong.NumberBoard-itemValue"); String body = download(url, proxy);
String fllow = "0"; Document document = Jsoup.parse(body);
String view = "0"; Elements views = document.select("strong.NumberBoard-itemValue");
if (views.size() >= 2) { String fllow = "0";
fllow=views.get(0).attr("title"); String view = "0";
view=views.get(1).attr("title"); if (views.size() >= 2) {
} fllow=views.get(0).attr("title");
return fllow+","+view; view=views.get(1).attr("title");
} catch (Exception e){ }
throw e; return fllow+","+view;
} } catch (Exception e){
} throw e;
}
}
/**
* 获取单页数据
* @param url /**
* @param page * 获取单页数据
* @param endDate * @param url
* @param proxy * @param page
* @return * @param endDate
* @throws Exception * @param proxy
*/ * @return
public static Map<String,Object> getAnswerList(String url, int page, Date endDate, Proxy proxy) throws Exception{ * @throws Exception
try{ */
String questionId = getQuestionId(url); public static Map<String,Object> getAnswerList(String url, int page, Date endDate, Proxy proxy) throws Exception{
String bord = getNumberBoard(url, proxy); try{
return analsis(questionId,endDate,page,bord ,proxy); String questionId = getQuestionId(url);
}catch (Exception e){ String bord = getNumberBoard(url, proxy);
throw e; return analsis(questionId,endDate,page,bord ,proxy);
} }catch (Exception e){
} throw e;
}
}
/**
* 解析数据
* @param questionId /**
* @param endDate * 解析数据
* @param page * @param questionId
* @param proxy * @param endDate
* @return * @param page
* @throws Exception * @param proxy
*/ * @return
private static Map<String,Object> analsis(String questionId, Date endDate, int page, String bord, Proxy proxy) throws Exception{ * @throws Exception
try{ */
boolean more = true; private static Map<String,Object> analsis(String questionId, Date endDate, int page, String bord, Proxy proxy) throws Exception{
List<ZhihuAnswer> answerList = new ArrayList<>(); try{
String urlNext = getUrl(questionId, page); boolean more = true;
String body = download(urlNext, proxy); List<ZhihuAnswer> answerList = new ArrayList<>();
JSONObject dataJson = JSONObject.parseObject(body); String urlNext = getUrl(questionId, page);
Integer count = dataJson.getJSONObject("paging").getInteger("totals"); String body = download(urlNext, proxy);
JSONArray jsonArray = dataJson.getJSONArray("data"); JSONObject dataJson = JSONObject.parseObject(body);
Integer count = dataJson.getJSONObject("paging").getInteger("totals");
String from_url = "https://www.zhihu.com/question/" + questionId; JSONArray jsonArray = dataJson.getJSONArray("data");
for(int i=0; i<jsonArray.size(); i++){
JSONObject answerJson = jsonArray.getJSONObject(i); String from_url = "https://www.zhihu.com/question/" + questionId;
Date time = new Date(answerJson.getLong("created_time")*1000); for(int i=0; i<jsonArray.size(); i++){
if(time.after(endDate)){ JSONObject answerJson = jsonArray.getJSONObject(i);
String answerId = answerJson.getString("id"); Date time = new Date(answerJson.getLong("created_time")*1000);
String link = from_url+"/answers/" + answerId; if(time.after(endDate)){
String author = answerJson.getJSONObject("author").getString("name"); String answerId = answerJson.getString("id");
String authorUrl = "https://www.zhihu.com/people/"+answerJson.getJSONObject("author").getString("url_token"); String link = from_url+"/answers/" + answerId;
String content = ZhiWeiTools.delHTMLTag(answerJson.getString("content")); String author = answerJson.getJSONObject("author").getString("name");
String title = answerJson.getJSONObject("question").getString("title"); String authorUrl = "https://www.zhihu.com/people/"+answerJson.getJSONObject("author").getString("url_token");
Integer voteup_count = answerJson.getInteger("voteup_count"); String content = ZhiWeiTools.delHTMLTag(answerJson.getString("content"));
Integer comment_count = answerJson.getInteger("comment_count"); String title = answerJson.getJSONObject("question").getString("title");
Integer guanzhu_count = Integer.valueOf(bord.split(",")[0]); Integer voteup_count = answerJson.getInteger("voteup_count");
Integer bord_count = Integer.valueOf(bord.split(",")[1]); Integer comment_count = answerJson.getInteger("comment_count");
ZhihuAnswer zhihuAnswer = new ZhihuAnswer(link, from_url, title, time, author, authorUrl, content,voteup_count ,comment_count, guanzhu_count, bord_count); Integer guanzhu_count = Integer.valueOf(bord.split(",")[0]);
answerList.add(zhihuAnswer); Integer bord_count = Integer.valueOf(bord.split(",")[1]);
} ZhihuAnswer zhihuAnswer = new ZhihuAnswer(link, from_url, title, time, author, authorUrl, content,voteup_count ,comment_count, guanzhu_count, bord_count);
} answerList.add(zhihuAnswer);
if(count<page*20){ }
more = false; }
} if(count<page*20){
Map<String,Object> resultMap = new HashMap<>(); more = false;
resultMap.put("data", answerList); }
resultMap.put("more", more); Map<String,Object> resultMap = new HashMap<>();
return resultMap; resultMap.put("data", answerList);
}catch (Exception e){ resultMap.put("more", more);
throw e; return resultMap;
} }catch (Exception e){
} throw e;
}
}
/**
* 根据链接获取数据
* @param url /**
* @param proxy * 根据链接获取数据
* @return * @param url
* @throws Exception * @param proxy
*/ * @return
private static String download(String url, Proxy proxy) throws Exception{ * @throws Exception
try(Response response = HttpBoot.syncCall(RequestUtils.wrapGet(url),proxy)){ */
return response.body().string(); private static String download(String url, Proxy proxy) throws Exception{
}catch (Exception e){ try(Response response = httpBoot.syncCall(RequestUtils.wrapGet(url),proxy)){
throw e; return response.body().string();
} }catch (Exception e){
} throw e;
}
}
/**
* 根据链接获取问题id
* @param url /**
* @return * 根据链接获取问题id
* @throws Exception * @param url
*/ * @return
private static String getQuestionId(String url) throws Exception{ * @throws Exception
try{ */
if(url.contains("question")){ private static String getQuestionId(String url) throws Exception{
return url.split("question/")[1].split("/")[0]; try{
} if(url.contains("question")){
}catch (Exception e){ return url.split("question/")[1].split("/")[0];
throw e; }
} }catch (Exception e){
throw new Exception("链接不符合要求,不是正常的知乎问题链接"); throw e;
}
} throw new Exception("链接不符合要求,不是正常的知乎问题链接");
/*** }
* 获取数据页链接
* @param questionId /***
* @param page * 获取数据页链接
* @return * @param questionId
*/ * @param page
private static String getUrl(String questionId, int page){ * @return
return "https://www.zhihu.com/api/v4/questions/"+questionId+"/answers?include=data%5B*%5D.is_normal%2Cadmin_closed_comment%2" + */
"Creward_info%2Cis_collapsed%2Cannotation_action%2Cannotation_detail%2Ccollapse_reason%2Cis_sticky%2Ccollapsed_by%2Csuggest_edit" + private static String getUrl(String questionId, int page){
"%2Ccomment_count%2Ccan_comment%2Ccontent%2Ceditable_content%2Cvoteup_count%2Creshipment_settings%2Ccomment_permission%2Ccreated_time%2" + return "https://www.zhihu.com/api/v4/questions/"+questionId+"/answers?include=data%5B*%5D.is_normal%2Cadmin_closed_comment%2" +
"Cupdated_time%2Creview_info%2Crelevant_info%2Cquestion%2Cexcerpt%2Crelationship.is_authorized%2Cis_author%2Cvoting%2Cis_thanked%2Cis_nothelp" + "Creward_info%2Cis_collapsed%2Cannotation_action%2Cannotation_detail%2Ccollapse_reason%2Cis_sticky%2Ccollapsed_by%2Csuggest_edit" +
"%3Bdata%5B*%5D.mark_infos%5B*%5D.url%3Bdata%5B*%5D.author.follower_count%2Cbadge%5B*%5D.topics&offset="+page*20+"&limit=20&sort_by=created"; "%2Ccomment_count%2Ccan_comment%2Ccontent%2Ceditable_content%2Cvoteup_count%2Creshipment_settings%2Ccomment_permission%2Ccreated_time%2" +
} "Cupdated_time%2Creview_info%2Crelevant_info%2Cquestion%2Cexcerpt%2Crelationship.is_authorized%2Cis_author%2Cvoting%2Cis_thanked%2Cis_nothelp" +
"%3Bdata%5B*%5D.mark_infos%5B*%5D.url%3Bdata%5B*%5D.author.follower_count%2Cbadge%5B*%5D.topics&offset="+page*20+"&limit=20&sort_by=created";
}
public static void main(String[] args){
String url = "https://www.zhihu.com/question/288128510";
Date endDate = TimeParse.stringFormartDate("2018-09-20 08:00:00"); public static void main(String[] args){
try{ String url = "https://www.zhihu.com/question/288128510";
getAnswerList(url,endDate, null); Date endDate = TimeParse.stringFormartDate("2018-09-20 08:00:00");
}catch (Exception e){ try{
e.fillInStackTrace(); getAnswerList(url,endDate, null);
} }catch (Exception e){
} e.fillInStackTrace();
}
}
}
}
package com.zhiwei.media_data_crawler.data; package com.zhiwei.media_data_crawler.data;
import java.net.Proxy; import java.net.Proxy;
import java.util.ArrayList;
import java.util.Date; import java.util.Date;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import com.zhiwei.media_data_crawler.crawler.*; import com.zhiwei.media_data_crawler.crawler.*;
import com.zhiwei.media_data_crawler.entity.*; import com.zhiwei.media_data_crawler.entity.*;
import com.zhiwei.tools.tools.ZhiWeiTools;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;
public class DataCrawler { public class DataCrawler {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment