Commit f2fc1084 by win 10

删除mongo和excel的pom依赖

parent 88e4e8c0
...@@ -19,20 +19,6 @@ ...@@ -19,20 +19,6 @@
<version>0.6.1.0-SNAPSHOT</version> <version>0.6.1.0-SNAPSHOT</version>
<scope>provided</scope> <scope>provided</scope>
</dependency> </dependency>
<!-- excel导出 -->
<dependency>
<groupId>com.zhiwei</groupId>
<artifactId>excelpoi</artifactId>
<version>0.0.3-SNAPSHOT</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>easyexcel</artifactId>
<version>2.0.0-beta3</version>
<scope>provided</scope>
</dependency>
</dependencies> </dependencies>
<!-- 打包管理 --> <!-- 打包管理 -->
......
...@@ -22,7 +22,6 @@ import com.zhiwei.crawler.proxy.ProxyHolder; ...@@ -22,7 +22,6 @@ import com.zhiwei.crawler.proxy.ProxyHolder;
import com.zhiwei.crawler.utils.RequestUtils; import com.zhiwei.crawler.utils.RequestUtils;
import com.zhiwei.media_data_crawler.data.DataCrawler; import com.zhiwei.media_data_crawler.data.DataCrawler;
import com.zhiwei.media_data_crawler.entity.TiebaData; import com.zhiwei.media_data_crawler.entity.TiebaData;
import com.zhiwei.media_data_crawler.excelentity.DataExcel;
import com.zhiwei.proxy.config.SimpleConfig; import com.zhiwei.proxy.config.SimpleConfig;
import com.zhiwei.tools.httpclient.HeaderTool; import com.zhiwei.tools.httpclient.HeaderTool;
import com.zhiwei.tools.timeparse.TimeParse; import com.zhiwei.tools.timeparse.TimeParse;
...@@ -40,38 +39,6 @@ public class BaiduTiebaCrawlerParse { ...@@ -40,38 +39,6 @@ public class BaiduTiebaCrawlerParse {
private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build(); private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build();
private static Logger logger = LogManager.getLogger(BaiduTiebaCrawlerParse.class); private static Logger logger = LogManager.getLogger(BaiduTiebaCrawlerParse.class);
// public static void main(String[] args) {
// ProxyFactory.init(SimpleConfig.builder().registry("zookeeper://192.168.0.11:2181?backup=192.168.0.30:2181,192.168.0.35:2181")
// .appName("xumiaoxin").appId(10000008).group("local").build());
//
// List<DataExcel> bodyList = new ArrayList<>();
//
// try {
// List<String> wordList = WordsReadFile.getWords("D:\\crawlerdata\\关键词6.txt");
// for(String s:wordList) {
// List<TiebaData> dataList = getBaiduTiebaData(s, null, null);
// dataList.forEach(data -> {
// DataExcel dataExcel = new DataExcel();
// dataExcel.setAuthor(data.getAuthor());
// dataExcel.setContent(data.getContent());
// dataExcel.setSource(data.getSource());
// dataExcel.setTid(data.getTid());
// dataExcel.setTime(data.getTime());
// dataExcel.setTitle(data.getTitle());
// dataExcel.setUrl(data.getUrl());
// dataExcel.setWord(data.getWord());
//
// bodyList.add(dataExcel);
// });
// }
// } catch (Exception e) {
// e.toString();
// }
//
// EasyExcel.write("D:\\crawlerdata\\百度贴吧-花木兰2.xlsx", DataExcel.class).sheet("数据").doWrite(bodyList);
// System.out.println("导出成功");
// }
/** /**
* @Title: getBaiduTiebaData * @Title: getBaiduTiebaData
* @author hero * @author hero
......
package com.zhiwei.media_data_crawler.crawler;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import com.zhiwei.crawler.proxy.ProxyFactory;
import com.zhiwei.crawler.proxy.ProxyHolder;
import com.zhiwei.excelpoi.excel.PoiExcelUtil;
import com.zhiwei.media_data_crawler.entity.ZhihuAnswer;
import com.zhiwei.proxy.config.SimpleConfig;
import com.zhiwei.tools.timeparse.TimeParse;
/**
* 出知乎评论(图片数据量和用户评论排名)
* @author xMx
* @date 2019年10月19日 上午11:01:29
*/
public class CrawlerTest {
public static void main(String[] args) throws Exception {
//代理地址
String address = "zookeeper://192.168.0.11:2181?backup=192.168.0.30:2181,192.168.0.35:2181";
String appName = "xumaioxin";
long appId = 10000008L;
ProxyFactory.init(SimpleConfig.builder().registry(address).appName(appName).appId(appId).group("local").build());
String wordFileName = "D://crawlerdata/关键词5.txt";
String dataFileName = "D://crawlerdata/知乎2.xlsx";
String endTime = "1970-01-01 23:59:59";
List<String> wordList = WordsReadFile.getWords(wordFileName);
List<Map<String, Object>> resultList = new ArrayList<>();
for(String s:wordList) {
// List<ZhihuAnswer> zhihuAnswer = ZhihuAnwserCrawlerParse.getAnswerList(s,TimeParse.stringFormartDate(endTime),ProxyHolder.NAT_HEAVY_PROXY);
List<ZhihuAnswer> zhihuAnswer = ZhihuAnwserCrawlerParse.getPictureCount(s);
for(ZhihuAnswer z:zhihuAnswer) {
Map<String, Object> map = new HashMap<>();
map.put("地址", z.getUrl());
map.put("问题地址", z.getFrom_url());
map.put("标题", z.getTitle());
map.put("时间", z.getTime());
map.put("发布者", z.getAuthor());
map.put("作者地址", z.getAuthorUrl());
map.put("内容", z.getContent());
map.put("回答点赞数", z.getAttitudes_count());
map.put("回答评论数", z.getComment_count());
map.put("问题点赞数", z.getFollow_count());
map.put("问题评论数", z.getBord_count());
map.put("图片数量", z.getImgCount());
map.put("排名", z.getSort());
resultList.add(map);
}
}
PoiExcelUtil poi = PoiExcelUtil.getInstance();
List<String> headList = new ArrayList<>();
headList.add("地址");
headList.add("问题地址");
headList.add("标题");
headList.add("时间");
headList.add("发布者");
headList.add("作者地址");
headList.add("内容");
headList.add("回答点赞数");
headList.add("回答评论数");
headList.add("问题点赞数");
headList.add("问题评论数");
headList.add("图片数量");
headList.add("排名");
poi.exportExcel(dataFileName, "数据", headList, resultList);
System.out.println("导出成功");
}
}
package com.zhiwei.media_data_crawler.excelentity;
import com.alibaba.excel.annotation.ExcelProperty;
/**
* easy导出文件标题
* @author xMx
* @date 2019年10月29日 上午9:15:40
*/
public class DataExcel {
@ExcelProperty(value = "地址",index = 0)
private String url;
@ExcelProperty("标题")
private String title;
@ExcelProperty("时间")
private String time;
@ExcelProperty("tid")
private String tid;
@ExcelProperty("来源")
private String source;
@ExcelProperty("回复者或楼主")
private String author;
@ExcelProperty("回复内容")
private String content;
@ExcelProperty("关键词")
private String word;
public String getUrl() {
return url;
}
public void setUrl(String url) {
this.url = url;
}
public String getTitle() {
return title;
}
public void setTitle(String title) {
this.title = title;
}
public String getTime() {
return time;
}
public void setTime(String time) {
this.time = time;
}
public String getTid() {
return tid;
}
public void setTid(String tid) {
this.tid = tid;
}
public String getSource() {
return source;
}
public void setSource(String source) {
this.source = source;
}
public String getAuthor() {
return author;
}
public void setAuthor(String author) {
this.author = author;
}
public String getContent() {
return content;
}
public void setContent(String content) {
this.content = content;
}
public String getWord() {
return word;
}
public void setWord(String word) {
this.word = word;
}
}
package com.zhiwei.media_data_crawler.test;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import com.zhiwei.crawler.proxy.ProxyFactory;
import com.zhiwei.excelpoi.excel.PoiExcelUtil;
import com.zhiwei.media_data_crawler.crawler.WordsReadFile;
import com.zhiwei.media_data_crawler.data.DataCrawler;
import com.zhiwei.media_data_crawler.entity.LunTanData;
import com.zhiwei.proxy.config.SimpleConfig;
/**
* 天涯论坛数据获取
* @author xMx
* @date 2019年11月8日 下午4:08:29
*/
public class GetTiayaDataTest {
public static void main(String[] args) {
String wordFilePath = "D:\\crawlerdata\\关键词6.txt"; //关键词
String filePath = "D:\\crawlerdata\\天涯论坛-精装房.xlsx";
String startTime = "2019-01-01 00:00:00"; //开始时间
String endTime = "2019-11-08 23:59:59"; //结束时间
//代理地址
String address = "zookeeper://192.168.0.11:2181?backup=192.168.0.30:2181,192.168.0.35:2181";
String appName = "xumaioxin";
long appId = 10000008L;
ProxyFactory.init(SimpleConfig.builder().registry(address).appName(appName).appId(appId).group("local").build());
List<String> wordList = WordsReadFile.getWords(wordFilePath);
List<LunTanData> list = new ArrayList<>();
wordList.forEach(word ->{
list.addAll(DataCrawler.getLunTanData(word, null, startTime, endTime));
});
List<Map<String, Object>> bodyList = new ArrayList<>();
list.forEach(data ->{
Map<String, Object> map = new HashMap<>();
map.put("地址", data.getUrl());
map.put("标题", data.getTitle());
map.put("时间", data.getTime());
map.put("来源", data.getSource());
map.put("回复者或楼主", data.getAuthor());
map.put("回复内容", data.getContent());
map.put("回复数", data.getReply_count());
map.put("平台", data.getPt());
map.put("关键词", data.getWord());
bodyList.add(map);
});
List<String> headList = new ArrayList<>();
headList.add("地址");
headList.add("标题");
headList.add("时间");
headList.add("来源");
headList.add("回复者或楼主");
headList.add("回复内容");
headList.add("回复数");
headList.add("平台");
headList.add("关键词");
PoiExcelUtil poi = PoiExcelUtil.getInstance();
poi.exportExcel(filePath, "数据", headList, bodyList);
System.out.println("导出成功");
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment