Commit 0b03ebf6 by [zhangzhiwei]

新版kafka地址及redis地址

parent 1a02bc2f
...@@ -40,7 +40,7 @@ ...@@ -40,7 +40,7 @@
<dependency> <dependency>
<groupId>com.zhiwei.middleware</groupId> <groupId>com.zhiwei.middleware</groupId>
<artifactId>cleaner-unified-urlfilter</artifactId> <artifactId>cleaner-unified-urlfilter</artifactId>
<version>1.0-SNAPSHOT</version> <version>1.0.0.RELEASE</version>
</dependency> </dependency>
<dependency> <dependency>
......
package com.zhiwei.crawler.config;
import java.io.InputStream;
import java.util.Properties;
public class KafkaConfig {
static {
Properties conf = null;
try {
InputStream is = Thread.currentThread().getContextClassLoader()
.getResourceAsStream("kafka.properties");
conf = new Properties();
conf.load(is);
is.close();
kafkaIp = conf.getProperty("kafka.ip");
kafkaTopic = conf.getProperty("kafka.topic");
} catch (Exception e) {
e.printStackTrace();
}
}
public static String kafkaIp;
public static String kafkaTopic;
}
package com.zhiwei.crawler.dao; package com.zhiwei.crawler.dao;
import java.util.Date;
import java.util.Properties; import java.util.Properties;
import java.util.concurrent.Future; import java.util.concurrent.Future;
...@@ -13,34 +12,28 @@ import org.apache.logging.log4j.Logger; ...@@ -13,34 +12,28 @@ import org.apache.logging.log4j.Logger;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.mongodb.DBObject; import com.mongodb.DBObject;
import com.zhiwei.crawler.dbtemplate.RsidClientTemplate; import com.zhiwei.crawler.config.KafkaConfig;
public class ProducerKafka { public class ProducerKafka {
private static final Logger logger = LogManager.getLogger(ProducerKafka.class); private static final Logger logger = LogManager.getLogger(ProducerKafka.class);
private static final String topic = "crawler-test";
private static Producer<String, String> producer; private static Producer<String, String> producer;
static{ static{
if(producer == null){ if(producer == null){
Properties props = new Properties(); Properties props = new Properties();
props.put("bootstrap.servers","kafka1.irybd.com:9092"); props.put("bootstrap.servers", KafkaConfig.kafkaIp);
props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer"); props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer"); props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");
props.put("enable.auto.commit", "true"); props.put("enable.auto.commit", "true");
props.put("compression.type", "snappy"); props.put("compression.type", "snappy");
// props.put("acks", "all");
// props.put("retries", 0);
// props.put("batch.size", 16384);
// props.put("linger.ms", 1);
producer = new KafkaProducer<String, String>(props); producer = new KafkaProducer<String, String>(props);
} }
} }
public static void add(String spiderName, DBObject doc) { public static void add(String key ,DBObject doc) {
String data = JSONObject.toJSONString(doc); String data = JSONObject.toJSONString(doc);
Future<RecordMetadata> future = producer.send(new ProducerRecord<String, String>(topic, spiderName, data)); Future<RecordMetadata> future = producer.send(new ProducerRecord<String, String>(KafkaConfig.kafkaTopic, key, data));
try { try {
RecordMetadata recMeta = future.get(); RecordMetadata recMeta = future.get();
logger.info("添加成功。。。。。。,信息为:::{}", recMeta); logger.info("添加成功。。。。。。,信息为:::{}", recMeta);
...@@ -49,12 +42,4 @@ public class ProducerKafka { ...@@ -49,12 +42,4 @@ public class ProducerKafka {
} }
} }
public static void main(String[] args) {
boolean f =RsidClientTemplate.addFilterUrl("http://www.sougou.com/15.html", "测试标题222", "百度", new Date(), "网媒");
System.out.println("f======"+f);
}
} }
package com.zhiwei.crawler.dbtemplate; package com.zhiwei.crawler.dbtemplate;
import java.util.Arrays;
import com.mongodb.DB; import com.mongodb.DB;
import com.mongodb.DBCollection; import com.mongodb.DBCollection;
import com.mongodb.Mongo; import com.mongodb.Mongo;
...@@ -28,13 +30,13 @@ public class MongoDBTemplate ...@@ -28,13 +30,13 @@ public class MongoDBTemplate
try { try {
if(reader==null) if(reader==null)
{ {
// reader = new MongoClient(address, Arrays.asList(credential)); reader = new MongoClient(address, Arrays.asList(credential));
reader = new MongoClient(address); // reader = new MongoClient(address);
} }
if(writer==null) if(writer==null)
{ {
// writer = new MongoClient(address, Arrays.asList(credential)); writer = new MongoClient(address, Arrays.asList(credential));
writer = new MongoClient(address); // writer = new MongoClient(address);
} }
} catch (MongoException e ) { } catch (MongoException e ) {
e.printStackTrace(); e.printStackTrace();
......
...@@ -8,10 +8,10 @@ import java.util.Map; ...@@ -8,10 +8,10 @@ import java.util.Map;
import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger; import org.apache.logging.log4j.Logger;
import com.zhiwei.common.config.GroupType;
import com.zhiwei.crawler.config.Config; import com.zhiwei.crawler.config.Config;
import com.zhiwei.middleware.cleaner.ptenum.PTENUM; import com.zhiwei.middleware.cleaner.ptenum.PTENUM;
import com.zhiwei.middleware.cleaner.urlfilter.UnifiedUrlFilterClient; import com.zhiwei.middleware.cleaner.urlfilter.UnifiedUrlFilterClient;
import com.zhiwei.middleware.filter.config.Definition;
import com.zhiwei.tools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
/** /**
...@@ -31,7 +31,7 @@ public class RsidClientTemplate { ...@@ -31,7 +31,7 @@ public class RsidClientTemplate {
if(client==null) { if(client==null) {
try { try {
client = UnifiedUrlFilterClient.getClient(Config.rsidUrl, client = UnifiedUrlFilterClient.getClient(Config.rsidUrl,
Config.rsidGroup, Definition.GroupType.PROVIDER); Config.rsidGroup, GroupType.PROVIDER);
} catch (Exception e) { } catch (Exception e) {
logger.error("链接清洗中间件时出现错误,错误为:::{}", e); logger.error("链接清洗中间件时出现错误,错误为:::{}", e);
} }
...@@ -53,6 +53,7 @@ public class RsidClientTemplate { ...@@ -53,6 +53,7 @@ public class RsidClientTemplate {
for(int i=0; i<3; i++){ for(int i=0; i<3; i++){
try { try {
Map<String,Object> filterMap = new HashMap<String,Object>(); Map<String,Object> filterMap = new HashMap<String,Object>();
filterMap.put("_id", url);
filterMap.put("url", url); filterMap.put("url", url);
filterMap.put("title", title); filterMap.put("title", title);
filterMap.put("source", source); filterMap.put("source", source);
......
#####################生产环境################################# #####################service#################################
#mongoIp=192.168.0.101 mongoIp=192.168.0.101
#mongoPort=30000 mongoPort=30000
#db.username=zzwno
#db.paasword=zzwno1q2w3e4r
#db.certifiedDB=admin
###save data dbInfo
#savedbName=mediaspider
#saveCollName=net_media
###crawler word dbInfo
#crawlerdbName=qbjcPhoenix
#crawlerCollName=qbjc_crawlerword
#rsid.zookeeper.url = zookeeper://192.168.0.203:2181;zookeeper://192.168.0.104:2181;zookeeper://192.168.0.105:2181
#rsid.zookeeper.group=rsidservernew
#redisKey=media
#####################测试环境#################################
mongoIp=127.0.0.1
mongoPort=27017
db.username=zzwno db.username=zzwno
db.paasword=zzwno1q2w3e4r db.paasword=zzwno1q2w3e4r
db.certifiedDB=admin db.certifiedDB=admin
...@@ -25,6 +10,21 @@ saveCollName=net_media ...@@ -25,6 +10,21 @@ saveCollName=net_media
##crawler word dbInfo ##crawler word dbInfo
crawlerdbName=qbjcPhoenix crawlerdbName=qbjcPhoenix
crawlerCollName=qbjc_crawlerword crawlerCollName=qbjc_crawlerword
rsid.zookeeper.url = zookeeper://192.168.0.36:2181; rsid.zookeeper.url = zookeeper://192.168.0.203:2181;zookeeper://192.168.0.104:2181;zookeeper://192.168.0.105:2181
rsid.zookeeper.group=local rsid.zookeeper.group=crawler-filter
redisKey=media redisKey=media
#####################local#################################
#mongoIp=192.168.0.233
#mongoPort=27017
#db.username=zzwno
#db.paasword=zzwno1q2w3e4r
#db.certifiedDB=admin
###save data dbInfo
#savedbName=mediaspider
#saveCollName=net_media
###crawler word dbInfo
#crawlerdbName=qbjcPhoenix
#crawlerCollName=qbjc_crawlerword
#rsid.zookeeper.url = zookeeper://192.168.0.36:2181;
#rsid.zookeeper.group=local
#redisKey=media
\ No newline at end of file
##########################local##############################
#kafka.ip=kafka1.irybd.com:9092,kafka1.irybd.com:9093,kafka1.irybd.com:9094
#kafka.topic=crawler-test
##########################service##############################
kafka.ip=10.123.52.76:9092,10.123.52.76:9093,10.123.52.76:9094
kafka.topic=crawler-media
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment