Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
S
soubao_crawler
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
zhiwei
soubao_crawler
Commits
0b03ebf6
Commit
0b03ebf6
authored
Oct 09, 2018
by
[zhangzhiwei]
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
新版kafka地址及redis地址
parent
1a02bc2f
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
69 additions
and
48 deletions
+69
-48
pom.xml
+1
-1
src/main/java/com/zhiwei/crawler/config/KafkaConfig.java
+26
-0
src/main/java/com/zhiwei/crawler/dao/ProducerKafka.java
+4
-19
src/main/java/com/zhiwei/crawler/dbtemplate/MongoDBTemplate.java
+6
-4
src/main/java/com/zhiwei/crawler/dbtemplate/RsidClientTemplate.java
+3
-2
src/main/resources/db.properties
+22
-22
src/main/resources/kafka.properties
+7
-0
No files found.
pom.xml
View file @
0b03ebf6
...
...
@@ -40,7 +40,7 @@
<dependency>
<groupId>
com.zhiwei.middleware
</groupId>
<artifactId>
cleaner-unified-urlfilter
</artifactId>
<version>
1.0
-SNAPSHOT
</version>
<version>
1.0
.0.RELEASE
</version>
</dependency>
<dependency>
...
...
src/main/java/com/zhiwei/crawler/config/KafkaConfig.java
0 → 100644
View file @
0b03ebf6
package
com
.
zhiwei
.
crawler
.
config
;
import
java.io.InputStream
;
import
java.util.Properties
;
public
class
KafkaConfig
{
static
{
Properties
conf
=
null
;
try
{
InputStream
is
=
Thread
.
currentThread
().
getContextClassLoader
()
.
getResourceAsStream
(
"kafka.properties"
);
conf
=
new
Properties
();
conf
.
load
(
is
);
is
.
close
();
kafkaIp
=
conf
.
getProperty
(
"kafka.ip"
);
kafkaTopic
=
conf
.
getProperty
(
"kafka.topic"
);
}
catch
(
Exception
e
)
{
e
.
printStackTrace
();
}
}
public
static
String
kafkaIp
;
public
static
String
kafkaTopic
;
}
src/main/java/com/zhiwei/crawler/dao/ProducerKafka.java
View file @
0b03ebf6
package
com
.
zhiwei
.
crawler
.
dao
;
import
java.util.Date
;
import
java.util.Properties
;
import
java.util.concurrent.Future
;
...
...
@@ -13,34 +12,28 @@ import org.apache.logging.log4j.Logger;
import
com.alibaba.fastjson.JSONObject
;
import
com.mongodb.DBObject
;
import
com.zhiwei.crawler.
dbtemplate.RsidClientTemplate
;
import
com.zhiwei.crawler.
config.KafkaConfig
;
public
class
ProducerKafka
{
private
static
final
Logger
logger
=
LogManager
.
getLogger
(
ProducerKafka
.
class
);
private
static
final
String
topic
=
"crawler-test"
;
private
static
Producer
<
String
,
String
>
producer
;
static
{
if
(
producer
==
null
){
Properties
props
=
new
Properties
();
props
.
put
(
"bootstrap.servers"
,
"kafka1.irybd.com:9092"
);
props
.
put
(
"bootstrap.servers"
,
KafkaConfig
.
kafkaIp
);
props
.
put
(
"key.serializer"
,
"org.apache.kafka.common.serialization.StringSerializer"
);
props
.
put
(
"value.serializer"
,
"org.apache.kafka.common.serialization.StringSerializer"
);
props
.
put
(
"enable.auto.commit"
,
"true"
);
props
.
put
(
"compression.type"
,
"snappy"
);
// props.put("acks", "all");
// props.put("retries", 0);
// props.put("batch.size", 16384);
// props.put("linger.ms", 1);
producer
=
new
KafkaProducer
<
String
,
String
>(
props
);
}
}
public
static
void
add
(
String
spiderName
,
DBObject
doc
)
{
public
static
void
add
(
String
key
,
DBObject
doc
)
{
String
data
=
JSONObject
.
toJSONString
(
doc
);
Future
<
RecordMetadata
>
future
=
producer
.
send
(
new
ProducerRecord
<
String
,
String
>(
topic
,
spiderName
,
data
));
Future
<
RecordMetadata
>
future
=
producer
.
send
(
new
ProducerRecord
<
String
,
String
>(
KafkaConfig
.
kafkaTopic
,
key
,
data
));
try
{
RecordMetadata
recMeta
=
future
.
get
();
logger
.
info
(
"添加成功。。。。。。,信息为:::{}"
,
recMeta
);
...
...
@@ -49,12 +42,4 @@ public class ProducerKafka {
}
}
public
static
void
main
(
String
[]
args
)
{
boolean
f
=
RsidClientTemplate
.
addFilterUrl
(
"http://www.sougou.com/15.html"
,
"测试标题222"
,
"百度"
,
new
Date
(),
"网媒"
);
System
.
out
.
println
(
"f======"
+
f
);
}
}
src/main/java/com/zhiwei/crawler/dbtemplate/MongoDBTemplate.java
View file @
0b03ebf6
package
com
.
zhiwei
.
crawler
.
dbtemplate
;
import
java.util.Arrays
;
import
com.mongodb.DB
;
import
com.mongodb.DBCollection
;
import
com.mongodb.Mongo
;
...
...
@@ -28,13 +30,13 @@ public class MongoDBTemplate
try
{
if
(
reader
==
null
)
{
//
reader = new MongoClient(address, Arrays.asList(credential));
reader
=
new
MongoClient
(
address
);
reader
=
new
MongoClient
(
address
,
Arrays
.
asList
(
credential
));
//
reader = new MongoClient(address);
}
if
(
writer
==
null
)
{
//
writer = new MongoClient(address, Arrays.asList(credential));
writer
=
new
MongoClient
(
address
);
writer
=
new
MongoClient
(
address
,
Arrays
.
asList
(
credential
));
//
writer = new MongoClient(address);
}
}
catch
(
MongoException
e
)
{
e
.
printStackTrace
();
...
...
src/main/java/com/zhiwei/crawler/dbtemplate/RsidClientTemplate.java
View file @
0b03ebf6
...
...
@@ -8,10 +8,10 @@ import java.util.Map;
import
org.apache.logging.log4j.LogManager
;
import
org.apache.logging.log4j.Logger
;
import
com.zhiwei.common.config.GroupType
;
import
com.zhiwei.crawler.config.Config
;
import
com.zhiwei.middleware.cleaner.ptenum.PTENUM
;
import
com.zhiwei.middleware.cleaner.urlfilter.UnifiedUrlFilterClient
;
import
com.zhiwei.middleware.filter.config.Definition
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
/**
...
...
@@ -31,7 +31,7 @@ public class RsidClientTemplate {
if
(
client
==
null
)
{
try
{
client
=
UnifiedUrlFilterClient
.
getClient
(
Config
.
rsidUrl
,
Config
.
rsidGroup
,
Definition
.
GroupType
.
PROVIDER
);
Config
.
rsidGroup
,
GroupType
.
PROVIDER
);
}
catch
(
Exception
e
)
{
logger
.
error
(
"链接清洗中间件时出现错误,错误为:::{}"
,
e
);
}
...
...
@@ -53,6 +53,7 @@ public class RsidClientTemplate {
for
(
int
i
=
0
;
i
<
3
;
i
++){
try
{
Map
<
String
,
Object
>
filterMap
=
new
HashMap
<
String
,
Object
>();
filterMap
.
put
(
"_id"
,
url
);
filterMap
.
put
(
"url"
,
url
);
filterMap
.
put
(
"title"
,
title
);
filterMap
.
put
(
"source"
,
source
);
...
...
src/main/resources/db.properties
View file @
0b03ebf6
#####################生产环境#################################
#mongoIp=192.168.0.101
#mongoPort=30000
#db.username=zzwno
#db.paasword=zzwno1q2w3e4r
#db.certifiedDB=admin
###save data dbInfo
#savedbName=mediaspider
#saveCollName=net_media
###crawler word dbInfo
#crawlerdbName=qbjcPhoenix
#crawlerCollName=qbjc_crawlerword
#rsid.zookeeper.url = zookeeper://192.168.0.203:2181;zookeeper://192.168.0.104:2181;zookeeper://192.168.0.105:2181
#rsid.zookeeper.group=rsidservernew
#redisKey=media
#####################测试环境#################################
mongoIp
=
127.0.0.1
mongoPort
=
27017
#####################service#################################
mongoIp
=
192.168.0.101
mongoPort
=
30000
db.username
=
zzwno
db.paasword
=
zzwno1q2w3e4r
db.certifiedDB
=
admin
...
...
@@ -25,6 +10,21 @@ saveCollName=net_media
##crawler word dbInfo
crawlerdbName
=
qbjcPhoenix
crawlerCollName
=
qbjc_crawlerword
rsid.zookeeper.url
=
zookeeper://192.168.0.36:2181;
rsid.zookeeper.group
=
local
redisKey
=
media
\ No newline at end of file
rsid.zookeeper.url
=
zookeeper://192.168.0.203:2181;zookeeper://192.168.0.104:2181;zookeeper://192.168.0.105:2181
rsid.zookeeper.group
=
crawler-filter
redisKey
=
media
#####################local#################################
#mongoIp=192.168.0.233
#mongoPort=27017
#db.username=zzwno
#db.paasword=zzwno1q2w3e4r
#db.certifiedDB=admin
###save data dbInfo
#savedbName=mediaspider
#saveCollName=net_media
###crawler word dbInfo
#crawlerdbName=qbjcPhoenix
#crawlerCollName=qbjc_crawlerword
#rsid.zookeeper.url = zookeeper://192.168.0.36:2181;
#rsid.zookeeper.group=local
#
redisKey
=
media
\ No newline at end of file
src/main/resources/kafka.properties
0 → 100644
View file @
0b03ebf6
##########################local##############################
#kafka.ip=kafka1.irybd.com:9092,kafka1.irybd.com:9093,kafka1.irybd.com:9094
#kafka.topic=crawler-test
##########################service##############################
kafka.ip
=
10.123.52.76:9092,10.123.52.76:9093,10.123.52.76:9094
kafka.topic
=
crawler-media
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment