Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
S
soubao_crawler
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
zhiwei
soubao_crawler
Commits
cb82a2d5
Commit
cb82a2d5
authored
Aug 13, 2019
by
zhiwei
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
新版字段相应的配置及相应jar
parent
c1281636
Show whitespace changes
Inline
Side-by-side
Showing
12 changed files
with
89 additions
and
103 deletions
+89
-103
pom.xml
+12
-7
src/main/java/com/zhiwei/crawler/config/CleanerFilterConfig.java
+25
-0
src/main/java/com/zhiwei/crawler/config/DBConfig.java
+1
-14
src/main/java/com/zhiwei/crawler/dao/ProducerKafka.java
+2
-2
src/main/java/com/zhiwei/crawler/dao/WordsDao.java
+3
-3
src/main/java/com/zhiwei/crawler/dbtemplate/MongoDBTemplate.java
+3
-3
src/main/java/com/zhiwei/crawler/dbtemplate/RsidClientTemplate.java
+8
-37
src/main/java/com/zhiwei/crawler/util/TreatData.java
+21
-17
src/main/resources/cleanerfilter.properties
+6
-0
src/main/resources/db.properties
+2
-15
src/main/resources/kafka.properties
+5
-4
src/main/resources/proxyip.properties
+1
-1
No files found.
pom.xml
View file @
cb82a2d5
...
@@ -29,14 +29,9 @@
...
@@ -29,14 +29,9 @@
<dependency>
<dependency>
<groupId>
com.zhiwei.tools
</groupId>
<groupId>
com.zhiwei.tools
</groupId>
<artifactId>
zhiwei-tools
</artifactId>
<artifactId>
zhiwei-tools
</artifactId>
<version>
0.1.
3
-SNAPSHOT
</version>
<version>
0.1.
4
-SNAPSHOT
</version>
</dependency>
</dependency>
<dependency>
<groupId>
com.zhiwei.middleware
</groupId>
<artifactId>
cleaner-unified-urlfilter
</artifactId>
<version>
1.0.6.RELEASE
</version>
</dependency>
<dependency>
<dependency>
<groupId>
org.apache.kafka
</groupId>
<groupId>
org.apache.kafka
</groupId>
...
@@ -53,7 +48,17 @@
...
@@ -53,7 +48,17 @@
<dependency>
<dependency>
<groupId>
com.zhiwei.crawler
</groupId>
<groupId>
com.zhiwei.crawler
</groupId>
<artifactId>
crawler-core
</artifactId>
<artifactId>
crawler-core
</artifactId>
<version>
0.3.6-RELEASE
</version>
<version>
0.5.2-RELEASE
</version>
</dependency>
<dependency>
<groupId>
com.zhiwei.middleware
</groupId>
<artifactId>
cleaner-unified-filter
</artifactId>
<version>
0.0.7-SNAPSHOT
</version>
</dependency>
<dependency>
<groupId>
com.zhiwei.base
</groupId>
<artifactId>
base-objects
</artifactId>
<version>
0.2.7-SNAPSHOT
</version>
</dependency>
</dependency>
</dependencies>
</dependencies>
...
...
src/main/java/com/zhiwei/crawler/config/CleanerFilterConfig.java
0 → 100644
View file @
cb82a2d5
package
com
.
zhiwei
.
crawler
.
config
;
import
java.io.InputStream
;
import
java.util.Properties
;
public
class
CleanerFilterConfig
{
static
{
Properties
conf
=
null
;
try
{
InputStream
is
=
Thread
.
currentThread
().
getContextClassLoader
()
.
getResourceAsStream
(
"redis.properties"
);
conf
=
new
Properties
();
conf
.
load
(
is
);
is
.
close
();
rsidUrl
=
conf
.
getProperty
(
"rsid.zookeeper.url"
);
rsidGroup
=
conf
.
getProperty
(
"rsid.zookeeper.group"
);
}
catch
(
Exception
e
)
{
e
.
printStackTrace
();
}
}
public
static
String
rsidUrl
;
public
static
String
rsidGroup
;
}
src/main/java/com/zhiwei/crawler/config/Config.java
→
src/main/java/com/zhiwei/crawler/config/
DB
Config.java
View file @
cb82a2d5
...
@@ -3,7 +3,7 @@ package com.zhiwei.crawler.config;
...
@@ -3,7 +3,7 @@ package com.zhiwei.crawler.config;
import
java.io.InputStream
;
import
java.io.InputStream
;
import
java.util.Properties
;
import
java.util.Properties
;
public
class
Config
{
public
class
DB
Config
{
static
{
static
{
Properties
conf
=
null
;
Properties
conf
=
null
;
try
{
try
{
...
@@ -17,13 +17,8 @@ public class Config {
...
@@ -17,13 +17,8 @@ public class Config {
userName
=
conf
.
getProperty
(
"db.username"
);
userName
=
conf
.
getProperty
(
"db.username"
);
userPwd
=
conf
.
getProperty
(
"db.paasword"
);
userPwd
=
conf
.
getProperty
(
"db.paasword"
);
authDB
=
conf
.
getProperty
(
"db.certifiedDB"
);
authDB
=
conf
.
getProperty
(
"db.certifiedDB"
);
savedbName
=
conf
.
getProperty
(
"savedbName"
);
saveCollName
=
conf
.
getProperty
(
"saveCollName"
);
crawlerdbName
=
conf
.
getProperty
(
"crawlerdbName"
);
crawlerdbName
=
conf
.
getProperty
(
"crawlerdbName"
);
crawlerCollName
=
conf
.
getProperty
(
"crawlerCollName"
);
crawlerCollName
=
conf
.
getProperty
(
"crawlerCollName"
);
redisKey
=
conf
.
getProperty
(
"redisKey"
);
rsidUrl
=
conf
.
getProperty
(
"rsid.zookeeper.url"
);
rsidGroup
=
conf
.
getProperty
(
"rsid.zookeeper.group"
);
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
e
.
printStackTrace
();
e
.
printStackTrace
();
}
}
...
@@ -35,15 +30,7 @@ public class Config {
...
@@ -35,15 +30,7 @@ public class Config {
public
static
String
userName
;
public
static
String
userName
;
public
static
String
userPwd
;
public
static
String
userPwd
;
public
static
String
authDB
;
public
static
String
authDB
;
public
static
String
savedbName
;
public
static
String
saveCollName
;
public
static
String
crawlerdbName
;
public
static
String
crawlerdbName
;
public
static
String
crawlerCollName
;
public
static
String
crawlerCollName
;
public
static
String
redisKey
;
public
static
String
rsidUrl
;
public
static
String
rsidGroup
;
public
static
String
filePath
;
public
static
int
saveCount
;
public
static
int
queueCount
;
}
}
src/main/java/com/zhiwei/crawler/dao/ProducerKafka.java
View file @
cb82a2d5
...
@@ -11,7 +11,7 @@ import org.apache.logging.log4j.LogManager;
...
@@ -11,7 +11,7 @@ import org.apache.logging.log4j.LogManager;
import
org.apache.logging.log4j.Logger
;
import
org.apache.logging.log4j.Logger
;
import
com.alibaba.fastjson.JSONObject
;
import
com.alibaba.fastjson.JSONObject
;
import
com.
mongodb.DBObjec
t
;
import
com.
zhiwei.base.entity.subclass.CompleteTex
t
;
import
com.zhiwei.crawler.config.KafkaConfig
;
import
com.zhiwei.crawler.config.KafkaConfig
;
public
class
ProducerKafka
{
public
class
ProducerKafka
{
...
@@ -31,7 +31,7 @@ public class ProducerKafka {
...
@@ -31,7 +31,7 @@ public class ProducerKafka {
}
}
}
}
public
static
void
add
(
String
key
,
DBObjec
t
doc
)
{
public
static
void
add
(
String
key
,
CompleteTex
t
doc
)
{
String
data
=
JSONObject
.
toJSONString
(
doc
);
String
data
=
JSONObject
.
toJSONString
(
doc
);
Future
<
RecordMetadata
>
future
=
producer
.
send
(
new
ProducerRecord
<
String
,
String
>(
KafkaConfig
.
kafkaTopic
,
key
,
data
));
Future
<
RecordMetadata
>
future
=
producer
.
send
(
new
ProducerRecord
<
String
,
String
>(
KafkaConfig
.
kafkaTopic
,
key
,
data
));
try
{
try
{
...
...
src/main/java/com/zhiwei/crawler/dao/WordsDao.java
View file @
cb82a2d5
...
@@ -10,15 +10,15 @@ import org.apache.logging.log4j.Logger;
...
@@ -10,15 +10,15 @@ import org.apache.logging.log4j.Logger;
import
com.mongodb.BasicDBObject
;
import
com.mongodb.BasicDBObject
;
import
com.mongodb.DBCursor
;
import
com.mongodb.DBCursor
;
import
com.mongodb.DBObject
;
import
com.mongodb.DBObject
;
import
com.zhiwei.crawler.config.Config
;
import
com.zhiwei.crawler.config.
DB
Config
;
import
com.zhiwei.crawler.dbtemplate.MongoDBTemplate
;
import
com.zhiwei.crawler.dbtemplate.MongoDBTemplate
;
public
class
WordsDao
extends
MongoDBTemplate
{
public
class
WordsDao
extends
MongoDBTemplate
{
private
static
Logger
logger
=
LogManager
.
getLogger
(
WordsDao
.
class
);
private
static
Logger
logger
=
LogManager
.
getLogger
(
WordsDao
.
class
);
public
WordsDao
(){
public
WordsDao
(){
super
.
setDbName
(
Config
.
crawlerdbName
);
super
.
setDbName
(
DB
Config
.
crawlerdbName
);
super
.
setCollName
(
Config
.
crawlerCollName
);
super
.
setCollName
(
DB
Config
.
crawlerCollName
);
}
}
...
...
src/main/java/com/zhiwei/crawler/dbtemplate/MongoDBTemplate.java
View file @
cb82a2d5
...
@@ -10,7 +10,7 @@ import com.mongodb.MongoClient;
...
@@ -10,7 +10,7 @@ import com.mongodb.MongoClient;
import
com.mongodb.MongoCredential
;
import
com.mongodb.MongoCredential
;
import
com.mongodb.MongoException
;
import
com.mongodb.MongoException
;
import
com.mongodb.ServerAddress
;
import
com.mongodb.ServerAddress
;
import
com.zhiwei.crawler.config.Config
;
import
com.zhiwei.crawler.config.
DB
Config
;
/**
/**
*
*
* @author LihuaTang
* @author LihuaTang
...
@@ -25,8 +25,8 @@ public class MongoDBTemplate
...
@@ -25,8 +25,8 @@ public class MongoDBTemplate
@SuppressWarnings
(
"deprecation"
)
@SuppressWarnings
(
"deprecation"
)
public
MongoDBTemplate
()
{
public
MongoDBTemplate
()
{
MongoCredential
credential
=
MongoCredential
.
createCredential
(
Config
.
userName
,
Config
.
authDB
,
Config
.
userPwd
.
toCharArray
());
MongoCredential
credential
=
MongoCredential
.
createCredential
(
DBConfig
.
userName
,
DBConfig
.
authDB
,
DB
Config
.
userPwd
.
toCharArray
());
ServerAddress
address
=
new
ServerAddress
(
Config
.
mongoIp
,
Config
.
mongoPort
);
ServerAddress
address
=
new
ServerAddress
(
DBConfig
.
mongoIp
,
DB
Config
.
mongoPort
);
try
{
try
{
if
(
reader
==
null
)
if
(
reader
==
null
)
{
{
...
...
src/main/java/com/zhiwei/crawler/dbtemplate/RsidClientTemplate.java
View file @
cb82a2d5
package
com
.
zhiwei
.
crawler
.
dbtemplate
;
package
com
.
zhiwei
.
crawler
.
dbtemplate
;
import
java.util.Date
;
import
java.util.HashMap
;
import
java.util.Map
;
import
org.apache.logging.log4j.LogManager
;
import
org.apache.logging.log4j.LogManager
;
import
org.apache.logging.log4j.Logger
;
import
org.apache.logging.log4j.Logger
;
import
com.zhiwei.base.entity.subclass.CompleteText
;
import
com.zhiwei.common.config.GroupType
;
import
com.zhiwei.common.config.GroupType
;
import
com.zhiwei.crawler.config.Config
;
import
com.zhiwei.crawler.config.CleanerFilterConfig
;
import
com.zhiwei.middleware.cleaner.ptenum.PTENUM
;
import
com.zhiwei.middleware.cleaner.filter.UnifiedFilterClient
;
import
com.zhiwei.middleware.cleaner.urlfilter.UnifiedUrlFilterClient
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
/**
/**
...
@@ -23,15 +19,15 @@ import com.zhiwei.tools.tools.ZhiWeiTools;
...
@@ -23,15 +19,15 @@ import com.zhiwei.tools.tools.ZhiWeiTools;
public
class
RsidClientTemplate
{
public
class
RsidClientTemplate
{
private
static
Logger
logger
=
LogManager
.
getLogger
(
RsidClientTemplate
.
class
);
private
static
Logger
logger
=
LogManager
.
getLogger
(
RsidClientTemplate
.
class
);
private
static
Unified
Url
FilterClient
client
;
private
static
UnifiedFilterClient
client
;
static
{
static
{
if
(
client
==
null
){
if
(
client
==
null
){
synchronized
(
RsidClientTemplate
.
class
)
{
synchronized
(
RsidClientTemplate
.
class
)
{
if
(
client
==
null
)
{
if
(
client
==
null
)
{
try
{
try
{
client
=
Unified
UrlFilterClient
.
getClient
(
Config
.
rsidUrl
,
client
=
Unified
FilterClient
.
getClient
(
CleanerFilter
Config
.
rsidUrl
,
Config
.
rsidGroup
,
GroupType
.
PROVIDER
);
C
leanerFilterC
onfig
.
rsidGroup
,
GroupType
.
PROVIDER
);
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
logger
.
error
(
"链接清洗中间件时出现错误,错误为:::{}"
,
e
);
logger
.
error
(
"链接清洗中间件时出现错误,错误为:::{}"
,
e
);
}
}
...
@@ -49,42 +45,17 @@ public class RsidClientTemplate {
...
@@ -49,42 +45,17 @@ public class RsidClientTemplate {
* @param @return 设定文件
* @param @return 设定文件
* @return boolean 返回类型
* @return boolean 返回类型
*/
*/
public
static
boolean
addFilterUrl
(
String
url
,
String
title
,
String
source
,
Date
date
,
String
p
t
){
public
static
boolean
addFilterUrl
(
CompleteText
completeTex
t
){
for
(
int
i
=
0
;
i
<
3
;
i
++){
for
(
int
i
=
0
;
i
<
3
;
i
++){
try
{
try
{
Map
<
String
,
Object
>
filterMap
=
new
HashMap
<
String
,
Object
>();
return
client
.
contains
(
completeText
.
filterInfo
());
filterMap
.
put
(
"_id"
,
url
);
filterMap
.
put
(
"url"
,
url
);
filterMap
.
put
(
"title"
,
title
);
filterMap
.
put
(
"source"
,
source
);
filterMap
.
put
(
"time"
,
String
.
valueOf
(
date
.
getTime
()));
filterMap
.
put
(
"pt"
,
pt
);
if
(
client
==
null
){
System
.
out
.
println
(
"client is null"
);
}
return
client
.
contains
(
filterMap
,
PTENUM
.
COMMON
);
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
logger
.
error
(
"判断此条网媒消息是否存在出现问题"
,
e
);
logger
.
error
(
"判断此条网媒消息是否存在出现问题"
,
e
);
ZhiWeiTools
.
sleep
(
500
);
ZhiWeiTools
.
sleep
(
500
);
continue
;
}
}
}
}
return
false
;
return
false
;
}
}
public
static
boolean
addFilterUrl
(
String
url
){
for
(
int
i
=
0
;
i
<
3
;
i
++){
try
{
return
client
.
contains
(
url
,
PTENUM
.
COMMON
);
}
catch
(
Exception
e
)
{
logger
.
error
(
"判断此条网媒消息是否存在出现问题"
,
e
);
ZhiWeiTools
.
sleep
(
500
);
continue
;
}
}
return
false
;
}
}
}
src/main/java/com/zhiwei/crawler/util/TreatData.java
View file @
cb82a2d5
...
@@ -6,8 +6,13 @@ import java.util.Map;
...
@@ -6,8 +6,13 @@ import java.util.Map;
import
org.slf4j.Logger
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
org.slf4j.LoggerFactory
;
import
com.mongodb.BasicDBObject
;
import
com.zhiwei.base.category.ClassA
;
import
com.mongodb.DBObject
;
import
com.zhiwei.base.category.ClassA.TypeA
;
import
com.zhiwei.base.category.ClassB.TypeB
;
import
com.zhiwei.base.category.ClassC.TypeC
;
import
com.zhiwei.base.category.ClassD
;
import
com.zhiwei.base.category.ClassD.TypeD
;
import
com.zhiwei.base.entity.subclass.CompleteText
;
import
com.zhiwei.crawler.dao.ProducerKafka
;
import
com.zhiwei.crawler.dao.ProducerKafka
;
import
com.zhiwei.crawler.dbtemplate.RsidClientTemplate
;
import
com.zhiwei.crawler.dbtemplate.RsidClientTemplate
;
import
com.zhiwei.tools.timeparse.TimeParse
;
import
com.zhiwei.tools.timeparse.TimeParse
;
...
@@ -31,7 +36,7 @@ public class TreatData {
...
@@ -31,7 +36,7 @@ public class TreatData {
* @return void 返回类型
* @return void 返回类型
*/
*/
public
static
void
treatDataAccount
(
Map
<
String
,
Object
>
dataMap
)
{
public
static
void
treatDataAccount
(
Map
<
String
,
Object
>
dataMap
)
{
if
(!(
dataMap
.
get
(
"
_id"
)
==
null
||
dataMap
.
get
(
"_id
"
).
equals
(
""
)
if
(!(
dataMap
.
get
(
"
url"
)
==
null
||
dataMap
.
get
(
"url
"
).
equals
(
""
)
||
dataMap
.
get
(
"source"
)
==
null
||
dataMap
.
get
(
"source"
).
equals
(
""
)
||
dataMap
.
get
(
"source"
)
==
null
||
dataMap
.
get
(
"source"
).
equals
(
""
)
||
dataMap
.
get
(
"time"
)==
null
||
dataMap
.
get
(
"time"
).
equals
(
""
)
||
dataMap
.
get
(
"time"
)==
null
||
dataMap
.
get
(
"time"
).
equals
(
""
)
||
dataMap
.
get
(
"title"
).
equals
(
""
)||
dataMap
.
get
(
"title"
)==
null
)
||
dataMap
.
get
(
"title"
).
equals
(
""
)||
dataMap
.
get
(
"title"
)==
null
)
...
@@ -39,23 +44,22 @@ public class TreatData {
...
@@ -39,23 +44,22 @@ public class TreatData {
Date
now
=
new
Date
(
new
Date
().
getTime
()
-
49
*
60
*
60
*
1000
);
//避免时间为:2018-06-22 00:00:00 时间格式的数据丢掉
Date
now
=
new
Date
(
new
Date
().
getTime
()
-
49
*
60
*
60
*
1000
);
//避免时间为:2018-06-22 00:00:00 时间格式的数据丢掉
Date
date
=
TimeParse
.
stringFormartDate
(
dataMap
.
get
(
"time"
).
toString
());
Date
date
=
TimeParse
.
stringFormartDate
(
dataMap
.
get
(
"time"
).
toString
());
if
(
date
.
after
(
now
))
{
if
(
date
.
after
(
now
))
{
logger
.
info
(
"去重的链接为:{}"
,
dataMap
.
get
(
"
_id
"
));
logger
.
info
(
"去重的链接为:{}"
,
dataMap
.
get
(
"
url
"
));
String
url
=
dataMap
.
get
(
"
_id
"
).
toString
();
String
url
=
dataMap
.
get
(
"
url
"
).
toString
();
String
title
=
dataMap
.
get
(
"title"
).
toString
();
String
title
=
dataMap
.
get
(
"title"
).
toString
();
String
source
=
dataMap
.
get
(
"source"
).
toString
();
String
source
=
dataMap
.
get
(
"source"
).
toString
();
if
(!
RsidClientTemplate
.
addFilterUrl
(
url
,
title
,
source
,
date
,
"平媒"
)){
String
content
=
dataMap
.
get
(
"content"
)+
""
.
replaceAll
(
"<[^>]*>"
,
""
);
DBObject
doc
=
new
BasicDBObject
();
ClassD
cd
=
ClassA
.
selectA
(
TypeA
.
TEXT
).
selectB
(
TypeB
.
COMPLETE
).
selectC
(
TypeC
.
PGC
).
selectD
(
TypeD
.
平媒
);
doc
.
put
(
"_id"
,
url
);
long
ctime
=
System
.
currentTimeMillis
();
doc
.
put
(
"url"
,
url
);
CompleteText
completeText
=
new
CompleteText
.
Builder
(
date
.
getTime
(),
ctime
,
source
.
trim
()
doc
.
put
(
"title"
,
dataMap
.
get
(
"title"
));
,
cd
.
encodeA
(),
cd
.
encodeB
(),
cd
.
encodeC
(),
cd
.
encodeD
(),
cd
.
combineEncode
(),
"PC"
,
null
,
100010005L
)
doc
.
put
(
"time"
,
String
.
valueOf
(
date
.
getTime
()));
.
url
(
url
)
doc
.
put
(
"source"
,
dataMap
.
get
(
"source"
));
.
title
(
title
)
doc
.
put
(
"content"
,
dataMap
.
get
(
"content"
)+
""
.
replaceAll
(
"<[^>]*>"
,
""
));
.
content
(
content
)
doc
.
put
(
"pt"
,
"平媒"
);
.
build
();
doc
.
put
(
"type"
,
"平媒"
);
doc
.
put
(
"savetime"
,
System
.
currentTimeMillis
());
ProducerKafka
.
add
(
"搜报网"
,
doc
);
if
(!
RsidClientTemplate
.
addFilterUrl
(
completeText
)){
ProducerKafka
.
add
(
"搜报网采集程序"
,
completeText
);
}
else
{
}
else
{
logger
.
info
(
"搜报网地址为:{},此条数据重复"
,
dataMap
.
get
(
"_id"
));
logger
.
info
(
"搜报网地址为:{},此条数据重复"
,
dataMap
.
get
(
"_id"
));
}
}
...
...
src/main/resources/cleanerfilter.properties
0 → 100644
View file @
cb82a2d5
rsid.zookeeper.url
=
zookeeper://192.168.0.203:2182;zookeeper://192.168.0.104:2182;zookeeper://192.168.0.105:2182
rsid.zookeeper.group
=
zhiwei-bloom-filter
########################local##############################
#rsid.zookeeper.url=zookeeper://192.168.0.36:2181
#
rsid.zookeeper.group
=
filter-test_sjj
\ No newline at end of file
src/main/resources/db.properties
View file @
cb82a2d5
#####################service#################################
#####################service
�
#################################
mongoIp
=
192.168.0.101
mongoIp
=
192.168.0.101
mongoPort
=
30000
mongoPort
=
30000
db.username
=
zzwno
db.username
=
zzwno
db.paasword
=
zzwno1q2w3e4r
db.paasword
=
zzwno1q2w3e4r
db.certifiedDB
=
admin
db.certifiedDB
=
admin
##save data dbInfo
savedbName
=
mediaspider
saveCollName
=
net_media
##crawler word dbInfo
##crawler word dbInfo
crawlerdbName
=
qbjcPhoenix
crawlerdbName
=
qbjcPhoenix
crawlerCollName
=
qbjc_crawlerword
crawlerCollName
=
qbjc_crawlerword
rsid.zookeeper.url
=
zookeeper://192.168.0.203:2181;zookeeper://192.168.0.104:2181;zookeeper://192.168.0.105:2181
#####################local�#################################
rsid.zookeeper.group
=
crawler-filter
redisKey
=
media
#####################local#################################
#mongoIp=192.168.0.233
#mongoIp=192.168.0.233
#mongoPort=27017
#mongoPort=27017
#db.username=zzwno
#db.username=zzwno
#db.paasword=zzwno1q2w3e4r
#db.paasword=zzwno1q2w3e4r
#db.certifiedDB=admin
#db.certifiedDB=admin
###save data dbInfo
###save data dbInfo
#savedbName=mediaspider
#saveCollName=net_media
###crawler word dbInfo
###crawler word dbInfo
#crawlerdbName=qbjcPhoenix
#crawlerdbName=qbjcPhoenix
#
crawlerCollName
=
qbjc_crawlerword
#
crawlerCollName
=
qbjc_crawlerword
\ No newline at end of file
#rsid.zookeeper.url = zookeeper://192.168.0.36:2181;
#rsid.zookeeper.group=local
#
redisKey
=
media
\ No newline at end of file
src/main/resources/kafka.properties
View file @
cb82a2d5
##########################local##############################
##########################local##############################
,kafka1.irybd.com:9093,kafka1.irybd.com:9094
#kafka.ip=kafka1.irybd.com:9092
,kafka1.irybd.com:9093,kafka1.irybd.com:9094
#kafka.ip=kafka1.irybd.com:9092
#kafka.topic=crawler-
test
#kafka.topic=crawler-
media
##########################service##############################
##########################service##############################
kafka.ip
=
192.168.0.203:909
2,192.168.0.203:9093,192.168.0.203:9094
kafka.ip
=
192.168.0.203:909
5,192.168.0.104:9093,192.168.0.105:9093
kafka.topic
=
crawler-media
kafka.topic
=
crawler-media
\ No newline at end of file
src/main/resources/proxyip.properties
View file @
cb82a2d5
registry
=
zookeeper://192.168.0.203:218
1;zookeeper://192.168.0.104:2181;zookeeper://192.168.0.105:2181
registry
=
zookeeper://192.168.0.203:218
2;zookeeper://192.168.0.104:2182;zookeeper://192.168.0.105:2182
group
=
hangzhou
group
=
hangzhou
########################################################
########################################################
#registry=zookeeper://192.168.0.36:2181
#registry=zookeeper://192.168.0.36:2181
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment