-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathconfig.conf
75 lines (62 loc) · 1.92 KB
/
config.conf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
# 数据库相关
[mongodb]
ip = localhost
port = 27017
db = internet_content_detection
[oracledb]
ip = 192.168.60.31
port = 1521
db = orcl
user_name = iimp_user
user_pass = speech
[mysql]
ip = localhost
port = 3306
db = orcl
user_name = iimp_user
user_pass = speech
[elasticsearch]
# 多个逗号分隔
address = 192.168.60.40:9200,192.168.60.30:9200,192.168.110.141:9200
# 爬虫相关
[collector]
sleep_time = 5
depth = 7
url_count = 400
max_size = 1000
allowed_null_times = 10
[parser]
parser_count = 10
sleep_time = 5
url_count = 20
max_retry_times = 15
# spider_site_name = all or site_name1,site_name2
# spider site except sitename eg: except_site_name = 百度
[spider_site]
# 网盘搜,百度,bt磁力链,新浪微博,微信,映客,搜百度盘,豆瓣网,花椒直播,陌陌直播,应用宝,360手机助手,百度手机助手,安卓市场,喜马拉雅FM,今日头条,坐享小说,极速漫画,动漫啦漫画,cctv
spider_site_name = all
except_site_name =
[task]
search_task_sleep_time = 10
# 图片识别相关
[image_collector]
images_count = 10
sleep_time = 2
max_size = 1000
allowed_null_times = 10
# 图片解析器
[image_porn]
# 线程数(处理每个表的线程数)
porn_thread_count = 1
# 每个线程一次取图片数
deal_image_count = 1
sleep_time = 1
# 需要解析图片的表名 多个表逗号分隔
image_tables = WWA_wechat_article,WWA_wechat_official_accounts,WWA_weibo_info_info,WWA_weibo_user_info,WWA_app_content_info
# 是否一个表一个表做 (0 否 1是)否表示多个表同时做,这样解析图片的线程数为:表数*porn_thread_count
deal_image_tables_one_by_one = 1
[files]
headlines_save_path = D:/headlines_today/
kuaibao_save_path = D:/kuaibao/
wwa_save_path = D:/pattek/wwa/
program_save_path = D:/program/