File tree 1 file changed +23
-21
lines changed 1 file changed +23
-21
lines changed Original file line number Diff line number Diff line change 1
1
# coding=gbk
2
2
"""
3
- 作者:川川
4
- @时间 : 2023/12/9 0:44
3
+ 作者:川川
4
+ @时间 : 202/02/18 0:44
5
5
"""
6
6
# pip install beautifulsoup4 icrawler
7
+ import os
7
8
from icrawler .builtin import BingImageCrawler
8
9
9
- # 需要爬取的关键字
10
- list_word = ['宝马车' , '奔驰车 ' ]
10
+ # 需要爬取的关键字
11
+ list_word = ['比亚迪汽车 ' ]
11
12
12
- filters = dict (
13
- size = 'large' ,
14
- color = 'color' ,
15
- license = 'commercial,modify' ,
16
- date = 'pastyear'
17
- )
13
+ # 确保路径存在
14
+ if not os .path .exists ('photo' ):
15
+ os .makedirs ('photo' )
18
16
19
17
for word in list_word :
20
- # bing爬虫
21
- # 保存路径
22
- bing_storage = {'root_dir' : 'photo\\ ' + word } # photo为主文件名
23
- # 从上到下依次是解析器线程数,下载线程数,还有上面设置的保存路径
24
- bing_crawler = BingImageCrawler (parser_threads = 4 ,
25
- downloader_threads = 8 ,
26
- storage = bing_storage )
27
- # 开始爬虫,关键字+图片数量
28
- bing_crawler .crawl (keyword = word ,
29
- filters = filters ,
30
- max_num = 10 )
18
+ # 保存路径
19
+ bing_storage = {'root_dir' : os .path .join ('photo' , word )} # 使用os.path.join处理路径
20
+
21
+ # 创建BingImageCrawler实例
22
+ bing_crawler = BingImageCrawler (
23
+ parser_threads = 3 , # 使用3个解析线程
24
+ downloader_threads = 4 , # 使用4个下载线程
25
+ storage = bing_storage
26
+ )
27
+
28
+ # 开始爬虫,关键字+图片数量
29
+ bing_crawler .crawl (
30
+ keyword = word , # 关键字
31
+ max_num = 10 # 最大下载10张图片
32
+ )
You can’t perform that action at this time.
0 commit comments