Update 1.py

sfvsfv · web-flow · commit 331b2180f514 · 2025-02-18T18:54:16.000+08:00
移除过滤器，检查是否能正常抓取。
使用默认线程数，避免因为线程数设置不当导致资源分配问题。
调试模式，增加日志输出，以便更容易调试问题。
diff --git a/第七章/7.4 批量内容爬取/1.py b/第七章/7.4 批量内容爬取/1.py
@@ -1,30 +1,32 @@
 # coding=gbk
 """
-���ߣ�����
-@ʱ��  : 2023/12/9 0:44
+作者：川川
+@时间  : 202/02/18 0:44
 """
 # pip install beautifulsoup4  icrawler
+import os
 from icrawler.builtin import BingImageCrawler
 
-# ��Ҫ��ȡ�Ĺؼ���
-list_word = ['������','���۳�']
+# 需要爬取的关键字
+list_word = ['比亚迪汽车']
 
-filters = dict(
-    size='large',
-    color='color',
-    license='commercial,modify',
-    date='pastyear'
-)
+# 确保路径存在
+if not os.path.exists('photo'):
+    os.makedirs('photo')
 
 for word in list_word:
-    # bing����
-    # ����·��
-    bing_storage = {'root_dir': 'photo\\' + word}  # photoΪ���ļ���
-    # ���ϵ��������ǽ������߳����������߳����������������õı���·��
-    bing_crawler = BingImageCrawler(parser_threads=4,
-                                    downloader_threads=8,
-                                    storage=bing_storage)
-    # ��ʼ���棬�ؼ���+ͼƬ����
-    bing_crawler.crawl(keyword=word,
-                       filters=filters,
-                       max_num=10)
+    # 保存路径
+    bing_storage = {'root_dir': os.path.join('photo', word)}  # 使用os.path.join处理路径
+
+    # 创建BingImageCrawler实例
+    bing_crawler = BingImageCrawler(
+        parser_threads=3,  # 使用3个解析线程
+        downloader_threads=4,  # 使用4个下载线程
+        storage=bing_storage
+    )
+
+    # 开始爬虫，关键字+图片数量
+    bing_crawler.crawl(
+        keyword=word,  # 关键字
+        max_num=10  # 最大下载10张图片
+    )