-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathpicture_parser.py
30 lines (27 loc) · 1.08 KB
/
picture_parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
import sys
sys.path.append('../../')
import base.base_parser as base_parser
import init
import utils.tools as tools
from utils.log import log
def spider_picture(p_url, end):
for i in range(1,11):
i = str(i)
url = p_url+i+end
html, r = tools.get_html_by_requests(url)
regex = 'title=".*?".*?src = "(.*?)".*?<div class="wrapper-listTitle">'
img_urls = tools.get_info(html, regex)
regex_name = 'rseat="dsjp7".*?title="(.*?)".*?src = ".*?"'
names = tools.get_info(html, regex_name)
j=0
for img_url in img_urls:
name = names[j]
name = tools.del_html_tag(name)
j=j+1
#print(img_url,'---',name,'****',j)
FILE_LOCAL_PATH = 'd:'
sto_path = '/picture/' + name + '.jpg'
tools.download_file(img_url, FILE_LOCAL_PATH, sto_path)
if __name__ == '__main__':
spider_picture('http://list.iqiyi.com/www/2/-------------11-', '-1-iqiyi--.html')
spider_picture('http://list.iqiyi.com/www/1/-------------11-', '-1-iqiyi--.html')