-
Notifications
You must be signed in to change notification settings - Fork 12
/
mz.py
66 lines (58 loc) · 2.34 KB
/
mz.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
#!/usr/bin/env python
#-*- coding: utf-8 -*-
__author__ = 'ZYSzys'
import requests
from bs4 import BeautifulSoup
import os
class Mz:
def __init__(self):
self.url = 'http://www.mzitu.com'
self.headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36',
'Referer': 'http://www.mzitu.com/'
}
self.req = requests.session()
self.all_a = []
self.all_a_title = []
self.all_a_max = []
os.makedirs(os.path.join(os.getcwd(), 'Mzitu'))
os.chdir(os.path.join(os.getcwd(), 'Mzitu'))
self.initpwd = os.getcwd()
def Domainhtml(self):
html = self.req.get(self.url, headers=self.headers)
lis = BeautifulSoup(html.text, 'lxml').find('div', class_='postlist').find_all('li')
for a in lis:
imgurl = a.find('a')['href']
self.all_a.append(imgurl)
def Getmaxpage(self):
for a in self.all_a:
imghtml = self.req.get(a, headers=self.headers)
title = BeautifulSoup(imghtml.text, 'lxml').find('h2', class_='main-title').string
#print title
last = BeautifulSoup(imghtml.text, 'lxml').find('div', class_='pagenavi').find_all('span')
last = int(last[-2].string)
self.all_a_title.append(title)
self.all_a_max.append(last)
def Downloadimg(self):
cnt = 0
print('total: %s' % len(self.all_a))
for a in self.all_a:
print('Downloading %s now...' % (cnt+1))
os.makedirs(os.path.join(os.getcwd(), self.all_a_title[cnt]))
os.chdir(os.path.join(os.getcwd(), self.all_a_title[cnt]))
for i in range(1, self.all_a_max[cnt]+1):
nurl = a+'/'+str(i)
imghtml = self.req.get(nurl, headers=self.headers)
aaa = BeautifulSoup(imghtml.text, 'lxml').find('div', class_='main-image').find('img')['src']
img = self.req.get(aaa, headers=self.headers)
f = open(str(i)+'.jpg', 'ab')
f.write(img.content)
f.close()
cnt += 1
os.chdir(self.initpwd)
print('Dowmload completed!')
if __name__ == '__main__':
test = Mz()
test.Domainhtml()
test.Getmaxpage()
test.Downloadimg()