Skip to content

Commit 63b115f

Browse files
committed
UPDATED: scraplatostadora.py completed generating store items in store.md
1 parent 10636fd commit 63b115f

File tree

2 files changed

+66
-4
lines changed

2 files changed

+66
-4
lines changed

scraplatostadora.py

Lines changed: 66 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,68 @@
1-
We scrape the images
1+
# We scrape the images
22

33
# https://www.latostadora.com/afaces
4+
5+
import requests
6+
from bs4 import BeautifulSoup
7+
8+
playlists = ["https://www.latostadora.com/afaces"]
9+
playlistName = ""
10+
11+
url = playlists[0]
12+
res = requests.get(url)
13+
html_page = res.content
14+
15+
soup = BeautifulSoup(html_page, 'html.parser')
16+
#text = soup.find_all(text=True)
17+
images = soup.find_all('img')
18+
19+
output = ''
20+
blacklist = [
21+
'[document]',
22+
'noscript',
23+
'header',
24+
'html',
25+
'meta',
26+
'head',
27+
'input',
28+
'script',
29+
# there may be more elements you don't want, such as "style", etc.
30+
]
31+
for t in images:
32+
if t.parent.name not in blacklist:
33+
output += '{} '.format(t)
34+
35+
lines = output.split('data-original=')
36+
37+
# print(lines[1].split(" ")[0])
38+
# print(lines[2].split(" ")[0])
39+
40+
text = ""
41+
for i in lines:
42+
text += i
43+
ola = text.split("zoomable-images")
44+
45+
c = 1
46+
images_url = []
47+
for j in ola:
48+
images_url.append(ola[c].split(" ")[1].strip("\""))
49+
c+=1
50+
if c >= len(ola):
51+
break
52+
53+
# Get URL of each product
54+
project_href = [i.a['href'] for i in soup.find_all('div', attrs={'class': 'm-product-card'})]
55+
for i in range(len(project_href)):
56+
project_href[i] = "https://www.latostadora.com" + project_href[i]
57+
58+
urlIDList = []
59+
for j in range(len(images_url)):
60+
separateList=[]
61+
separateList.append(images_url[j])
62+
separateList.append(project_href[j])
63+
urlIDList.append(separateList)
64+
65+
# print(urlIDList[0])
66+
with open("store.md", "w") as o:
67+
for k in range(len(urlIDList)):
68+
o.writelines("[![Alt text](" + urlIDList[k][0] + ")](" + urlIDList[k][1] + ")\n")

store.md

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,2 @@
1-
21
[![Alt text](https://srv.latostadora.com/designall.dll/afaces_bag--i:13562392855110135623211;c:9285511;s:B_D1;k:b5d0a30634e31556e2a8771b49fe9e74.jpg)](https://www.latostadora.com/afaces/afaces_bag/9285511)
3-
42
[![Alt text](https://srv.latostadora.com/designall.dll/afaces--i:135623929285901356231;c:9292859;s:K_A1;k:a008b19e1cdc06490193758fd1b01d97.jpg)](https://www.latostadora.com/afaces/afaces/9292859)
5-

0 commit comments

Comments
 (0)