-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathImage.py
61 lines (51 loc) · 1.34 KB
/
Image.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
# -*- coding: utf-8 -*-
import os
import cv2
import pandas as pd
import requests
from bs4 import BeautifulSoup
import shutil
base = '/home/soumitra/Webscraping-Fifa-Ratings/Pictures/'
a = os.listdir(base)
incorrect = [];
df = pd.read_csv("PlayerNames.csv")
print("Read complete")
url = "https://www.fifaindex.com"
proxies = {
'http': 'http://10.4.22.5:3128',
'https': 'https://10.4.22.5:3128',
}
for i in range(len(a)):
im = cv2.imread(base+a[i])
if(im is None):
p = a[i].split('.')
incorrect.append(p[0])
row = [];
for element in incorrect:
row.append(df[df['Name']==element]['url'])
print(element)
print(len(incorrect))
for i in range(len(row)):
for parts in row[i]:
url_temp = url+parts
while(True):
print("Getting page "+url_temp)
try:
page = requests.get(url_temp,proxies=proxies)
except requests.exceptions.RequestException as e: # This is the correct syntax
print(e)
continue
break
html = page.content
soup = BeautifulSoup(html,'lxml')
Nat = soup.find('img')
while(True):
try:
response = requests.get(url+Nat['src'], stream=True,proxies=proxies)
except requests.exceptions.RequestException as e: # This is the correct syntax
print(e)
continue
break
with open('Pictures/'+incorrect[i]+'.png', 'wb') as out_file:
shutil.copyfileobj(response.raw, out_file)
del response