-
Notifications
You must be signed in to change notification settings - Fork 0
/
match_scraper.py
90 lines (65 loc) · 1.96 KB
/
match_scraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
"""
Python3
parses XML from sportradar API and exports CSV of match facts
"""
import xml.etree.ElementTree as ET
import urllib.request
import csv
## Enter output filename
output_file = ''
## Enter your API key
api_key = ''
## Enter the match url
target_url = 'https://api.sportradar.us/soccer-t2/eu/matches/...?api_key=' + api_key
def main(file_name, match_url):
## Download XML
with urllib.request.urlopen(match_url) as webpage:
data = webpage.read()
text = data.decode('utf-8')
## Parse XML
parsed = ET.fromstring(text)
## Find team names
def team_names(root):
"""
takes root as input and returns dictions with home and away team names and team ID.
"""
homebranch = root[0][0].find('{http://feed.elasticstats.com/schema/soccer/sr/v2/matches-summary.xsd}home')
homename = (homebranch.attrib.get('alias'))
homeid = (homebranch.attrib.get('id'))
awaybranch = root[0][0].find('{http://feed.elasticstats.com/schema/soccer/sr/v2/matches-summary.xsd}away')
awayname = (awaybranch.attrib.get('alias'))
awayid = (awaybranch.attrib.get('id'))
teamnames = {homeid:homename, awayid:awayname}
return(teamnames)
names = team_names(parsed)
## extract facts
# create row headers
rowlist = [ [ 'time', 'facttype', 'team', 'clock', 'x', 'y']]
# add facts
for fact in parsed[0][0][9]:
time = fact.get('time')
facttype = fact.get('type')
clock = fact.get('clock')
team_id = fact.get('team_id')
if team_id:
factteam = names.get(team_id)
else:
factteam = 'NA'
fact_x = fact.get('x')
if fact_x:
x = fact_x
else:
x = 'NA'
fact_y = fact.get('y')
if fact_y:
y = fact_y
else:
y = 'NA'
rowlist.append([time, facttype, factteam, clock, x, y])
with open(file_name, 'w', newline='') as csvfile:
my_writer = csv.writer(csvfile, delimiter=',')
for row in rowlist:
my_writer.writerow(row)
print(match_url[:50] + '...' + ' scraped and saved to ' + file_name)
if __name__ == '__main__':
main(output_file, target_url)