-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
46 lines (34 loc) · 1.04 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
from bs4 import BeautifulSoup
import json
import requests
#Wikipedia countries and their respective Adjectivals
URL = "https://en.wikipedia.org/wiki/List_of_adjectival_and_demonymic_forms_for_countries_and_nations"
page = requests.get(URL)
soup = BeautifulSoup(page.content, "html.parser")
"""#For Local HTML - used for testings.
file = open("countries.html", "r")
soup = BeautifulSoup(file.read(), "html.parser")"""
results = soup.find("tbody")
trs = results.find_all("tr")
dict = {}
for tr in trs[1:]:
tds = tr.find_all("td")
D = []
C = tds[0].text
if C.find('[') != -1:
C = C[:C.find('[')]
if (tds[1].find_all("li")):
lis=tds[1].find_all("li")
for li in lis:
ass = li.find_all("a")
for a in ass:
D.append(a.text)
break
else:
D = tds[1].text
if D.find('[') != -1:
D = D[:D.find('[')]
dict[C] = D
print(dict)
with open("Countries And Adjectivals.json", "w") as outfile:
json.dump(dict, outfile, ensure_ascii=False)