-
Notifications
You must be signed in to change notification settings - Fork 0
/
kathmandujob.py
83 lines (79 loc) · 3.63 KB
/
kathmandujob.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
from bs4 import BeautifulSoup
import requests
import json
def kathmandujob():
links = []
count = 0
with open('C:/Projects/itjobseeker/public/jsondata/kathmandujob.json', 'r') as readfile:
try:
data = json.load(readfile)
stored_links = []
for single_data in data:
stored_links.append(single_data['Page_URL'])
except:
data = []
stored_links = []
nums = [1, 14, 28, 42, 56, 70]
# 84, 98, 112, 126, 140]
# [ 154, 168, 182, 196, 210, 224, 238, 252, 266, 280, 294, 308, 322, 336]
# [ 350, 364, 378, 392, 406, 420, 434, 448, 462, 476]
# nums = [ 490, 504, 518, 532, 546, 560, 574, 588, 602, 616, 630,
# 644, 658, 672, 686, 700, 714, 728, 742, 756, 770, 784, 798,
# 812, 826, 840, 854, 868, 882, 896, 910, 924, 938]
for num in nums:
num = str(num)
hyper_source = requests.get('https://kathmandujobs.com/jobs/' + num).text
soup = BeautifulSoup(hyper_source, 'lxml')
jobs = soup.find('div', class_='post-list')
titles = jobs.find_all('div', class_='titles')
for title in titles:
link = title.find_all('a')[0]
link = link['href']
links.append(link)
for link in links:
if link not in stored_links:
print('New job found', link)
stored_links.append(link)
count += 1
source = requests.get(link).text
soup = BeautifulSoup(source, 'lxml')
job = soup.find('div', class_='details')
name = job.find('div', class_='titles').h4.get_text(strip=True)
company = job.find('div', class_='titles').h6.get_text(strip=True)
experience = job.find_all('p', class_='address')[0].get_text(strip=True)
experience = experience.split(':', 1)[1]
level = job.find_all('p', class_='address')[1].get_text(strip=True)
level = level.split(':', 1)[1]
vacancy = job.find_all('p', class_='address')[2].get_text(strip=True)
vacancy = vacancy.split(':', 1)[1]
time = job.find_all('p', class_='address')[3].get_text(strip=True)
time = time.split(':', 1)[1]
salary = job.find_all('p', class_='address')[4].get_text(strip=True)
salary = salary.split(':', 1)[1]
education = job.find_all('p', class_='address')[5].get_text(strip=True)
education = education.split(':', 1)[1]
address = job.find_all('p', class_='address')[6].get_text(strip=True)
address = address.split(':', 1)[1]
deadline = job.find_all('p', class_='address')[7].get_text(strip=True)
deadline = deadline.split(':', 1)[1]
desct = job.get_text(strip=True)
data.append({
'name': name,
'company': company,
'vacancy': vacancy,
'time': time,
'address': address,
'deadline': deadline,
'education': education,
'experience': experience,
'level': level,
'salary': salary,
'desct': desct,
'Page_URL': link,
'websitename': 'kathmandujobs.com'
})
else:
print("Alredy in the database")
with open('C:/Projects/itjobseeker/public/jsondata/kathmandujob.json', 'w') as outfile:
json.dump(data, outfile)
print("Kathmandu jobs done")