-
Notifications
You must be signed in to change notification settings - Fork 0
/
globaljob.py
76 lines (67 loc) · 3.02 KB
/
globaljob.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
from bs4 import BeautifulSoup
import requests
import json
def globaljob():
count = 0
with open('C:/Projects/itjobseeker/public/jsondata/globaljob.json','r') as readfile:
try:
data = json.load(readfile)
stored_links = []
for single_data in data:
stored_links.append(single_data['Page_URL'])
except:
data = []
stored_links = []
links = []
hyper_source = requests.get('https://globaljob.com.np/category/it/it/649').text
soup = BeautifulSoup(hyper_source, 'lxml')
jobs = soup('div', class_='col-md-6')
for job in jobs:
link = job.a['href']
links.append(link)
for link in links:
if link not in stored_links:
stored_links.append(link)
count += 1
print("[" + str(count) + "]", "New job found !",link)
source = requests.get(link).text
soup = BeautifulSoup(source, 'lxml')
name = soup.find('div', class_='headline').get_text(strip=True)
details = soup.find('div', class_='vacancies-details')
level = details.find_all('div', class_='row')[2].find_all('p')[2].get_text(strip=True)
vacancy = details.find_all('div', class_='row')[3].find_all('p')[2].get_text(strip=True)
salary = details.find_all('div', class_='row')[4].find_all('p')[2].get_text(strip=True)
experience = details.find_all('div', class_='row')[5].find_all('p')[2].get_text(strip=True)
time = details.find_all('div', class_='row')[6].find_all('p')[2].get_text(strip=True)
deadline = details.find_all('div', class_='row')[7].find_all('p')[2].get_text(strip=True)
try:
deadline = details.find_all('div', class_='row')[8].find_all('p')[2].get_text(strip=True)
except:
pass
education = soup.find('div', class_='elements').li.get_text(strip=True)
desct = soup.find('div', class_='vacancies-details').get_text(strip=True)
company = soup.find('section', class_='about-company').p.get_text(strip=True)
company = company.split(':')[1]
address = soup.find('section', class_='about-company').find_all('p')[2].get_text(strip=True)
address = address.split(':')[1]
print(company,address)
data.append({
'name': name,
'company': company,
'level': level,
'vacancy': vacancy,
'address': address,
'salary': salary,
'deadline': deadline,
'time': time,
'education': education,
'desct': desct,
'experience': experience,
'Page_URL': link,
'websitename': 'globaljob.com'
})
else:
print("Already in the database")
with open('C:/Projects/itjobseeker/public/jsondata/globaljob.json', 'w') as outfile:
json.dump(data, outfile)
print("globaljob done")