From de9e617c8bc9a4b23c1f86345853e84181311000 Mon Sep 17 00:00:00 2001 From: Rarescode Date: Thu, 25 Jan 2024 22:50:52 +0200 Subject: [PATCH] Fixed bugs on some scrapers regarding job type and cities --- sites/affidearomania.py | 4 ++-- sites/ensemblesoftware.py | 3 +-- sites/htecgroup.py | 2 ++ sites/intelligentbee.py | 7 ++++++- sites/interbrandsorbico.py | 2 +- sites/iuliuscompany.py | 2 +- sites/jobshop.py | 10 ++++++++-- sites/kinetic.py | 2 +- sites/script_runner.py | 1 + sites/sonrisatechnologies.py | 2 +- 10 files changed, 24 insertions(+), 11 deletions(-) diff --git a/sites/affidearomania.py b/sites/affidearomania.py index 2bd235ecc0..8fbccc3083 100644 --- a/sites/affidearomania.py +++ b/sites/affidearomania.py @@ -29,8 +29,8 @@ def scrape_jobs(self): Scrape job data from affidearomania website. """ - job_elements = self.get_jobs_elements('css_', 'h5 > a') - job_cities_elements = self.get_jobs_elements('class_', 'job-location') + job_elements = self.get_jobs_elements('class_', 'sc-6exb5d-1 cTfiAE') + job_cities_elements = self.get_jobs_elements('class_', 'custom-css-style-job-location-city') self.job_titles = self.get_jobs_details_text(job_elements) self.job_cities = self.get_jobs_details_text(job_cities_elements) diff --git a/sites/ensemblesoftware.py b/sites/ensemblesoftware.py index 51310469cf..a715c56357 100644 --- a/sites/ensemblesoftware.py +++ b/sites/ensemblesoftware.py @@ -36,7 +36,6 @@ def scrape_jobs(self): self.format_data() - def sent_to_future(self): self.send_to_viitor() @@ -51,7 +50,7 @@ def format_data(self): """ for job_title, job_url in zip(self.job_titles, self.job_urls): job_url = f"https://www.ensemblesoftware.ro/{job_url}" - self.create_jobs_dict(job_title, job_url, "România", ['Baia Mare', 'Brasov', 'Cluj']) + self.create_jobs_dict(job_title, job_url, "România", ['Baia Mare', 'Brasov', 'Cluj-Napoca'], 'remote') if __name__ == "__main__": ensemblesoftware = ensemblesoftwareScraper() diff --git a/sites/htecgroup.py b/sites/htecgroup.py index bc875b389b..1228d37763 100644 --- a/sites/htecgroup.py +++ b/sites/htecgroup.py @@ -64,6 +64,8 @@ def format_data(self): Iterate over all job details and send to the create jobs dictionary. """ for job_title, job_url in zip(self.job_titles, self.job_urls): + if "Kragujevac" in job_title: + continue self.create_jobs_dict(job_title, job_url, "România", "Bucuresti", "remote") diff --git a/sites/intelligentbee.py b/sites/intelligentbee.py index 7d9e831d40..f2fe3773c2 100644 --- a/sites/intelligentbee.py +++ b/sites/intelligentbee.py @@ -49,8 +49,13 @@ def format_data(self): Iterate over all job details and send to the create jobs dictionary. """ for job_title, job_url in zip(self.job_titles, self.job_urls): + remote = "On-site" + if "remote" in job_title: + remote = "remote" + elif "hybrid" in job_title: + remote = "hybrid" job_url = "https://intelligentbee.com" + job_url - self.create_jobs_dict(job_title, job_url, "România", "Iasi") + self.create_jobs_dict(job_title, job_url, "România", "Iasi", remote) if __name__ == "__main__": intelligentbee = intelligentbeeScraper() diff --git a/sites/interbrandsorbico.py b/sites/interbrandsorbico.py index cfbc390b61..397f504a80 100644 --- a/sites/interbrandsorbico.py +++ b/sites/interbrandsorbico.py @@ -29,7 +29,7 @@ def scrape_jobs(self): Scrape job data from InterbrandsOrbico website. """ - job_titles_elements = self.get_jobs_elements('class_', "sc-6exb5d-1 dLpFZe") + job_titles_elements = self.get_jobs_elements('class_', "sc-6exb5d-1 jnZALp") locations = self.get_jobs_elements('class_', "custom-css-style-job-location") job_urls = self.get_jobs_elements('class_', "sc-s03za1-0 iCILJS") diff --git a/sites/iuliuscompany.py b/sites/iuliuscompany.py index d44d7ac7cf..189e8818f7 100644 --- a/sites/iuliuscompany.py +++ b/sites/iuliuscompany.py @@ -52,7 +52,7 @@ def format_data(self): """ for job_title, job_url, job_city in zip(self.job_titles, self.job_urls, self.job_cities): job_url = self.url + job_url - self.create_jobs_dict(job_title, job_url, "România", job_city.replace("LOCAȚIE: ", "")) + self.create_jobs_dict(job_title, job_url, "România", job_city.replace("LOCAȚIE: ", "").replace("Cluj", "Cluj-Napoca")) if __name__ == "__main__": iuliuscompany = iuliuscompanyScraper() diff --git a/sites/jobshop.py b/sites/jobshop.py index 630bc2b632..4ad6317ca1 100644 --- a/sites/jobshop.py +++ b/sites/jobshop.py @@ -52,9 +52,15 @@ def format_data(self): Iterate over all job details and send to the create jobs dictionary. """ for job_title, job_city, job_url in zip(self.job_titles, self.job_cities, self.job_urls): - job_city = job_city.split("•")[1][1:] + if "Remote" in job_city: + remote = "remote" + job_city = "Iasi" + else: + remote = "On-site" + job_city = job_city.replace("ș", "s").split("•")[1][1:-1] + job_url = f"https://www.jobshop.bestis.ro{job_url}" - self.create_jobs_dict(job_title, job_url, "România", job_city) + self.create_jobs_dict(job_title, job_url, "România", job_city, remote) if __name__ == "__main__": jobshop = jobshopScraper() diff --git a/sites/kinetic.py b/sites/kinetic.py index 18c9574505..214db49bf4 100644 --- a/sites/kinetic.py +++ b/sites/kinetic.py @@ -50,7 +50,7 @@ def format_data(self): """ for job_title in self.job_titles: job_url = self.url + "#" + str(self.job_count) - self.create_jobs_dict(job_title, job_url, "România", ["Iasi", "Bucuresti", "Cluj"]) + self.create_jobs_dict(job_title, job_url, "România", ["Iasi", "Bucuresti", "Cluj-Napoca"]) self.job_count += 1 if __name__ == "__main__": diff --git a/sites/script_runner.py b/sites/script_runner.py index f71773322b..ce73328682 100644 --- a/sites/script_runner.py +++ b/sites/script_runner.py @@ -36,6 +36,7 @@ def run(self): 'typingdna.py', # This does not have a career's page now 'netrom.py', # This does not have a career's page now 'kaizengaming.py', # Website changed + 'sonrisatechnologies.py', # Deactivated as no jobs in romania are available 'brillio.py', # there are no jobs available 'aeroportoradea.py', # Removed as they changed the page layout, to be fixed 'mennekes.py', # This does not have jobs at this moment diff --git a/sites/sonrisatechnologies.py b/sites/sonrisatechnologies.py index daf5084040..15cac1b43d 100644 --- a/sites/sonrisatechnologies.py +++ b/sites/sonrisatechnologies.py @@ -59,7 +59,7 @@ def format_data(self): sonrisatechnologies = sonrisatechnologiesScraper() sonrisatechnologies.get_response() sonrisatechnologies.scrape_jobs() - sonrisatechnologies.sent_to_future() + # sonrisatechnologies.sent_to_future()