Skip to content

Commit

Permalink
Fixed Sii scraper
Browse files Browse the repository at this point in the history
  • Loading branch information
lalalaurentiu committed Oct 12, 2023
1 parent afe28da commit 53cb067
Showing 1 changed file with 92 additions and 37 deletions.
129 changes: 92 additions & 37 deletions sites/sii.js
Original file line number Diff line number Diff line change
@@ -1,42 +1,97 @@
"use strict";
const scraper = require("../peviitor_scraper.js");
const uuid = require("uuid");

const url = "https://www.siiromania.ro/jobopportunities/#section";

const company = { company: "SII" };
let finalJobs = [];
const apiKey = process.env.KNOX
const s = new scraper.Scraper(url);

s.soup
.then((soup) => {
const jobs = soup.findAll('td', {class: 'title'});
jobs.forEach((job) => {
const id = uuid.v4();
const job_title = job.find('a').text.trim();
const job_link = job.find('a').attrs.href;
finalJobs.push({
id: id,
job_title: job_title,
job_link: job_link,
country: "Romania",
city:"Bucuresti",
company: company.company,
const { Scraper, postApiPeViitor } = require("peviitor_jsscraper");
const { getTownAndCounty } = require("../getTownAndCounty.js");

const generateJob = (
job_title,
job_link,
city = "Bucuresti",
county = "Bucuresti",
remote
) => ({
job_title,
job_link,
country: "Romania",
city,
county,
remote,
});

const getJobs = async () => {
let url = "https://www.siiromania.ro/jobopportunities/#section";
const jobs = [];
let pages = 1;
const scraper = new Scraper(url);

let res = await scraper.get_soup("HTML");
let items = res.find("tbody").findAll("tr");

while (items.length > 0) {
items.forEach((item) => {
const citys = [];
const countys = [];
const jobtypes = [];

const job_title = item.findAll("td")[0].text.trim();
const job_link = item.findAll("td")[0].find("a").attrs.href;

const isCity = item.findAll("td")[2].text.split("-");
if (isCity[isCity.length - 1] === "Bucharest") {
isCity[isCity.length - 1] = "Bucuresti";
}

if (getTownAndCounty(isCity[isCity.length - 1].trim()).foudedTown) {
citys.push(
getTownAndCounty(isCity[isCity.length - 1].trim()).foudedTown
);
countys.push(getTownAndCounty(isCity[isCity.length - 1].trim()).county);
}

isCity.forEach((item) => {
if (item.includes("Remote") || item.includes("Hybrid")) {
jobtypes.push(item.includes("Remote") ? "Remote" : "Hybrid");
}
});

jobs.push(
generateJob(job_title, job_link, citys[0], countys[0], jobtypes)
);
});
})
.then(() => {
console.log(JSON.stringify(finalJobs, null, 2));

scraper.postApiPeViitor(finalJobs, company, apiKey);
pages++;
url = `https://www.siiromania.ro/jobopportunities/page/${pages}/#section`;
scraper.url = url;
res = await scraper.get_soup("HTML");
try {
items = res.find("tbody").findAll("tr");
} catch (e) {
items = [];
}
}

return jobs;
};

const getParams = () => {
const company = "SII";
const logo =
"https://www.siiromania.ro/wp-content/themes/corporate-sii-romania/img/logo.png";
const apikey = process.env.APIKEY;
const params = {
company,
logo,
apikey,
};
return params;
};

const run = async () => {
const jobs = await getJobs();
const params = getParams();
postApiPeViitor(jobs, params);
};

let logo =
"https://www.siiromania.ro/wp-content/themes/corporate-sii-romania/img/logo.png";
if (require.main === module) {
run();
}

let postLogo = new scraper.ApiScraper(
"https://api.peviitor.ro/v1/logo/add/"
);
postLogo.headers.headers["Content-Type"] = "application/json";
postLogo.post(JSON.stringify([{ id: company.company, logo: logo }]));
});
module.exports = { run, getJobs, getParams }; // this is needed for our unit test job

0 comments on commit 53cb067

Please sign in to comment.