Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor ddroidd, decorfloor, delongi, deutschebank and drmax scraper #350

Merged
merged 2 commits into from
May 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
95 changes: 57 additions & 38 deletions forRefactor/ddroidd.js
Original file line number Diff line number Diff line change
@@ -1,50 +1,69 @@
"use strict";
const scraper = require("../peviitor_scraper.js");
const { getTownAndCounty } = require("../getTownAndCounty.js");
const { translate_city } = require("../utils.js");
const {
Scraper,
postApiPeViitor,
generateJob,
getParams,
} = require("peviitor_jsscraper");
const { Counties } = require("../getTownAndCounty.js");

const url =
"https://api.storyblok.com/v2/cdn/stories/?version=published&starts_with=vacancies%2F&&&excluding_ids=-1&token=4pOFw3LnvRlerPVVh0AB1Qtt&cv=undefined";
const _counties = new Counties();

const company = { company: "DDroidd" };
let finalJobs = [];
const getJobs = async () => {
const url =
"https://api.storyblok.com/v2/cdn/stories/?version=published&starts_with=vacancies%2F&&&excluding_ids=-1&token=4pOFw3LnvRlerPVVh0AB1Qtt&cv=undefined";

const s = new scraper.ApiScraper(url);
const jobs = [];
const scraper = new Scraper(url);
const type = "JSON";
const res = await scraper.get_soup(type);
const json = res.stories;

s.get()
.then((response) => {
const jobs = response.stories;

jobs.forEach((job) => {
const job_title = job.name;
const job_link = "https://www.ddroidd.com/" + job.full_slug;
const remote = job.content.type.toLowerCase().includes("remote")
await Promise.all(
json.map(async (item) => {
const job_title = item.name;
const job_link = "https://www.ddroidd.com/" + item.full_slug;
const remote = item.content.type.toLowerCase().includes("remote")
? ["Remote"]
: [];
let city = "";
let county = "";

const obj = getTownAndCounty(
translate_city(job.content.location.toLowerCase())
let cities = [];
let counties = [];

const { city: c, county: co } = await _counties.getCounties(
translate_city(item.content.location)
);

if (obj.foudedTown && obj.county) {
city = obj.foudedTown;
county = obj.county;
if (c) {
cities.push(c);
counties = [...new Set([...counties, ...co])];
}

finalJobs.push({
job_title: job_title,
job_link: job_link,
company: company.company,
city: city,
county: county,
country: "Romania",
remote: remote,
});
});
})
.then(() => {
console.log(JSON.stringify(finalJobs, null, 2));
scraper.postApiPeViitor(finalJobs, company);
});
const job = generateJob(
job_title,
job_link,
"Romania",
cities,
counties,
remote
);
jobs.push(job);
})
);
return jobs;
};

const run = async () => {
const company = "DDroidd";
const logo = "https://www.ddroidd.com/img/header-logo.svg";
const jobs = await getJobs();
const params = getParams(company, logo);
postApiPeViitor(jobs, params);
};

if (require.main === module) {
run();
}

module.exports = { run, getJobs, getParams }; // this is needed for our unit test job

129 changes: 69 additions & 60 deletions forRefactor/decorfloor.js
Original file line number Diff line number Diff line change
@@ -1,11 +1,17 @@
"use strict";
const scraper = require("../peviitor_scraper.js");
const { getTownAndCounty } = require("../getTownAndCounty.js");
const { translate_city } = require("../utils.js");
const {
Scraper,
postApiPeViitor,
generateJob,
getParams,
} = require("peviitor_jsscraper");
const { Counties } = require("../getTownAndCounty.js");

const _counties = new Counties();

const getAditionalCity = async (url) => {
const s = new scraper.Scraper(url);
const soup = await s.get_soup();
const scraper = new Scraper(url);
const soup = await scraper.get_soup("HTML");

let location;

Expand All @@ -20,60 +26,63 @@ const getAditionalCity = async (url) => {
location = "Unknown";
}

const { foudedTown, county } = getTownAndCounty(
translate_city(location.trim().toLowerCase())
let cities = [];
let counties = [];

const { city: c, county: co } = await _counties.getCounties(
translate_city(location.trim())
);
return { foudedTown, county };

if (c) {
cities.push(c);
counties = [...new Set([...counties, ...co])];
}

return { city: cities, county: counties }
};
const url = "https://decorfloor.ro/careers/";

const company = { company: "Decorfloor" };
let finalJobs = [];

const s = new scraper.Scraper(url);

s.soup
.then(async (soup) => {
const jobs = soup.findAll("div", { class: "vc_gitem-col" });
await Promise.all(
jobs.map(async (job) => {
const job_title = job.find("h4").text.trim();
const job_link = job.find("a").attrs.href;

const { foudedTown, county } = await getAditionalCity(job_link);

if (foudedTown && county) {
finalJobs.push({
job_title: job_title,
job_link: job_link,
city: foudedTown,
county: county,
country: "Romania",
company: company.company,
});
} else {
finalJobs.push({
job_title: job_title,
job_link: job_link,
city: ["Bucuresti", "Cluj-Napoca"],
county: ["Bucuresti", "Cluj"],
country: "Romania",
company: company.company,
});
}
})
);
})
.then(() => {
console.log(JSON.stringify(finalJobs, null, 2));

scraper.postApiPeViitor(finalJobs, company);

let logo = "https://decorfloor.ro/wp-content/uploads/2015/08/logo.png";

let postLogo = new scraper.ApiScraper(
"https://api.peviitor.ro/v1/logo/add/"
);
postLogo.headers.headers["Content-Type"] = "application/json";
postLogo.post(JSON.stringify([{ id: company.company, logo: logo }]));
});

const getJobs = async () => {
const url = "https://decorfloor.ro/careers/";

const scraper = new Scraper(url);
const jobs = [];

const soup = await scraper.get_soup("HTML");

const jobsElements = soup.findAll("div", { class: "vc_gitem-col" });

await Promise.all(
jobsElements.map(async (elem) => {
const job_title = elem.find("h4").text.trim();
const job_link = elem.find("a").attrs.href;

let cities = [];
let counties = [];

const { city: c, county: co } = await getAditionalCity(job_link);

if (c) {
cities.push(...c);
counties = [...new Set([...counties, ...co])];
}

const job = generateJob(job_title, job_link, "Romania", cities, counties);
jobs.push(job);
})
);
return jobs;
};

const run = async () => {
const company = "Decorfloor";
const logo = "https://decorfloor.ro/wp-content/uploads/2015/08/logo.png";
const jobs = await getJobs();
const params = getParams(company, logo);
postApiPeViitor(jobs, params);
};

if (require.main === module) {
run();
}

module.exports = { run, getJobs, getParams }; // this is needed for our unit test job
110 changes: 61 additions & 49 deletions forRefactor/delonghi.js
Original file line number Diff line number Diff line change
@@ -1,11 +1,24 @@
"use strict";
const scraper = require("../peviitor_scraper.js");
const { getTownAndCounty } = require("../getTownAndCounty.js");
const { translate_city } = require("../utils.js");
const Jssoup = require("jssoup").default;
const {
Scraper,
postApiPeViitor,
generateJob,
getParams,
} = require("peviitor_jsscraper");
const { Counties } = require("../getTownAndCounty.js");

const obj = {
url: "https://www.delonghigroup.com/en/views/ajax?_wrapper_format=drupal_ajax",
params: {
const _counties = new Counties();

const getJobs = async () => {
const url =
"https://www.delonghigroup.com/en/views/ajax?_wrapper_format=drupal_ajax";

const scraper = new Scraper(url);
scraper.config.headers["Content-Type"] =
"application/x-www-form-urlencoded; charset=UTF-8";

const data = {
"MIME Type": "application/x-www-form-urlencoded; charset=UTF-8",
view_name: "jobs_positions",
view_display_id: "block_1",
Expand All @@ -17,30 +30,30 @@ const obj = {
"ajax_page_state[theme]": "delonghi",
"ajax_page_state[libraries]":
"better_exposed_filters/auto_submit,better_exposed_filters/general,better_exposed_filters/select_all_none,classy/base,classy/messages,colorbox/colorbox,colorbox/default,core/html5shiv,core/normalize,delonghi/banner,delonghi/global,delonghi/paragraph--body-element,delonghi/paragraph--drupal-block,delonghi/paragraph--row,delonghi/views-view--jobs-positions,eu_cookie_compliance/eu_cookie_compliance_bare,media/filter.caption,msg_useless_options/useless_options,msg_zip/msg_zip,paragraphs/drupal.paragraphs.unpublished,system/base,views/views.ajax,views/views.module",
},
};
};

const company = { company: "DeLonghi" };

const fetchData = async () => {
const jobs = [];
const s = new scraper.ApiScraper(obj.url);
s.headers.headers["Content-Type"] =
"application/x-www-form-urlencoded; charset=UTF-8";
const res = await s.post(obj.params).then((res) => {
const soup = scraper.soup(res[2].data);
const jobsContainer = soup.findAll("div", {
class: "views-row",
});
jobsContainer.forEach((job) => {
const job_title = job.find("h3").text;

const form = new FormData();

for (const key in data) {
form.append(key, data[key]);
}

const res = await scraper.post(form);
const soup = new Jssoup(res[2].data);
const elements = soup.findAll("div", { class: "views-row" });

await Promise.all(
elements.map(async (elem) => {
const job_title = elem.find("h3").text;
const job_link =
"https://www.delonghigroup.com" + job.find("a").attrs.href;
const job_location = job.find("div", {
"https://www.delonghigroup.com" + elem.find("a").attrs.href;
const job_location = elem.find("div", {
class: "job-country-location",
}).text;
let city_element = translate_city(job_location.split(",")[1].trim());
const job_country = job_location.split(","); //[0].split(" ")[0].trim();
const job_country = job_location.split(",");

let country;
if (job_country[0] === "CEE") {
Expand All @@ -49,37 +62,36 @@ const fetchData = async () => {
country = job_country[0].split(" ")[0].trim();
}

const job_element = {
job_title: job_title,
job_link: job_link,
company: company.company,
country: country,
};
let cities = [];
let counties = [];

if (country === "Romania") {
const { foudedTown, county } = getTownAndCounty(city_element);

job_element["city"] = foudedTown;
job_element["county"] = county;
} else {
job_element["city"] = city_element;
const { city: c, county: co } =
await _counties.getCounties(city_element);
if (c) {
cities.push(c);
counties = [...new Set([...counties, ...co])];
}
const job = generateJob(job_title, job_link, country, cities, counties);
jobs.push(job);
}

jobs.push(job_element);
});
});
})
);
return jobs;
};

fetchData().then((jobs) => {
console.log(JSON.stringify(jobs, null, 2));
const run = async () => {
const company = "DeLonghi";
const logo =
"https://logos-world.net/wp-content/uploads/2020/12/DeLonghi-Logo-700x394.png";
const jobs = await getJobs();
const params = getParams(company, logo);
postApiPeViitor(jobs, params);
};

scraper.postApiPeViitor(jobs, company);
if (require.main === module) {
run();
}

let logo =
"https://logos-world.net/wp-content/uploads/2020/12/DeLonghi-Logo-700x394.png";
module.exports = { run, getJobs, getParams }; // this is needed for our unit test job

let postLogo = new scraper.ApiScraper("https://api.peviitor.ro/v1/logo/add/");
postLogo.headers.headers["Content-Type"] = "application/json";
postLogo.post(JSON.stringify([{ id: company.company, logo: logo }]));
});
Loading
Loading