Skip to content

Commit

Permalink
Merge pull request #350 from lalalaurentiu/main
Browse files Browse the repository at this point in the history
Refactor ddroidd, decorfloor, delongi, deutschebank and drmax scraper
  • Loading branch information
lalalaurentiu authored May 8, 2024
2 parents 2b77eac + 7e7b7da commit e6faefb
Show file tree
Hide file tree
Showing 12 changed files with 289 additions and 246 deletions.
95 changes: 57 additions & 38 deletions forRefactor/ddroidd.js
Original file line number Diff line number Diff line change
@@ -1,50 +1,69 @@
"use strict";
const scraper = require("../peviitor_scraper.js");
const { getTownAndCounty } = require("../getTownAndCounty.js");
const { translate_city } = require("../utils.js");
const {
Scraper,
postApiPeViitor,
generateJob,
getParams,
} = require("peviitor_jsscraper");
const { Counties } = require("../getTownAndCounty.js");

const url =
"https://api.storyblok.com/v2/cdn/stories/?version=published&starts_with=vacancies%2F&&&excluding_ids=-1&token=4pOFw3LnvRlerPVVh0AB1Qtt&cv=undefined";
const _counties = new Counties();

const company = { company: "DDroidd" };
let finalJobs = [];
const getJobs = async () => {
const url =
"https://api.storyblok.com/v2/cdn/stories/?version=published&starts_with=vacancies%2F&&&excluding_ids=-1&token=4pOFw3LnvRlerPVVh0AB1Qtt&cv=undefined";

const s = new scraper.ApiScraper(url);
const jobs = [];
const scraper = new Scraper(url);
const type = "JSON";
const res = await scraper.get_soup(type);
const json = res.stories;

s.get()
.then((response) => {
const jobs = response.stories;

jobs.forEach((job) => {
const job_title = job.name;
const job_link = "https://www.ddroidd.com/" + job.full_slug;
const remote = job.content.type.toLowerCase().includes("remote")
await Promise.all(
json.map(async (item) => {
const job_title = item.name;
const job_link = "https://www.ddroidd.com/" + item.full_slug;
const remote = item.content.type.toLowerCase().includes("remote")
? ["Remote"]
: [];
let city = "";
let county = "";

const obj = getTownAndCounty(
translate_city(job.content.location.toLowerCase())
let cities = [];
let counties = [];

const { city: c, county: co } = await _counties.getCounties(
translate_city(item.content.location)
);

if (obj.foudedTown && obj.county) {
city = obj.foudedTown;
county = obj.county;
if (c) {
cities.push(c);
counties = [...new Set([...counties, ...co])];
}

finalJobs.push({
job_title: job_title,
job_link: job_link,
company: company.company,
city: city,
county: county,
country: "Romania",
remote: remote,
});
});
})
.then(() => {
console.log(JSON.stringify(finalJobs, null, 2));
scraper.postApiPeViitor(finalJobs, company);
});
const job = generateJob(
job_title,
job_link,
"Romania",
cities,
counties,
remote
);
jobs.push(job);
})
);
return jobs;
};

const run = async () => {
const company = "DDroidd";
const logo = "https://www.ddroidd.com/img/header-logo.svg";
const jobs = await getJobs();
const params = getParams(company, logo);
postApiPeViitor(jobs, params);
};

if (require.main === module) {
run();
}

module.exports = { run, getJobs, getParams }; // this is needed for our unit test job

129 changes: 69 additions & 60 deletions forRefactor/decorfloor.js
Original file line number Diff line number Diff line change
@@ -1,11 +1,17 @@
"use strict";
const scraper = require("../peviitor_scraper.js");
const { getTownAndCounty } = require("../getTownAndCounty.js");
const { translate_city } = require("../utils.js");
const {
Scraper,
postApiPeViitor,
generateJob,
getParams,
} = require("peviitor_jsscraper");
const { Counties } = require("../getTownAndCounty.js");

const _counties = new Counties();

const getAditionalCity = async (url) => {
const s = new scraper.Scraper(url);
const soup = await s.get_soup();
const scraper = new Scraper(url);
const soup = await scraper.get_soup("HTML");

let location;

Expand All @@ -20,60 +26,63 @@ const getAditionalCity = async (url) => {
location = "Unknown";
}

const { foudedTown, county } = getTownAndCounty(
translate_city(location.trim().toLowerCase())
let cities = [];
let counties = [];

const { city: c, county: co } = await _counties.getCounties(
translate_city(location.trim())
);
return { foudedTown, county };

if (c) {
cities.push(c);
counties = [...new Set([...counties, ...co])];
}

return { city: cities, county: counties }
};
const url = "https://decorfloor.ro/careers/";

const company = { company: "Decorfloor" };
let finalJobs = [];

const s = new scraper.Scraper(url);

s.soup
.then(async (soup) => {
const jobs = soup.findAll("div", { class: "vc_gitem-col" });
await Promise.all(
jobs.map(async (job) => {
const job_title = job.find("h4").text.trim();
const job_link = job.find("a").attrs.href;

const { foudedTown, county } = await getAditionalCity(job_link);

if (foudedTown && county) {
finalJobs.push({
job_title: job_title,
job_link: job_link,
city: foudedTown,
county: county,
country: "Romania",
company: company.company,
});
} else {
finalJobs.push({
job_title: job_title,
job_link: job_link,
city: ["Bucuresti", "Cluj-Napoca"],
county: ["Bucuresti", "Cluj"],
country: "Romania",
company: company.company,
});
}
})
);
})
.then(() => {
console.log(JSON.stringify(finalJobs, null, 2));

scraper.postApiPeViitor(finalJobs, company);

let logo = "https://decorfloor.ro/wp-content/uploads/2015/08/logo.png";

let postLogo = new scraper.ApiScraper(
"https://api.peviitor.ro/v1/logo/add/"
);
postLogo.headers.headers["Content-Type"] = "application/json";
postLogo.post(JSON.stringify([{ id: company.company, logo: logo }]));
});

const getJobs = async () => {
const url = "https://decorfloor.ro/careers/";

const scraper = new Scraper(url);
const jobs = [];

const soup = await scraper.get_soup("HTML");

const jobsElements = soup.findAll("div", { class: "vc_gitem-col" });

await Promise.all(
jobsElements.map(async (elem) => {
const job_title = elem.find("h4").text.trim();
const job_link = elem.find("a").attrs.href;

let cities = [];
let counties = [];

const { city: c, county: co } = await getAditionalCity(job_link);

if (c) {
cities.push(...c);
counties = [...new Set([...counties, ...co])];
}

const job = generateJob(job_title, job_link, "Romania", cities, counties);
jobs.push(job);
})
);
return jobs;
};

const run = async () => {
const company = "Decorfloor";
const logo = "https://decorfloor.ro/wp-content/uploads/2015/08/logo.png";
const jobs = await getJobs();
const params = getParams(company, logo);
postApiPeViitor(jobs, params);
};

if (require.main === module) {
run();
}

module.exports = { run, getJobs, getParams }; // this is needed for our unit test job
110 changes: 61 additions & 49 deletions forRefactor/delonghi.js
Original file line number Diff line number Diff line change
@@ -1,11 +1,24 @@
"use strict";
const scraper = require("../peviitor_scraper.js");
const { getTownAndCounty } = require("../getTownAndCounty.js");
const { translate_city } = require("../utils.js");
const Jssoup = require("jssoup").default;
const {
Scraper,
postApiPeViitor,
generateJob,
getParams,
} = require("peviitor_jsscraper");
const { Counties } = require("../getTownAndCounty.js");

const obj = {
url: "https://www.delonghigroup.com/en/views/ajax?_wrapper_format=drupal_ajax",
params: {
const _counties = new Counties();

const getJobs = async () => {
const url =
"https://www.delonghigroup.com/en/views/ajax?_wrapper_format=drupal_ajax";

const scraper = new Scraper(url);
scraper.config.headers["Content-Type"] =
"application/x-www-form-urlencoded; charset=UTF-8";

const data = {
"MIME Type": "application/x-www-form-urlencoded; charset=UTF-8",
view_name: "jobs_positions",
view_display_id: "block_1",
Expand All @@ -17,30 +30,30 @@ const obj = {
"ajax_page_state[theme]": "delonghi",
"ajax_page_state[libraries]":
"better_exposed_filters/auto_submit,better_exposed_filters/general,better_exposed_filters/select_all_none,classy/base,classy/messages,colorbox/colorbox,colorbox/default,core/html5shiv,core/normalize,delonghi/banner,delonghi/global,delonghi/paragraph--body-element,delonghi/paragraph--drupal-block,delonghi/paragraph--row,delonghi/views-view--jobs-positions,eu_cookie_compliance/eu_cookie_compliance_bare,media/filter.caption,msg_useless_options/useless_options,msg_zip/msg_zip,paragraphs/drupal.paragraphs.unpublished,system/base,views/views.ajax,views/views.module",
},
};
};

const company = { company: "DeLonghi" };

const fetchData = async () => {
const jobs = [];
const s = new scraper.ApiScraper(obj.url);
s.headers.headers["Content-Type"] =
"application/x-www-form-urlencoded; charset=UTF-8";
const res = await s.post(obj.params).then((res) => {
const soup = scraper.soup(res[2].data);
const jobsContainer = soup.findAll("div", {
class: "views-row",
});
jobsContainer.forEach((job) => {
const job_title = job.find("h3").text;

const form = new FormData();

for (const key in data) {
form.append(key, data[key]);
}

const res = await scraper.post(form);
const soup = new Jssoup(res[2].data);
const elements = soup.findAll("div", { class: "views-row" });

await Promise.all(
elements.map(async (elem) => {
const job_title = elem.find("h3").text;
const job_link =
"https://www.delonghigroup.com" + job.find("a").attrs.href;
const job_location = job.find("div", {
"https://www.delonghigroup.com" + elem.find("a").attrs.href;
const job_location = elem.find("div", {
class: "job-country-location",
}).text;
let city_element = translate_city(job_location.split(",")[1].trim());
const job_country = job_location.split(","); //[0].split(" ")[0].trim();
const job_country = job_location.split(",");

let country;
if (job_country[0] === "CEE") {
Expand All @@ -49,37 +62,36 @@ const fetchData = async () => {
country = job_country[0].split(" ")[0].trim();
}

const job_element = {
job_title: job_title,
job_link: job_link,
company: company.company,
country: country,
};
let cities = [];
let counties = [];

if (country === "Romania") {
const { foudedTown, county } = getTownAndCounty(city_element);

job_element["city"] = foudedTown;
job_element["county"] = county;
} else {
job_element["city"] = city_element;
const { city: c, county: co } =
await _counties.getCounties(city_element);
if (c) {
cities.push(c);
counties = [...new Set([...counties, ...co])];
}
const job = generateJob(job_title, job_link, country, cities, counties);
jobs.push(job);
}

jobs.push(job_element);
});
});
})
);
return jobs;
};

fetchData().then((jobs) => {
console.log(JSON.stringify(jobs, null, 2));
const run = async () => {
const company = "DeLonghi";
const logo =
"https://logos-world.net/wp-content/uploads/2020/12/DeLonghi-Logo-700x394.png";
const jobs = await getJobs();
const params = getParams(company, logo);
postApiPeViitor(jobs, params);
};

scraper.postApiPeViitor(jobs, company);
if (require.main === module) {
run();
}

let logo =
"https://logos-world.net/wp-content/uploads/2020/12/DeLonghi-Logo-700x394.png";
module.exports = { run, getJobs, getParams }; // this is needed for our unit test job

let postLogo = new scraper.ApiScraper("https://api.peviitor.ro/v1/logo/add/");
postLogo.headers.headers["Content-Type"] = "application/json";
postLogo.post(JSON.stringify([{ id: company.company, logo: logo }]));
});
Loading

0 comments on commit e6faefb

Please sign in to comment.