Skip to content

Commit 05edb88

Browse files
committed
feat: improve handling of queryVars.
1 parent 54b89ec commit 05edb88

File tree

11 files changed

+29
-33
lines changed

11 files changed

+29
-33
lines changed

README.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,6 @@ An object of additional query params to add to the Google News URL string, forma
128128
const articles = await googleNewsScraper({
129129
searchTerm: "Últimas noticias en Madrid",
130130
queryVars: {
131-
hl:"es-ES",
132131
gl:"ES",
133132
ceid:"ES:es"
134133
},

dist/cjs/index.js

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2725,14 +2725,14 @@ const googleNewsScraper = (userConfig) => __awaiter(void 0, void 0, void 0, func
27252725
limit: 99
27262726
}, userConfig);
27272727
const logger = getLogger(config.logLevel);
2728-
const queryVars = (_a = config.queryVars) !== null && _a !== void 0 ? _a : {};
2728+
const queryVars = config.queryVars
2729+
? Object.assign(Object.assign({}, config.queryVars), { when: config.timeframe }) : { when: config.timeframe };
27292730
if (userConfig.searchTerm) {
27302731
queryVars.q = userConfig.searchTerm;
27312732
}
2732-
const queryString = queryVars ? buildQueryString(queryVars) : '';
2733+
const queryString = (_a = buildQueryString(queryVars)) !== null && _a !== void 0 ? _a : '';
27332734
const baseUrl = (_b = config.baseUrl) !== null && _b !== void 0 ? _b : `https://news.google.com/search`;
2734-
const timeString = config.timeframe ? ` when:${config.timeframe}` : '';
2735-
const url = `${baseUrl}${queryString}${timeString}`;
2735+
const url = `${baseUrl}${queryString}`;
27362736
logger.info(`📰 SCRAPING NEWS FROM: ${url}`);
27372737
const requiredArgs = [
27382738
'--disable-extensions-except=/path/to/manifest/folder/',
@@ -2778,15 +2778,14 @@ const googleNewsScraper = (userConfig) => __awaiter(void 0, void 0, void 0, func
27782778
const $ = cheerio__namespace.load(content);
27792779
const articles = $('article');
27802780
let results = [];
2781-
$(articles).each(function () {
2781+
$(articles).each(function (i) {
27822782
var _a, _b, _c, _d, _e, _f, _g, _h, _j;
27832783
const link = ((_c = (_b = (_a = $(this)) === null || _a === void 0 ? void 0 : _a.find('a[href^="./article"]')) === null || _b === void 0 ? void 0 : _b.attr('href')) === null || _c === void 0 ? void 0 : _c.replace('./', 'https://news.google.com/')) || ((_f = (_e = (_d = $(this)) === null || _d === void 0 ? void 0 : _d.find('a[href^="./read"]')) === null || _e === void 0 ? void 0 : _e.attr('href')) === null || _f === void 0 ? void 0 : _f.replace('./', 'https://news.google.com/')) || "";
27842784
const srcset = (_g = $(this).find('figure').find('img').attr('srcset')) === null || _g === void 0 ? void 0 : _g.split(' ');
27852785
const image = srcset && srcset.length
27862786
? srcset[srcset.length - 2]
27872787
: $(this).find('figure').find('img').attr('src');
27882788
const articleType = getArticleType($(this));
2789-
// TODO: Done up to here
27902789
const title = getTitle($(this), articleType);
27912790
const mainArticle = {
27922791
title,

dist/cjs/min/index.min.js

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

dist/cjs/min/index.min.js.map

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

dist/esm/index.mjs

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2704,14 +2704,14 @@ const googleNewsScraper = (userConfig) => __awaiter(void 0, void 0, void 0, func
27042704
limit: 99
27052705
}, userConfig);
27062706
const logger = getLogger(config.logLevel);
2707-
const queryVars = (_a = config.queryVars) !== null && _a !== void 0 ? _a : {};
2707+
const queryVars = config.queryVars
2708+
? Object.assign(Object.assign({}, config.queryVars), { when: config.timeframe }) : { when: config.timeframe };
27082709
if (userConfig.searchTerm) {
27092710
queryVars.q = userConfig.searchTerm;
27102711
}
2711-
const queryString = queryVars ? buildQueryString(queryVars) : '';
2712+
const queryString = (_a = buildQueryString(queryVars)) !== null && _a !== void 0 ? _a : '';
27122713
const baseUrl = (_b = config.baseUrl) !== null && _b !== void 0 ? _b : `https://news.google.com/search`;
2713-
const timeString = config.timeframe ? ` when:${config.timeframe}` : '';
2714-
const url = `${baseUrl}${queryString}${timeString}`;
2714+
const url = `${baseUrl}${queryString}`;
27152715
logger.info(`📰 SCRAPING NEWS FROM: ${url}`);
27162716
const requiredArgs = [
27172717
'--disable-extensions-except=/path/to/manifest/folder/',
@@ -2757,15 +2757,14 @@ const googleNewsScraper = (userConfig) => __awaiter(void 0, void 0, void 0, func
27572757
const $ = cheerio.load(content);
27582758
const articles = $('article');
27592759
let results = [];
2760-
$(articles).each(function () {
2760+
$(articles).each(function (i) {
27612761
var _a, _b, _c, _d, _e, _f, _g, _h, _j;
27622762
const link = ((_c = (_b = (_a = $(this)) === null || _a === void 0 ? void 0 : _a.find('a[href^="./article"]')) === null || _b === void 0 ? void 0 : _b.attr('href')) === null || _c === void 0 ? void 0 : _c.replace('./', 'https://news.google.com/')) || ((_f = (_e = (_d = $(this)) === null || _d === void 0 ? void 0 : _d.find('a[href^="./read"]')) === null || _e === void 0 ? void 0 : _e.attr('href')) === null || _f === void 0 ? void 0 : _f.replace('./', 'https://news.google.com/')) || "";
27632763
const srcset = (_g = $(this).find('figure').find('img').attr('srcset')) === null || _g === void 0 ? void 0 : _g.split(' ');
27642764
const image = srcset && srcset.length
27652765
? srcset[srcset.length - 2]
27662766
: $(this).find('figure').find('img').attr('src');
27672767
const articleType = getArticleType($(this));
2768-
// TODO: Done up to here
27692768
const title = getTitle($(this), articleType);
27702769
const mainArticle = {
27712770
title,

dist/esm/index.mjs.map

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

dist/esm/min/index.min.mjs

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

dist/esm/min/index.min.mjs.map

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

dist/tsc/index.js

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -28,14 +28,14 @@ const googleNewsScraper = (userConfig) => __awaiter(void 0, void 0, void 0, func
2828
limit: 99
2929
}, userConfig);
3030
const logger = getLogger(config.logLevel);
31-
const queryVars = (_a = config.queryVars) !== null && _a !== void 0 ? _a : {};
31+
const queryVars = config.queryVars
32+
? Object.assign(Object.assign({}, config.queryVars), { when: config.timeframe }) : { when: config.timeframe };
3233
if (userConfig.searchTerm) {
3334
queryVars.q = userConfig.searchTerm;
3435
}
35-
const queryString = queryVars ? buildQueryString(queryVars) : '';
36+
const queryString = (_a = buildQueryString(queryVars)) !== null && _a !== void 0 ? _a : '';
3637
const baseUrl = (_b = config.baseUrl) !== null && _b !== void 0 ? _b : `https://news.google.com/search`;
37-
const timeString = config.timeframe ? ` when:${config.timeframe}` : '';
38-
const url = `${baseUrl}${queryString}${timeString}`;
38+
const url = `${baseUrl}${queryString}`;
3939
logger.info(`📰 SCRAPING NEWS FROM: ${url}`);
4040
const requiredArgs = [
4141
'--disable-extensions-except=/path/to/manifest/folder/',
@@ -83,7 +83,7 @@ const googleNewsScraper = (userConfig) => __awaiter(void 0, void 0, void 0, func
8383
let results = [];
8484
let i = 0;
8585
const urlChecklist = [];
86-
$(articles).each(function () {
86+
$(articles).each(function (i) {
8787
var _a, _b, _c, _d, _e, _f, _g, _h, _j;
8888
const link = ((_c = (_b = (_a = $(this)) === null || _a === void 0 ? void 0 : _a.find('a[href^="./article"]')) === null || _b === void 0 ? void 0 : _b.attr('href')) === null || _c === void 0 ? void 0 : _c.replace('./', 'https://news.google.com/')) || ((_f = (_e = (_d = $(this)) === null || _d === void 0 ? void 0 : _d.find('a[href^="./read"]')) === null || _e === void 0 ? void 0 : _e.attr('href')) === null || _f === void 0 ? void 0 : _f.replace('./', 'https://news.google.com/')) || "";
8989
link && urlChecklist.push(link);
@@ -92,7 +92,6 @@ const googleNewsScraper = (userConfig) => __awaiter(void 0, void 0, void 0, func
9292
? srcset[srcset.length - 2]
9393
: $(this).find('figure').find('img').attr('src');
9494
const articleType = getArticleType($(this));
95-
// TODO: Done up to here
9695
const title = getTitle($(this), articleType);
9796
const mainArticle = {
9897
title,

src/buildQueryString.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import { QueryVars } from "./types"
22

3-
const buildQueryString = (query: QueryVars) => {
3+
const buildQueryString = ( query: QueryVars ) => {
44

55
// Bail if there's nothing in the Object
66
if (Object.keys(query).length === 0) return "";

0 commit comments

Comments
 (0)