Skip to content

Commit

Permalink
Update scraping logic for GitHub
Browse files Browse the repository at this point in the history
Fixes #5
  • Loading branch information
grubersjoe committed Nov 17, 2023
1 parent bbf22d4 commit 391c8f4
Show file tree
Hide file tree
Showing 4 changed files with 69 additions and 53 deletions.
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"name": "github-contribution-api",
"description": "Scrape contribution data of a Github user",
"version": "4.5.3",
"version": "4.6.0",
"author": "Jonathan Gruber <[email protected]>",
"license": "MIT",
"private": true,
Expand Down
4 changes: 2 additions & 2 deletions src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -89,8 +89,8 @@ app.get('/v4/:username', async (req: Request, res, next) => {
next(
new Error(
`Error scraping contribution data of '${username}': ${
error instanceof Error ? error.message : 'Unknown error'
}.`,
error instanceof Error ? error.message : 'Unknown error.'
}`,
),
);
}
Expand Down
114 changes: 65 additions & 49 deletions src/scrape.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import cheerio from 'cheerio';

import { ParsedQuery } from '.';
import TagElement = cheerio.TagElement;
import Cheerio = cheerio.Cheerio;

type Level = 0 | 1 | 2 | 3 | 4;
type Year = number | 'lastYear';
Expand Down Expand Up @@ -79,9 +81,9 @@ async function scrapeContributionsForYear(
const $ = cheerio.load(await page.text());
const $days = $('.js-calendar-graph-table .ContributionCalendar-day');

const sortedDays: Array<Element> = $days.get().sort((a, b) => {
const dateA = $(a).attr('data-date') ?? '';
const dateB = $(b).attr('data-date') ?? '';
const sortedDays= $days.get().sort((a: TagElement, b: TagElement) => {
const dateA = a.attribs['data-date'] ?? '';
const dateB = b.attribs['data-date'] ?? '';

return dateA.localeCompare(dateB, 'en');
});
Expand All @@ -97,49 +99,17 @@ async function scrapeContributionsForYear(

const total = parseInt(totalMatch[0].replace(/,/g, ''));

const parseDay = (day: Node) => {
const $day = $(day);
const attr = {
date: $day.attr('data-date'),
level: $day.attr('data-level'),
};

if (!attr.date) {
throw Error('Unable to parse date attribute');
}

if (!attr.level) {
throw Error('Unable to parse level attribute');
}

const countMatch = $day.text().trim().match(/^\d+/);

if (!countMatch) {
throw Error('Unable to parse contribution count')
}

const count = parseInt(countMatch[0]);
const level = parseInt(attr.level) as Level;

if (isNaN(count)) {
throw Error('Unable to parse contribution count for day');
}

if (isNaN(level)) {
throw Error('Unable to parse contribution level for day');
}

const contribution: Contribution = {
date: attr.date,
count,
level,
};

return {
date: attr.date.split('-').map((d) => parseInt(d)),
contribution,
};
};
// Required for contribution count
const tooltipsByDayId = $('.js-calendar-graph tool-tip')
.toArray()
.reduce<Record<string, Cheerio>>((map, elem) => {
const $elem = $(elem);
const dayId = $elem.attr('for');
if (dayId) {
map[dayId] = $elem
}
return map;
}, {});

const response = {
total: {
Expand All @@ -149,8 +119,8 @@ async function scrapeContributionsForYear(
};

if (format === 'nested') {
return sortedDays.reduce<NestedResponse>((data, day: Node) => {
const { date, contribution } = parseDay(day);
return sortedDays.reduce<NestedResponse>((data, day) => {
const { date, contribution } = parseDay(day, tooltipsByDayId);
const [y, m, d] = date;

if (!data.contributions[y]) data.contributions[y] = {};
Expand All @@ -164,10 +134,56 @@ async function scrapeContributionsForYear(

return {
...response,
contributions: sortedDays.map((day) => parseDay(day).contribution),
contributions: sortedDays.map((day) => parseDay(day, tooltipsByDayId).contribution, tooltipsByDayId),
};
}

const parseDay = (day: TagElement, tooltipsByDayId: Record<string, Cheerio>) => {
const attr = {
id: day.attribs['id'],
date: day.attribs['data-date'],
level: day.attribs['data-level'],
};


if (!attr.date) {
throw Error('Unable to parse date attribute.');
}

if (!attr.level) {
throw Error('Unable to parse level attribute.');
}

let count = 0;
if (tooltipsByDayId[attr.id]) {
const countMatch = tooltipsByDayId[attr.id].text().trim().match(/^\d+/);
if (countMatch) {
count = parseInt(countMatch[0]);
}
}

const level = parseInt(attr.level) as Level;

if (isNaN(count)) {
throw Error('Unable to parse contribution count.');
}

if (isNaN(level)) {
throw Error('Unable to parse contribution level.');
}

const contribution = {
date: attr.date,
count,
level,
} satisfies Contribution;

return {
date: attr.date.split('-').map((d) => parseInt(d)),
contribution,
};
};

/**
* @throws UserNotFoundError
*/
Expand Down
2 changes: 1 addition & 1 deletion tsconfig.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"include": ["src"],
"compilerOptions": {
"esModuleInterop": true,
"lib": ["ES2022", "dom"],
"lib": ["ES2023"],
"module": "commonjs",
"target": "ES2022",
"moduleResolution": "node",
Expand Down

0 comments on commit 391c8f4

Please sign in to comment.