-
Notifications
You must be signed in to change notification settings - Fork 12
Refactor getSubTags and getParentTags in order to use cheerio maps #86
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
f9e6471
6093f89
f9bb38e
c77fcca
18eb2e2
388238a
1404884
cadee7d
04d4e64
97aa54a
2fd0ad9
df74cf0
d78dd30
e79039c
f7c98f9
aa1a73e
6a987b4
4a1738e
9c661f3
2ff33b3
b4e9f5f
fa7a062
b8d42f2
05f91a7
f766803
a372329
f9c5eef
3ff1a64
7d4b8d6
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,5 @@ | ||
import { Author, Series, SeriesWorkSummary } from "types/entities"; | ||
import { CheerioAPI, load } from "cheerio"; | ||
import { CheerioAPI, Element, load } from "cheerio"; | ||
import { SeriesPage, WorkPage } from "../page-loaders"; | ||
import { | ||
getWorkBookmarkCount, | ||
|
@@ -31,48 +31,34 @@ export const getSeriesTitle = ($seriesPage: SeriesPage): string => { | |
return $seriesPage("h2.heading").text().trim(); | ||
}; | ||
|
||
export const getSeriesAuthors = ( | ||
$seriesPage: SeriesPage | ||
): Series["authors"] => { | ||
export const getSeriesAuthors = ($seriesPage: SeriesPage): Series["authors"] => { | ||
const authorLinks = $seriesPage("dl.meta a[rel=author]"); | ||
const authors: Author[] = []; | ||
|
||
if ( | ||
$seriesPage("dl.meta > dd:nth-of-type(1)").text().trim() === "Anonymous" | ||
) { | ||
if ($seriesPage("dl.meta > dd:nth-of-type(1)").text().trim() === "Anonymous") { | ||
return [{ username: "Anonymous", pseud: "Anonymous", anonymous: true }]; | ||
} | ||
|
||
if (authorLinks.length !== 0) { | ||
authorLinks.each((i, element) => { | ||
return authorLinks.map((_, element) => { | ||
const url = element.attribs.href; | ||
const [, username, pseud] = url.match(/users\/(.+)\/pseuds\/(.+)/)!; | ||
|
||
authors.push({ | ||
return { | ||
username: username, | ||
pseud: decodeURI(pseud), | ||
anonymous: false, | ||
}); | ||
}); | ||
} as Author | ||
}).get(); | ||
} | ||
|
||
return authors; | ||
return [] as Author[]; | ||
}; | ||
|
||
export const getSeriesDescription = ( | ||
$seriesPage: SeriesPage | ||
): string | null => { | ||
const description = $seriesPage("dl.series blockquote.userstuff").html(); | ||
return description ? description.trim() : null; | ||
export const getSeriesDescription = ($seriesPage: SeriesPage): string | null => { | ||
return $seriesPage("dl.series blockquote.userstuff").html()?.trim() || null; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. that's the original behavior which I preserved. if description is an empty string the conditional is falsy |
||
}; | ||
|
||
export const getSeriesNotes = ($seriesPage: SeriesPage): string | null => { | ||
const notes = $seriesPage("dl.series dd:nth-of-type(5)"); | ||
if (notes.prevAll().first().text().trim() === "Notes:") { | ||
return notes.html()!.trim(); | ||
} else { | ||
return null; | ||
} | ||
return notes.prevAll().first().text().trim() === "Notes:" ? notes.html()!.trim() : null; | ||
}; | ||
|
||
export const getSeriesPublishDate = ($seriesPage: SeriesPage): string => { | ||
|
@@ -114,18 +100,11 @@ export const getSeriesBookmarkCount = ($seriesPage: SeriesPage): number => { | |
); | ||
}; | ||
|
||
export const getSeriesWorks = ( | ||
$seriesPage: SeriesPage | ||
): SeriesWorkSummary[] => { | ||
const works: SeriesWorkSummary[] = []; | ||
|
||
$seriesPage("ul.index > li.work").each((index, element) => { | ||
works[index] = getSeriesWork($seriesPage(element).html() as string); | ||
}); | ||
|
||
return works; | ||
}; | ||
|
||
export const getSeriesWorks = ($seriesPage: SeriesPage): SeriesWorkSummary[] => { | ||
return $seriesPage("ul.index > li.work").map((index, element) => { | ||
return getSeriesWork($seriesPage(element).html() as string); | ||
}).get(); | ||
} | ||
// Helpers for series' works | ||
interface SeriesWork extends CheerioAPI { | ||
kind: "SeriesWork"; | ||
|
@@ -187,63 +166,46 @@ const getSeriesWorkSummary = ($work: SeriesWork) => { | |
}; | ||
|
||
const getSeriesWorkFandoms = ($work: SeriesWork): string[] => { | ||
const fandoms: string[] = []; | ||
|
||
$work("h5.fandoms a.tag").each(function (i, element) { | ||
fandoms[i] = $work(element).text().trim(); | ||
}); | ||
return fandoms; | ||
}; | ||
return $work("h5.fandoms a.tag").map((i, element) => { | ||
return $work(element).text().trim(); | ||
}).get(); | ||
} | ||
|
||
const getSeriesWorkCharacters = ($work: SeriesWork): string[] => { | ||
const characters: string[] = []; | ||
|
||
$work("li.characters a.tag").each(function (i, character) { | ||
characters[i] = $work(character).text().trim(); | ||
}); | ||
return characters; | ||
return $work("li.characters a.tag").map((i, character) => { | ||
return $work(character).text().trim(); | ||
}).get(); | ||
}; | ||
|
||
const getSeriesWorkRelationships = ($work: SeriesWork): string[] => { | ||
const ships: string[] = []; | ||
|
||
$work("li.relationships a.tag").each(function (i, ship) { | ||
ships[i] = $work(ship).text().trim(); | ||
}); | ||
return ships; | ||
return $work("li.relationships a.tag").map((i, ship) => { | ||
return $work(ship).text().trim(); | ||
}).get(); | ||
}; | ||
|
||
const getSeriesWorkAdditionalTags = ($work: SeriesWork): string[] => { | ||
const tags: string[] = []; | ||
|
||
$work("li.freeforms a.tag").each(function (i) { | ||
tags[i] = $work(this).text().trim(); | ||
}); | ||
return tags; | ||
return $work("li.freeforms a.tag").map((_, element) => { | ||
return $work(element).text().trim(); | ||
}).get(); | ||
}; | ||
|
||
const getSeriesWorkAuthors = ( | ||
$work: SeriesWork | ||
): SeriesWorkSummary["authors"] => { | ||
const getSeriesWorkAuthors = ($work: SeriesWork): SeriesWorkSummary["authors"] => { | ||
const authorLinks = $work("h4.heading a[rel='author']"); | ||
const authors: Author[] = []; | ||
|
||
if ($work("h4.heading").text().split("by")[1].trim() === "Anonymous") { | ||
return [{ username: "Anonymous", pseud: "Anonymous", anonymous: true }]; | ||
} | ||
|
||
if (authorLinks.length !== 0) { | ||
authorLinks.each((i, element) => { | ||
return authorLinks.map((_, element) => { | ||
const url = element.attribs.href; | ||
const [, username, pseud] = url.match(/users\/(.+)\/pseuds\/(.+)/)!; | ||
|
||
authors.push({ | ||
return { | ||
username: username, | ||
pseud: decodeURI(pseud), | ||
anonymous: false, | ||
}); | ||
}); | ||
} | ||
}).get(); | ||
} | ||
|
||
return authors; | ||
return [] as Author[]; | ||
}; |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
import { TagCategory } from "types/entities"; | ||
import { TagCategory, Tag } from "types/entities"; | ||
import { TagPage } from "../page-loaders"; | ||
import { Element } from "cheerio"; | ||
|
||
|
@@ -47,11 +47,9 @@ export const getCanonical = ($tagPage: TagPage) => { | |
}; | ||
|
||
export const getParentTags = ($tagPage: TagPage) => { | ||
const parentTags: string[] = []; | ||
$tagPage(".parent ul.tags li").each((_, element) => { | ||
parentTags.push($tagPage(element).text()); | ||
}); | ||
return parentTags; | ||
return $tagPage(".parent ul.tags li").map((_, element) => { | ||
return $tagPage(element).text(); | ||
}).get(); | ||
}; | ||
export const getChildTags = ($tagPage: TagPage) => { | ||
return $tagPage(".child > div").map((_, divElement) => { | ||
|
@@ -66,19 +64,21 @@ export const getChildTags = ($tagPage: TagPage) => { | |
} | ||
|
||
export const getSubTags = ($tagPage: TagPage) => { | ||
const subTags: { tagName: string; parentSubTag: string | null }[] = []; | ||
$tagPage(".sub > ul.tags > li").each((_, element) => { | ||
subTags.push({ tagName: $tagPage(element).children().first().text(), parentSubTag: null }); | ||
if ($tagPage($tagPage(element)).has("ul.tags").length) { | ||
$tagPage("ul.tags", element).children("li").each((_, child) => { | ||
// each <li> element contains an <a> element, | ||
// which is why `.children().first()` is needed for both the `tagName` and `parentSubTag` | ||
subTags.push({ | ||
tagName: $tagPage(child).children().first().text(), | ||
parentSubTag: $tagPage($tagPage(child)).parents("li").children().first().text() | ||
}); | ||
}); | ||
return $tagPage(".sub > ul.tags > li").map((_, element) => { | ||
if($tagPage($tagPage(element).has("ul.tags")).length) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Change this for an early if return (the thing we discussed on Discord) if you can :) |
||
return [ | ||
{ | ||
tagName: $tagPage(element).children().first().text(), | ||
parentSubTag: null | ||
}, | ||
$tagPage("ul.tags", element).children("li").map((_, child) => { | ||
return { | ||
tagName: $tagPage(child).children().first().text(), | ||
parentSubTag: $tagPage($tagPage(child)).parents("li").children().first().text() | ||
}; | ||
}).get() | ||
].flat(); | ||
} | ||
}); | ||
return subTags; | ||
return { tagName: $tagPage(element).children().first().text(), parentSubTag: null }; | ||
}).get(); | ||
}; |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,4 @@ | ||
import { Element } from "cheerio"; | ||
import { UserProfile } from "../page-loaders"; | ||
import { getUserProfileUrl } from "../urls"; | ||
|
||
|
@@ -18,17 +19,11 @@ export const getUserProfileName = ($userProfile: UserProfile) => { | |
const PSEUD_SUFFIX = ", "; | ||
export const getUserProfilePseuds = ($userProfile: UserProfile) => { | ||
const pseuds = $userProfile("dd.pseuds a"); | ||
const pseudsArray: string[] = []; | ||
|
||
if (pseuds.length !== 0) { | ||
pseuds.each((i, element) => { | ||
const url = element.attribs.href; | ||
const [, username, pseud] = url.match(/users\/(.+)\/pseuds\/(.+)/)!; | ||
|
||
pseudsArray.push(decodeURI(pseud)); | ||
}); | ||
} | ||
return pseudsArray.join(PSEUD_SUFFIX); | ||
return pseuds.length !== 0 | ||
? pseuds.map((_, element) => decodeURI(element.attribs.href.match(/users\/(.+)\/pseuds\/(.+)/)![2])) | ||
.get() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is there a reason why you changed this? it makes it harder to understand what's going on. I would
|
||
.join(PSEUD_SUFFIX) | ||
: ""; | ||
}; | ||
|
||
//Trim the results to only the date: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,9 +4,9 @@ import { getWorkDetailsFromUrl, getWorkUrl } from "src/urls"; | |
import { ChapterIndexPage } from "src/page-loaders"; | ||
|
||
const TITLE_SEPARATOR = ". "; | ||
export const getChaptersList = ($chapterIndexPage: ChapterIndexPage) => { | ||
const chapters: Chapter[] = []; | ||
$chapterIndexPage("ol.index > li").each((index, li) => { | ||
export const getChaptersList = ($chapterIndexPage: ChapterIndexPage): Chapter[] => { | ||
//return chapters; | ||
return $chapterIndexPage("ol.index > li").map((index, li) => { | ||
const link = $chapterIndexPage(li).find("a")[0]; | ||
const chapterText = $chapterIndexPage(link).text(); | ||
const { workId, chapterId } = getWorkDetailsFromUrl({ | ||
|
@@ -18,8 +18,7 @@ export const getChaptersList = ($chapterIndexPage: ChapterIndexPage) => { | |
const dateNode = $chapterIndexPage( | ||
$chapterIndexPage(li).find(".datetime")[0] | ||
); | ||
|
||
chapters.push({ | ||
return { | ||
id: chapterId!, | ||
workId, | ||
index: index + 1, | ||
|
@@ -28,35 +27,29 @@ export const getChaptersList = ($chapterIndexPage: ChapterIndexPage) => { | |
publishedAt: dateNode.text().replace(/[\(\)]/g, ""), | ||
// We rebuild the url so it gets the full path | ||
url: getWorkUrl({ workId, chapterId }), | ||
}); | ||
}); | ||
return chapters; | ||
} | ||
}).get(); | ||
}; | ||
|
||
export const getWorkTitle = ($chapterIndexPage: ChapterIndexPage) => { | ||
return $chapterIndexPage(".works-navigate h2 a[href^='/works/']").text(); | ||
}; | ||
|
||
export const getWorkAuthors = ( | ||
$chapterIndexPage: ChapterIndexPage | ||
): Author[] => { | ||
export const getWorkAuthors = ($chapterIndexPage: ChapterIndexPage): Author[] => { | ||
const authors: Author[] = []; | ||
const authorNode = $chapterIndexPage(".works-navigate h2 a[rel='author']"); | ||
if (authorNode.text().trim() === "Anonymous") { | ||
return [{ username: "Anonymous", pseud: "Anonymous", anonymous: true }]; | ||
} | ||
|
||
if (authorNode.length !== 0) { | ||
authorNode.each((i, element) => { | ||
return authorNode.length !== 0 | ||
? authorNode.map((_, element) => { | ||
const url = element.attribs.href; | ||
const [, username, pseud] = url.match(/users\/(.+)\/pseuds\/(.+)/)!; | ||
|
||
authors.push({ | ||
return { | ||
username: username, | ||
pseud: decodeURI(pseud), | ||
anonymous: false, | ||
}); | ||
}); | ||
} | ||
return authors; | ||
} | ||
}).get() | ||
: [] as Author[]; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ternary operators are useful, but they can be cognitively heavy when large. If you immediately return |
||
}; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We should figure out why you need "
as Author
" here. What happens if change line 42 to beThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
i'm pretty sure i don't need to. I'll check tomorrow