-
Notifications
You must be signed in to change notification settings - Fork 12
add prompt functionality (in progress) #62
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
b8367f4
003a992
e2139f8
f6df295
ee7f8be
0936f4c
23e81cd
e387772
0d62c25
c1ffadc
1ef926a
81fba93
12ba058
3bc27cb
09d7f78
5f0a1d8
96bec3a
1addc20
5fedc19
c17f71b
f4c64c4
1077f86
052ef2c
b11ec64
e9c5ebf
478f87c
0cba4c3
42294d1
ae8edb5
4189922
a65b1d5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -34,6 +34,7 @@ | |
"scripts": { | ||
"build": "rm -rf dist/ && tsup src/index.ts src/urls.ts --format esm,cjs --dts", | ||
"test": "NODE_OPTIONS=--experimental-vm-modules jest --no-cache .test.ts$ --verbose=true", | ||
"test-prompts": "NODE_OPTIONS=--experimental-vm-modules jest --no-cache prompts.test.ts$ --verbose=true", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I believe the way to do this should be:
|
||
"encode-mock-files": "ts-node-esm tests/mocks/scripts/encode-mock-files.mts" | ||
}, | ||
"repository": "https://github.com/essential-randomness/AO3.js.git", | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
import { | ||
Prompt, | ||
WorkCategory | ||
} from "types/entities" | ||
|
||
import { | ||
getPostedAt, | ||
getPromptSummary, | ||
getCollectionDisplayTitle, | ||
getPromptRatings, | ||
getPromptAuthor, | ||
getPromptFandoms, | ||
getPromptAdditionalTags, | ||
getPromptCharacters, | ||
getPromptRelationships, | ||
getPromptWarnings, | ||
getPromptClaims, | ||
getPromptCategories | ||
} from "./prompt-getters" | ||
|
||
import {loadPromptPage} from "../../page-loaders" | ||
|
||
export const getPrompt = async ({ | ||
promptId, | ||
collectionName, | ||
}: { | ||
promptId: string; | ||
collectionName: string; | ||
}): Promise<Prompt> => { | ||
|
||
const promptPage = await loadPromptPage({id: promptId, collectionName: collectionName});//TODO | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fill in more details or delete this TODO |
||
|
||
return { | ||
postedAt: getPostedAt(promptPage), | ||
summary: getPromptSummary(promptPage), | ||
collectionDisplayTitle: getCollectionDisplayTitle(promptPage), | ||
ratings: getPromptRatings(promptPage), | ||
author: getPromptAuthor(promptPage), | ||
fandoms: getPromptFandoms(promptPage), | ||
//TODO needs work: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fill in more details or delete this TODO |
||
tags: { | ||
warnings: getPromptWarnings(promptPage), | ||
characters: getPromptCharacters(promptPage), | ||
relationships: getPromptRelationships(promptPage), | ||
additional: getPromptAdditionalTags(promptPage) | ||
}, | ||
//TODO: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fill or delete this TODO |
||
claims: getPromptClaims(promptPage), | ||
title: "TODO", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we should just not add the field if we cannot scrape it yet. If you want to still have it in the type for future reference you can just comment it out and put a todo there to fill it at some point. |
||
collectionName: collectionName, | ||
id: promptId, | ||
filled: true, | ||
fills: [], | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same thing as above |
||
categories: getPromptCategories(promptPage) | ||
} | ||
|
||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,160 @@ | ||
import { | ||
Author, | ||
WorkRatings, | ||
WorkWarnings, | ||
Prompt, | ||
WorkCategory | ||
} from "types/entities" | ||
import { PromptPage } from "../../page-loaders"; | ||
|
||
export const getPostedAt = ($promptPage : PromptPage): string => { | ||
const dateElement = $promptPage("p.datetime:first"); | ||
const date = dateElement.text(); | ||
|
||
return date; | ||
} | ||
|
||
export const getPromptSummary = ($promptPage : PromptPage): string | null => { | ||
const summary = $promptPage("blockquote.userstuff.summary").html(); | ||
// trim returns a new string, removes any whitespace at the start and end | ||
// usually a new line '\n is returned at the beginning and end of the summary. | ||
return summary ? summary.trim() : null; | ||
}; | ||
|
||
export const getCollectionDisplayTitle = ($promptPage : PromptPage): string => { | ||
const titleElement = $promptPage("h2.collections"); | ||
const title = titleElement.text().trim(); | ||
return title; | ||
} | ||
|
||
export const getPromptRatings = ($promptPage: PromptPage): WorkRatings[] => { | ||
|
||
const ratingsString = $promptPage("ul.required-tags:first span.rating").text().trim(); | ||
//if the work has no rating, the string gathered from the page will be "No rating" | ||
const hasRatings = !(ratingsString === "No rating"); | ||
const ratingsArray = ratingsString.split(', '); | ||
|
||
if (hasRatings){ | ||
ratingsArray.forEach(rating => { | ||
if (!Object.values(WorkRatings).includes(rating as WorkRatings)) { | ||
// console.log("working with cheerio") | ||
// console.log($promptPage("ul.required-tags:first span.rating").text()); | ||
throw new Error("An unknown rating was found on the page: \""+ rating+"\""); | ||
|
||
} | ||
}) | ||
} | ||
|
||
if (hasRatings) return ratingsArray as WorkRatings[] | ||
else return ["Not Rated"] as WorkRatings[] | ||
|
||
}; | ||
|
||
export const getPromptAuthor = ($promptPage: PromptPage): Author | "Anonymous"=> { | ||
|
||
const requestHeading = $promptPage("#main.prompts-show.dashboard.region > h2").text().trim(); | ||
|
||
if(requestHeading === "Request by Anonymous") return "Anonymous" | ||
|
||
const pseudAndUser = /Request by ([^\(]*)?\(?([^\)]*)/g | ||
|
||
const captures = pseudAndUser.exec(requestHeading); | ||
|
||
if (!captures) throw new Error("Could not evalueate Prompt Author String"); | ||
|
||
const pseud = captures[1].trim(); | ||
const user = captures[2] ? captures[2] : pseud; | ||
|
||
return { | ||
username: user, | ||
pseud: pseud | ||
} as Author | ||
} | ||
|
||
export const getPromptFandoms = ($promptPage: PromptPage): string[] => { | ||
const fandoms: string[] = []; | ||
|
||
$promptPage("h5.fandoms.heading:first a.tag").each(function (i, element) { | ||
fandoms[i] = $promptPage(element).text().trim(); | ||
}); | ||
return fandoms; | ||
}; | ||
|
||
export const getPromptAdditionalTags = ($promptPage: PromptPage): string[] => { | ||
const freeform: string[] = []; | ||
$promptPage("ul.tags.commas:first li.freeforms a.tag").each(function (i) { | ||
freeform[i] = $promptPage(this).text().trim(); | ||
}); | ||
return freeform; | ||
}; | ||
|
||
export const getPromptWarnings = ($promptPage: PromptPage): WorkWarnings[] => { | ||
const warnings: WorkWarnings[] = []; | ||
|
||
$promptPage("ul.tags.commas:first li.warnings a.tag").each(function (i, element) { | ||
const warning = $promptPage(element).text().trim(); | ||
if (!Object.values(WorkWarnings).includes(warning as WorkWarnings)) { | ||
throw new Error("An unknown warning was found on the page"); | ||
} | ||
|
||
warnings[i] = warning as WorkWarnings; | ||
}); | ||
if (warnings.length === 0) return ["Creator Chose Not To Use Archive Warnings"] as WorkWarnings[]; | ||
return warnings; | ||
}; | ||
|
||
export const getPromptCharacters = ($promptPage: PromptPage): string[] => { | ||
const characters: string[] = []; | ||
|
||
$promptPage("ul.tags.commas:first li.characters a.tag").each(function (i, character) { | ||
characters[i] = $promptPage(character).text().trim(); | ||
}); | ||
return characters; | ||
}; | ||
|
||
export const getPromptRelationships = ($promptPage: PromptPage): string[] => { | ||
const ships: string[] = []; | ||
|
||
$promptPage("ul.tags.commas:first li.relationships a.tag").each(function (i, ship) { | ||
ships[i] = $promptPage(ship).text().trim(); | ||
}); | ||
return ships; | ||
}; | ||
|
||
export const getPromptClaims = ($promptPage: PromptPage): Prompt["claims"] => { | ||
const claimNodesText = $promptPage(".commas.index.group").text().trim(); | ||
|
||
//There are no claims | ||
if(claimNodesText === "") return {count: 0} as Prompt["claims"]; | ||
|
||
const regexAnonCount = /([^ ]*) anonymous/g | ||
const captures = regexAnonCount.exec(claimNodesText); | ||
|
||
if (!captures) { | ||
//No anon claims. Get known claims: | ||
const claimants:string[] = []; | ||
|
||
const lis = $promptPage("div.claims li").each(function (i, element) { | ||
claimants[i] = $promptPage(element).text().trim(); | ||
}); | ||
|
||
if(lis.length === 0) throw new Error("Could not process Claimants"); | ||
return {count: lis.length, isAnonCollection: false, claimantUsernames: claimants} as Prompt["claims"]; | ||
}; | ||
|
||
//There are anonymous claimes | ||
const anonCount = Number(captures[1].replace(/,/g, "")); | ||
if (Number.isNaN(anonCount)) throw new Error("Error processing prompt anonymous claim count: NaN"); | ||
|
||
return {count: anonCount, isAnonCollection: true} as Prompt["claims"]; | ||
} | ||
|
||
export const getPromptCategories = ($promptPage: PromptPage): Prompt["categories"] => { | ||
const categoriesString = $promptPage("span.category:first span.text").text().trim(); | ||
|
||
if (categoriesString === "No category") return null; | ||
|
||
const categories = categoriesString.split(", ") | ||
|
||
return categories as WorkCategory[]; | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -57,3 +57,45 @@ export const getWorkDetailsFromUrl = ({ | |
collectionName: url.match(/collections\/(\w+)/)?.[1], | ||
}; | ||
}; | ||
|
||
|
||
export const getPromptDetailsFromUrl = ({ | ||
// defining the input structure, eg. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This might work better as a single function-level comment. You could write it as: /**
* Extracts promptId and collectionName from the URL of a collection.
*
* For example, given the url "https://archiveofourown.org/collections/mo_dao_zu_shi_kink_meme_2020/prompts/2644428"
* this function will return:
* - promptId: 2644428
* - collectionName: mo_dao_zu_shi_kink_meme_2020
**/
export const getPromptDetailsFromUrl = ({ url,
}: {
url: string;
}): {
promptId: string;
collectionName: string
} => { |
||
// url: "https://archiveofourown.org/collections/mo_dao_zu_shi_kink_meme_2020/prompts/2644428" | ||
url, | ||
}: { | ||
url: string; | ||
}): { | ||
//defining the form of the return value/output | ||
//collection name is the url name, not the Display Title | ||
promptId: string; | ||
collectionName: string | ||
} => { | ||
const promptUrlMatch = url.match(/prompts\/(\d+)/); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You might want to merge these in a single regex test, something like: const collectionUrlMatch = url.match(/collections\/(\w+)/prompts\/(\d+)/);
if (!collectionUrlMatch) {
throw new Error("Invalid prompt URL");
}
return {
promptId: collectionUrlMatch[2],
collectionName: collectionUrlMatch[1]
}; Double-check that I'm extracting the groups correctly, cause I always forget the syntax. |
||
if (!promptUrlMatch) { | ||
throw new Error("Invalid prompt URL"); | ||
} | ||
|
||
const collectionMatch = url.match(/collections\/(\w+)/); | ||
if (!collectionMatch) { | ||
throw new Error("Invalid prompt URL"); | ||
} | ||
|
||
return { | ||
promptId: promptUrlMatch[1], | ||
collectionName: collectionMatch[1] | ||
}; | ||
}; | ||
|
||
export const getPromptUrl = ({ | ||
promptId, | ||
collectionName | ||
}:{ | ||
promptId: string, | ||
collectionName:string | ||
})=>{ | ||
let workUrl = `https://archiveofourown.org`; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You can do this with a single return statement like return `https://archiveofourown.org/collections/${collectionName}/prompts/${promptId}` |
||
workUrl += `/collections/${collectionName}`; | ||
workUrl += `/prompts/${promptId}`; | ||
return workUrl; | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please revert this change, we'll add documentation on how to do it without needing to add a new script.