FujoWebDev · rabpitpie · Feb 23, 2024 · Feb 23, 2024 · Feb 23, 2024 · Feb 23, 2024
diff --git a/README.md b/README.md
@@ -201,7 +201,7 @@ To identify what we're missing (and tour the codebase as a bonus):
 
 1. Take a look at our [TypeScript types](./types/entities.ts). If the data you seek is not there, we most likely can't scrape it (yet).
 2. See if there's already [an open issue](https://github.com/essential-randomness/ao3.js/issues/) for the type of data you seek.
-3. See if we're aready [scraping the page](./src/pages-loaders.ts) the data resides in.
+3. See if we're aready [scraping the page](./src/page-loaders.ts) the data resides in.
 
 Congratulations, you now have your first contribution carved for you!
 

diff --git a/package-lock.json b/package-lock.json
diff --git a/package.json b/package.json
@@ -34,6 +34,7 @@
   "scripts": {
     "build": "rm -rf dist/ && tsup src/index.ts src/urls.ts --format esm,cjs --dts",
     "test": "NODE_OPTIONS=--experimental-vm-modules jest --no-cache .test.ts$ --verbose=true",
+    "test-prompts": "NODE_OPTIONS=--experimental-vm-modules jest --no-cache prompts.test.ts$ --verbose=true",
     "encode-mock-files": "ts-node-esm tests/mocks/scripts/encode-mock-files.mts"
   },
   "repository": "https://github.com/essential-randomness/AO3.js.git",

diff --git a/src/collections/prompts/index.ts b/src/collections/prompts/index.ts
@@ -0,0 +1,57 @@
+import {
+  Prompt,
+  WorkCategory
+} from "types/entities"
+
+import {
+  getPostedAt,
+  getPromptSummary,
+  getCollectionDisplayTitle,
+  getPromptRatings,
+  getPromptAuthor,
+  getPromptFandoms,
+  getPromptAdditionalTags,
+  getPromptCharacters,
+  getPromptRelationships,
+  getPromptWarnings,
+  getPromptClaims,
+  getPromptCategories
+} from "./prompt-getters"
+
+import {loadPromptPage} from "../../page-loaders"
+
+export const getPrompt = async ({
+  promptId,
+  collectionName,
+}: {
+  promptId: string;
+  collectionName: string;
+}): Promise<Prompt> => {
+
+  const promptPage = await loadPromptPage({id: promptId, collectionName: collectionName});//TODO
+
+  return {
+    postedAt: getPostedAt(promptPage),
+    summary: getPromptSummary(promptPage),
+    collectionDisplayTitle: getCollectionDisplayTitle(promptPage),
+    ratings: getPromptRatings(promptPage),
+    author: getPromptAuthor(promptPage),
+    fandoms: getPromptFandoms(promptPage),
+    //TODO needs work:
+    tags: {
+      warnings: getPromptWarnings(promptPage),
+      characters: getPromptCharacters(promptPage),
+      relationships: getPromptRelationships(promptPage),
+      additional: getPromptAdditionalTags(promptPage)
+    },
+    //TODO:
+    claims: getPromptClaims(promptPage),
+    title: "TODO",
+    collectionName: collectionName,
+    id: promptId,
+    filled: true,
+    fills: [],
+    categories: getPromptCategories(promptPage)
+  }
+
+}
diff --git a/src/collections/prompts/prompt-getters.ts b/src/collections/prompts/prompt-getters.ts
@@ -0,0 +1,160 @@
+import {
+  Author,
+  WorkRatings,
+  WorkWarnings,
+  Prompt,
+  WorkCategory
+} from "types/entities"
+import { PromptPage } from "../../page-loaders";
+
+export const getPostedAt = ($promptPage : PromptPage): string => {
+  const dateElement = $promptPage("p.datetime:first");
+  const date = dateElement.text();
+
+  return date;
+}
+
+export const getPromptSummary = ($promptPage : PromptPage): string | null => {
+  const summary = $promptPage("blockquote.userstuff.summary").html();
+  // trim returns a new string, removes any whitespace at the start and end
+  // usually a new line '\n is returned at the beginning and end of the summary.
+  return summary ? summary.trim() : null;
+};
+
+export const getCollectionDisplayTitle = ($promptPage : PromptPage): string => {
+  const titleElement = $promptPage("h2.collections");
+  const title = titleElement.text().trim();
+  return title;
+}
+
+export const getPromptRatings = ($promptPage: PromptPage): WorkRatings[] => {
+
+  const ratingsString = $promptPage("ul.required-tags:first span.rating").text().trim();
+  //if the work has no rating, the string gathered from the page will be "No rating"
+  const hasRatings = !(ratingsString === "No rating");
+  const ratingsArray = ratingsString.split(', ');
+
+  if (hasRatings){
+    ratingsArray.forEach(rating => {
+      if (!Object.values(WorkRatings).includes(rating as WorkRatings)) {
+        // console.log("working with cheerio")
+        // console.log($promptPage("ul.required-tags:first span.rating").text());
+        throw new Error("An unknown rating was found on the page: \""+ rating+"\"");
+
+      }
+    })
+  }
+
+  if (hasRatings) return ratingsArray as WorkRatings[]
+  else return ["Not Rated"] as WorkRatings[]
+
+};
+
+export const getPromptAuthor = ($promptPage: PromptPage): Author | "Anonymous"=> {
+
+  const requestHeading = $promptPage("#main.prompts-show.dashboard.region > h2").text().trim();
+
+  if(requestHeading === "Request by Anonymous") return "Anonymous"
+
+  const pseudAndUser = /Request by ([^\(]*)?\(?([^\)]*)/g
+
+  const captures = pseudAndUser.exec(requestHeading);
+
+  if (!captures) throw new Error("Could not evalueate Prompt Author String");
+
+  const pseud = captures[1].trim();  
+  const user = captures[2] ? captures[2] : pseud;
+
+  return {
+    username: user,
+    pseud: pseud
+  } as Author
+}
+
+export const getPromptFandoms = ($promptPage: PromptPage): string[] => {
+  const fandoms: string[] = [];
+
+  $promptPage("h5.fandoms.heading:first a.tag").each(function (i, element) {
+    fandoms[i] = $promptPage(element).text().trim();
+  });
+  return fandoms;
+};
+
+export const getPromptAdditionalTags = ($promptPage: PromptPage): string[] => {
+  const freeform: string[] = [];
+  $promptPage("ul.tags.commas:first li.freeforms a.tag").each(function (i) {
+    freeform[i] = $promptPage(this).text().trim();
+  });
+  return freeform;
+};
+
+export const getPromptWarnings = ($promptPage: PromptPage): WorkWarnings[] => {
+  const warnings: WorkWarnings[] = [];
+
+  $promptPage("ul.tags.commas:first li.warnings a.tag").each(function (i, element) {
+    const warning = $promptPage(element).text().trim();
+    if (!Object.values(WorkWarnings).includes(warning as WorkWarnings)) {
+      throw new Error("An unknown warning was found on the page");
+    }
+
+    warnings[i] = warning as WorkWarnings;
+  });
+  if (warnings.length === 0) return ["Creator Chose Not To Use Archive Warnings"] as WorkWarnings[];
+  return warnings;
+};
+
+export const getPromptCharacters = ($promptPage: PromptPage): string[] => {
+  const characters: string[] = [];
+
+  $promptPage("ul.tags.commas:first li.characters a.tag").each(function (i, character) {
+    characters[i] = $promptPage(character).text().trim();
+  });
+  return characters;
+};
+
+export const getPromptRelationships = ($promptPage: PromptPage): string[] => {
+  const ships: string[] = [];
+
+  $promptPage("ul.tags.commas:first li.relationships a.tag").each(function (i, ship) {
+    ships[i] = $promptPage(ship).text().trim();
+  });
+  return ships;
+};
+
+export const getPromptClaims = ($promptPage: PromptPage): Prompt["claims"] => {
+  const claimNodesText = $promptPage(".commas.index.group").text().trim();
+
+  //There are no claims
+  if(claimNodesText === "") return {count: 0} as Prompt["claims"];
+
+  const regexAnonCount = /([^ ]*) anonymous/g
+  const captures = regexAnonCount.exec(claimNodesText);
+
+   if (!captures) {
+    //No anon claims. Get known claims:
+      const claimants:string[] = [];
+
+      const lis = $promptPage("div.claims li").each(function (i, element) {
+        claimants[i] = $promptPage(element).text().trim();
+      });
+
+      if(lis.length === 0) throw new Error("Could not process Claimants");
+      return {count: lis.length, isAnonCollection: false, claimantUsernames: claimants} as Prompt["claims"];
+   };
+
+  //There are anonymous claimes
+  const anonCount = Number(captures[1].replace(/,/g, ""));
+  if (Number.isNaN(anonCount)) throw new Error("Error processing prompt anonymous claim count: NaN");
+
+  return {count: anonCount, isAnonCollection: true} as Prompt["claims"];
+}
+
+export const getPromptCategories = ($promptPage: PromptPage): Prompt["categories"] => {
+  const categoriesString = $promptPage("span.category:first span.text").text().trim();  
+
+  if (categoriesString === "No category") return null; 
+
+  const categories = categoriesString.split(", ")
+
+  return categories as WorkCategory[];
+}
diff --git a/src/index.ts b/src/index.ts
@@ -2,5 +2,6 @@ export * from "./users";
 export * from "./tags";
 export * from "./works";
 export * from "./series";
+export * from "./collections/prompts";
 
 export { setFetcher } from "./fetcher";
diff --git a/src/page-loaders.ts b/src/page-loaders.ts
@@ -4,6 +4,7 @@ import {
   getTagWorksFeedUrl,
   getUserProfileUrl,
   getWorkUrl,
+  getPromptUrl
 } from "./urls";
 
 import { CheerioAPI } from "cheerio";
@@ -14,6 +15,18 @@ import { getFetcher } from "./fetcher";
 // correct type of page is passed to each method that extracts data.
 // Other than this, all pages are instances of CheerioAPI and can be used interchangeably.
 
+// A page showing a single, specified prompt from a particular collection
+// Sample: https://archiveofourown.org/collections/mo_dao_zu_shi_kink_meme_2020/prompts/2644428
+export interface PromptPage extends CheerioAPI {
+  kind: "PromptPage";
+}
+export const loadPromptPage = async ({ id, collectionName }: { id: string, collectionName: string }) => {
+  return load(
+    await (await getFetcher()(getPromptUrl({promptId: id, collectionName: collectionName}))).text()
+  ) as PromptPage;
+};
+
+
 // A page showing the most recent works featuring a tag.
 // Sample: https://archiveofourown.org/tags/Git%20(The%20Fujoshi%20Guide%20to%20Web%20Development)/works
 export interface TagWorksFeed extends CheerioAPI {

diff --git a/src/urls.ts b/src/urls.ts
@@ -57,3 +57,45 @@ export const getWorkDetailsFromUrl = ({
     collectionName: url.match(/collections\/(\w+)/)?.[1],
   };
 };
+
+
+export const getPromptDetailsFromUrl = ({
+  // defining the input structure, eg.
+  // url: "https://archiveofourown.org/collections/mo_dao_zu_shi_kink_meme_2020/prompts/2644428"
+  url, 
+}: {
+  url: string;
+}): {
+  //defining the form of the return value/output
+  //collection name is the url name, not the Display Title
+  promptId: string;
+  collectionName: string
+} => {
+  const promptUrlMatch = url.match(/prompts\/(\d+)/);
+  if (!promptUrlMatch) {
+    throw new Error("Invalid prompt URL");
+  }
+
+  const collectionMatch = url.match(/collections\/(\w+)/);
+  if (!collectionMatch) {
+    throw new Error("Invalid prompt URL");
+  }
+
+  return {
+    promptId: promptUrlMatch[1],
+    collectionName: collectionMatch[1]
+  };
+};
+
+export const getPromptUrl = ({
+  promptId,
+  collectionName
+}:{
+  promptId: string,
+  collectionName:string
+})=>{
+  let workUrl = `https://archiveofourown.org`;
+  workUrl += `/collections/${collectionName}`;
+  workUrl += `/prompts/${promptId}`;
+  return workUrl;
+}