Skip to content

Commit

Permalink
Recursive summarization
Browse files Browse the repository at this point in the history
  • Loading branch information
sawyerh authored Dec 29, 2022
1 parent f2d5cab commit fb4bd92
Show file tree
Hide file tree
Showing 9 changed files with 256 additions and 84 deletions.
81 changes: 0 additions & 81 deletions index.js

This file was deleted.

22 changes: 21 additions & 1 deletion package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 3 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"version": "1.0.0",
"type": "module",
"scripts": {
"x": "node index.js"
"x": "node src/index.js"
},
"engines": {
"node": ">=18.0.0"
Expand All @@ -20,6 +20,7 @@
},
"devDependencies": {
"@types/jsdom": "^20.0.1",
"@types/node": "^18.11.18"
"@types/node": "^18.11.18",
"typescript": "^4.9.4"
}
}
54 changes: 54 additions & 0 deletions src/cli-prompts.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
import inquirer from "inquirer";

const customPromptChoice = "[Custom prompt]";
const prompts = [
{
prompt: "Summarize this",
combinationPrompt: "Combine these summaries into an overall summary:",
},
{
prompt: "List 10 key takeaways",
combinationPrompt:
"Combine these takeaways into an overall list of 10 key takeaways",
},
{
prompt: "List all entities, grouped by type or category",
combinationPrompt:
"Combine these lists of entities, but preserve the grouping",
},
{
prompt: "Write an abstract for this",
combinationPrompt: "Write an abstract for this",
},
];

export async function runCliPrompts() {
const answers = await inquirer.prompt([
{
type: "list",
name: "prompt",
message: "Select prompt:",
choices: [...prompts.map((p) => p.prompt), customPromptChoice],
},
{
type: "input",
name: "customPrompt",
message: "Custom prompt (e.g 'Summarize this')",
when: (answers) => answers.prompt === customPromptChoice,
},
{
type: "input",
name: "customCombinationPrompt",
message:
"Custom combination prompt (e.g 'Combine these summaries into an overall summary')",
when: (answers) => answers.prompt === customPromptChoice,
},
]);

const prompt = answers.customPrompt ?? answers.prompt;
const combinationPrompt =
answers.customCombinationPrompt ??
prompts.find((p) => p.prompt === prompt)?.combinationPrompt;

return { prompt, combinationPrompt };
}
102 changes: 102 additions & 0 deletions src/completions.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
import { Configuration, OpenAIApi } from "openai";
import cliSpinners from "cli-spinners";
import { oraPromise } from "ora";
import { logger } from "./logger.js";

/**
*
* @param {string} prompt
* @param {"text-davinci-003"|"text-curie-001"} model - Davinci is the most powerful model, but it's also the most expensive
* @returns
*/
async function createCompletion(prompt, model = "text-davinci-003") {
const openai = new OpenAIApi(
new Configuration({
apiKey: process.env.OPENAI_KEY,
})
);

const response = await openai.createCompletion({
prompt,
model,
// 0.1 provides more straightforward and consistent responses. Higher numbers provides more diverse responses.
temperature: 0.1,
max_tokens: 500,
});

return response;
}

/**
* Break the page's content into roughly equally distributed
* chunks while preserving sentences, so that we don't exceed
* the API's max token limit
*/
function chunkTheContent(content) {
const maxChunkSize = 3500 * 4; // ~1 token = 4 characters
const chunks = [];
let chunk = "";
for (const sentence of content.split(/(?<=[.?!])\s+/)) {
if (chunk.length + sentence.length > maxChunkSize) {
chunks.push(chunk);
chunk = "";
}
chunk += sentence + " ";
}

if (chunks.length === 0) return [content];

return chunks;
}

export async function getCompletion({ content, prompt, combinationPrompt }) {
const chunks = chunkTheContent(content);

const chunkRequests =
// limit to 40 chunks to avoid excessive API usage
chunks.slice(0, 40).map((chunk, index) =>
oraPromise(
async () => {
const response = await createCompletion(
`${prompt}:\n\n###${chunk}\n\n###`
);
return { index, response };
},
{
spinner: cliSpinners.earth,
text: "Generating response...",
}
)
);

const resolvedRequests = await Promise.all(chunkRequests);
// Preserve the order of the content completions
const responses = resolvedRequests
.sort((a, b) => a.index - b.index)
.map((r) => r.response);

if (chunks.length === 1) return responses[0].data.choices[0].text;

/**
* Do one final completion against the combination of all the completions
*/
const combinedCompletions = responses
.map((r) => r.data.choices[0].text)
.join("\n----\n");

const finalCompletion = await oraPromise(
createCompletion(
`${combinationPrompt}:\n\n###${combinedCompletions}\n\n###`
),
{
spinner: cliSpinners.moon,
text: "Combining responses...",
}
);

logger.warn(
`Since the page's content was so long, the following response is formed by running a combination prompt against a series of responses (${chunks.length}) to smaller chunks of the content.`
);

return finalCompletion.data.choices[0].text;
}
33 changes: 33 additions & 0 deletions src/index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import { config } from "dotenv";
import { runCliPrompts } from "./cli-prompts.js";
import { parseUrl } from "./parse-url.js";
import { logger } from "./logger.js";
import { getCompletion } from "./completions.js";

// Load .env file
config();

/**
* 1. Get the main content of the URL
*/
const article = await parseUrl();
const content = article.textContent.replace(/\n/g, " ");
logger.info(article.title);

/**
* 2. Get the prompt from the user
*/
const { prompt, combinationPrompt } = await runCliPrompts();

/**
* 3. Run the prompt against the URL's content
*/
const completion = await getCompletion({
content,
prompt,
combinationPrompt,
});

logger.success("Response ⤵️ ");

logger.log(completion);
12 changes: 12 additions & 0 deletions src/logger.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
import * as colors from "yoctocolors";

export const logger = {
log: (message) => console.log(message),
info: (message) =>
console.log(`${colors.bgCyan(colors.black(` ${message} `))}\n`),
error: (message) =>
console.log(`${colors.bgRed(colors.white(` ${message} `))}\n`),
success: (message) =>
console.log(`${colors.bgGreen(colors.black(` ${message} `))}\n`),
warn: (message) => console.log(`${colors.yellow(message)}\n`),
};
19 changes: 19 additions & 0 deletions src/parse-url.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import { JSDOM } from "jsdom";
import { Readability } from "@mozilla/readability";

export async function parseUrl() {
const url = process.argv[2];
if (!url) {
console.error("Pass a URL as the last argument");
process.exit(1);
}
const dom = await JSDOM.fromURL(url);
const article = new Readability(dom.window.document).parse();

if (!article) {
console.error("Couldn't parse the URL");
process.exit(1);
}

return article;
}
12 changes: 12 additions & 0 deletions tsconfig.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{
"compilerOptions": {
"outDir": "./lib",
"allowJs": true,
"noImplicitAny": false,
"checkJs": true,
"forceConsistentCasingInFileNames": true,
"module": "NodeNext",
"strict": true,
"target": "ES2022"
}
}

0 comments on commit fb4bd92

Please sign in to comment.