From 8d76ea620b8e5f0b45500ca45afd5ce8d6c14a7d Mon Sep 17 00:00:00 2001 From: Anthony Campolo <12433465+ajcwebdev@users.noreply.github.com> Date: Wed, 25 Sep 2024 00:42:28 -0500 Subject: [PATCH 1/6] fix docker commands and refactor callAssembly --- docs/examples.md | 9 +++---- src/autoshow.js | 15 ++--------- src/llms/chatgpt.js | 2 +- src/llms/claude.js | 2 +- src/llms/cohere.js | 2 +- src/llms/gemini.js | 2 +- src/llms/llama.js | 8 +++--- src/llms/mistral.js | 2 +- src/llms/prompt.js | 2 +- src/transcription/assembly.js | 38 ++++++++++++++------------- src/transcription/whisper.js | 48 ++++++++++++++++++++--------------- src/utils/cleanUpFiles.js | 8 +++--- src/utils/runLLM.js | 2 +- src/utils/runTranscription.js | 14 +++------- test/all.test.js | 4 +-- test/local.test.js | 2 +- 16 files changed, 74 insertions(+), 86 deletions(-) diff --git a/docs/examples.md b/docs/examples.md index f3961ba..4ebcda0 100644 --- a/docs/examples.md +++ b/docs/examples.md @@ -243,7 +243,7 @@ npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --assembly Include speaker labels and number of speakers: ```bash -npm run as -- --video "https://ajc.pics/audio/fsjam-short.mp3" --assembly --speaker-labels --speakers-expected 2 +npm run as -- --video "https://ajc.pics/audio/fsjam-short.mp3" --assembly --speakerLabels ``` ### Whisper.cpp @@ -267,10 +267,10 @@ npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --whisper me npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --whisper large ``` -Run `whisper.cpp` in a Docker container with `--whisper-docker`: +Run `whisper.cpp` in a Docker container with `--whisperDocker`: ```bash -npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --whisper-docker tiny +npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --whisperDocker base ``` ## Docker Compose @@ -278,8 +278,7 @@ npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --whisper-do This will run both `whisper.cpp` and the AutoShow Commander CLI in their own Docker containers. ```bash -docker-compose up --build -d -docker-compose run autoshow --video "https://www.youtube.com/watch?v=MORMZXEaONk" --whisper-docker base +docker-compose run autoshow --video "https://www.youtube.com/watch?v=MORMZXEaONk" --whisperDocker base ``` Currently working on the `llama.cpp` Docker integration so the entire project can be encapsulated in one local Docker Compose file. diff --git a/src/autoshow.js b/src/autoshow.js index ff38713..0a9d65a 100644 --- a/src/autoshow.js +++ b/src/autoshow.js @@ -40,7 +40,6 @@ import { env } from 'node:process' * @property {boolean} [deepgram=false] - Use Deepgram for transcription. * @property {boolean} [assembly=false] - Use AssemblyAI for transcription. * @property {boolean} [speakerLabels=false] - Use speaker labels for AssemblyAI transcription. - * @property {number} [speakersExpected=1] - Number of expected speakers for AssemblyAI transcription. * @property {boolean} [noCleanUp=false] - Do not delete intermediary files after processing. */ @@ -61,11 +60,10 @@ program .option('--order ', 'Specify the order for RSS feed processing (newest or oldest)', 'newest') .option('--skip ', 'Number of items to skip when processing RSS feed', parseInt, 0) .option('--whisper [modelType]', 'Use Whisper.cpp for transcription (non-Docker version)') - .option('--whisper-docker [modelType]', 'Use Whisper.cpp for transcription (Docker version)') + .option('--whisperDocker [modelType]', 'Use Whisper.cpp for transcription (Docker version)') .option('--deepgram', 'Use Deepgram for transcription') .option('--assembly', 'Use AssemblyAI for transcription') - .option('--speaker-labels', 'Use speaker labels for AssemblyAI transcription') - .option('--speakers-expected ', 'Number of expected speakers for AssemblyAI transcription', parseInt, 1) + .option('--speakerLabels', 'Use speaker labels for AssemblyAI transcription') .option('--chatgpt [model]', 'Use ChatGPT for processing with optional model specification') .option('--claude [model]', 'Use Claude for processing with optional model specification') .option('--cohere [model]', 'Use Cohere for processing with optional model specification') @@ -199,15 +197,6 @@ const INQUIRER_PROMPT = [ when: (answers) => answers.transcriptOpt === 'assembly', default: false, }, - { - type: 'number', - name: 'speakersExpected', - message: 'How many speakers are expected?', - when: (answers) => answers.speakerLabels, - default: 1, - validate: (input) => - input > 0 && input <= 25 ? true : 'Please enter a number between 1 and 25.', - }, { type: 'checkbox', name: 'prompt', diff --git a/src/llms/chatgpt.js b/src/llms/chatgpt.js index d4eafeb..7421b6d 100644 --- a/src/llms/chatgpt.js +++ b/src/llms/chatgpt.js @@ -44,7 +44,7 @@ export async function callChatGPT(transcriptContent, outputFilePath, model = 'GP // Write the generated content to the output file await writeFile(outputFilePath, content) - console.log(`Transcript saved to ${outputFilePath}`) + console.log(`\nTranscript saved to:\n - ${outputFilePath}`) // console.log(`\nChatGPT response:\n\n${JSON.stringify(response, null, 2)}`) // Commented out detailed response logging console.log(`\nFinish Reason: ${finish_reason}\nModel: ${usedModel}`) console.log(`Token Usage:\n - ${prompt_tokens} prompt tokens\n - ${completion_tokens} completion tokens\n - ${total_tokens} total tokens\n`) diff --git a/src/llms/claude.js b/src/llms/claude.js index 8f4afc2..296003f 100644 --- a/src/llms/claude.js +++ b/src/llms/claude.js @@ -45,7 +45,7 @@ export async function callClaude(transcriptContent, outputFilePath, model = 'CLA // Write the generated text to the output file await writeFile(outputFilePath, text) - console.log(`Transcript saved to ${outputFilePath}`) + console.log(`\nTranscript saved to:\n - ${outputFilePath}`) // console.log(`\nClaude response:\n\n${JSON.stringify(response, null, 2)}`) // Commented out detailed response logging console.log(`\nStop Reason: ${stop_reason}\nModel: ${usedModel}`) console.log(`Token Usage:\n - ${input_tokens} input tokens\n - ${output_tokens} output tokens\n`) diff --git a/src/llms/cohere.js b/src/llms/cohere.js index 6e9c765..912121d 100644 --- a/src/llms/cohere.js +++ b/src/llms/cohere.js @@ -42,7 +42,7 @@ export async function callCohere(transcriptContent, outputFilePath, model = 'COM // Write the generated text to the output file await writeFile(outputFilePath, text) - console.log(`Transcript saved to ${outputFilePath}`) + console.log(`\nTranscript saved to:\n - ${outputFilePath}`) // console.log(`\nCohere response:\n\n${JSON.stringify(response, null, 2)}`) // Commented out detailed response logging console.log(`\nFinish Reason: ${finishReason}\nModel: ${actualModel}`) console.log(`Token Usage:\n - ${inputTokens} input tokens\n - ${outputTokens} output tokens\n`) diff --git a/src/llms/gemini.js b/src/llms/gemini.js index bd3ea6b..ac23617 100644 --- a/src/llms/gemini.js +++ b/src/llms/gemini.js @@ -53,7 +53,7 @@ export async function callGemini(transcriptContent, outputFilePath, model = 'GEM // Write the generated text to the output file await writeFile(outputFilePath, text) - console.log(`Transcript saved to ${outputFilePath}`) + console.log(`\nTranscript saved to:\n - ${outputFilePath}`) console.log(`\nModel: ${actualModel}`) // Return the model name used diff --git a/src/llms/llama.js b/src/llms/llama.js index 28582b7..3c1a006 100644 --- a/src/llms/llama.js +++ b/src/llms/llama.js @@ -84,11 +84,11 @@ async function downloadModel(modelName = 'GEMMA_2_2B_Q4_MODEL') { /** * Main function to call the local Llama model. * @param {string} promptAndTranscript - The combined prompt and transcript content. - * @param {string} tempPath - The temporary file path to save the output. + * @param {string} outputFilePath - The file path to save the output. * @param {string} [modelName='GEMMA_2_2B_Q4_MODEL'] - The name of the model to use. * @returns {Promise} */ -export async function callLlama(promptAndTranscript, tempPath, modelName = 'GEMMA_2_2B_Q4_MODEL') { +export async function callLlama(promptAndTranscript, outputFilePath, modelName = 'GEMMA_2_2B_Q4_MODEL') { try { // Ensure the model is downloaded const modelPath = await downloadModel(modelName) @@ -103,8 +103,8 @@ export async function callLlama(promptAndTranscript, tempPath, modelName = 'GEMM // Generate a response and write the response to a temporary file const response = await session.prompt(promptAndTranscript) - await writeFile(tempPath, response) - console.log(`\nTranscript saved to:\n - ${tempPath}`) + await writeFile(outputFilePath, response) + console.log(`\nTranscript saved to:\n - ${outputFilePath}`) console.log(`\nModel used:\n - ${modelName}\n`) } catch (error) { // Log and re-throw any errors that occur during the process diff --git a/src/llms/mistral.js b/src/llms/mistral.js index 23e8f0f..8b0b320 100644 --- a/src/llms/mistral.js +++ b/src/llms/mistral.js @@ -43,7 +43,7 @@ export async function callMistral(transcriptContent, outputFilePath, model = 'MI // Write the generated content to the specified output file await writeFile(outputFilePath, content) - console.log(`Transcript saved to ${outputFilePath}`) + console.log(`\nTranscript saved to:\n - ${outputFilePath}`) // Log finish reason, used model, and token usage console.log(`\nFinish Reason: ${finishReason}\nModel: ${usedModel}`) diff --git a/src/llms/prompt.js b/src/llms/prompt.js index 6a68991..8ee3650 100644 --- a/src/llms/prompt.js +++ b/src/llms/prompt.js @@ -112,7 +112,7 @@ const sections = { 7. What are the main considerations when choosing a database for a web application? 8. How do containerization technologies like Docker impact web development and deployment? 9. What role does responsive design play in modern web development? - 10. How can developers ensure the security of user data in web applications?`, + 10. How can developers ensure the security of user data in web applications?\n`, }, } diff --git a/src/transcription/assembly.js b/src/transcription/assembly.js index 8de4f55..76ae71c 100644 --- a/src/transcription/assembly.js +++ b/src/transcription/assembly.js @@ -9,26 +9,27 @@ const client = new AssemblyAI({ apiKey: env.ASSEMBLY_API_KEY }) /** * Main function to handle transcription using AssemblyAI. - * @param {string} input - The audio file path or URL to transcribe. - * @param {string} id - The identifier used for naming output files. - * @param {boolean} [useSpeakerLabels=false] - Whether to use speaker labels. - * @param {number} [speakersExpected=1] - The expected number of speakers. - * @returns {Promise} + * @param {string} finalPath - The identifier used for naming output files. + * @param {string} transcriptOpt - The transcription service to use. + * @param {object} options - Additional options for processing. + * @returns {Promise} - Returns the formatted transcript content. */ -export async function callAssembly(input, id, useSpeakerLabels = false, speakersExpected = 1) { +export async function callAssembly(finalPath, transcriptOpt, options) { try { + const { speakerLabels } = options + console.log(`Parameters passed to callAssembly:`) + console.log(` - finalPath: ${finalPath}\n - transcriptOpt: ${transcriptOpt}\n - speakerLabels: ${speakerLabels}`) // Request transcription from AssemblyAI const transcript = await client.transcripts.transcribe({ - audio: input, // The audio file to transcribe - speech_model: 'nano', // Use the 'nano' speech model for transcription - ...(useSpeakerLabels && { // Conditionally add speaker labeling options + audio: `${finalPath}.wav`, // The audio file to transcribe + speech_model: 'nano', // Use the 'nano' speech model for transcription + ...(speakerLabels && { // Conditionally add speaker labeling options speaker_labels: true, - speakers_expected: Math.max(1, Math.min(speakersExpected, 25)) // Ensure speakers are between 1 and 25 }) }) // Initialize output string - let output = '' + let txtContent = '' // Helper function to format timestamps const formatTime = timestamp => { @@ -39,8 +40,8 @@ export async function callAssembly(input, id, useSpeakerLabels = false, speakers // Process the transcript based on whether utterances are available if (transcript.utterances) { // If utterances are available, format each utterance with speaker labels if used - output = transcript.utterances.map(utt => - `${useSpeakerLabels ? `Speaker ${utt.speaker} ` : ''}(${formatTime(utt.start)}): ${utt.text}` + txtContent = transcript.utterances.map(utt => + `${speakerLabels ? `Speaker ${utt.speaker} ` : ''}(${formatTime(utt.start)}): ${utt.text}` ).join('\n') } else if (transcript.words) { // If only words are available, group them into lines with timestamps @@ -49,7 +50,7 @@ export async function callAssembly(input, id, useSpeakerLabels = false, speakers transcript.words.forEach(word => { if (currentLine.length + word.text.length > 80) { // Start a new line if the current line exceeds 80 characters - output += `[${currentTimestamp}] ${currentLine.trim()}\n` + txtContent += `[${currentTimestamp}] ${currentLine.trim()}\n` currentLine = '' currentTimestamp = formatTime(word.start) } @@ -57,16 +58,17 @@ export async function callAssembly(input, id, useSpeakerLabels = false, speakers }) // Add the last line if there's any remaining text if (currentLine.length > 0) { - output += `[${currentTimestamp}] ${currentLine.trim()}\n` + txtContent += `[${currentTimestamp}] ${currentLine.trim()}\n` } } else { // If no structured data is available, use the plain text or a default message - output = transcript.text || 'No transcription available.' + txtContent = transcript.text || 'No transcription available.' } // Write the formatted transcript to a file - await writeFile(`${id}.txt`, output) - console.log('Transcript saved.') + await writeFile(`${finalPath}.txt`, txtContent) + console.log(`\nTranscript saved:\n - ${finalPath}.txt`) + return txtContent } catch (error) { // Log any errors that occur during the transcription process console.error('Error processing the transcription:', error) diff --git a/src/transcription/whisper.js b/src/transcription/whisper.js index adaed70..ac230f0 100644 --- a/src/transcription/whisper.js +++ b/src/transcription/whisper.js @@ -3,8 +3,7 @@ import { readFile, writeFile, access } from 'node:fs/promises' import { exec } from 'node:child_process' import { promisify } from 'node:util' -import { existsSync } from 'node:fs' -import path from 'node:path' +import { basename, join } from 'node:path' const execPromise = promisify(exec) @@ -32,7 +31,7 @@ export async function callWhisper(finalPath, transcriptOpt, options) { const modelName = WHISPER_MODELS[whisperModel] // Call the appropriate Whisper function based on the transcription service - await (transcriptOpt === 'whisper-docker' ? callWhisperDocker : callWhisperMain)(finalPath, modelName, whisperModel) + await (transcriptOpt === 'whisperDocker' ? callWhisperDocker : callWhisperMain)(finalPath, modelName, whisperModel) // Read, process, and format the generated LRC file const lrcContent = await readFile(`${finalPath}.lrc`, 'utf8') @@ -58,32 +57,37 @@ async function callWhisperDocker(finalPath, modelName, whisperModel) { const WHISPER_CONTAINER_NAME = 'autoshow-whisper-1' const CONTENT_DIR = '/app/content' const MODELS_DIR = '/app/models' - const modelPathHost = `./whisper.cpp/models/${modelName}` - - // Check if the model exists locally, download if not - if (!existsSync(modelPathHost)) { - console.log(`Model ${modelName} not found locally. Downloading...`) - await execPromise(`bash ./whisper.cpp/models/download-ggml-model.sh ${whisperModel}`) - console.log(`Model ${modelName} downloaded.`) - } + const modelPathContainer = `${MODELS_DIR}/${modelName}` // Check if the Whisper container is running, start it if not try { await execPromise(`docker ps | grep ${WHISPER_CONTAINER_NAME}`) - console.log('Whisper container is already running.') + console.log('\nWhisper container is already running.') } catch { - console.log('Whisper container is not running. Starting it...') + console.log('\nWhisper container is not running. Starting it...') await execPromise('docker-compose up -d whisper') console.log('Whisper container started successfully.') } + // Check if the model exists in the container, download if not + try { + await execPromise(`docker exec ${WHISPER_CONTAINER_NAME} test -f ${modelPathContainer}`) + console.log(`\nWhisper.cpp ${whisperModel} model found:`) + console.log(` - ${modelName} model selected\n - Model located at ${modelPathContainer}`) + } catch { + console.log(`\nWhisper.cpp ${whisperModel} model not found in container:`) + console.log(` - ${modelName} model selected\n - Model downloading to ${modelPathContainer}`) + await execPromise(`docker exec ${WHISPER_CONTAINER_NAME} ${MODELS_DIR}/download-ggml-model.sh ${whisperModel}`) + console.log(` - Model downloaded successfully`) + } + // Execute Whisper transcription in the Docker container - const fileName = path.basename(finalPath) + const fileName = basename(finalPath) await execPromise( `docker exec ${WHISPER_CONTAINER_NAME} /app/main \ - -m ${path.join(MODELS_DIR, modelName)} \ - -f ${path.join(CONTENT_DIR, `${fileName}.wav`)} \ - -of ${path.join(CONTENT_DIR, fileName)} \ + -m ${modelPathContainer} \ + -f ${join(CONTENT_DIR, `${fileName}.wav`)} \ + -of ${join(CONTENT_DIR, fileName)} \ --output-lrc` ) console.log(`\nTranscript LRC file completed:\n - ${finalPath}.lrc`) @@ -102,11 +106,13 @@ async function callWhisperMain(finalPath, modelName, whisperModel) { // Check if the model exists locally, download if not try { await access(modelPath) - console.log(`\nWhisper model found:\n - ${modelPath}`) + console.log(`\nWhisper.cpp ${whisperModel} model found:`) + console.log(` - ${modelName} model selected\n - Model located at ${modelPath}`) } catch { - console.log(`\nWhisper model not found: ${modelPath}\n - Downloading model: ${whisperModel}`) + console.log(`\nWhisper.cpp ${whisperModel} model not found:`) + console.log(` - ${modelName} model selected\n - Model downloading to ${modelPath}`) await execPromise(`bash ./whisper.cpp/models/download-ggml-model.sh ${whisperModel}`) - console.log(` - Model downloaded: ${modelPath}`) + console.log(` - Model downloaded successfully`) } /** @@ -115,5 +121,5 @@ async function callWhisperMain(finalPath, modelName, whisperModel) { await execPromise( `./whisper.cpp/main -m "whisper.cpp/models/${modelName}" -f "${finalPath}.wav" -of "${finalPath}" --output-lrc` ) - console.log(`Whisper.cpp Model Selected:\n - ${modelPath}\n\nTranscript LRC file completed:\n - ${finalPath}.lrc`) + console.log(`\nTranscript LRC file completed:\n - ${finalPath}.lrc`) } \ No newline at end of file diff --git a/src/utils/cleanUpFiles.js b/src/utils/cleanUpFiles.js index 4b44781..512f25c 100644 --- a/src/utils/cleanUpFiles.js +++ b/src/utils/cleanUpFiles.js @@ -17,10 +17,6 @@ export async function cleanUpFiles(id) { await unlink(`${id}.wav`) console.log(` - ${id}.wav`) - // Remove .lrc file - await unlink(`${id}.lrc`) - console.log(` - ${id}.lrc`) - // Remove .txt file await unlink(`${id}.txt`) console.log(` - ${id}.txt`) @@ -28,6 +24,10 @@ export async function cleanUpFiles(id) { // Remove .md file await unlink(`${id}.md`) console.log(` - ${id}.md`) + + // Remove .lrc file + await unlink(`${id}.lrc`) + console.log(` - ${id}.lrc`) } catch (error) { // If the error is not "file not found", log the error if (error.code !== 'ENOENT') { diff --git a/src/utils/runLLM.js b/src/utils/runLLM.js index bf7b1a7..45e9995 100644 --- a/src/utils/runLLM.js +++ b/src/utils/runLLM.js @@ -53,7 +53,7 @@ export async function runLLM(finalPath, frontMatter, llmOpt, options) { // Remove the temporary file await unlink(tempPath) - console.log(`Updated markdown file: ${finalPath}-${llmOpt}-shownotes.md`) + console.log(`Updated markdown file:\n - ${finalPath}-${llmOpt}-shownotes.md`) } else { // If no LLM is selected, just write the prompt and transcript await writeFile(`${finalPath}-prompt.md`, `${frontMatter}\n${promptAndTranscript}`) diff --git a/src/utils/runTranscription.js b/src/utils/runTranscription.js index 1b93b92..d84ef63 100644 --- a/src/utils/runTranscription.js +++ b/src/utils/runTranscription.js @@ -8,7 +8,6 @@ import { callAssembly } from '../transcription/assembly.js' /** * @typedef {Object} transcriptOptions * @property {boolean} [speakerLabels=false] - Whether to use speaker labels. - * @property {number} [speakersExpected=1] - The expected number of speakers. * @property {string[]} [prompt] - Sections to include in the prompt. * @property {string} [whisper] - Whisper model type. * @property {string} [whisperDocker] - Whisper model type for Docker. @@ -42,18 +41,11 @@ export async function runTranscription( break case 'assembly': - // Use AssemblyAI for transcription, pass options for speaker labels and number of speakers - await callAssembly( - `${finalPath}.wav`, - finalPath, - options.speakerLabels, - options.speakersExpected - ) - // Read the transcription result - txtContent = await readFile(`${finalPath}.txt`, 'utf8') + // Use AssemblyAI for transcription and pass option for speaker labels + txtContent = await callAssembly(finalPath, transcriptOpt, options) break - case 'whisper-docker': + case 'whisperDocker': case 'whisper': // Use Whisper (either local or Docker version) for transcription txtContent = await callWhisper(finalPath, transcriptOpt, options) diff --git a/test/all.test.js b/test/all.test.js index e044a82..ca6f004 100644 --- a/test/all.test.js +++ b/test/all.test.js @@ -115,12 +115,12 @@ const commands = [ newName: '23---2024-09-24-ep0-fsjam-podcast-llama-shownotes.md' }, { - cmd: 'npm run as -- --video "https://ajc.pics/audio/fsjam-short.mp3" --assembly --speaker-labels --speakers-expected 2', + cmd: 'npm run as -- --video "https://ajc.pics/audio/fsjam-short.mp3" --assembly --speakerLabels', expectedFile: '2024-05-08-fsjam-short-prompt.md', newName: '24---2024-05-08-fsjam-short-prompt.md' }, { - cmd: 'npm run as -- --video "https://ajc.pics/audio/fsjam-short.mp3" --assembly --speaker-labels --speakers-expected 2 --llama', + cmd: 'npm run as -- --video "https://ajc.pics/audio/fsjam-short.mp3" --assembly --speakerLabels --llama', expectedFile: '2024-05-08-fsjam-short-llama-shownotes.md', newName: '25---2024-05-08-fsjam-short-llama-shownotes.md' }, diff --git a/test/local.test.js b/test/local.test.js index 1565f72..576f8ab 100644 --- a/test/local.test.js +++ b/test/local.test.js @@ -47,7 +47,7 @@ const commands = [ newName: '09---2024-09-24-ep0-fsjam-podcast-prompt.md' }, { - cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --prompt titles summary shortChapters mediumChapters longChapters takeaways questions', + cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --prompt titles summary mediumChapters takeaways questions', expectedFile: '2024-09-24-ep0-fsjam-podcast-prompt.md', newName: '10---2024-09-24-ep0-fsjam-podcast-prompt.md' }, From edbf7396556db7bd9be35dfc4fc553ba04708d09 Mon Sep 17 00:00:00 2001 From: Anthony Campolo <12433465+ajcwebdev@users.noreply.github.com> Date: Wed, 25 Sep 2024 16:12:42 -0500 Subject: [PATCH 2/6] run ossf security check in gh workflow --- .github/workflows/scorecard.yml | 73 +++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) create mode 100644 .github/workflows/scorecard.yml diff --git a/.github/workflows/scorecard.yml b/.github/workflows/scorecard.yml new file mode 100644 index 0000000..6f74a97 --- /dev/null +++ b/.github/workflows/scorecard.yml @@ -0,0 +1,73 @@ +# This workflow uses actions that are not certified by GitHub. They are provided +# by a third-party and are governed by separate terms of service, privacy +# policy, and support documentation. + +name: Scorecard supply-chain security +on: + # For Branch-Protection check. Only the default branch is supported. See + # https://github.com/ossf/scorecard/blob/main/docs/checks.md#branch-protection + branch_protection_rule: + # To guarantee Maintained check is occasionally updated. See + # https://github.com/ossf/scorecard/blob/main/docs/checks.md#maintained + schedule: + - cron: '15 11 * * 5' + push: + branches: [ "main" ] + +# Declare default permissions as read only. +permissions: read-all + +jobs: + analysis: + name: Scorecard analysis + runs-on: ubuntu-latest + permissions: + # Needed to upload the results to code-scanning dashboard. + security-events: write + # Needed to publish results and get a badge (see publish_results below). + id-token: write + # Uncomment the permissions below if installing in a private repository. + # contents: read + # actions: read + + steps: + - name: "Checkout code" + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + with: + persist-credentials: false + + - name: "Run analysis" + uses: ossf/scorecard-action@0864cf19026789058feabb7e87baa5f140aac736 # v2.3.1 + with: + results_file: results.sarif + results_format: sarif + # (Optional) "write" PAT token. Uncomment the `repo_token` line below if: + # - you want to enable the Branch-Protection check on a *public* repository, or + # - you are installing Scorecard on a *private* repository + # To create the PAT, follow the steps in https://github.com/ossf/scorecard-action?tab=readme-ov-file#authentication-with-fine-grained-pat-optional. + # repo_token: ${{ secrets.SCORECARD_TOKEN }} + + # Public repositories: + # - Publish results to OpenSSF REST API for easy access by consumers + # - Allows the repository to include the Scorecard badge. + # - See https://github.com/ossf/scorecard-action#publishing-results. + # For private repositories: + # - `publish_results` will always be set to `false`, regardless + # of the value entered here. + publish_results: true + + # Upload the results as artifacts (optional). Commenting out will disable uploads of run results in SARIF + # format to the repository Actions tab. + - name: "Upload artifact" + uses: actions/upload-artifact@97a0fba1372883ab732affbe8f94b823f91727db # v3.pre.node20 + with: + name: SARIF file + path: results.sarif + retention-days: 5 + + # Upload the results to GitHub's code scanning dashboard (optional). + # Commenting out will disable upload of results to your repo's Code Scanning dashboard + - name: "Upload to code-scanning" + uses: github/codeql-action/upload-sarif@v3 + with: + sarif_file: results.sarif \ No newline at end of file From 080bd01438924abe4c2ee4b1edbe6c8d1bdf3f15 Mon Sep 17 00:00:00 2001 From: Anthony Campolo <12433465+ajcwebdev@users.noreply.github.com> Date: Wed, 25 Sep 2024 18:25:39 -0500 Subject: [PATCH 3/6] improve jsdoc types --- src/autoshow.js | 58 +++++------- src/commands/processFile.js | 18 +++- src/commands/processPlaylist.js | 27 +++++- src/commands/processRSS.js | 70 +++++++++----- src/commands/processURLs.js | 14 ++- src/commands/processVideo.js | 14 ++- src/llms/chatgpt.js | 14 ++- src/llms/claude.js | 11 ++- src/llms/cohere.js | 11 ++- src/llms/gemini.js | 16 +++- src/llms/llama.js | 12 ++- src/llms/mistral.js | 10 +- src/llms/octo.js | 10 +- src/llms/prompt.js | 5 +- src/transcription/assembly.js | 22 ++++- src/transcription/deepgram.js | 22 +++-- src/transcription/whisper.js | 162 +++++++++++++++++++------------- src/types.js | 74 +++++++++++++++ src/utils/cleanUpFiles.js | 36 +++---- src/utils/generateMarkdown.js | 16 +--- src/utils/runLLM.js | 14 ++- src/utils/runTranscription.js | 16 ++-- 22 files changed, 445 insertions(+), 207 deletions(-) create mode 100644 src/types.js diff --git a/src/autoshow.js b/src/autoshow.js index 0a9d65a..cb1c9d0 100644 --- a/src/autoshow.js +++ b/src/autoshow.js @@ -15,32 +15,16 @@ import { processPlaylist } from './commands/processPlaylist.js' import { processURLs } from './commands/processURLs.js' import { processFile } from './commands/processFile.js' import { processRSS } from './commands/processRSS.js' -import { env } from 'node:process' +import { argv } from 'node:process' +import './types.js' /** - * @typedef {Object} ProcessingOptions - * @property {string[]} [prompt] - Specify prompt sections to include. - * @property {string} [video] - URL of the YouTube video to process. - * @property {string} [playlist] - URL of the YouTube playlist to process. - * @property {string} [urls] - File path containing URLs to process. - * @property {string} [file] - File path of the local audio/video file to process. - * @property {string} [rss] - URL of the podcast RSS feed to process. - * @property {string[]} [item] - Specific items in the RSS feed to process. - * @property {string} [order='newest'] - Order for RSS feed processing ('newest' or 'oldest'). - * @property {number} [skip=0] - Number of items to skip when processing RSS feed. - * @property {string} [whisper] - Whisper model type for non-Docker version. - * @property {string} [whisperDocker] - Whisper model type for Docker version. - * @property {string} [chatgpt] - ChatGPT model to use for processing. - * @property {string} [claude] - Claude model to use for processing. - * @property {string} [cohere] - Cohere model to use for processing. - * @property {string} [mistral] - Mistral model to use for processing. - * @property {string} [octo] - Octo model to use for processing. - * @property {boolean} [llama=false] - Use Node Llama for processing. - * @property {string} [gemini] - Gemini model to use for processing. - * @property {boolean} [deepgram=false] - Use Deepgram for transcription. - * @property {boolean} [assembly=false] - Use AssemblyAI for transcription. - * @property {boolean} [speakerLabels=false] - Use speaker labels for AssemblyAI transcription. - * @property {boolean} [noCleanUp=false] - Do not delete intermediary files after processing. + * Custom types + * @typedef {LLMOption} LLMOption + * @typedef {TranscriptOption} TranscriptOption + * @typedef {ProcessingOptions} ProcessingOptions + * @typedef {InquirerAnswers} InquirerAnswers + * @typedef {HandlerFunction} HandlerFunction */ // Initialize the command-line interface @@ -226,7 +210,7 @@ const INQUIRER_PROMPT = [ * @returns {Promise} - The updated options after user input. */ async function handleInteractivePrompt(options) { - /** @type {ProcessingOptions} */ + /** @type {InquirerAnswers} */ const answers = await inquirer.prompt(INQUIRER_PROMPT) options = { ...options, @@ -279,7 +263,7 @@ program.action(async (options) => { /** * Map actions to their respective handler functions - * @type {Object.>} + * @type {Object.} */ const handlers = { video: processVideo, @@ -290,20 +274,20 @@ program.action(async (options) => { } /** - * Determine the selected LLM option - * @type {string | undefined} - */ - const llmOpt = ['chatgpt', 'claude', 'cohere', 'mistral', 'octo', 'llama', 'gemini'].find( + * Determine the selected LLM option + * @type {LLMOption | undefined} + */ + const llmOpt = /** @type {LLMOption} */ (['chatgpt', 'claude', 'cohere', 'mistral', 'octo', 'llama', 'gemini'].find( (option) => options[option] - ) + )) /** - * Determine the transcription service to use - * @type {string | undefined} - */ - const transcriptOpt = ['whisper', 'whisperDocker', 'deepgram', 'assembly'].find( + * Determine the transcription service to use + * @type {TranscriptOption | undefined} + */ + const transcriptOpt = /** @type {TranscriptOption} */ (['whisper', 'whisperDocker', 'deepgram', 'assembly'].find( (option) => options[option] - ) + )) // Execute the appropriate handler based on the action for (const [key, handler] of Object.entries(handlers)) { @@ -314,4 +298,4 @@ program.action(async (options) => { }) // Parse the command-line arguments -program.parse(env.argv) \ No newline at end of file +program.parse(argv) \ No newline at end of file diff --git a/src/commands/processFile.js b/src/commands/processFile.js index cc2dafe..af1d842 100644 --- a/src/commands/processFile.js +++ b/src/commands/processFile.js @@ -5,13 +5,21 @@ import { downloadFileAudio } from '../utils/downloadAudio.js' import { runTranscription } from '../utils/runTranscription.js' import { runLLM } from '../utils/runLLM.js' import { cleanUpFiles } from '../utils/cleanUpFiles.js' +import '../types.js' + +/** + * Import custom types + * @typedef {LLMOption} LLMOption + * @typedef {TranscriptOption} TranscriptOption + * @typedef {ProcessingOptions} ProcessingOptions + */ /** * Main function to process a local audio or video file. * @param {string} filePath - The path to the local file to process. - * @param {string} llmOpt - The selected Language Model option. - * @param {string} transcriptOpt - The transcription service to use. - * @param {object} options - Additional options for processing. + * @param {LLMOption} llmOpt - The selected Language Model option. + * @param {TranscriptOption} transcriptOpt - The transcription service to use. + * @param {ProcessingOptions} options - Additional options for processing. * @returns {Promise} */ export async function processFile(filePath, llmOpt, transcriptOpt, options) { @@ -22,8 +30,10 @@ export async function processFile(filePath, llmOpt, transcriptOpt, options) { // Download or convert the audio file await downloadFileAudio(filePath, filename) - // Run transcription on the file and process the transcript with the selected LLM + // Run transcription on the file await runTranscription(finalPath, transcriptOpt, options, frontMatter) + + // Process the transcript with the selected Language Model await runLLM(finalPath, frontMatter, llmOpt, options) // Clean up temporary files if the noCleanUp option is not set diff --git a/src/commands/processPlaylist.js b/src/commands/processPlaylist.js index f22eb32..5f1ca88 100644 --- a/src/commands/processPlaylist.js +++ b/src/commands/processPlaylist.js @@ -4,15 +4,23 @@ import { writeFile } from 'node:fs/promises' import { processVideo } from './processVideo.js' import { execFile } from 'node:child_process' import { promisify } from 'node:util' +import '../types.js' const execFilePromise = promisify(execFile) +/** + * Custom types + * @typedef {LLMOption} LLMOption + * @typedef {TranscriptOption} TranscriptOption + * @typedef {ProcessingOptions} ProcessingOptions + */ + /** * Main function to process a YouTube playlist. * @param {string} playlistUrl - The URL of the YouTube playlist to process. - * @param {string} llmOpt - The selected Language Model option. - * @param {string} transcriptOpt - The transcription service to use. - * @param {object} options - Additional options for processing. + * @param {LLMOption} llmOpt - The selected Language Model option. + * @param {TranscriptOption} transcriptOpt - The transcription service to use. + * @param {ProcessingOptions} options - Additional options for processing. * @returns {Promise} */ export async function processPlaylist(playlistUrl, llmOpt, transcriptOpt, options) { @@ -21,19 +29,28 @@ export async function processPlaylist(playlistUrl, llmOpt, transcriptOpt, option console.log(`Processing playlist: ${playlistUrl}`) // Use yt-dlp to fetch video URLs from the playlist - const { stdout } = await execFilePromise('yt-dlp', [ + const { stdout, stderr } = await execFilePromise('yt-dlp', [ '--flat-playlist', '--print', 'url', '--no-warnings', playlistUrl ]) + // Check for errors in stderr + if (stderr) { + console.error(`yt-dlp error: ${stderr}`) + } + // Split the stdout into an array of video URLs const urls = stdout.trim().split('\n').filter(Boolean) console.log(`Found ${urls.length} videos in the playlist`) // Write the URLs to a file for reference - await writeFile('content/urls.md', urls.join('\n')) + try { + await writeFile('content/urls.md', urls.join('\n')) + } catch (writeError) { + console.error('Error writing URLs to file:', writeError) + } // Process each video in the playlist for (const [index, url] of urls.entries()) { diff --git a/src/commands/processRSS.js b/src/commands/processRSS.js index 27b3993..edc47fb 100644 --- a/src/commands/processRSS.js +++ b/src/commands/processRSS.js @@ -11,6 +11,15 @@ import { downloadAudio } from '../utils/downloadAudio.js' import { runTranscription } from '../utils/runTranscription.js' import { runLLM } from '../utils/runLLM.js' import { cleanUpFiles } from '../utils/cleanUpFiles.js' +import '../types.js' + +/** + * Import custom types + * @typedef {LLMOption} LLMOption + * @typedef {TranscriptOption} TranscriptOption + * @typedef {ProcessingOptions} ProcessingOptions + * @typedef {RSSItem} RSSItem + */ // Initialize XML parser with specific options const parser = new XMLParser({ @@ -21,10 +30,11 @@ const parser = new XMLParser({ /** * Process a single item from the RSS feed. - * @param {Object} item - The item to process. - * @param {string} transcriptOpt - The transcription service to use. - * @param {string} llmOpt - The selected Language Model option. - * @param {Object} options - Additional options for processing. + * @param {RSSItem} item - The item to process. + * @param {TranscriptOption} transcriptOpt - The transcription service to use. + * @param {LLMOption} llmOpt - The selected Language Model option. + * @param {ProcessingOptions} options - Additional options for processing. + * @returns {Promise} */ async function processItem(item, transcriptOpt, llmOpt, options) { try { @@ -53,9 +63,10 @@ async function processItem(item, transcriptOpt, llmOpt, options) { /** * Main function to process an RSS feed. * @param {string} rssUrl - The URL of the RSS feed to process. - * @param {string} llmOpt - The selected Language Model option. - * @param {string} transcriptOpt - The transcription service to use. - * @param {object} options - Additional options for processing. + * @param {LLMOption} llmOpt - The selected Language Model option. + * @param {TranscriptOption} transcriptOpt - The transcription service to use. + * @param {ProcessingOptions} options - Additional options for processing. + * @returns {Promise} */ export async function processRSS(rssUrl, llmOpt, transcriptOpt, options) { try { @@ -71,14 +82,30 @@ export async function processRSS(rssUrl, llmOpt, transcriptOpt, options) { console.log(` - Skipping first ${options.skip} items`) } - // Fetch the RSS feed - const response = await fetch(rssUrl, { - method: 'GET', - headers: { - 'Accept': 'application/rss+xml', - }, - timeout: 5000, // Set a timeout of 5 seconds - }) + // Set a timeout of 5 seconds using AbortController + const controller = new AbortController() + const timeout = setTimeout(() => { + controller.abort() + }, 5000) + + let response + try { + response = await fetch(rssUrl, { + method: 'GET', + headers: { + 'Accept': 'application/rss+xml', + }, + signal: controller.signal, + }) + clearTimeout(timeout) + } catch (error) { + if (error.name === 'AbortError') { + console.error('Fetch request timed out') + } else { + console.error('Error fetching RSS feed:', error) + } + throw error + } // Check if the response is successful if (!response.ok) { @@ -86,8 +113,7 @@ export async function processRSS(rssUrl, llmOpt, transcriptOpt, options) { } // Parse the RSS feed content - const buffer = await response.arrayBuffer() - const text = Buffer.from(buffer).toString('utf-8') + const text = await response.text() const feed = parser.parse(text) // Extract channel information @@ -108,8 +134,11 @@ export async function processRSS(rssUrl, llmOpt, transcriptOpt, options) { day: '2-digit', }) + // Ensure feedItems is an array + const feedItemsArray = Array.isArray(feedItems) ? feedItems : [feedItems] + // Filter and map feed items to extract necessary information - const items = feedItems + const items = feedItemsArray .filter((item) => { // Ensure the item has an enclosure with a valid type if (!item.enclosure || !item.enclosure.type) return false @@ -153,8 +182,7 @@ export async function processRSS(rssUrl, llmOpt, transcriptOpt, options) { // Process each item in the feed for (const [index, item] of itemsToProcess.entries()) { console.log( - `\nProcessing item ${index + 1}/${itemsToProcess.length}: ${item.title - }` + `\nProcessing item ${index + 1}/${itemsToProcess.length}: ${item.title}` ) await processItem(item, transcriptOpt, llmOpt, options) } @@ -163,7 +191,7 @@ export async function processRSS(rssUrl, llmOpt, transcriptOpt, options) { console.log('RSS feed processing completed') } catch (error) { // Log any errors that occur during RSS feed processing - console.error('Error fetching or parsing feed:', error) + console.error('Error processing RSS feed:', error) throw error } } \ No newline at end of file diff --git a/src/commands/processURLs.js b/src/commands/processURLs.js index f637155..4455662 100644 --- a/src/commands/processURLs.js +++ b/src/commands/processURLs.js @@ -3,13 +3,21 @@ import { readFile } from 'node:fs/promises' import { processVideo } from './processVideo.js' import { resolve } from 'node:path' +import '../types.js' + +/** + * Import custom types + * @typedef {LLMOption} LLMOption + * @typedef {TranscriptOption} TranscriptOption + * @typedef {ProcessingOptions} ProcessingOptions + */ /** * Main function to process URLs from a file. * @param {string} filePath - The path to the file containing URLs. - * @param {string} llmOpt - The selected Language Model option. - * @param {string} transcriptOpt - The transcription service to use. - * @param {object} options - Additional options for processing. + * @param {LLMOption} llmOpt - The selected Language Model option. + * @param {TranscriptOption} transcriptOpt - The transcription service to use. + * @param {ProcessingOptions} options - Additional options for processing. * @returns {Promise} */ export async function processURLs(filePath, llmOpt, transcriptOpt, options) { diff --git a/src/commands/processVideo.js b/src/commands/processVideo.js index ef46d71..38b0ebb 100644 --- a/src/commands/processVideo.js +++ b/src/commands/processVideo.js @@ -5,13 +5,21 @@ import { downloadAudio } from '../utils/downloadAudio.js' import { runTranscription } from '../utils/runTranscription.js' import { runLLM } from '../utils/runLLM.js' import { cleanUpFiles } from '../utils/cleanUpFiles.js' +import '../types.js' + +/** + * Custom types + * @typedef {LLMOption} LLMOption + * @typedef {TranscriptOption} TranscriptOption + * @typedef {ProcessingOptions} ProcessingOptions + */ /** * Main function to process a single video. * @param {string} url - The URL of the video to process. - * @param {string} llmOpt - The selected Language Model option. - * @param {string} transcriptOpt - The transcription service to use. - * @param {object} options - Additional options for processing. + * @param {LLMOption} llmOpt - The selected Language Model option. + * @param {TranscriptOption} transcriptOpt - The transcription service to use. + * @param {ProcessingOptions} options - Additional options for processing. * @returns {Promise} */ export async function processVideo(url, llmOpt, transcriptOpt, options) { diff --git a/src/llms/chatgpt.js b/src/llms/chatgpt.js index 7421b6d..815530e 100644 --- a/src/llms/chatgpt.js +++ b/src/llms/chatgpt.js @@ -4,7 +4,10 @@ import { writeFile } from 'node:fs/promises' import { env } from 'node:process' import { OpenAI } from 'openai' -// Define available GPT models +/** + * Define available GPT models + * @type {Object.} + */ const gptModel = { GPT_4o_MINI: "gpt-4o-mini", GPT_4o: "gpt-4o", @@ -18,8 +21,14 @@ const gptModel = { * @param {string} outputFilePath - The file path to save the output. * @param {string} [model='GPT_4o_MINI'] - The GPT model to use. * @returns {Promise} - The actual model name used. + * @throws {Error} - If an error occurs during API call. */ export async function callChatGPT(transcriptContent, outputFilePath, model = 'GPT_4o_MINI') { + // Check for API key + if (!env.OPENAI_API_KEY) { + throw new Error('OPENAI_API_KEY environment variable is not set.') + } + // Initialize the OpenAI client with the API key from environment variables const openai = new OpenAI({ apiKey: env.OPENAI_API_KEY }) @@ -45,14 +54,13 @@ export async function callChatGPT(transcriptContent, outputFilePath, model = 'GP await writeFile(outputFilePath, content) console.log(`\nTranscript saved to:\n - ${outputFilePath}`) - // console.log(`\nChatGPT response:\n\n${JSON.stringify(response, null, 2)}`) // Commented out detailed response logging console.log(`\nFinish Reason: ${finish_reason}\nModel: ${usedModel}`) console.log(`Token Usage:\n - ${prompt_tokens} prompt tokens\n - ${completion_tokens} completion tokens\n - ${total_tokens} total tokens\n`) // Return the actual model name used return Object.keys(gptModel).find(key => gptModel[key] === usedModel) || model } catch (error) { - console.error('Error:', error) + console.error('Error in callChatGPT:', error) throw error // Re-throw the error for handling in the calling function } } \ No newline at end of file diff --git a/src/llms/claude.js b/src/llms/claude.js index 296003f..2e802c5 100644 --- a/src/llms/claude.js +++ b/src/llms/claude.js @@ -4,7 +4,10 @@ import { writeFile } from 'node:fs/promises' import { env } from 'node:process' import { Anthropic } from '@anthropic-ai/sdk' -// Define available Claude models +/** + * Define available Claude models + * @type {Object.} + */ const claudeModel = { CLAUDE_3_5_SONNET: "claude-3-5-sonnet-20240620", CLAUDE_3_OPUS: "claude-3-opus-20240229", @@ -18,8 +21,14 @@ const claudeModel = { * @param {string} outputFilePath - The file path to save the output. * @param {string} [model='CLAUDE_3_HAIKU'] - The Claude model to use. * @returns {Promise} - The actual model name used. + * @throws {Error} - If an error occurs during the API call. */ export async function callClaude(transcriptContent, outputFilePath, model = 'CLAUDE_3_HAIKU') { + // Check if the ANTHROPIC_API_KEY environment variable is set + if (!env.ANTHROPIC_API_KEY) { + throw new Error('ANTHROPIC_API_KEY environment variable is not set.') + } + // Initialize the Anthropic client with the API key from environment variables const anthropic = new Anthropic({ apiKey: env.ANTHROPIC_API_KEY }) diff --git a/src/llms/cohere.js b/src/llms/cohere.js index 912121d..f973c44 100644 --- a/src/llms/cohere.js +++ b/src/llms/cohere.js @@ -4,7 +4,10 @@ import { writeFile } from 'node:fs/promises' import { env } from 'node:process' import { CohereClient } from 'cohere-ai' -// Define available Cohere models +/** + * Define available Cohere models + * @type {Object.} + */ const cohereModel = { COMMAND_R: "command-r", // Standard Command model COMMAND_R_PLUS: "command-r-plus" // Enhanced Command model @@ -16,8 +19,14 @@ const cohereModel = { * @param {string} outputFilePath - The file path to save the output. * @param {string} [model='COMMAND_R'] - The Cohere model to use. * @returns {Promise} - The actual model name used. + * @throws {Error} - If an error occurs during the API call. */ export async function callCohere(transcriptContent, outputFilePath, model = 'COMMAND_R') { + // Check if the COHERE_API_KEY environment variable is set + if (!env.COHERE_API_KEY) { + throw new Error('COHERE_API_KEY environment variable is not set.') + } + // Initialize the Cohere client with the API key from environment variables const cohere = new CohereClient({ token: env.COHERE_API_KEY }) diff --git a/src/llms/gemini.js b/src/llms/gemini.js index ac23617..f8e0829 100644 --- a/src/llms/gemini.js +++ b/src/llms/gemini.js @@ -4,14 +4,21 @@ import { writeFile } from 'node:fs/promises' import { env } from 'node:process' import { GoogleGenerativeAI } from "@google/generative-ai" -// Define available Gemini models +/** + * Define available Gemini models + * @type {Object.} + */ const geminiModel = { GEMINI_1_5_FLASH: "gemini-1.5-flash", // GEMINI_1_5_PRO: "gemini-1.5-pro", GEMINI_1_5_PRO: "gemini-1.5-pro-exp-0827", } -// Utility function to introduce a delay +/** + * Utility function to introduce a delay + * @param {number} ms - Milliseconds to delay + * @returns {Promise} + */ const delay = (ms) => new Promise(resolve => setTimeout(resolve, ms)); /** @@ -20,8 +27,13 @@ const delay = (ms) => new Promise(resolve => setTimeout(resolve, ms)); * @param {string} outputFilePath - The file path to save the output. * @param {string} [model='GEMINI_1_5_FLASH'] - The Gemini model to use. * @returns {Promise} - The actual model name used. + * @throws {Error} - If an error occurs during the API call. */ export async function callGemini(transcriptContent, outputFilePath, model = 'GEMINI_1_5_FLASH') { + // Check if the GEMINI_API_KEY environment variable is set + if (!env.GEMINI_API_KEY) { + throw new Error('GEMINI_API_KEY environment variable is not set.') + } // Initialize the Google Generative AI client const genAI = new GoogleGenerativeAI(env.GEMINI_API_KEY) diff --git a/src/llms/llama.js b/src/llms/llama.js index 3c1a006..e0514b9 100644 --- a/src/llms/llama.js +++ b/src/llms/llama.js @@ -8,7 +8,10 @@ import { promisify } from 'node:util' const execAsync = promisify(exec) -// Define local model configurations +/** + * Define local model configurations + * @type {Object.} + */ const localModels = { LLAMA_3_1_8B_Q4_MODEL: { filename: "Meta-Llama-3.1-8B-Instruct.IQ4_XS.gguf", @@ -40,6 +43,7 @@ const localModels = { * Function to download the model if it doesn't exist. * @param {string} [modelName='GEMMA_2_2B_Q4_MODEL'] - The name of the model to use. * @returns {Promise} - The path to the downloaded model. + * @throws {Error} - If the model download fails. */ async function downloadModel(modelName = 'GEMMA_2_2B_Q4_MODEL') { // Get the model object from localModels using the provided modelName or default to GEMMA_2_2B_Q4_MODEL @@ -87,6 +91,7 @@ async function downloadModel(modelName = 'GEMMA_2_2B_Q4_MODEL') { * @param {string} outputFilePath - The file path to save the output. * @param {string} [modelName='GEMMA_2_2B_Q4_MODEL'] - The name of the model to use. * @returns {Promise} + * @throws {Error} - If an error occurs during processing. */ export async function callLlama(promptAndTranscript, outputFilePath, modelName = 'GEMMA_2_2B_Q4_MODEL') { try { @@ -101,14 +106,13 @@ export async function callLlama(promptAndTranscript, outputFilePath, modelName = const context = await localModel.createContext() const session = new LlamaChatSession({ contextSequence: context.getSequence() }) - // Generate a response and write the response to a temporary file + // Generate a response and write the response to a file const response = await session.prompt(promptAndTranscript) await writeFile(outputFilePath, response) console.log(`\nTranscript saved to:\n - ${outputFilePath}`) console.log(`\nModel used:\n - ${modelName}\n`) } catch (error) { - // Log and re-throw any errors that occur during the process - console.error('Error:', error) + console.error('Error in callLlama:', error) throw error } } \ No newline at end of file diff --git a/src/llms/mistral.js b/src/llms/mistral.js index 8b0b320..b3dbf32 100644 --- a/src/llms/mistral.js +++ b/src/llms/mistral.js @@ -4,7 +4,10 @@ import { writeFile } from 'node:fs/promises' import { env } from 'node:process' import { Mistral } from '@mistralai/mistralai' -// Define available Mistral AI models +/** + * Define available Mistral AI models + * @type {Object.} + */ const mistralModel = { MIXTRAL_8x7b: "open-mixtral-8x7b", MIXTRAL_8x22b: "open-mixtral-8x22b", @@ -18,8 +21,13 @@ const mistralModel = { * @param {string} outputFilePath - The file path to save the output. * @param {string} [model='MISTRAL_NEMO'] - The Mistral model to use. * @returns {Promise} - The actual model name used. + * @throws {Error} - If an error occurs during the API call. */ export async function callMistral(transcriptContent, outputFilePath, model = 'MISTRAL_NEMO') { + // Check if the MISTRAL_API_KEY environment variable is set + if (!env.MISTRAL_API_KEY) { + throw new Error('MISTRAL_API_KEY environment variable is not set.') + } // Initialize Mistral client with API key from environment variables const mistral = new Mistral(env.MISTRAL_API_KEY) diff --git a/src/llms/octo.js b/src/llms/octo.js index 67514f2..c6f6710 100644 --- a/src/llms/octo.js +++ b/src/llms/octo.js @@ -4,7 +4,10 @@ import { writeFile } from 'node:fs/promises' import { env } from 'node:process' import { OctoAIClient } from '@octoai/sdk' -// Define available OctoAI models +/** + * Define available OctoAI models + * @type {Object.} + */ const octoModel = { LLAMA_3_1_8B: "meta-llama-3.1-8b-instruct", LLAMA_3_1_70B: "meta-llama-3.1-70b-instruct", @@ -21,8 +24,13 @@ const octoModel = { * @param {string} outputFilePath - The file path to save the output. * @param {string} [model='LLAMA_3_1_70B'] - The OctoAI model to use. * @returns {Promise} - The actual model name used. + * @throws {Error} - If an error occurs during the API call. */ export async function callOcto(transcriptContent, outputFilePath, model = 'LLAMA_3_1_70B') { + // Check if the OCTOAI_API_KEY environment variable is set + if (!env.OCTOAI_API_KEY) { + throw new Error('OCTOAI_API_KEY environment variable is not set.') + } // Initialize OctoAI client with API key from environment variables const octoai = new OctoAIClient({ apiKey: env.OCTOAI_API_KEY }) diff --git a/src/llms/prompt.js b/src/llms/prompt.js index 8ee3650..8187c35 100644 --- a/src/llms/prompt.js +++ b/src/llms/prompt.js @@ -1,6 +1,9 @@ // src/llms/prompt.js -// Define the structure for different sections of the prompt +/** + * Define the structure for different sections of the prompt + * @type {Object.} + */ const sections = { // Section for generating titles titles: { diff --git a/src/transcription/assembly.js b/src/transcription/assembly.js index 76ae71c..e9db1e8 100644 --- a/src/transcription/assembly.js +++ b/src/transcription/assembly.js @@ -3,6 +3,20 @@ import { writeFile } from 'node:fs/promises' import { env } from 'node:process' import { AssemblyAI } from 'assemblyai' +import '../types.js' + +/** + * Import custom types + * @typedef {TranscriptOption} TranscriptOption + * @typedef {ProcessingOptions} ProcessingOptions + */ + +/** + * Check if the ASSEMBLY_API_KEY environment variable is set + */ +if (!env.ASSEMBLY_API_KEY) { + throw new Error('ASSEMBLY_API_KEY environment variable is not set.') +} // Initialize the AssemblyAI client with API key from environment variables const client = new AssemblyAI({ apiKey: env.ASSEMBLY_API_KEY }) @@ -10,9 +24,10 @@ const client = new AssemblyAI({ apiKey: env.ASSEMBLY_API_KEY }) /** * Main function to handle transcription using AssemblyAI. * @param {string} finalPath - The identifier used for naming output files. - * @param {string} transcriptOpt - The transcription service to use. - * @param {object} options - Additional options for processing. + * @param {TranscriptOption} transcriptOpt - The transcription service to use. + * @param {ProcessingOptions} options - Additional options for processing. * @returns {Promise} - Returns the formatted transcript content. + * @throws {Error} - If an error occurs during transcription. */ export async function callAssembly(finalPath, transcriptOpt, options) { try { @@ -22,7 +37,7 @@ export async function callAssembly(finalPath, transcriptOpt, options) { // Request transcription from AssemblyAI const transcript = await client.transcripts.transcribe({ audio: `${finalPath}.wav`, // The audio file to transcribe - speech_model: 'nano', // Use the 'nano' speech model for transcription + speech_model: 'nano', // Use the 'nano' speech model for transcription (`best` also an option) ...(speakerLabels && { // Conditionally add speaker labeling options speaker_labels: true, }) @@ -72,5 +87,6 @@ export async function callAssembly(finalPath, transcriptOpt, options) { } catch (error) { // Log any errors that occur during the transcription process console.error('Error processing the transcription:', error) + throw error // Re-throw the error for handling in the calling function } } \ No newline at end of file diff --git a/src/transcription/deepgram.js b/src/transcription/deepgram.js index 3492eb1..7a8fc7b 100644 --- a/src/transcription/deepgram.js +++ b/src/transcription/deepgram.js @@ -4,16 +4,24 @@ import { writeFile, readFile } from 'node:fs/promises' import { env } from 'node:process' import { createClient } from '@deepgram/sdk' +/** + * Check if the DEEPGRAM_API_KEY environment variable is set + */ +if (!env.DEEPGRAM_API_KEY) { + throw new Error('DEEPGRAM_API_KEY environment variable is not set.') +} + +// Initialize the Deepgram client with the API key from environment variables +const deepgram = createClient(env.DEEPGRAM_API_KEY) + /** * Main function to handle transcription using Deepgram. * @param {string} input - The URL or local file path of the audio to transcribe. * @param {string} id - The identifier used for naming output files. * @returns {Promise} + * @throws {Error} - If an error occurs during transcription. */ export async function callDeepgram(input, id) { - // Initialize the Deepgram client with the API key from environment variables - const deepgram = createClient(env.DEEPGRAM_API_KEY) - // Check if the input is a URL or a local file const isUrl = input.startsWith('http://') || input.startsWith('https://') @@ -39,10 +47,10 @@ export async function callDeepgram(input, id) { // Write the formatted transcript to a file await writeFile(`${id}.txt`, formattedTranscript) - console.log('Transcript saved.') - } catch (err) { + console.log(`\nTranscript saved:\n - ${id}.txt`) + } catch (error) { // Log any errors that occur during the transcription process - console.error('Error processing the transcription:', err) - throw err // Re-throw the error for handling in the calling function + console.error('Error processing the transcription:', error) + throw error // Re-throw the error for handling in the calling function } } \ No newline at end of file diff --git a/src/transcription/whisper.js b/src/transcription/whisper.js index ac230f0..aac16b5 100644 --- a/src/transcription/whisper.js +++ b/src/transcription/whisper.js @@ -4,10 +4,20 @@ import { readFile, writeFile, access } from 'node:fs/promises' import { exec } from 'node:child_process' import { promisify } from 'node:util' import { basename, join } from 'node:path' +import '../types.js' const execPromise = promisify(exec) -// Define available Whisper models +/** + * Import custom types + * @typedef {TranscriptOption} TranscriptOption + * @typedef {ProcessingOptions} ProcessingOptions + */ + +/** + * Define available Whisper models + * @type {Object.} + */ const WHISPER_MODELS = { 'tiny': 'ggml-tiny.bin', 'tiny.en': 'ggml-tiny.en.bin', 'base': 'ggml-base.bin', 'base.en': 'ggml-base.en.bin', @@ -20,30 +30,42 @@ const WHISPER_MODELS = { /** * Main function to handle transcription using Whisper. * @param {string} finalPath - The base path for the files. - * @param {string} transcriptOpt - The transcription service to use. - * @param {object} options - Additional options for processing. + * @param {TranscriptOption} transcriptOpt - The transcription service to use. + * @param {ProcessingOptions} options - Additional options for processing. * @returns {Promise} - Returns the formatted transcript content. + * @throws {Error} - If an error occurs during transcription. */ export async function callWhisper(finalPath, transcriptOpt, options) { - // Determine which Whisper model to use - const whisperModel = options.whisper || options.whisperDocker || 'base' - if (!(whisperModel in WHISPER_MODELS)) throw new Error(`Unknown model type: ${whisperModel}`) - const modelName = WHISPER_MODELS[whisperModel] + try { + // Determine which Whisper model to use + const whisperModel = options.whisper || options.whisperDocker || 'base' + if (!(whisperModel in WHISPER_MODELS)) { + throw new Error(`Unknown model type: ${whisperModel}`) + } + const modelName = WHISPER_MODELS[whisperModel] - // Call the appropriate Whisper function based on the transcription service - await (transcriptOpt === 'whisperDocker' ? callWhisperDocker : callWhisperMain)(finalPath, modelName, whisperModel) + // Call the appropriate Whisper function based on the transcription service + if (transcriptOpt === 'whisperDocker') { + await callWhisperDocker(finalPath, modelName, whisperModel) + } else { + await callWhisperMain(finalPath, modelName, whisperModel) + } - // Read, process, and format the generated LRC file - const lrcContent = await readFile(`${finalPath}.lrc`, 'utf8') - const txtContent = lrcContent.split('\n') - .filter(line => !line.startsWith('[by:whisper.cpp]')) - .map(line => line.replace(/\[(\d{2,3}):(\d{2})\.(\d{2})\]/g, (_, p1, p2) => `[${p1}:${p2}]`)) - .join('\n') + // Read, process, and format the generated LRC file + const lrcContent = await readFile(`${finalPath}.lrc`, 'utf8') + const txtContent = lrcContent.split('\n') + .filter(line => !line.startsWith('[by:whisper.cpp]')) + .map(line => line.replace(/\[(\d{2,3}):(\d{2})\.(\d{2})\]/g, (_, p1, p2) => `[${p1}:${p2}]`)) + .join('\n') - // Write the formatted content to a text file - await writeFile(`${finalPath}.txt`, txtContent) - console.log(`Transcript transformation completed:\n - ${finalPath}.txt`) - return txtContent + // Write the formatted content to a text file + await writeFile(`${finalPath}.txt`, txtContent) + console.log(`Transcript transformation completed:\n - ${finalPath}.txt`) + return txtContent + } catch (error) { + console.error('Error in callWhisper:', error) + throw error + } } /** @@ -52,6 +74,7 @@ export async function callWhisper(finalPath, transcriptOpt, options) { * @param {string} modelName - The model file name. * @param {string} whisperModel - The Whisper model type. * @returns {Promise} + * @throws {Error} - If an error occurs during Docker transcription. */ async function callWhisperDocker(finalPath, modelName, whisperModel) { const WHISPER_CONTAINER_NAME = 'autoshow-whisper-1' @@ -59,38 +82,43 @@ async function callWhisperDocker(finalPath, modelName, whisperModel) { const MODELS_DIR = '/app/models' const modelPathContainer = `${MODELS_DIR}/${modelName}` - // Check if the Whisper container is running, start it if not try { - await execPromise(`docker ps | grep ${WHISPER_CONTAINER_NAME}`) - console.log('\nWhisper container is already running.') - } catch { - console.log('\nWhisper container is not running. Starting it...') - await execPromise('docker-compose up -d whisper') - console.log('Whisper container started successfully.') - } + // Check if the Whisper container is running, start it if not + try { + await execPromise(`docker ps | grep ${WHISPER_CONTAINER_NAME}`) + console.log('\nWhisper container is already running.') + } catch { + console.log('\nWhisper container is not running. Starting it...') + await execPromise('docker-compose up -d whisper') + console.log('Whisper container started successfully.') + } - // Check if the model exists in the container, download if not - try { - await execPromise(`docker exec ${WHISPER_CONTAINER_NAME} test -f ${modelPathContainer}`) - console.log(`\nWhisper.cpp ${whisperModel} model found:`) - console.log(` - ${modelName} model selected\n - Model located at ${modelPathContainer}`) - } catch { - console.log(`\nWhisper.cpp ${whisperModel} model not found in container:`) - console.log(` - ${modelName} model selected\n - Model downloading to ${modelPathContainer}`) - await execPromise(`docker exec ${WHISPER_CONTAINER_NAME} ${MODELS_DIR}/download-ggml-model.sh ${whisperModel}`) - console.log(` - Model downloaded successfully`) - } + // Check if the model exists in the container, download if not + try { + await execPromise(`docker exec ${WHISPER_CONTAINER_NAME} test -f ${modelPathContainer}`) + console.log(`\nWhisper.cpp ${whisperModel} model found:`) + console.log(` - ${modelName} model selected\n - Model located at ${modelPathContainer}`) + } catch { + console.log(`\nWhisper.cpp ${whisperModel} model not found in container:`) + console.log(` - ${modelName} model selected\n - Model downloading to ${modelPathContainer}`) + await execPromise(`docker exec ${WHISPER_CONTAINER_NAME} ${MODELS_DIR}/download-ggml-model.sh ${whisperModel}`) + console.log(` - Model downloaded successfully`) + } - // Execute Whisper transcription in the Docker container - const fileName = basename(finalPath) - await execPromise( - `docker exec ${WHISPER_CONTAINER_NAME} /app/main \ - -m ${modelPathContainer} \ - -f ${join(CONTENT_DIR, `${fileName}.wav`)} \ - -of ${join(CONTENT_DIR, fileName)} \ - --output-lrc` - ) - console.log(`\nTranscript LRC file completed:\n - ${finalPath}.lrc`) + // Execute Whisper transcription in the Docker container + const fileName = basename(finalPath) + await execPromise( + `docker exec ${WHISPER_CONTAINER_NAME} /app/main \ + -m ${modelPathContainer} \ + -f ${join(CONTENT_DIR, `${fileName}.wav`)} \ + -of ${join(CONTENT_DIR, fileName)} \ + --output-lrc` + ) + console.log(`\nTranscript LRC file completed:\n - ${finalPath}.lrc`) + } catch (error) { + console.error('Error in callWhisperDocker:', error) + throw error + } } /** @@ -99,27 +127,31 @@ async function callWhisperDocker(finalPath, modelName, whisperModel) { * @param {string} modelName - The model file name. * @param {string} whisperModel - The Whisper model type. * @returns {Promise} + * @throws {Error} - If an error occurs during transcription. */ async function callWhisperMain(finalPath, modelName, whisperModel) { const modelPath = `./whisper.cpp/models/${modelName}` - // Check if the model exists locally, download if not try { - await access(modelPath) - console.log(`\nWhisper.cpp ${whisperModel} model found:`) - console.log(` - ${modelName} model selected\n - Model located at ${modelPath}`) - } catch { - console.log(`\nWhisper.cpp ${whisperModel} model not found:`) - console.log(` - ${modelName} model selected\n - Model downloading to ${modelPath}`) - await execPromise(`bash ./whisper.cpp/models/download-ggml-model.sh ${whisperModel}`) - console.log(` - Model downloaded successfully`) - } + // Check if the model exists locally, download if not + try { + await access(modelPath) + console.log(`\nWhisper.cpp ${whisperModel} model found:`) + console.log(` - ${modelName} model selected\n - Model located at ${modelPath}`) + } catch { + console.log(`\nWhisper.cpp ${whisperModel} model not found:`) + console.log(` - ${modelName} model selected\n - Model downloading to ${modelPath}`) + await execPromise(`bash ./whisper.cpp/models/download-ggml-model.sh ${whisperModel}`) + console.log(` - Model downloaded successfully`) + } - /** - * Execute Whisper transcription. - */ - await execPromise( - `./whisper.cpp/main -m "whisper.cpp/models/${modelName}" -f "${finalPath}.wav" -of "${finalPath}" --output-lrc` - ) - console.log(`\nTranscript LRC file completed:\n - ${finalPath}.lrc`) + // Execute Whisper transcription + await execPromise( + `./whisper.cpp/main -m "whisper.cpp/models/${modelName}" -f "${finalPath}.wav" -of "${finalPath}" --output-lrc` + ) + console.log(`\nTranscript LRC file completed:\n - ${finalPath}.lrc`) + } catch (error) { + console.error('Error in callWhisperMain:', error) + throw error + } } \ No newline at end of file diff --git a/src/types.js b/src/types.js new file mode 100644 index 0000000..0ddd6db --- /dev/null +++ b/src/types.js @@ -0,0 +1,74 @@ +// src/types.js + +/** + * @typedef {'whisper' | 'whisperDocker' | 'deepgram' | 'assembly'} TranscriptOption + * @typedef {'chatgpt' | 'claude' | 'cohere' | 'mistral' | 'octo' | 'llama' | 'gemini'} LLMOption + * @typedef {function(string, string, any): Promise} LLMFunction + * @typedef {Object.} LLMFunctions + * + * @typedef {function(string, LLMOption, TranscriptOption, ProcessingOptions): Promise} HandlerFunction + */ + +/** + * @typedef {Object} ProcessingOptions + * @property {string[]} [prompt] - Specify prompt sections to include. + * @property {string} [video] - URL of the YouTube video to process. + * @property {string} [playlist] - URL of the YouTube playlist to process. + * @property {string} [urls] - File path containing URLs to process. + * @property {string} [file] - File path of the local audio/video file to process. + * @property {string} [rss] - URL of the podcast RSS feed to process. + * @property {string[]} [item] - Specific items in the RSS feed to process. + * @property {string} [order='newest'] - Order for RSS feed processing ('newest' or 'oldest'). + * @property {number} [skip=0] - Number of items to skip when processing RSS feed. + * @property {string} [whisper] - Whisper model type for non-Docker version. + * @property {string} [whisperDocker] - Whisper model type for Docker version. + * @property {string} [chatgpt] - ChatGPT model to use for processing. + * @property {string} [claude] - Claude model to use for processing. + * @property {string} [cohere] - Cohere model to use for processing. + * @property {string} [mistral] - Mistral model to use for processing. + * @property {string} [octo] - Octo model to use for processing. + * @property {string} [llama] - Llama model to use for processing. + * @property {string} [gemini] - Gemini model to use for processing. + * @property {boolean} [deepgram=false] - Use Deepgram for transcription. + * @property {boolean} [assembly=false] - Use AssemblyAI for transcription. + * @property {boolean} [speakerLabels=false] - Use speaker labels for AssemblyAI transcription. + * @property {boolean} [noCleanUp=false] - Do not delete intermediary files after processing. + * @property {Object} [additionalProps] - Additional dynamic properties. + */ + +/** + * @typedef {Object} InquirerAnswers + * @property {string} [action] + * @property {string} [video] + * @property {string} [playlist] + * @property {string} [urls] + * @property {string} [file] + * @property {string} [rss] + * @property {boolean} [specifyItem] + * @property {string} [item] + * @property {LLMOption} [llmOpt] + * @property {string} [llamaModel] + * @property {TranscriptOption} [transcriptOpt] + * @property {boolean} [useDocker] + * @property {string} [whisperModel] + * @property {boolean} [speakerLabels] + * @property {string[]} [prompt] + * @property {boolean} [noCleanUp] + */ + +/** + * @typedef {Object} MarkdownData + * @property {string} frontMatter - The front matter content for the markdown file. + * @property {string} finalPath - The base path for the files. + * @property {string} filename - The sanitized filename. + */ + +/** + * @typedef {Object} RSSItem + * @property {string} publishDate - The publish date of the item. + * @property {string} title - The title of the item. + * @property {string} coverImage - The cover image URL of the item. + * @property {string} showLink - The show link of the item. + * @property {string} channel - The channel name. + * @property {string} channelURL - The channel URL. + */ \ No newline at end of file diff --git a/src/utils/cleanUpFiles.js b/src/utils/cleanUpFiles.js index 512f25c..7a15252 100644 --- a/src/utils/cleanUpFiles.js +++ b/src/utils/cleanUpFiles.js @@ -9,31 +9,21 @@ import { unlink } from 'node:fs/promises' * @throws {Error} - If an error occurs while deleting files. */ export async function cleanUpFiles(id) { - try { - // Log the start of the cleanup process - console.log(`\nTemporary files removed:`) + // Array of file extensions to delete + const extensions = ['.wav', '.txt', '.md', '.lrc'] - // Remove .wav file - await unlink(`${id}.wav`) - console.log(` - ${id}.wav`) + // Log the start of the cleanup process + console.log(`\nTemporary files removed:`) - // Remove .txt file - await unlink(`${id}.txt`) - console.log(` - ${id}.txt`) - - // Remove .md file - await unlink(`${id}.md`) - console.log(` - ${id}.md`) - - // Remove .lrc file - await unlink(`${id}.lrc`) - console.log(` - ${id}.lrc`) - } catch (error) { - // If the error is not "file not found", log the error - if (error.code !== 'ENOENT') { - console.error(`Error deleting file:`, error) - throw error + for (const ext of extensions) { + try { + await unlink(`${id}${ext}`) + console.log(` - ${id}${ext}`) + } catch (error) { + if (error.code !== 'ENOENT') { + console.error(`Error deleting file ${id}${ext}:`, error) + } + // If the file does not exist, silently continue } - // If the error is "file not found", silently ignore it } } \ No newline at end of file diff --git a/src/utils/generateMarkdown.js b/src/utils/generateMarkdown.js index b9adfd5..5461f28 100644 --- a/src/utils/generateMarkdown.js +++ b/src/utils/generateMarkdown.js @@ -4,26 +4,20 @@ import { execFile } from 'node:child_process' import { promisify } from 'node:util' import { writeFile } from 'node:fs/promises' import { basename, extname } from 'node:path' +import '../types.js' // Promisify the execFile function for use with async/await const execFilePromise = promisify(execFile) /** - * @typedef {Object} MarkdownData - * @property {string} frontMatter - The front matter content for the markdown file. - * @property {string} finalPath - The base path for the files. - * @property {string} filename - The sanitized filename. + * Import custom types + * @typedef {MarkdownData} MarkdownData + * @typedef {RSSItem} RSSItem */ /** * Function to generate markdown for RSS feed items. - * @param {object} item - The RSS feed item object. - * @param {string} item.publishDate - The publish date of the item. - * @param {string} item.title - The title of the item. - * @param {string} item.coverImage - The cover image URL of the item. - * @param {string} item.showLink - The show link of the item. - * @param {string} item.channel - The channel name. - * @param {string} item.channelURL - The channel URL. + * @param {RSSItem} item - The RSS feed item object. * @returns {Promise} - Returns an object with frontMatter, finalPath, and filename. * @throws {Error} - If markdown generation fails. */ diff --git a/src/utils/runLLM.js b/src/utils/runLLM.js index 45e9995..1e505ce 100644 --- a/src/utils/runLLM.js +++ b/src/utils/runLLM.js @@ -10,8 +10,18 @@ import { callLlama } from '../llms/llama.js' // import { callLlamaCpp } from '../llms/llamacpp.js' import { callGemini } from '../llms/gemini.js' import { generatePrompt } from '../llms/prompt.js' +import '../types.js' + +/** + * Import custom types + * @typedef {LLMOption} LLMOption + * @typedef {ProcessingOptions} ProcessingOptions + * @typedef {LLMFunction} LLMFunction + * @typedef {LLMFunctions} LLMFunctions + */ // Object mapping LLM options to their respective functions +/** @type {LLMFunctions} */ const llmFunctions = { chatgpt: callChatGPT, claude: callClaude, @@ -27,8 +37,8 @@ const llmFunctions = { * Main function to run the selected Language Model. * @param {string} finalPath - The base path for the files. * @param {string} frontMatter - The front matter content for the markdown file. - * @param {string} llmOpt - The selected Language Model option. - * @param {object} options - Additional options for processing. + * @param {LLMOption} llmOpt - The selected Language Model option. + * @param {ProcessingOptions} options - Additional options for processing. * @returns {Promise} */ export async function runLLM(finalPath, frontMatter, llmOpt, options) { diff --git a/src/utils/runTranscription.js b/src/utils/runTranscription.js index d84ef63..7218433 100644 --- a/src/utils/runTranscription.js +++ b/src/utils/runTranscription.js @@ -4,21 +4,19 @@ import { readFile, writeFile } from 'node:fs/promises' import { callWhisper } from '../transcription/whisper.js' import { callDeepgram } from '../transcription/deepgram.js' import { callAssembly } from '../transcription/assembly.js' +import '../types.js' /** - * @typedef {Object} transcriptOptions - * @property {boolean} [speakerLabels=false] - Whether to use speaker labels. - * @property {string[]} [prompt] - Sections to include in the prompt. - * @property {string} [whisper] - Whisper model type. - * @property {string} [whisperDocker] - Whisper model type for Docker. - * // Include other properties used in options. + * Import custom types + * @typedef {TranscriptOption} TranscriptOption + * @typedef {ProcessingOptions} ProcessingOptions */ /** * Main function to run transcription. * @param {string} finalPath - The base path for the files. - * @param {string} transcriptOpt - The transcription service to use. - * @param {transcriptOptions} [options={}] - Additional options for processing. + * @param {TranscriptOption} transcriptOpt - The transcription service to use. + * @param {ProcessingOptions} [options={}] - Additional options for processing. * @param {string} [frontMatter=''] - Optional front matter content for the markdown file. * @returns {Promise} - Returns the final content including markdown and transcript. * @throws {Error} - If the transcription service fails or an error occurs during processing. @@ -41,7 +39,7 @@ export async function runTranscription( break case 'assembly': - // Use AssemblyAI for transcription and pass option for speaker labels + // Use AssemblyAI for transcription and pass options txtContent = await callAssembly(finalPath, transcriptOpt, options) break From 98f98551e63a68863713f9f71506f96ea78eef55 Mon Sep 17 00:00:00 2001 From: Anthony Campolo <12433465+ajcwebdev@users.noreply.github.com> Date: Wed, 25 Sep 2024 23:39:58 -0500 Subject: [PATCH 4/6] update node imports in test files --- test/all.test.js | 28 +++++++++++++++------------- test/local.test.js | 28 ++++++++++++++-------------- 2 files changed, 29 insertions(+), 27 deletions(-) diff --git a/test/all.test.js b/test/all.test.js index ca6f004..0e586d6 100644 --- a/test/all.test.js +++ b/test/all.test.js @@ -1,8 +1,10 @@ +// test/all.test.js + import test from 'node:test' -import assert from 'node:assert/strict' +import { strictEqual } from 'node:assert/strict' import { execSync } from 'node:child_process' -import fs from 'node:fs' -import path from 'node:path' +import { existsSync, renameSync } from 'node:fs' +import { join } from 'node:path' const commands = [ { @@ -187,18 +189,18 @@ test('Autoshow Command Tests', async (t) => { execSync(command.cmd, { stdio: 'inherit' }) if (Array.isArray(command.expectedFiles)) { for (const { file, newName } of command.expectedFiles) { - const filePath = path.join('content', file) - assert.strictEqual(fs.existsSync(filePath), true, `Expected file ${file} was not created`) - const newPath = path.join('content', newName) - fs.renameSync(filePath, newPath) - assert.strictEqual(fs.existsSync(newPath), true, `File was not renamed to ${newName}`) + const filePath = join('content', file) + strictEqual(existsSync(filePath), true, `Expected file ${file} was not created`) + const newPath = join('content', newName) + renameSync(filePath, newPath) + strictEqual(existsSync(newPath), true, `File was not renamed to ${newName}`) } } else { - const filePath = path.join('content', command.expectedFile) - assert.strictEqual(fs.existsSync(filePath), true, `Expected file ${command.expectedFile} was not created`) - const newPath = path.join('content', command.newName) - fs.renameSync(filePath, newPath) - assert.strictEqual(fs.existsSync(newPath), true, `File was not renamed to ${command.newName}`) + const filePath = join('content', command.expectedFile) + strictEqual(existsSync(filePath), true, `Expected file ${command.expectedFile} was not created`) + const newPath = join('content', command.newName) + renameSync(filePath, newPath) + strictEqual(existsSync(newPath), true, `File was not renamed to ${command.newName}`) } }) } diff --git a/test/local.test.js b/test/local.test.js index 576f8ab..5093926 100644 --- a/test/local.test.js +++ b/test/local.test.js @@ -1,10 +1,10 @@ -// test/autoshow.test.js +// test/local.test.js import test from 'node:test' -import assert from 'node:assert/strict' +import { strictEqual } from 'node:assert/strict' import { execSync } from 'node:child_process' -import fs from 'node:fs' -import path from 'node:path' +import { existsSync, renameSync } from 'node:fs' +import { join } from 'node:path' const commands = [ { @@ -100,18 +100,18 @@ test('Autoshow Command Tests', async (t) => { if (Array.isArray(command.expectedFiles)) { for (const { file, newName } of command.expectedFiles) { - const filePath = path.join('content', file) - assert.strictEqual(fs.existsSync(filePath), true, `Expected file ${file} was not created`) - const newPath = path.join('content', newName) - fs.renameSync(filePath, newPath) - assert.strictEqual(fs.existsSync(newPath), true, `File was not renamed to ${newName}`) + const filePath = join('content', file) + strictEqual(existsSync(filePath), true, `Expected file ${file} was not created`) + const newPath = join('content', newName) + renameSync(filePath, newPath) + strictEqual(existsSync(newPath), true, `File was not renamed to ${newName}`) } } else { - const filePath = path.join('content', command.expectedFile) - assert.strictEqual(fs.existsSync(filePath), true, `Expected file ${command.expectedFile} was not created`) - const newPath = path.join('content', command.newName) - fs.renameSync(filePath, newPath) - assert.strictEqual(fs.existsSync(newPath), true, `File was not renamed to ${command.newName}`) + const filePath = join('content', command.expectedFile) + strictEqual(existsSync(filePath), true, `Expected file ${command.expectedFile} was not created`) + const newPath = join('content', command.newName) + renameSync(filePath, newPath) + strictEqual(existsSync(newPath), true, `File was not renamed to ${command.newName}`) } }) } From 088c17620279f30ffd9c7ede48a446f8cbdd473c Mon Sep 17 00:00:00 2001 From: Anthony Campolo <12433465+ajcwebdev@users.noreply.github.com> Date: Wed, 25 Sep 2024 23:41:48 -0500 Subject: [PATCH 5/6] remove temporary llamacpp file --- src/llms/llamacpp.js | 48 -------------------------------------------- src/utils/runLLM.js | 10 ++++----- 2 files changed, 4 insertions(+), 54 deletions(-) delete mode 100644 src/llms/llamacpp.js diff --git a/src/llms/llamacpp.js b/src/llms/llamacpp.js deleted file mode 100644 index 7aefb04..0000000 --- a/src/llms/llamacpp.js +++ /dev/null @@ -1,48 +0,0 @@ -// src/llms/llamacpp.js - -import { exec } from 'node:child_process' -import { promisify } from 'node:util' -import { writeFile } from 'node:fs/promises' - -const execAsync = promisify(exec) - -/** - * Main function to call Llama.cpp using Docker. - * @param {string} fullPrompt - The prompt to send to the model. - * @param {string} tempOutputPath - The temporary file path to save the output. - * @param {object} [options={}] - Additional options like model path, threads, and prediction tokens. - * @returns {Promise} - */ -export async function callLlamaCpp(fullPrompt, tempOutputPath, options = {}) { - try { - // Set default options or use provided options - const modelPath = options.modelPath || '/app/models/gemma-2-2b-it-IQ4_XS.gguf' - const nThreads = options.nThreads || 4 - const nPredict = options.nPredict || 1024 - - // Construct the Docker command - const dockerCommand = `docker run --rm \ - -v ${process.cwd()}/content:/app/content \ - llama ./build/bin/main \ - -m ${modelPath} \ - -n ${nPredict} \ - -t ${nThreads} \ - -p "${fullPrompt}"` - console.log('Running llama.cpp with Docker command:', dockerCommand) - - // Execute the Docker command and check for/log any errors - const { stdout, stderr } = await execAsync(dockerCommand) - if (stderr) { - console.error('Error running llama.cpp:', stderr) - } - console.log('llama.cpp output:', stdout) - - // Write the output to the specified temporary file - await writeFile(tempOutputPath, stdout) - console.log(`Llama.cpp output saved to ${tempOutputPath}`) - } catch (error) { - // Log and re-throw any errors that occur during the process - console.error('Error in callLlama:', error) - throw error - } -} \ No newline at end of file diff --git a/src/utils/runLLM.js b/src/utils/runLLM.js index 1e505ce..84f1a62 100644 --- a/src/utils/runLLM.js +++ b/src/utils/runLLM.js @@ -1,14 +1,13 @@ // src/utils/runLLM.js import { readFile, writeFile, unlink } from 'node:fs/promises' +import { callLlama } from '../llms/llama.js' import { callChatGPT } from '../llms/chatgpt.js' import { callClaude } from '../llms/claude.js' +import { callGemini } from '../llms/gemini.js' import { callCohere } from '../llms/cohere.js' import { callMistral } from '../llms/mistral.js' import { callOcto } from '../llms/octo.js' -import { callLlama } from '../llms/llama.js' -// import { callLlamaCpp } from '../llms/llamacpp.js' -import { callGemini } from '../llms/gemini.js' import { generatePrompt } from '../llms/prompt.js' import '../types.js' @@ -23,14 +22,13 @@ import '../types.js' // Object mapping LLM options to their respective functions /** @type {LLMFunctions} */ const llmFunctions = { + llama: callLlama, chatgpt: callChatGPT, claude: callClaude, + gemini: callGemini, cohere: callCohere, mistral: callMistral, octo: callOcto, - llama: callLlama, - // llamacpp: callLlamaCpp, - gemini: callGemini, } /** From aa1ba7b731c2634f6d25e4ab7e3ad1f97b4fbb59 Mon Sep 17 00:00:00 2001 From: Anthony Campolo <12433465+ajcwebdev@users.noreply.github.com> Date: Thu, 26 Sep 2024 10:01:47 -0500 Subject: [PATCH 6/6] consolidate jsdoc to types file --- .gitignore | 3 +- docs/examples.md | 130 +++++----------- src/autoshow.js | 21 +-- src/commands/processFile.js | 12 +- src/commands/processPlaylist.js | 14 +- src/commands/processRSS.js | 17 +-- src/commands/processURLs.js | 12 +- src/commands/processVideo.js | 12 +- src/llms/chatgpt.js | 25 ++-- src/llms/claude.js | 25 ++-- src/llms/cohere.js | 25 ++-- src/llms/gemini.js | 28 ++-- src/llms/llama.js | 19 ++- src/llms/mistral.js | 25 ++-- src/llms/octo.js | 25 ++-- src/llms/prompt.js | 4 +- src/transcription/assembly.js | 17 +-- src/transcription/deepgram.js | 8 +- src/transcription/whisper.js | 21 +-- src/types.js | 256 +++++++++++++++++++++++++------- src/utils/downloadAudio.js | 7 +- src/utils/generateMarkdown.js | 12 +- src/utils/runLLM.js | 15 +- src/utils/runTranscription.js | 9 +- 24 files changed, 395 insertions(+), 347 deletions(-) diff --git a/.gitignore b/.gitignore index 6e648e7..6609ca4 100644 --- a/.gitignore +++ b/.gitignore @@ -7,4 +7,5 @@ package-lock.json src/llms/models .idea build -deno.lock \ No newline at end of file +deno.lock +out \ No newline at end of file diff --git a/docs/examples.md b/docs/examples.md index 4ebcda0..6bcad4a 100644 --- a/docs/examples.md +++ b/docs/examples.md @@ -379,100 +379,38 @@ npm run test-local This can be a useful way of creating a single markdown file of the entire project for giving to an LLM as context to develop new features or debug code. I'll usually start a conversation by including this along with a prompt that explains what I want changed or added. ```bash -cat README.md >> LLM.md && \ - echo '\n\n```' >> LLM.md && \ - tree >> LLM.md && \ - echo '\n```' >> LLM.md && \ - echo '\n\n' >> LLM.md && \ - cat docs/examples.md >> LLM.md && \ - echo '\n## AutoShow CLI Entry Point' >> LLM.md && \ - echo '\n\n```js' >> LLM.md && \ - cat src/autoshow.js >> LLM.md && \ - echo '\n```\n' >> LLM.md && \ - echo '## Utility Functions\n' >> LLM.md && \ - echo '```js' >> LLM.md && \ - cat src/utils/cleanUpFiles.js >> LLM.md && \ - echo '\n```\n' >> LLM.md && \ - echo '```js' >> LLM.md && \ - cat src/utils/downloadAudio.js >> LLM.md && \ - echo '\n```\n' >> LLM.md && \ - echo '```js' >> LLM.md && \ - cat src/utils/generateMarkdown.js >> LLM.md && \ - echo '\n```\n' >> LLM.md && \ - echo '```js' >> LLM.md && \ - cat src/utils/runLLM.js >> LLM.md && \ - echo '\n```\n' >> LLM.md && \ - echo '```js' >> LLM.md && \ - cat src/utils/runTranscription.js >> LLM.md && \ - echo '\n```\n' >> LLM.md && \ - echo '## Transcription Functions\n' >> LLM.md && \ - echo '```js' >> LLM.md && \ - cat src/transcription/whisper.js >> LLM.md && \ - echo '\n```\n' >> LLM.md && \ - echo '```js' >> LLM.md && \ - cat src/transcription/deepgram.js >> LLM.md && \ - echo '\n```\n' >> LLM.md && \ - echo '```js' >> LLM.md && \ - cat src/transcription/assembly.js >> LLM.md && \ - echo '\n```\n' >> LLM.md && \ - echo '## LLM Functions\n' >> LLM.md && \ - echo '```js' >> LLM.md && \ - cat src/llms/prompt.js >> LLM.md && \ - echo '\n```\n' >> LLM.md && \ - echo '```js' >> LLM.md && \ - cat src/llms/chatgpt.js >> LLM.md && \ - echo '\n```\n' >> LLM.md && \ - echo '```js' >> LLM.md && \ - cat src/llms/claude.js >> LLM.md && \ - echo '\n```\n' >> LLM.md && \ - echo '```js' >> LLM.md && \ - cat src/llms/cohere.js >> LLM.md && \ - echo '\n```\n' >> LLM.md && \ - echo '```js' >> LLM.md && \ - cat src/llms/gemini.js >> LLM.md && \ - echo '\n```\n' >> LLM.md && \ - echo '```js' >> LLM.md && \ - cat src/llms/llama.js >> LLM.md && \ - echo '\n```\n' >> LLM.md && \ - echo '```js' >> LLM.md && \ - cat src/llms/llamacpp.js >> LLM.md && \ - echo '\n```\n' >> LLM.md && \ - echo '```js' >> LLM.md && \ - cat src/llms/mistral.js >> LLM.md && \ - echo '\n```\n' >> LLM.md && \ - echo '```js' >> LLM.md && \ - cat src/llms/octo.js >> LLM.md && \ - echo '\n```\n' >> LLM.md && \ - echo '## Process Commands\n' >> LLM.md && \ - echo '```js' >> LLM.md && \ - cat src/commands/processVideo.js >> LLM.md && \ - echo '\n```\n' >> LLM.md && \ - echo '```js' >> LLM.md && \ - cat src/commands/processURLs.js >> LLM.md && \ - echo '\n```\n' >> LLM.md && \ - echo '```js' >> LLM.md && \ - cat src/commands/processRSS.js >> LLM.md && \ - echo '\n```\n' >> LLM.md && \ - echo '```js' >> LLM.md && \ - cat src/commands/processPlaylist.js >> LLM.md && \ - echo '\n```\n' >> LLM.md && \ - echo '```js' >> LLM.md && \ - cat src/commands/processFile.js >> LLM.md && \ - echo '\n```\n' >> LLM.md && \ - echo '## Docker Files\n' >> LLM.md && \ - echo '```Dockerfile' >> LLM.md && \ - cat .github/whisper.Dockerfile >> LLM.md && \ - echo '\n```\n' >> LLM.md && \ - echo '```Dockerfile' >> LLM.md && \ - cat .github/llama.Dockerfile >> LLM.md && \ - echo '\n```\n' >> LLM.md && \ - echo '```Dockerfile' >> LLM.md && \ - cat Dockerfile >> LLM.md && \ - echo '\n```\n' >> LLM.md && \ - echo '```yml' >> LLM.md && \ - cat docker-compose.yml >> LLM.md && \ - echo '\n```\n' >> LLM.md && \ - echo '```bash' >> LLM.md && \ - cat docker-entrypoint.sh >> LLM.md && \ - echo '\n```\n' >> LLM.md +export MD="LLM.md" && export COMMANDS="src/commands" && export UTILS="src/utils" && \ + export LLMS="src/llms" && export TRANSCRIPT="src/transcription" && \ + export OPEN="\n\n\`\`\`js" && export CLOSE="\n\`\`\`\n\n" && cat README.md >> $MD && \ + echo '\n\n### Directory and File Structure\n\n```' >> $MD && tree >> $MD && \ + echo '```\n\n## Example CLI Commands Test Suite'$OPEN'' >> $MD && cat test/all.test.js >> $MD && \ + echo ''$CLOSE'## JSDoc Types'$OPEN'' >> $MD && cat src/types.js >> $MD && \ + echo ''$CLOSE'## AutoShow CLI Entry Point'$OPEN'' >> $MD && cat src/autoshow.js >> $MD && \ + echo ''$CLOSE'## Utility Functions\n\n### Generate Markdown'$OPEN'' >> $MD && cat $UTILS/generateMarkdown.js >> $MD && \ + echo ''$CLOSE'### Download Audio'$OPEN'' >> $MD && cat $UTILS/downloadAudio.js >> $MD && \ + echo ''$CLOSE'### Run Transcription'$OPEN'' >> $MD && cat $UTILS/runTranscription.js >> $MD && \ + echo ''$CLOSE'### Run LLM'$OPEN'' >> $MD && cat $UTILS/runLLM.js >> $MD && \ + echo ''$CLOSE'### Clean Up Files'$OPEN'' >> $MD && cat $UTILS/cleanUpFiles.js >> $MD && \ + echo ''$CLOSE'## Process Commands\n\n### Process Video'$OPEN'' >> $MD && cat $COMMANDS/processVideo.js >> $MD && \ + echo ''$CLOSE'### Process Playlist'$OPEN'' >> $MD && cat $COMMANDS/processPlaylist.js >> $MD && \ + echo ''$CLOSE'### Process URLs'$OPEN'' >> $MD && cat $COMMANDS/processURLs.js >> $MD && \ + echo ''$CLOSE'### Process RSS'$OPEN'' >> $MD && cat $COMMANDS/processRSS.js >> $MD && \ + echo ''$CLOSE'### Process File'$OPEN'' >> $MD && cat $COMMANDS/processFile.js >> $MD && \ + echo ''$CLOSE'## Transcription Functions\n\n### Call Whisper'$OPEN'' >> $MD && cat $TRANSCRIPT/whisper.js >> $MD && \ + echo ''$CLOSE'### Call Deepgram'$OPEN'' >> $MD && cat $TRANSCRIPT/deepgram.js >> $MD && \ + echo ''$CLOSE'### Call Assembly'$OPEN'' >> $MD && cat $TRANSCRIPT/assembly.js >> $MD && \ + echo ''$CLOSE'## LLM Functions\n\n### Prompt Function'$OPEN'' >> $MD && cat $LLMS/prompt.js >> $MD && \ + echo ''$CLOSE'### Call ChatGPT'$OPEN'' >> $MD && cat $LLMS/chatgpt.js >> $MD && \ + echo ''$CLOSE'### Call Claude'$OPEN'' >> $MD && cat $LLMS/claude.js >> $MD && \ + echo ''$CLOSE'### Call Cohere'$OPEN'' >> $MD && cat $LLMS/cohere.js >> $MD && \ + echo ''$CLOSE'### Call Gemini'$OPEN'' >> $MD && cat $LLMS/gemini.js >> $MD && \ + echo ''$CLOSE'### Call Llama.cpp'$OPEN'' >> $MD && cat $LLMS/llama.js >> $MD && \ + echo ''$CLOSE'### Call Mistral'$OPEN'' >> $MD && cat $LLMS/mistral.js >> $MD && \ + echo ''$CLOSE'### Call Octo'$OPEN'' >> $MD && cat $LLMS/octo.js >> $MD && \ + echo ''$CLOSE'## Docker Files\n\n```Dockerfile' >> $MD && cat .github/whisper.Dockerfile >> $MD && \ + echo ''$CLOSE'```Dockerfile' >> $MD && cat .github/llama.Dockerfile >> $MD && \ + echo ''$CLOSE'```Dockerfile' >> $MD && cat Dockerfile >> $MD && \ + echo ''$CLOSE'```yml' >> $MD && cat docker-compose.yml >> $MD && \ + echo ''$CLOSE'```bash' >> $MD && cat docker-entrypoint.sh >> $MD && \ + echo '\n```\n' >> $MD ``` \ No newline at end of file diff --git a/src/autoshow.js b/src/autoshow.js index cb1c9d0..a5189b7 100644 --- a/src/autoshow.js +++ b/src/autoshow.js @@ -16,16 +16,8 @@ import { processURLs } from './commands/processURLs.js' import { processFile } from './commands/processFile.js' import { processRSS } from './commands/processRSS.js' import { argv } from 'node:process' -import './types.js' -/** - * Custom types - * @typedef {LLMOption} LLMOption - * @typedef {TranscriptOption} TranscriptOption - * @typedef {ProcessingOptions} ProcessingOptions - * @typedef {InquirerAnswers} InquirerAnswers - * @typedef {HandlerFunction} HandlerFunction - */ +/** @import { ProcessingOptions, InquirerAnswers, InquirerQuestions, HandlerFunction, LLMOption, TranscriptOption, WhisperModelType } from './types.js' */ // Initialize the command-line interface const program = new Command() @@ -58,6 +50,7 @@ program .option('--noCleanUp', 'Do not delete intermediary files after processing') // Interactive prompts using inquirer +/** @type {InquirerQuestions} */ const INQUIRER_PROMPT = [ { type: 'list', @@ -170,7 +163,7 @@ const INQUIRER_PROMPT = [ type: 'list', name: 'whisperModel', message: 'Select the Whisper model type:', - choices: ['tiny', 'base', 'small', 'medium', 'large'], + choices: ['tiny', 'tiny.en', 'base', 'base.en', 'small', 'small.en', 'medium', 'medium.en', 'large', 'large-v1', 'large-v2'], when: (answers) => answers.transcriptOpt === 'whisper', default: 'large', }, @@ -225,9 +218,9 @@ async function handleInteractivePrompt(options) { // Handle transcription options if (answers.transcriptOpt === 'whisper') { if (answers.useDocker) { - options.whisperDocker = answers.whisperModel + options.whisperDocker = /** @type {WhisperModelType} */ (answers.whisperModel) } else { - options.whisper = answers.whisperModel + options.whisper = /** @type {WhisperModelType} */ (answers.whisperModel) } } else { options[answers.transcriptOpt] = true @@ -277,7 +270,7 @@ program.action(async (options) => { * Determine the selected LLM option * @type {LLMOption | undefined} */ - const llmOpt = /** @type {LLMOption} */ (['chatgpt', 'claude', 'cohere', 'mistral', 'octo', 'llama', 'gemini'].find( + const llmOpt = /** @type {LLMOption | undefined} */ (['chatgpt', 'claude', 'cohere', 'mistral', 'octo', 'llama', 'gemini'].find( (option) => options[option] )) @@ -285,7 +278,7 @@ program.action(async (options) => { * Determine the transcription service to use * @type {TranscriptOption | undefined} */ - const transcriptOpt = /** @type {TranscriptOption} */ (['whisper', 'whisperDocker', 'deepgram', 'assembly'].find( + const transcriptOpt = /** @type {TranscriptOption | undefined} */ (['whisper', 'whisperDocker', 'deepgram', 'assembly'].find( (option) => options[option] )) diff --git a/src/commands/processFile.js b/src/commands/processFile.js index af1d842..8d02e28 100644 --- a/src/commands/processFile.js +++ b/src/commands/processFile.js @@ -5,20 +5,14 @@ import { downloadFileAudio } from '../utils/downloadAudio.js' import { runTranscription } from '../utils/runTranscription.js' import { runLLM } from '../utils/runLLM.js' import { cleanUpFiles } from '../utils/cleanUpFiles.js' -import '../types.js' -/** - * Import custom types - * @typedef {LLMOption} LLMOption - * @typedef {TranscriptOption} TranscriptOption - * @typedef {ProcessingOptions} ProcessingOptions - */ +/** @import { LLMOption, TranscriptOption, ProcessingOptions } from '../types.js' */ /** * Main function to process a local audio or video file. * @param {string} filePath - The path to the local file to process. - * @param {LLMOption} llmOpt - The selected Language Model option. - * @param {TranscriptOption} transcriptOpt - The transcription service to use. + * @param {LLMOption} [llmOpt] - The selected Language Model option. + * @param {TranscriptOption} [transcriptOpt] - The transcription service to use. * @param {ProcessingOptions} options - Additional options for processing. * @returns {Promise} */ diff --git a/src/commands/processPlaylist.js b/src/commands/processPlaylist.js index 5f1ca88..489df70 100644 --- a/src/commands/processPlaylist.js +++ b/src/commands/processPlaylist.js @@ -4,22 +4,16 @@ import { writeFile } from 'node:fs/promises' import { processVideo } from './processVideo.js' import { execFile } from 'node:child_process' import { promisify } from 'node:util' -import '../types.js' -const execFilePromise = promisify(execFile) +/** @import { LLMOption, TranscriptOption, ProcessingOptions } from '../types.js' */ -/** - * Custom types - * @typedef {LLMOption} LLMOption - * @typedef {TranscriptOption} TranscriptOption - * @typedef {ProcessingOptions} ProcessingOptions - */ +const execFilePromise = promisify(execFile) /** * Main function to process a YouTube playlist. * @param {string} playlistUrl - The URL of the YouTube playlist to process. - * @param {LLMOption} llmOpt - The selected Language Model option. - * @param {TranscriptOption} transcriptOpt - The transcription service to use. + * @param {LLMOption} [llmOpt] - The selected Language Model option. + * @param {TranscriptOption} [transcriptOpt] - The transcription service to use. * @param {ProcessingOptions} options - Additional options for processing. * @returns {Promise} */ diff --git a/src/commands/processRSS.js b/src/commands/processRSS.js index edc47fb..03ab6fd 100644 --- a/src/commands/processRSS.js +++ b/src/commands/processRSS.js @@ -11,15 +11,8 @@ import { downloadAudio } from '../utils/downloadAudio.js' import { runTranscription } from '../utils/runTranscription.js' import { runLLM } from '../utils/runLLM.js' import { cleanUpFiles } from '../utils/cleanUpFiles.js' -import '../types.js' -/** - * Import custom types - * @typedef {LLMOption} LLMOption - * @typedef {TranscriptOption} TranscriptOption - * @typedef {ProcessingOptions} ProcessingOptions - * @typedef {RSSItem} RSSItem - */ +/** @import { LLMOption, TranscriptOption, ProcessingOptions, RSSItem } from '../types.js' */ // Initialize XML parser with specific options const parser = new XMLParser({ @@ -31,8 +24,8 @@ const parser = new XMLParser({ /** * Process a single item from the RSS feed. * @param {RSSItem} item - The item to process. - * @param {TranscriptOption} transcriptOpt - The transcription service to use. - * @param {LLMOption} llmOpt - The selected Language Model option. + * @param {TranscriptOption} [transcriptOpt] - The transcription service to use. + * @param {LLMOption} [llmOpt] - The selected Language Model option. * @param {ProcessingOptions} options - Additional options for processing. * @returns {Promise} */ @@ -63,8 +56,8 @@ async function processItem(item, transcriptOpt, llmOpt, options) { /** * Main function to process an RSS feed. * @param {string} rssUrl - The URL of the RSS feed to process. - * @param {LLMOption} llmOpt - The selected Language Model option. - * @param {TranscriptOption} transcriptOpt - The transcription service to use. + * @param {LLMOption} [llmOpt] - The selected Language Model option. + * @param {TranscriptOption} [transcriptOpt] - The transcription service to use. * @param {ProcessingOptions} options - Additional options for processing. * @returns {Promise} */ diff --git a/src/commands/processURLs.js b/src/commands/processURLs.js index 4455662..7c2f732 100644 --- a/src/commands/processURLs.js +++ b/src/commands/processURLs.js @@ -3,20 +3,14 @@ import { readFile } from 'node:fs/promises' import { processVideo } from './processVideo.js' import { resolve } from 'node:path' -import '../types.js' -/** - * Import custom types - * @typedef {LLMOption} LLMOption - * @typedef {TranscriptOption} TranscriptOption - * @typedef {ProcessingOptions} ProcessingOptions - */ +/** @import { LLMOption, TranscriptOption, ProcessingOptions } from '../types.js' */ /** * Main function to process URLs from a file. * @param {string} filePath - The path to the file containing URLs. - * @param {LLMOption} llmOpt - The selected Language Model option. - * @param {TranscriptOption} transcriptOpt - The transcription service to use. + * @param {LLMOption} [llmOpt] - The selected Language Model option. + * @param {TranscriptOption} [transcriptOpt] - The transcription service to use. * @param {ProcessingOptions} options - Additional options for processing. * @returns {Promise} */ diff --git a/src/commands/processVideo.js b/src/commands/processVideo.js index 38b0ebb..1256134 100644 --- a/src/commands/processVideo.js +++ b/src/commands/processVideo.js @@ -5,20 +5,14 @@ import { downloadAudio } from '../utils/downloadAudio.js' import { runTranscription } from '../utils/runTranscription.js' import { runLLM } from '../utils/runLLM.js' import { cleanUpFiles } from '../utils/cleanUpFiles.js' -import '../types.js' -/** - * Custom types - * @typedef {LLMOption} LLMOption - * @typedef {TranscriptOption} TranscriptOption - * @typedef {ProcessingOptions} ProcessingOptions - */ +/** @import { LLMOption, TranscriptOption, ProcessingOptions } from '../types.js' */ /** * Main function to process a single video. * @param {string} url - The URL of the video to process. - * @param {LLMOption} llmOpt - The selected Language Model option. - * @param {TranscriptOption} transcriptOpt - The transcription service to use. + * @param {LLMOption} [llmOpt] - The selected Language Model option. + * @param {TranscriptOption} [transcriptOpt] - The transcription service to use. * @param {ProcessingOptions} options - Additional options for processing. * @returns {Promise} */ diff --git a/src/llms/chatgpt.js b/src/llms/chatgpt.js index 815530e..2773ed1 100644 --- a/src/llms/chatgpt.js +++ b/src/llms/chatgpt.js @@ -4,9 +4,11 @@ import { writeFile } from 'node:fs/promises' import { env } from 'node:process' import { OpenAI } from 'openai' +/** @import { LLMFunction, ChatGPTModelType } from '../types.js' */ + /** - * Define available GPT models - * @type {Object.} + * Map of ChatGPT model identifiers to their API names + * @type {Record} */ const gptModel = { GPT_4o_MINI: "gpt-4o-mini", @@ -15,15 +17,16 @@ const gptModel = { GPT_4: "gpt-4", } +/** @type {LLMFunction} */ /** * Main function to call ChatGPT API. - * @param {string} transcriptContent - The transcript content to process. - * @param {string} outputFilePath - The file path to save the output. - * @param {string} [model='GPT_4o_MINI'] - The GPT model to use. - * @returns {Promise} - The actual model name used. + * @param {string} promptAndTranscript - The combined prompt and transcript text to process. + * @param {string} tempPath - The temporary file path to write the LLM output. + * @param {ChatGPTModelType} [model='GPT_4o_MINI'] - The GPT model to use. + * @returns {Promise} * @throws {Error} - If an error occurs during API call. */ -export async function callChatGPT(transcriptContent, outputFilePath, model = 'GPT_4o_MINI') { +export async function callChatGPT(promptAndTranscript, tempPath, model = 'GPT_4o_MINI') { // Check for API key if (!env.OPENAI_API_KEY) { throw new Error('OPENAI_API_KEY environment variable is not set.') @@ -40,7 +43,7 @@ export async function callChatGPT(transcriptContent, outputFilePath, model = 'GP const response = await openai.chat.completions.create({ model: actualModel, max_tokens: 4000, // Maximum number of tokens in the response - messages: [{ role: 'user', content: transcriptContent }], // The input message (transcript content) + messages: [{ role: 'user', content: promptAndTranscript }], // The input message (transcript content) }) // Destructure the response to get relevant information @@ -51,14 +54,12 @@ export async function callChatGPT(transcriptContent, outputFilePath, model = 'GP } = response // Write the generated content to the output file - await writeFile(outputFilePath, content) + await writeFile(tempPath, content) - console.log(`\nTranscript saved to:\n - ${outputFilePath}`) + console.log(`\nTranscript saved to:\n - ${tempPath}`) console.log(`\nFinish Reason: ${finish_reason}\nModel: ${usedModel}`) console.log(`Token Usage:\n - ${prompt_tokens} prompt tokens\n - ${completion_tokens} completion tokens\n - ${total_tokens} total tokens\n`) - // Return the actual model name used - return Object.keys(gptModel).find(key => gptModel[key] === usedModel) || model } catch (error) { console.error('Error in callChatGPT:', error) throw error // Re-throw the error for handling in the calling function diff --git a/src/llms/claude.js b/src/llms/claude.js index 2e802c5..1773b37 100644 --- a/src/llms/claude.js +++ b/src/llms/claude.js @@ -4,9 +4,11 @@ import { writeFile } from 'node:fs/promises' import { env } from 'node:process' import { Anthropic } from '@anthropic-ai/sdk' +/** @import { LLMFunction, ClaudeModelType } from '../types.js' */ + /** - * Define available Claude models - * @type {Object.} + * Map of Claude model identifiers to their API names + * @type {Record} */ const claudeModel = { CLAUDE_3_5_SONNET: "claude-3-5-sonnet-20240620", @@ -15,15 +17,16 @@ const claudeModel = { CLAUDE_3_HAIKU: "claude-3-haiku-20240307", } +/** @type {LLMFunction} */ /** * Main function to call Claude API. - * @param {string} transcriptContent - The transcript content to process. - * @param {string} outputFilePath - The file path to save the output. - * @param {string} [model='CLAUDE_3_HAIKU'] - The Claude model to use. - * @returns {Promise} - The actual model name used. + * @param {string} promptAndTranscript - The combined prompt and transcript text to process. + * @param {string} tempPath - The temporary file path to write the LLM output. + * @param {ClaudeModelType} [model='CLAUDE_3_HAIKU'] - The Claude model to use. + * @returns {Promise} * @throws {Error} - If an error occurs during the API call. */ -export async function callClaude(transcriptContent, outputFilePath, model = 'CLAUDE_3_HAIKU') { +export async function callClaude(promptAndTranscript, tempPath, model = 'CLAUDE_3_HAIKU') { // Check if the ANTHROPIC_API_KEY environment variable is set if (!env.ANTHROPIC_API_KEY) { throw new Error('ANTHROPIC_API_KEY environment variable is not set.') @@ -40,7 +43,7 @@ export async function callClaude(transcriptContent, outputFilePath, model = 'CLA const response = await anthropic.messages.create({ model: actualModel, max_tokens: 4000, // Maximum number of tokens in the response - messages: [{ role: 'user', content: transcriptContent }] // The input message (transcript content) + messages: [{ role: 'user', content: promptAndTranscript }] // The input message (transcript content) }) // Destructure the response to get relevant information @@ -52,15 +55,13 @@ export async function callClaude(transcriptContent, outputFilePath, model = 'CLA } = response // Write the generated text to the output file - await writeFile(outputFilePath, text) + await writeFile(tempPath, text) - console.log(`\nTranscript saved to:\n - ${outputFilePath}`) + console.log(`\nTranscript saved to:\n - ${tempPath}`) // console.log(`\nClaude response:\n\n${JSON.stringify(response, null, 2)}`) // Commented out detailed response logging console.log(`\nStop Reason: ${stop_reason}\nModel: ${usedModel}`) console.log(`Token Usage:\n - ${input_tokens} input tokens\n - ${output_tokens} output tokens\n`) - // Return the actual model name used - return Object.keys(claudeModel).find(key => claudeModel[key] === usedModel) || model } catch (error) { console.error('Error:', error) throw error // Re-throw the error for handling in the calling function diff --git a/src/llms/cohere.js b/src/llms/cohere.js index f973c44..67c5f4a 100644 --- a/src/llms/cohere.js +++ b/src/llms/cohere.js @@ -4,24 +4,27 @@ import { writeFile } from 'node:fs/promises' import { env } from 'node:process' import { CohereClient } from 'cohere-ai' +/** @import { LLMFunction, CohereModelType } from '../types.js' */ + /** - * Define available Cohere models - * @type {Object.} + * Map of Cohere model identifiers to their API names + * @type {Record} */ const cohereModel = { COMMAND_R: "command-r", // Standard Command model COMMAND_R_PLUS: "command-r-plus" // Enhanced Command model } +/** @type {LLMFunction} */ /** * Main function to call Cohere API. - * @param {string} transcriptContent - The transcript content to process. - * @param {string} outputFilePath - The file path to save the output. - * @param {string} [model='COMMAND_R'] - The Cohere model to use. - * @returns {Promise} - The actual model name used. + * @param {string} promptAndTranscript - The combined prompt and transcript text to process. + * @param {string} tempPath - The temporary file path to write the LLM output. + * @param {CohereModelType} [model='COMMAND_R'] - The Cohere model to use. + * @returns {Promise} * @throws {Error} - If an error occurs during the API call. */ -export async function callCohere(transcriptContent, outputFilePath, model = 'COMMAND_R') { +export async function callCohere(promptAndTranscript, tempPath, model = 'COMMAND_R') { // Check if the COHERE_API_KEY environment variable is set if (!env.COHERE_API_KEY) { throw new Error('COHERE_API_KEY environment variable is not set.') @@ -38,7 +41,7 @@ export async function callCohere(transcriptContent, outputFilePath, model = 'COM const response = await cohere.chat({ model: actualModel, // max_tokens: ?, // Cohere doesn't seem to have a max_tokens parameter for chat - message: transcriptContent // The input message (transcript content) + message: promptAndTranscript // The input message (prompt and transcript content) }) // Destructure the response to get relevant information @@ -49,15 +52,13 @@ export async function callCohere(transcriptContent, outputFilePath, model = 'COM } = response // Write the generated text to the output file - await writeFile(outputFilePath, text) + await writeFile(tempPath, text) - console.log(`\nTranscript saved to:\n - ${outputFilePath}`) + console.log(`\nTranscript saved to:\n - ${tempPath}`) // console.log(`\nCohere response:\n\n${JSON.stringify(response, null, 2)}`) // Commented out detailed response logging console.log(`\nFinish Reason: ${finishReason}\nModel: ${actualModel}`) console.log(`Token Usage:\n - ${inputTokens} input tokens\n - ${outputTokens} output tokens\n`) - // Return the actual model name used - return Object.keys(cohereModel).find(key => cohereModel[key] === actualModel) || model } catch (error) { console.error('Error:', error) throw error // Re-throw the error for handling in the calling function diff --git a/src/llms/gemini.js b/src/llms/gemini.js index f8e0829..4aa80d2 100644 --- a/src/llms/gemini.js +++ b/src/llms/gemini.js @@ -4,9 +4,11 @@ import { writeFile } from 'node:fs/promises' import { env } from 'node:process' import { GoogleGenerativeAI } from "@google/generative-ai" +/** @import { LLMFunction, GeminiModelType } from '../types.js' */ + /** - * Define available Gemini models - * @type {Object.} + * Map of Gemini model identifiers to their API names + * @type {Record} */ const geminiModel = { GEMINI_1_5_FLASH: "gemini-1.5-flash", @@ -21,15 +23,16 @@ const geminiModel = { */ const delay = (ms) => new Promise(resolve => setTimeout(resolve, ms)); +/** @type {LLMFunction} */ /** * Main function to call Gemini API. - * @param {string} transcriptContent - The transcript content to process. - * @param {string} outputFilePath - The file path to save the output. - * @param {string} [model='GEMINI_1_5_FLASH'] - The Gemini model to use. - * @returns {Promise} - The actual model name used. + * @param {string} promptAndTranscript - The combined prompt and transcript text to process. + * @param {string} tempPath - The temporary file path to write the LLM output. + * @param {GeminiModelType} [model='GEMINI_1_5_FLASH'] - The Gemini model to use. + * @returns {Promise} * @throws {Error} - If an error occurs during the API call. */ -export async function callGemini(transcriptContent, outputFilePath, model = 'GEMINI_1_5_FLASH') { +export async function callGemini(promptAndTranscript, tempPath, model = 'GEMINI_1_5_FLASH') { // Check if the GEMINI_API_KEY environment variable is set if (!env.GEMINI_API_KEY) { throw new Error('GEMINI_API_KEY environment variable is not set.') @@ -43,13 +46,13 @@ export async function callGemini(transcriptContent, outputFilePath, model = 'GEM // Get the generative model const gem = genAI.getGenerativeModel({ model: actualModel }) - const maxRetries = 3; // Maximum number of retry attempts + const maxRetries = 3 // Maximum number of retry attempts // Retry loop for (let attempt = 1; attempt <= maxRetries; attempt++) { try { // Generate content using the selected model - const result = await gem.generateContent(transcriptContent) + const result = await gem.generateContent(promptAndTranscript) // Get the response from the generated content const response = await result.response @@ -63,13 +66,12 @@ export async function callGemini(transcriptContent, outputFilePath, model = 'GEM } // Write the generated text to the output file - await writeFile(outputFilePath, text) + await writeFile(tempPath, text) - console.log(`\nTranscript saved to:\n - ${outputFilePath}`) + console.log(`\nTranscript saved to:\n - ${tempPath}`) console.log(`\nModel: ${actualModel}`) - // Return the model name used - return model + return } catch (error) { console.error(`Error in callGemini (attempt ${attempt}/${maxRetries}):`, error) diff --git a/src/llms/llama.js b/src/llms/llama.js index e0514b9..a58d06c 100644 --- a/src/llms/llama.js +++ b/src/llms/llama.js @@ -8,9 +8,11 @@ import { promisify } from 'node:util' const execAsync = promisify(exec) +/** @import { LLMFunction, LlamaModelType } from '../types.js' */ + /** - * Define local model configurations - * @type {Object.} + * Map of local model identifiers to their filenames and URLs + * @type {Record} */ const localModels = { LLAMA_3_1_8B_Q4_MODEL: { @@ -41,7 +43,7 @@ const localModels = { /** * Function to download the model if it doesn't exist. - * @param {string} [modelName='GEMMA_2_2B_Q4_MODEL'] - The name of the model to use. + * @param {LlamaModelType} [modelName='GEMMA_2_2B_Q4_MODEL'] - The name of the model to use. * @returns {Promise} - The path to the downloaded model. * @throws {Error} - If the model download fails. */ @@ -85,15 +87,16 @@ async function downloadModel(modelName = 'GEMMA_2_2B_Q4_MODEL') { } } +/** @type {LLMFunction} */ /** * Main function to call the local Llama model. * @param {string} promptAndTranscript - The combined prompt and transcript content. - * @param {string} outputFilePath - The file path to save the output. - * @param {string} [modelName='GEMMA_2_2B_Q4_MODEL'] - The name of the model to use. + * @param {string} tempPath - The temporary file path to write the LLM output. + * @param {LlamaModelType} [modelName='GEMMA_2_2B_Q4_MODEL'] - The name of the model to use. * @returns {Promise} * @throws {Error} - If an error occurs during processing. */ -export async function callLlama(promptAndTranscript, outputFilePath, modelName = 'GEMMA_2_2B_Q4_MODEL') { +export async function callLlama(promptAndTranscript, tempPath, modelName = 'GEMMA_2_2B_Q4_MODEL') { try { // Ensure the model is downloaded const modelPath = await downloadModel(modelName) @@ -108,8 +111,8 @@ export async function callLlama(promptAndTranscript, outputFilePath, modelName = // Generate a response and write the response to a file const response = await session.prompt(promptAndTranscript) - await writeFile(outputFilePath, response) - console.log(`\nTranscript saved to:\n - ${outputFilePath}`) + await writeFile(tempPath, response) + console.log(`\nTranscript saved to:\n - ${tempPath}`) console.log(`\nModel used:\n - ${modelName}\n`) } catch (error) { console.error('Error in callLlama:', error) diff --git a/src/llms/mistral.js b/src/llms/mistral.js index b3dbf32..212a2a5 100644 --- a/src/llms/mistral.js +++ b/src/llms/mistral.js @@ -4,9 +4,11 @@ import { writeFile } from 'node:fs/promises' import { env } from 'node:process' import { Mistral } from '@mistralai/mistralai' +/** @import { LLMFunction, MistralModelType } from '../types.js' */ + /** - * Define available Mistral AI models - * @type {Object.} + * Map of Mistral model identifiers to their API names + * @type {Record} */ const mistralModel = { MIXTRAL_8x7b: "open-mixtral-8x7b", @@ -15,15 +17,16 @@ const mistralModel = { MISTRAL_NEMO: "open-mistral-nemo" } +/** @type {LLMFunction} */ /** * Main function to call Mistral AI API. - * @param {string} transcriptContent - The transcript content to process. - * @param {string} outputFilePath - The file path to save the output. - * @param {string} [model='MISTRAL_NEMO'] - The Mistral model to use. - * @returns {Promise} - The actual model name used. + * @param {string} promptAndTranscript - The combined prompt and transcript text to process. + * @param {string} tempPath - The temporary file path to write the LLM output. + * @param {MistralModelType} [model='MISTRAL_NEMO'] - The Mistral model to use. + * @returns {Promise} * @throws {Error} - If an error occurs during the API call. */ -export async function callMistral(transcriptContent, outputFilePath, model = 'MISTRAL_NEMO') { +export async function callMistral(promptAndTranscript, tempPath, model = 'MISTRAL_NEMO') { // Check if the MISTRAL_API_KEY environment variable is set if (!env.MISTRAL_API_KEY) { throw new Error('MISTRAL_API_KEY environment variable is not set.') @@ -39,7 +42,7 @@ export async function callMistral(transcriptContent, outputFilePath, model = 'MI const response = await mistral.chat.complete({ model: actualModel, // max_tokens: ?, // Uncomment and set if you want to limit the response length - messages: [{ role: 'user', content: transcriptContent }], + messages: [{ role: 'user', content: promptAndTranscript }], }) // Destructure the response to extract relevant information @@ -50,15 +53,13 @@ export async function callMistral(transcriptContent, outputFilePath, model = 'MI } = response // Write the generated content to the specified output file - await writeFile(outputFilePath, content) - console.log(`\nTranscript saved to:\n - ${outputFilePath}`) + await writeFile(tempPath, content) + console.log(`\nTranscript saved to:\n - ${tempPath}`) // Log finish reason, used model, and token usage console.log(`\nFinish Reason: ${finishReason}\nModel: ${usedModel}`) console.log(`Token Usage:\n - ${promptTokens} prompt tokens\n - ${completionTokens} completion tokens\n - ${totalTokens} total tokens\n`) - // Return the name of the model used - return Object.keys(mistralModel).find(key => mistralModel[key] === usedModel) || model } catch (error) { // Log any errors that occur during the process console.error('Error:', error) diff --git a/src/llms/octo.js b/src/llms/octo.js index c6f6710..31d03e2 100644 --- a/src/llms/octo.js +++ b/src/llms/octo.js @@ -4,9 +4,11 @@ import { writeFile } from 'node:fs/promises' import { env } from 'node:process' import { OctoAIClient } from '@octoai/sdk' +/** @import { LLMFunction, OctoModelType } from '../types.js' */ + /** - * Define available OctoAI models - * @type {Object.} + * Map of OctoAI model identifiers to their API names + * @type {Record} */ const octoModel = { LLAMA_3_1_8B: "meta-llama-3.1-8b-instruct", @@ -18,15 +20,16 @@ const octoModel = { WIZARD_2_8X_22B: "wizardlm-2-8x22b", } +/** @type {LLMFunction} */ /** * Main function to call OctoAI API. - * @param {string} transcriptContent - The transcript content to process. - * @param {string} outputFilePath - The file path to save the output. - * @param {string} [model='LLAMA_3_1_70B'] - The OctoAI model to use. - * @returns {Promise} - The actual model name used. + * @param {string} promptAndTranscript - The combined prompt and transcript text to process. + * @param {string} tempPath - The temporary file path to write the LLM output. + * @param {OctoModelType} [model='LLAMA_3_1_70B'] - The OctoAI model to use. + * @returns {Promise} * @throws {Error} - If an error occurs during the API call. */ -export async function callOcto(transcriptContent, outputFilePath, model = 'LLAMA_3_1_70B') { +export async function callOcto(promptAndTranscript, tempPath, model = 'LLAMA_3_1_70B') { // Check if the OCTOAI_API_KEY environment variable is set if (!env.OCTOAI_API_KEY) { throw new Error('OCTOAI_API_KEY environment variable is not set.') @@ -42,7 +45,7 @@ export async function callOcto(transcriptContent, outputFilePath, model = 'LLAMA const response = await octoai.textGen.createChatCompletion({ model: actualModel, // max_tokens: ?, // Uncomment and set if you want to limit the response length - messages: [{ role: "user", content: transcriptContent }] + messages: [{ role: "user", content: promptAndTranscript }] }) // Destructure the response to extract relevant information @@ -53,15 +56,13 @@ export async function callOcto(transcriptContent, outputFilePath, model = 'LLAMA } = response // Write the generated content to the specified output file - await writeFile(outputFilePath, content) - console.log(`Octo show notes saved to ${outputFilePath}`) + await writeFile(tempPath, content) + console.log(`Octo show notes saved to ${tempPath}`) // Log finish reason, used model, and token usage console.log(`\nFinish Reason: ${finishReason}\nModel: ${usedModel}`) console.log(`Token Usage:\n - ${promptTokens} prompt tokens\n - ${completionTokens} completion tokens\n - ${totalTokens} total tokens\n`) - // Return the name of the model used - return Object.keys(octoModel).find(key => octoModel[key] === usedModel) || model } catch (error) { // Log any errors that occur during the process console.error('Error:', error) diff --git a/src/llms/prompt.js b/src/llms/prompt.js index 8187c35..c398fe7 100644 --- a/src/llms/prompt.js +++ b/src/llms/prompt.js @@ -1,8 +1,10 @@ // src/llms/prompt.js +/** @import { PromptSection } from '../types.js' */ + /** * Define the structure for different sections of the prompt - * @type {Object.} + * @type {Object.} */ const sections = { // Section for generating titles diff --git a/src/transcription/assembly.js b/src/transcription/assembly.js index e9db1e8..80bf373 100644 --- a/src/transcription/assembly.js +++ b/src/transcription/assembly.js @@ -3,13 +3,8 @@ import { writeFile } from 'node:fs/promises' import { env } from 'node:process' import { AssemblyAI } from 'assemblyai' -import '../types.js' -/** - * Import custom types - * @typedef {TranscriptOption} TranscriptOption - * @typedef {ProcessingOptions} ProcessingOptions - */ +/** @import { TranscriptOption, ProcessingOptions } from '../types.js' */ /** * Check if the ASSEMBLY_API_KEY environment variable is set @@ -25,7 +20,7 @@ const client = new AssemblyAI({ apiKey: env.ASSEMBLY_API_KEY }) * Main function to handle transcription using AssemblyAI. * @param {string} finalPath - The identifier used for naming output files. * @param {TranscriptOption} transcriptOpt - The transcription service to use. - * @param {ProcessingOptions} options - Additional options for processing. + * @param {ProcessingOptions} options - Additional processing options. * @returns {Promise} - Returns the formatted transcript content. * @throws {Error} - If an error occurs during transcription. */ @@ -47,22 +42,22 @@ export async function callAssembly(finalPath, transcriptOpt, options) { let txtContent = '' // Helper function to format timestamps - const formatTime = timestamp => { + const formatTime = (timestamp) => { const totalSeconds = Math.floor(timestamp / 1000) return `${Math.floor(totalSeconds / 60).toString().padStart(2, '0')}:${(totalSeconds % 60).toString().padStart(2, '0')}` } // Process the transcript based on whether utterances are available if (transcript.utterances) { - // If utterances are available, format each utterance with speaker labels if used - txtContent = transcript.utterances.map(utt => + // If utterances are available, format each with speaker labels if used + txtContent = transcript.utterances.map((utt) => `${speakerLabels ? `Speaker ${utt.speaker} ` : ''}(${formatTime(utt.start)}): ${utt.text}` ).join('\n') } else if (transcript.words) { // If only words are available, group them into lines with timestamps let currentLine = '' let currentTimestamp = formatTime(transcript.words[0].start) - transcript.words.forEach(word => { + transcript.words.forEach((word) => { if (currentLine.length + word.text.length > 80) { // Start a new line if the current line exceeds 80 characters txtContent += `[${currentTimestamp}] ${currentLine.trim()}\n` diff --git a/src/transcription/deepgram.js b/src/transcription/deepgram.js index 7a8fc7b..3246cce 100644 --- a/src/transcription/deepgram.js +++ b/src/transcription/deepgram.js @@ -31,13 +31,13 @@ export async function callDeepgram(input, id) { // Use URL or file content based on input type isUrl ? { url: input } : await readFile(input), // Use the "nova-2" model with smart formatting - { model: "nova-2", smart_format: true } + { model: 'nova-2', smart_format: true } ) // Process and format the transcription result const formattedTranscript = result.results.channels[0].alternatives[0].paragraphs.paragraphs - .flatMap(paragraph => paragraph.sentences) - .map(sentence => { + .flatMap((paragraph) => paragraph.sentences) + .map((sentence) => { // Format timestamp and text for each sentence const minutes = Math.floor(sentence.start / 60).toString().padStart(2, '0') const seconds = Math.floor(sentence.start % 60).toString().padStart(2, '0') @@ -51,6 +51,6 @@ export async function callDeepgram(input, id) { } catch (error) { // Log any errors that occur during the transcription process console.error('Error processing the transcription:', error) - throw error // Re-throw the error for handling in the calling function + throw error // Re-throw the error for handling in the calling function } } \ No newline at end of file diff --git a/src/transcription/whisper.js b/src/transcription/whisper.js index aac16b5..cd92006 100644 --- a/src/transcription/whisper.js +++ b/src/transcription/whisper.js @@ -4,19 +4,14 @@ import { readFile, writeFile, access } from 'node:fs/promises' import { exec } from 'node:child_process' import { promisify } from 'node:util' import { basename, join } from 'node:path' -import '../types.js' const execPromise = promisify(exec) -/** - * Import custom types - * @typedef {TranscriptOption} TranscriptOption - * @typedef {ProcessingOptions} ProcessingOptions - */ +/** @import { TranscriptOption, ProcessingOptions, WhisperModelType } from '../types.js' */ /** * Define available Whisper models - * @type {Object.} + * @type {Record} */ const WHISPER_MODELS = { 'tiny': 'ggml-tiny.bin', 'tiny.en': 'ggml-tiny.en.bin', @@ -31,13 +26,13 @@ const WHISPER_MODELS = { * Main function to handle transcription using Whisper. * @param {string} finalPath - The base path for the files. * @param {TranscriptOption} transcriptOpt - The transcription service to use. - * @param {ProcessingOptions} options - Additional options for processing. + * @param {ProcessingOptions} options - Additional processing options. * @returns {Promise} - Returns the formatted transcript content. * @throws {Error} - If an error occurs during transcription. */ export async function callWhisper(finalPath, transcriptOpt, options) { try { - // Determine which Whisper model to use + /** @type {WhisperModelType} */ const whisperModel = options.whisper || options.whisperDocker || 'base' if (!(whisperModel in WHISPER_MODELS)) { throw new Error(`Unknown model type: ${whisperModel}`) @@ -54,8 +49,8 @@ export async function callWhisper(finalPath, transcriptOpt, options) { // Read, process, and format the generated LRC file const lrcContent = await readFile(`${finalPath}.lrc`, 'utf8') const txtContent = lrcContent.split('\n') - .filter(line => !line.startsWith('[by:whisper.cpp]')) - .map(line => line.replace(/\[(\d{2,3}):(\d{2})\.(\d{2})\]/g, (_, p1, p2) => `[${p1}:${p2}]`)) + .filter((line) => !line.startsWith('[by:whisper.cpp]')) + .map((line) => line.replace(/\[(\d{2,3}):(\d{2})\.(\d{2})\]/g, (_, p1, p2) => `[${p1}:${p2}]`)) .join('\n') // Write the formatted content to a text file @@ -72,7 +67,7 @@ export async function callWhisper(finalPath, transcriptOpt, options) { * Function to handle Whisper transcription using Docker. * @param {string} finalPath - The base path for the files. * @param {string} modelName - The model file name. - * @param {string} whisperModel - The Whisper model type. + * @param {WhisperModelType} whisperModel - The Whisper model type. * @returns {Promise} * @throws {Error} - If an error occurs during Docker transcription. */ @@ -125,7 +120,7 @@ async function callWhisperDocker(finalPath, modelName, whisperModel) { * Function to handle Whisper transcription without Docker. * @param {string} finalPath - The base path for the files. * @param {string} modelName - The model file name. - * @param {string} whisperModel - The Whisper model type. + * @param {WhisperModelType} whisperModel - The Whisper model type. * @returns {Promise} * @throws {Error} - If an error occurs during transcription. */ diff --git a/src/types.js b/src/types.js index 0ddd6db..3d60b4e 100644 --- a/src/types.js +++ b/src/types.js @@ -1,74 +1,230 @@ // src/types.js /** - * @typedef {'whisper' | 'whisperDocker' | 'deepgram' | 'assembly'} TranscriptOption - * @typedef {'chatgpt' | 'claude' | 'cohere' | 'mistral' | 'octo' | 'llama' | 'gemini'} LLMOption - * @typedef {function(string, string, any): Promise} LLMFunction - * @typedef {Object.} LLMFunctions - * - * @typedef {function(string, LLMOption, TranscriptOption, ProcessingOptions): Promise} HandlerFunction + * @file This file contains all the custom type definitions used across the Autoshow project. */ /** + * Represents the processing options passed through command-line arguments or interactive prompts. * @typedef {Object} ProcessingOptions - * @property {string[]} [prompt] - Specify prompt sections to include. * @property {string} [video] - URL of the YouTube video to process. * @property {string} [playlist] - URL of the YouTube playlist to process. - * @property {string} [urls] - File path containing URLs to process. - * @property {string} [file] - File path of the local audio/video file to process. + * @property {string} [urls] - File path containing a list of URLs to process. + * @property {string} [file] - Local audio or video file path to process. * @property {string} [rss] - URL of the podcast RSS feed to process. - * @property {string[]} [item] - Specific items in the RSS feed to process. - * @property {string} [order='newest'] - Order for RSS feed processing ('newest' or 'oldest'). - * @property {number} [skip=0] - Number of items to skip when processing RSS feed. - * @property {string} [whisper] - Whisper model type for non-Docker version. - * @property {string} [whisperDocker] - Whisper model type for Docker version. - * @property {string} [chatgpt] - ChatGPT model to use for processing. - * @property {string} [claude] - Claude model to use for processing. - * @property {string} [cohere] - Cohere model to use for processing. - * @property {string} [mistral] - Mistral model to use for processing. - * @property {string} [octo] - Octo model to use for processing. - * @property {string} [llama] - Llama model to use for processing. - * @property {string} [gemini] - Gemini model to use for processing. - * @property {boolean} [deepgram=false] - Use Deepgram for transcription. - * @property {boolean} [assembly=false] - Use AssemblyAI for transcription. - * @property {boolean} [speakerLabels=false] - Use speaker labels for AssemblyAI transcription. - * @property {boolean} [noCleanUp=false] - Do not delete intermediary files after processing. - * @property {Object} [additionalProps] - Additional dynamic properties. + * @property {string[]} [item] - Specific items (audio URLs) from the RSS feed to process. + * @property {boolean} [noCleanUp] - Flag to indicate whether to keep temporary files after processing. + * @property {WhisperModelType} [whisper] - Whisper model type to use for transcription (e.g., 'tiny', 'base'). + * @property {WhisperModelType} [whisperDocker] - Whisper model type to use in Docker for transcription. + * @property {boolean} [deepgram] - Flag to use Deepgram for transcription. + * @property {boolean} [assembly] - Flag to use AssemblyAI for transcription. + * @property {boolean} [speakerLabels] - Flag to use speaker labels in AssemblyAI transcription. + * @property {string} [chatgpt] - ChatGPT model to use (e.g., 'GPT_4o_MINI'). + * @property {string} [claude] - Claude model to use (e.g., 'CLAUDE_3_SONNET'). + * @property {string} [cohere] - Cohere model to use (e.g., 'COMMAND_R_PLUS'). + * @property {string} [mistral] - Mistral model to use (e.g., 'MISTRAL_LARGE'). + * @property {string} [octo] - OctoAI model to use (e.g., 'LLAMA_3_1_8B'). + * @property {string} [llama] - Llama model to use for local inference (e.g., 'LLAMA_3_1_8B_Q4_MODEL'). + * @property {string} [gemini] - Gemini model to use (e.g., 'GEMINI_1_5_FLASH'). + * @property {string[]} [prompt] - Array of prompt sections to include (e.g., ['titles', 'summary']). + * @property {LLMOption} [llmOpt] - The selected LLM option. + * @property {TranscriptOption} [transcriptOpt] - The selected transcription option. + * @property {string} [llamaModel] - Specific Llama model to use. + * @property {number} [skip] - Number of items to skip in RSS feed processing. + * @property {string} [order] - Order in which to process RSS feed items ('newest' or 'oldest'). */ /** + * Represents the answers received from inquirer prompts in interactive mode. * @typedef {Object} InquirerAnswers - * @property {string} [action] - * @property {string} [video] - * @property {string} [playlist] - * @property {string} [urls] - * @property {string} [file] - * @property {string} [rss] - * @property {boolean} [specifyItem] - * @property {string} [item] - * @property {LLMOption} [llmOpt] - * @property {string} [llamaModel] - * @property {TranscriptOption} [transcriptOpt] - * @property {boolean} [useDocker] - * @property {string} [whisperModel] - * @property {boolean} [speakerLabels] - * @property {string[]} [prompt] - * @property {boolean} [noCleanUp] + * @property {string} action - The action selected by the user (e.g., 'video', 'playlist'). + * @property {string} [video] - YouTube video URL provided by the user. + * @property {string} [playlist] - YouTube playlist URL provided by the user. + * @property {string} [urls] - File path containing URLs provided by the user. + * @property {string} [file] - Local audio/video file path provided by the user. + * @property {string} [rss] - RSS feed URL provided by the user. + * @property {boolean} [specifyItem] - Whether the user wants to specify specific RSS items. + * @property {string} [item] - Comma-separated audio URLs of specific RSS items. + * @property {LLMOption} [llmOpt] - LLM option selected by the user. + * @property {string} [llamaModel] - Specific Llama model selected by the user. + * @property {TranscriptOption} [transcriptOpt] - Transcription option selected by the user. + * @property {boolean} [useDocker] - Whether to use Docker for Whisper transcription. + * @property {WhisperModelType} [whisperModel] - Whisper model type selected by the user. + * @property {boolean} [speakerLabels] - Whether to use speaker labels in transcription. + * @property {string[]} [prompt] - Prompt sections selected by the user. + * @property {boolean} [noCleanUp] - Whether to keep temporary files after processing. + * @property {string} [order] - Order in which to process RSS feed items ('newest' or 'oldest'). + * @property {number} [skip] - Number of items to skip in RSS feed processing. */ /** + * Represents the structure of the inquirer prompt questions. + * @typedef {Object[]} InquirerQuestions + * @property {string} type - The type of the prompt (e.g., 'input', 'list', 'confirm', 'checkbox'). + * @property {string} name - The name of the answer property. + * @property {string} message - The message to display to the user. + * @property {Array|Function} [choices] - The choices available for selection (for 'list' and 'checkbox' types). + * @property {Function} [when] - A function to determine when to display the prompt. + * @property {Function} [validate] - A function to validate the user's input. + * @property {*} [default] - The default value for the prompt. + */ + +/** + * Represents a handler function for processing different actions (e.g., video, playlist). + * @callback HandlerFunction + * @param {string} input - The primary input (e.g., URL or file path) for processing. + * @param {LLMOption} [llmOpt] - The selected LLM option. + * @param {TranscriptOption} [transcriptOpt] - The selected transcription option. + * @param {ProcessingOptions} options - Additional processing options. + * @returns {Promise} - A promise that resolves when processing is complete. + */ + +/** + * Represents the data structure for markdown generation. * @typedef {Object} MarkdownData * @property {string} frontMatter - The front matter content for the markdown file. - * @property {string} finalPath - The base path for the files. - * @property {string} filename - The sanitized filename. + * @property {string} finalPath - The base file path (without extension) for the markdown file. + * @property {string} filename - The sanitized filename used for the markdown file. + */ + +/** + * Represents the metadata extracted from a YouTube video. + * @typedef {Object} VideoMetadata + * @property {string} formattedDate - The upload date in 'YYYY-MM-DD' format. + * @property {string} title - The title of the video. + * @property {string} thumbnail - The URL to the video's thumbnail image. + * @property {string} webpage_url - The URL to the video's webpage. + * @property {string} channel - The name of the channel that uploaded the video. + * @property {string} uploader_url - The URL to the uploader's channel page. */ /** + * Represents an item in an RSS feed. * @typedef {Object} RSSItem - * @property {string} publishDate - The publish date of the item. - * @property {string} title - The title of the item. - * @property {string} coverImage - The cover image URL of the item. - * @property {string} showLink - The show link of the item. - * @property {string} channel - The channel name. - * @property {string} channelURL - The channel URL. + * @property {string} publishDate - The publication date of the RSS item (e.g., '2024-09-24'). + * @property {string} title - The title of the RSS item. + * @property {string} coverImage - The URL to the cover image of the RSS item. + * @property {string} showLink - The URL to the show or episode. + * @property {string} channel - The name of the channel or podcast. + * @property {string} channelURL - The URL to the channel or podcast. + * @property {string} [description] - A brief description of the RSS item. + * @property {string} [audioURL] - The URL to the audio file of the RSS item. + */ + +/** + * Represents the options for RSS feed processing. + * @typedef {Object} RSSProcessingOptions + * @property {string} [order] - The order to process items ('newest' or 'oldest'). + * @property {number} [skip] - The number of items to skip. + */ + +/** + * Represents the options for downloading audio files. + * @typedef {Object} DownloadAudioOptions + * @property {string} [outputFormat] - The desired output audio format (e.g., 'wav'). + * @property {number} [sampleRate] - The sample rate for the audio file (e.g., 16000). + * @property {number} [channels] - The number of audio channels (e.g., 1 for mono). + */ + +/** + * Represents the supported file types for audio and video processing. + * @typedef {'wav' | 'mp3' | 'm4a' | 'aac' | 'ogg' | 'flac' | 'mp4' | 'mkv' | 'avi' | 'mov' | 'webm'} SupportedFileType + */ + +/** + * Represents the transcription services that can be used in the application. + * @typedef {'whisper' | 'whisperDocker' | 'deepgram' | 'assembly'} TranscriptOption + * + * - `'whisper'`: Use Whisper.cpp for transcription. + * - `'whisperDocker'`: Use Whisper.cpp in a Docker container. + * - `'deepgram'`: Use Deepgram's transcription service. + * - `'assembly'`: Use AssemblyAI's transcription service. + */ + +/** + * Represents the options for transcription. + * @typedef {Object} TranscriptionOptions + * @property {boolean} [speakerLabels] - Whether to use speaker labels. + * @property {string} [language] - The language code for transcription (e.g., 'en'). + * @property {string} [model] - The model type to use for transcription. + */ + +/** + * Represents the available Whisper model types. + * @typedef {'tiny' | 'tiny.en' | 'base' | 'base.en' | 'small' | 'small.en' | 'medium' | 'medium.en' | 'large' | 'large-v1' | 'large-v2'} WhisperModelType + * + * - `'tiny'`: Smallest multilingual model. + * - `'tiny.en'`: Smallest English-only model. + * - `'base'`: Base multilingual model. + * - `'base.en'`: Base English-only model. + * - `'small'`: Small multilingual model. + * - `'small.en'`: Small English-only model. + * - `'medium'`: Medium multilingual model. + * - `'medium.en'`: Medium English-only model. + * - `'large'`: Largest multilingual model (same as 'large-v2'). + * - `'large-v1'`: Large multilingual model version 1. + * - `'large-v2'`: Large multilingual model version 2. + */ + +/** + * Represents the object containing the different prompts, their instructions to the LLM, and their expected example output + * @typedef {Object} PromptSection + * @property {string} instruction - The instructions for the section. + * @property {string} example - An example output for the section. + */ + +/** + * Represents the options for Language Models (LLMs) that can be used in the application. + * @typedef {'chatgpt' | 'claude' | 'cohere' | 'mistral' | 'octo' | 'llama' | 'gemini'} LLMOption + * + * - `'chatgpt'`: Use OpenAI's ChatGPT models. + * - `'claude'`: Use Anthropic's Claude models. + * - `'cohere'`: Use Cohere's language models. + * - `'mistral'`: Use Mistral AI's language models. + * - `'octo'`: Use OctoAI's language models. + * - `'llama'`: Use Llama models for local inference. + * - `'gemini'`: Use Google's Gemini models. + */ + +/** + * Represents the options for LLM processing. + * @typedef {Object} LLMOptions + * @property {string[]} [promptSections] - The sections to include in the prompt (e.g., ['titles', 'summary']). + * @property {string} [model] - The specific LLM model to use. + * @property {number} [temperature] - The temperature parameter for text generation. + * @property {number} [maxTokens] - The maximum number of tokens to generate. + */ + +/** + * Represents a function that calls an LLM for processing. + * @callback LLMFunction + * @param {string} promptAndTranscript - The combined prompt and transcript text to process. + * @param {string} tempPath - The temporary file path to write the LLM output. + * @param {string} [model] - The specific model to use for the LLM (optional). + * @returns {Promise} - A promise that resolves when the LLM processing is complete. + */ + +/** + * Represents a mapping of LLM option keys to their corresponding functions. + * @typedef {Object.} LLMFunctions + * + * This ensures that only valid `LLMOption` values can be used as keys in the `llmFunctions` object. + */ + +/** + * Define all available LLM models + * @typedef {'GPT_4o_MINI' | 'GPT_4o' | 'GPT_4_TURBO' | 'GPT_4'} ChatGPTModelType - Define available GPT models + * @typedef {'CLAUDE_3_5_SONNET' | 'CLAUDE_3_OPUS' | 'CLAUDE_3_SONNET' | 'CLAUDE_3_HAIKU'} ClaudeModelType - Define available Claude models + * @typedef {'COMMAND_R' | 'COMMAND_R_PLUS'} CohereModelType - Define available Cohere models + * @typedef {'GEMINI_1_5_FLASH' | 'GEMINI_1_5_PRO'} GeminiModelType - Define available Gemini models + * @typedef {'MIXTRAL_8x7b' | 'MIXTRAL_8x22b' | 'MISTRAL_LARGE' | 'MISTRAL_NEMO'} MistralModelType - Define available Mistral AI models + * @typedef {'LLAMA_3_1_8B' | 'LLAMA_3_1_70B' | 'LLAMA_3_1_405B' | 'MISTRAL_7B' | 'MIXTRAL_8X_7B' | 'NOUS_HERMES_MIXTRAL_8X_7B' | 'WIZARD_2_8X_22B'} OctoModelType - Define available OctoAI models + * @typedef {'LLAMA_3_1_8B_Q4_MODEL' | 'LLAMA_3_1_8B_Q6_MODEL' | 'GEMMA_2_2B_Q4_MODEL' | 'GEMMA_2_2B_Q6_MODEL' | 'TINY_LLAMA_1B_Q4_MODEL' | 'TINY_LLAMA_1B_Q6_MODEL'} LlamaModelType - Define local model configurations + */ + +/** + * Represents the function signature for cleaning up temporary files. + * @callback CleanUpFunction + * @param {string} id - The base filename (without extension) for the files to be cleaned up. + * @returns {Promise} - A promise that resolves when cleanup is complete. */ \ No newline at end of file diff --git a/src/utils/downloadAudio.js b/src/utils/downloadAudio.js index 5643cca..c644857 100644 --- a/src/utils/downloadAudio.js +++ b/src/utils/downloadAudio.js @@ -6,6 +6,8 @@ import { readFile, access } from 'node:fs/promises' import { fileTypeFromBuffer } from 'file-type' import ffmpeg from 'ffmpeg-static' +/** @import { SupportedFileType } from '../types.js' */ + const execFilePromise = promisify(execFile) const execPromise = promisify(exec) @@ -55,6 +57,7 @@ export async function downloadAudio(url, filename) { */ export async function downloadFileAudio(filePath, sanitizedFilename) { // Define supported audio and video formats + /** @type {Set} */ const supportedFormats = new Set([ 'wav', 'mp3', 'm4a', 'aac', 'ogg', 'flac', 'mp4', 'mkv', 'avi', 'mov', 'webm' ]) @@ -70,14 +73,14 @@ export async function downloadFileAudio(filePath, sanitizedFilename) { // Determine the file type const fileType = await fileTypeFromBuffer(buffer) - if (!fileType || !supportedFormats.has(fileType.ext)) { + if (!fileType || !supportedFormats.has(/** @type {SupportedFileType} */ (fileType.ext))) { throw new Error( fileType ? `Unsupported file type: ${fileType.ext}` : 'Unable to determine file type' ) } console.log(`Detected file type: ${fileType.ext}`) - const outputPath = `content/${sanitizedFilename}.wav` + const outputPath = `content/${sanitizedFilename}.wav` // If the file is not already a WAV, convert it if (fileType.ext !== 'wav') { await execPromise( diff --git a/src/utils/generateMarkdown.js b/src/utils/generateMarkdown.js index 5461f28..6962f23 100644 --- a/src/utils/generateMarkdown.js +++ b/src/utils/generateMarkdown.js @@ -4,17 +4,12 @@ import { execFile } from 'node:child_process' import { promisify } from 'node:util' import { writeFile } from 'node:fs/promises' import { basename, extname } from 'node:path' -import '../types.js' + +/** @import { MarkdownData, RSSItem } from '../types.js' */ // Promisify the execFile function for use with async/await const execFilePromise = promisify(execFile) -/** - * Import custom types - * @typedef {MarkdownData} MarkdownData - * @typedef {RSSItem} RSSItem - */ - /** * Function to generate markdown for RSS feed items. * @param {RSSItem} item - The RSS feed item object. @@ -32,7 +27,8 @@ export async function generateRSSMarkdown(item) { .trim() .replace(/[\s_]+/g, '-') .replace(/-+/g, '-') - .toLowerCase().slice(0, 200) + .toLowerCase() + .slice(0, 200) // Construct the filename, path, and front matter for the markdown file const filename = `${publishDate}-${sanitizedTitle}` diff --git a/src/utils/runLLM.js b/src/utils/runLLM.js index 84f1a62..71efd3e 100644 --- a/src/utils/runLLM.js +++ b/src/utils/runLLM.js @@ -9,17 +9,9 @@ import { callCohere } from '../llms/cohere.js' import { callMistral } from '../llms/mistral.js' import { callOcto } from '../llms/octo.js' import { generatePrompt } from '../llms/prompt.js' -import '../types.js' -/** - * Import custom types - * @typedef {LLMOption} LLMOption - * @typedef {ProcessingOptions} ProcessingOptions - * @typedef {LLMFunction} LLMFunction - * @typedef {LLMFunctions} LLMFunctions - */ +/** @import { LLMOption, ProcessingOptions, LLMFunction, LLMFunctions } from '../types.js' */ -// Object mapping LLM options to their respective functions /** @type {LLMFunctions} */ const llmFunctions = { llama: callLlama, @@ -38,6 +30,7 @@ const llmFunctions = { * @param {LLMOption} llmOpt - The selected Language Model option. * @param {ProcessingOptions} options - Additional options for processing. * @returns {Promise} + * @throws {Error} - If the LLM processing fails or an error occurs during execution. */ export async function runLLM(finalPath, frontMatter, llmOpt, options) { try { @@ -47,7 +40,9 @@ export async function runLLM(finalPath, frontMatter, llmOpt, options) { const promptAndTranscript = `${generatePrompt(options.prompt)}${transcript}` if (llmOpt) { - // Get the appropriate LLM function based on the option + /** Get the appropriate LLM function based on the option + * @type {LLMFunction} + */ const llmFunction = llmFunctions[llmOpt] if (!llmFunction) throw new Error(`Invalid LLM option: ${llmOpt}`) diff --git a/src/utils/runTranscription.js b/src/utils/runTranscription.js index 7218433..d5a2050 100644 --- a/src/utils/runTranscription.js +++ b/src/utils/runTranscription.js @@ -4,19 +4,14 @@ import { readFile, writeFile } from 'node:fs/promises' import { callWhisper } from '../transcription/whisper.js' import { callDeepgram } from '../transcription/deepgram.js' import { callAssembly } from '../transcription/assembly.js' -import '../types.js' -/** - * Import custom types - * @typedef {TranscriptOption} TranscriptOption - * @typedef {ProcessingOptions} ProcessingOptions - */ +/** @import { TranscriptOption, ProcessingOptions } from '../types.js' */ /** * Main function to run transcription. * @param {string} finalPath - The base path for the files. * @param {TranscriptOption} transcriptOpt - The transcription service to use. - * @param {ProcessingOptions} [options={}] - Additional options for processing. + * @param {ProcessingOptions} [options={}] - Additional processing options. * @param {string} [frontMatter=''] - Optional front matter content for the markdown file. * @returns {Promise} - Returns the final content including markdown and transcript. * @throws {Error} - If the transcription service fails or an error occurs during processing.