From 093b2f39bb6699c54a392f5b13668403b5d8673c Mon Sep 17 00:00:00 2001 From: Anthony Campolo <12433465+ajcwebdev@users.noreply.github.com> Date: Thu, 31 Oct 2024 02:38:52 -0500 Subject: [PATCH 1/3] add assembly types --- src/transcription/assembly.ts | 49 ++++++---- src/types.ts | 174 ++++++++++++++++++++++++++++++++++ 2 files changed, 206 insertions(+), 17 deletions(-) diff --git a/src/transcription/assembly.ts b/src/transcription/assembly.ts index f8f9327..74cab9f 100644 --- a/src/transcription/assembly.ts +++ b/src/transcription/assembly.ts @@ -1,9 +1,19 @@ +// src/transcription/assembly.ts + import { createReadStream } from 'node:fs' import { writeFile } from 'node:fs/promises' import { env } from 'node:process' -import fetch from 'node-fetch' import { l, wait, success, err } from '../globals.js' import type { ProcessingOptions } from '../types.js' +import type { + AssemblyAITranscriptionOptions, + AssemblyAIErrorResponse, + AssemblyAIUploadResponse, + AssemblyAITranscript, + AssemblyAIPollingResponse, + AssemblyAIUtterance, + AssemblyAIWord +} from '../types.js' const BASE_URL = 'https://api.assemblyai.com/v2' @@ -42,42 +52,45 @@ export async function callAssembly(options: ProcessingOptions, finalPath: string 'Content-Type': 'application/octet-stream', }, body: fileStream, + duplex: 'half', }) if (!uploadResponse.ok) { - const errorData = await uploadResponse.json() + const errorData = await uploadResponse.json() as AssemblyAIErrorResponse throw new Error(`File upload failed: ${errorData.error || uploadResponse.statusText}`) } - const uploadData = await uploadResponse.json() - const upload_url: string = uploadData.upload_url + const uploadData = await uploadResponse.json() as AssemblyAIUploadResponse + const { upload_url } = uploadData if (!upload_url) { throw new Error('Upload URL not returned by AssemblyAI.') } l(success(' Audio file uploaded successfully.')) // Step 2: Request transcription + const transcriptionOptions: AssemblyAITranscriptionOptions = { + audio_url: upload_url, + speech_model: 'nano', + speaker_labels: speakerLabels || false + } + const response = await fetch(`${BASE_URL}/transcript`, { method: 'POST', headers, - body: JSON.stringify({ - audio_url: upload_url, - speech_model: 'nano', - speaker_labels: speakerLabels || false - }) + body: JSON.stringify(transcriptionOptions) }) if (!response.ok) { throw new Error(`HTTP error! status: ${response.status}`) } - const transcriptData = await response.json() + const transcriptData = await response.json() as AssemblyAITranscript // Step 3: Poll for completion - let transcript + let transcript: AssemblyAIPollingResponse while (true) { const pollingResponse = await fetch(`${BASE_URL}/transcript/${transcriptData.id}`, { headers }) - transcript = await pollingResponse.json() + transcript = await pollingResponse.json() as AssemblyAIPollingResponse if (transcript.status === 'completed' || transcript.status === 'error') { break @@ -86,7 +99,7 @@ export async function callAssembly(options: ProcessingOptions, finalPath: string await new Promise(resolve => setTimeout(resolve, 3000)) } - if (transcript.status === 'error') { + if (transcript.status === 'error' || transcript.error) { throw new Error(`Transcription failed: ${transcript.error}`) } @@ -100,16 +113,17 @@ export async function callAssembly(options: ProcessingOptions, finalPath: string } // Process the transcript based on whether utterances are available - if (transcript.utterances) { + if (transcript.utterances && transcript.utterances.length > 0) { // If utterances are available, format each with speaker labels if used - txtContent = transcript.utterances.map((utt: any) => + txtContent = transcript.utterances.map((utt: AssemblyAIUtterance) => `${speakerLabels ? `Speaker ${utt.speaker} ` : ''}(${formatTime(utt.start)}): ${utt.text}` ).join('\n') - } else if (transcript.words) { + } else if (transcript.words && transcript.words.length > 0) { // If only words are available, group them into lines with timestamps let currentLine = '' let currentTimestamp = formatTime(transcript.words[0].start) - transcript.words.forEach((word: any) => { + + transcript.words.forEach((word: AssemblyAIWord) => { if (currentLine.length + word.text.length > 80) { // Start a new line if the current line exceeds 80 characters txtContent += `[${currentTimestamp}] ${currentLine.trim()}\n` @@ -118,6 +132,7 @@ export async function callAssembly(options: ProcessingOptions, finalPath: string } currentLine += `${word.text} ` }) + // Add the last line if there's any remaining text if (currentLine.length > 0) { txtContent += `[${currentTimestamp}] ${currentLine.trim()}\n` diff --git a/src/types.ts b/src/types.ts index 2c494ba..035e5bf 100644 --- a/src/types.ts +++ b/src/types.ts @@ -656,6 +656,180 @@ export type DeepgramResponse = { } } +// Assembly Request Types +export interface AssemblyAITranscriptionOptions { + audio_url: string + language_code?: string + speech_model?: 'default' | 'nano' + boost_param?: number + filter_profanity?: boolean + redact_pii?: boolean + redact_pii_audio?: boolean + redact_pii_audio_quality?: 'mp3' | 'wav' + redact_pii_policies?: Array< + | 'credit_card_number' + | 'bank_routing' + | 'credit_card_cvv' + | 'credit_card_expiration' + | 'date_of_birth' + | 'drivers_license' + | 'email_address' + | 'events' + | 'filename' + | 'gender_sexuality' + | 'healthcare_number' + | 'injury' + | 'ip_address' + | 'account_number' + | 'banking_information' + | 'blood_type' + | 'date' + | 'date_interval' + | 'drug' + | 'duration' + > + redact_pii_sub?: 'entity_name' | 'hash' + speaker_labels?: boolean + speakers_expected?: number + content_safety?: boolean + content_safety_labels?: boolean + iab_categories?: boolean + language_detection?: boolean + punctuate?: boolean + format_text?: boolean + dual_channel?: boolean + speaker_boost?: boolean + speech_threshold?: number + throttled?: boolean + auto_chapters?: boolean + summarization?: boolean + summary_model?: string + summary_type?: string + custom_topics?: boolean + topics?: string[] + sentiment_analysis?: boolean + entity_detection?: boolean + auto_highlights?: boolean +} + +// Response Types +export interface AssemblyAIUploadResponse { + upload_url: string +} + +export interface AssemblyAITimestamp { + start: number + end: number +} + +export interface AssemblyAIWord extends AssemblyAITimestamp { + text: string + confidence: number + speaker?: string +} + +export interface AssemblyAIUtterance extends AssemblyAITimestamp { + text: string + speaker: string + confidence: number +} + +export interface AssemblyAIKeyPhrase extends AssemblyAITimestamp { + text: string + count: number + rank: number +} + +export interface AssemblyAIAutoHighlights { + status: 'success' | 'unavailable' + results: AssemblyAIKeyPhrase[] +} + +export interface AssemblyAIContentSafetyLabel extends AssemblyAITimestamp { + text: string + labels: Array<{ + text: string + confidence: number + severity: number + sentiment: { + text: string + confidence: number + } + }> + timestamp: AssemblyAITimestamp + confidence: number + speaker?: string +} + +export interface AssemblyAIContentSafety { + status: 'success' | 'unavailable' + results: AssemblyAIContentSafetyLabel[] + summary: { + status: 'success' | 'unavailable' + result: { + text: string + confidence: number + } + } +} + +export interface AssemblyAISentimentAnalysisResult extends AssemblyAITimestamp { + text: string + sentiment: 'POSITIVE' | 'NEUTRAL' | 'NEGATIVE' + confidence: number + speaker?: string +} + +export interface AssemblyAIEntity extends AssemblyAITimestamp { + text: string + entity_type: string + speaker?: string +} + +export interface AssemblyAIChapter extends AssemblyAITimestamp { + gist: string + headline: string + summary: string +} + +export interface AssemblyAITranscript { + id: string + status: 'queued' | 'processing' | 'completed' | 'error' + acoustic_model: string + audio_duration: number + audio_url: string + audio_start_from?: number + audio_end_at?: number + text: string + confidence: number + language_code: string + utterances?: AssemblyAIUtterance[] + words?: AssemblyAIWord[] + paragraphs?: Array<{ + text: string + start: number + end: number + }> + error?: string + + // Optional enhanced results based on enabled features + auto_highlights?: AssemblyAIAutoHighlights + content_safety?: AssemblyAIContentSafety + sentiment_analysis_results?: AssemblyAISentimentAnalysisResult[] + entities?: AssemblyAIEntity[] + chapters?: AssemblyAIChapter[] +} + +// Error Response Type +export interface AssemblyAIErrorResponse { + error: string + status?: number + message?: string +} + +// Status polling response type +export type AssemblyAIPollingResponse = Pick + /** * Function signature for cleaning up temporary files. * From 49d232d0d822a82a07c2584e936bbb054ffd8c9b Mon Sep 17 00:00:00 2001 From: Anthony Campolo <12433465+ajcwebdev@users.noreply.github.com> Date: Thu, 31 Oct 2024 02:39:30 -0500 Subject: [PATCH 2/3] refactor scripts --- docs/examples.md | 6 +- package.json | 30 ++--- scripts/cleanContent.ts | 4 +- src/llms/ollama.ts | 2 +- test/bench.test.ts | 250 ++++++++++++++++++++-------------------- typedoc.json | 2 +- 6 files changed, 148 insertions(+), 146 deletions(-) diff --git a/docs/examples.md b/docs/examples.md index 10f8d9c..8c2dc07 100644 --- a/docs/examples.md +++ b/docs/examples.md @@ -648,14 +648,14 @@ npm run prune ### Bun ```bash -bun bun-as -- \ +npm run bun -- \ --video "https://www.youtube.com/watch?v=MORMZXEaONk" ``` ### Deno ```bash -deno task deno-as \ +npm run deno -- \ --video "https://www.youtube.com/watch?v=MORMZXEaONk" ``` @@ -667,7 +667,7 @@ Integration test. - Mostly uses transcripts of videos around one minute long and cheaper models when possible, so the total cost of running this for any given service should be at most only a few cents. ```bash -npm run test-all +npm run test-integrations ``` Local services test, only uses Whisper for transcription and Ollama for LLM operations. diff --git a/package.json b/package.json index c722f9e..2d93514 100644 --- a/package.json +++ b/package.json @@ -20,24 +20,26 @@ "autoshow": "./dist/autoshow.js" }, "scripts": { + "tsx:base": "tsx --env-file=.env --no-warnings", "setup": "bash ./scripts/setup.sh", "setup-python": "bash ./scripts/setup-python.sh", - "autoshow": "tsx --env-file=.env --no-warnings src/autoshow.ts", - "as": "tsx --env-file=.env --no-warnings src/autoshow.ts", + "autoshow": "npm run tsx:base -- src/autoshow.ts", + "as": "npm run tsx:base -- src/autoshow.ts", + "v": "npm run tsx:base -- src/autoshow.ts --video", + "u": "npm run tsx:base -- src/autoshow.ts --urls", + "p": "npm run tsx:base -- src/autoshow.ts --playlist", + "f": "npm run tsx:base -- src/autoshow.ts --file", + "r": "npm run tsx:base -- src/autoshow.ts --rss", + "rss-info": "npm run tsx:base -- src/autoshow.ts --info --rss", + "last2": "npm run tsx:base -- src/autoshow.ts --last 2 --rss", + "last3": "npm run tsx:base -- src/autoshow.ts --last 3 --rss", "docker": "docker compose run --remove-orphans --rm autoshow --whisperDocker", "docker-up": "docker compose up --build -d --remove-orphans --no-start", "ds": "docker compose images && docker compose ls", "prune": "docker system prune -af --volumes && docker image prune -af && docker container prune -f && docker volume prune -af", - "v": "tsx --env-file=.env --no-warnings src/autoshow.ts --whisper large-v3-turbo --video", - "u": "tsx --env-file=.env --no-warnings src/autoshow.ts --whisper large-v3-turbo --urls", - "p": "tsx --env-file=.env --no-warnings src/autoshow.ts --whisper large-v3-turbo --playlist", - "f": "tsx --env-file=.env --no-warnings src/autoshow.ts --whisper large-v3-turbo --file", - "r": "tsx --env-file=.env --no-warnings src/autoshow.ts --whisper large-v3-turbo --rss", - "last2": "tsx --env-file=.env --no-warnings src/autoshow.ts --whisper large-v3-turbo --last 2 --rss", - "last3": "tsx --env-file=.env --no-warnings src/autoshow.ts --whisper large-v3-turbo --last 3 --rss", - "serve": "tsx --env-file=.env --no-warnings --watch packages/server/index.ts", - "fetch-local": "tsx --env-file=.env --no-warnings packages/server/tests/fetch-local.ts", - "fetch-all": "tsx --env-file=.env --no-warnings packages/server/tests/fetch-all.ts", + "serve": "npm run tsx:base -- --watch packages/server/index.ts", + "fetch-local": "npm run tsx:base -- packages/server/tests/fetch-local.ts", + "fetch-all": "npm run tsx:base -- packages/server/tests/fetch-all.ts", "t": "npm run test-local", "bench": "tsx --test test/bench.test.ts", "test-bench": "tsx --test test/bench.test.ts", @@ -45,8 +47,8 @@ "test-docker": "tsx --test test/docker.test.ts", "test-integrations": "tsx --test test/integrations.test.ts", "clean": "tsx scripts/cleanContent.ts", - "bun-as": "bun --env-file=.env --no-warnings src/autoshow.ts", - "deno-as": "deno run --allow-sys --allow-read --allow-run --allow-write --allow-env src/autoshow.ts" + "bun": "bun --env-file=.env --no-warnings src/autoshow.ts", + "deno": "deno run --allow-sys --allow-read --allow-run --allow-write --allow-env --unstable-sloppy-imports src/autoshow.ts" }, "dependencies": { "@anthropic-ai/sdk": "0.30.1", diff --git a/scripts/cleanContent.ts b/scripts/cleanContent.ts index 5a06ce8..3640fe8 100644 --- a/scripts/cleanContent.ts +++ b/scripts/cleanContent.ts @@ -1,7 +1,7 @@ // scripts/cleanContent.ts -import { exec } from 'child_process' -import { promisify } from 'util' +import { exec } from 'node:child_process' +import { promisify } from 'node:util' import { l, err } from '../src/globals.js' const execAsync = promisify(exec) diff --git a/src/llms/ollama.ts b/src/llms/ollama.ts index c2d1b9a..cf92c53 100644 --- a/src/llms/ollama.ts +++ b/src/llms/ollama.ts @@ -3,7 +3,7 @@ import { writeFile } from 'node:fs/promises' import { env } from 'node:process' import { OLLAMA_MODELS, l, err, wait } from '../globals.js' -import { spawn } from 'child_process' +import { spawn } from 'node:child_process' import type { LLMFunction, OllamaModelType, OllamaResponse, OllamaTagsResponse } from '../types.js' /** diff --git a/test/bench.test.ts b/test/bench.test.ts index 1352d64..37bc0de 100644 --- a/test/bench.test.ts +++ b/test/bench.test.ts @@ -18,141 +18,141 @@ const commands: Command[] = [ expectedFile: 'audio-prompt.md', newName: '01_TINY_WHISPERCPP.md' }, - // { - // cmd: 'npm run as -- --file "content/audio.mp3" --whisper base', - // expectedFile: 'audio-prompt.md', - // newName: '02_BASE_WHISPERCPP.md' - // }, - // { - // cmd: 'npm run as -- --file "content/audio.mp3" --whisper small', - // expectedFile: 'audio-prompt.md', - // newName: '03_SMALL_WHISPERCPP.md' - // }, - // { - // cmd: 'npm run as -- --file "content/audio.mp3" --whisper medium', - // expectedFile: 'audio-prompt.md', - // newName: '04_MEDIUM_WHISPERCPP.md' - // }, - // { - // cmd: 'npm run as -- --file "content/audio.mp3" --whisper large-v1', - // expectedFile: 'audio-prompt.md', - // newName: '05_LARGE_V1_WHISPERCPP.md' - // }, - // { - // cmd: 'npm run as -- --file "content/audio.mp3" --whisper large-v2', - // expectedFile: 'audio-prompt.md', - // newName: '06_LARGE_V2_WHISPERCPP.md' - // }, - // { - // cmd: 'npm run as -- --file "content/audio.mp3" --whisper large-v3-turbo', - // expectedFile: 'audio-prompt.md', - // newName: '07_LARGE_V3_TURBO_WHISPERCPP.md' - // }, + { + cmd: 'npm run as -- --file "content/audio.mp3" --whisper base', + expectedFile: 'audio-prompt.md', + newName: '02_BASE_WHISPERCPP.md' + }, + { + cmd: 'npm run as -- --file "content/audio.mp3" --whisper small', + expectedFile: 'audio-prompt.md', + newName: '03_SMALL_WHISPERCPP.md' + }, + { + cmd: 'npm run as -- --file "content/audio.mp3" --whisper medium', + expectedFile: 'audio-prompt.md', + newName: '04_MEDIUM_WHISPERCPP.md' + }, + { + cmd: 'npm run as -- --file "content/audio.mp3" --whisper large-v1', + expectedFile: 'audio-prompt.md', + newName: '05_LARGE_V1_WHISPERCPP.md' + }, + { + cmd: 'npm run as -- --file "content/audio.mp3" --whisper large-v2', + expectedFile: 'audio-prompt.md', + newName: '06_LARGE_V2_WHISPERCPP.md' + }, + { + cmd: 'npm run as -- --file "content/audio.mp3" --whisper large-v3-turbo', + expectedFile: 'audio-prompt.md', + newName: '07_LARGE_V3_TURBO_WHISPERCPP.md' + }, { cmd: 'npm run as -- --file "content/audio.mp3" --whisperDocker tiny', expectedFile: 'audio-prompt.md', newName: '08_TINY_WHISPERCPP_DOCKER.md' }, - // { - // cmd: 'npm run as -- --file "content/audio.mp3" --whisperDocker base', - // expectedFile: 'audio-prompt.md', - // newName: '09_BASE_WHISPERCPP_DOCKER.md' - // }, - // { - // cmd: 'npm run as -- --file "content/audio.mp3" --whisperDocker small', - // expectedFile: 'audio-prompt.md', - // newName: '10_SMALL_WHISPERCPP_DOCKER.md' - // }, - // { - // cmd: 'npm run as -- --file "content/audio.mp3" --whisperDocker medium', - // expectedFile: 'audio-prompt.md', - // newName: '11_MEDIUM_WHISPERCPP_DOCKER.md' - // }, - // { - // cmd: 'npm run as -- --file "content/audio.mp3" --whisperDocker large-v1', - // expectedFile: 'audio-prompt.md', - // newName: '12_LARGE_V1_WHISPERCPP_DOCKER.md' - // }, - // { - // cmd: 'npm run as -- --file "content/audio.mp3" --whisperDocker large-v2', - // expectedFile: 'audio-prompt.md', - // newName: '13_LARGE_V2_WHISPERCPP_DOCKER.md' - // }, - // { - // cmd: 'npm run as -- --file "content/audio.mp3" --whisperDocker large-v3-turbo', - // expectedFile: 'audio-prompt.md', - // newName: '14_LARGE_V3_TURBO_WHISPERCPP_DOCKER.md' - // }, { - cmd: 'npm run as -- --file "content/audio.mp3" --whisperDiarization tiny', + cmd: 'npm run as -- --file "content/audio.mp3" --whisperDocker base', expectedFile: 'audio-prompt.md', - newName: '15_TINY_DIARIZATION.md' - }, - // { - // cmd: 'npm run as -- --file "content/audio.mp3" --whisperDiarization base', - // expectedFile: 'audio-prompt.md', - // newName: '16_BASE_DIARIZATION.md' - // }, - // { - // cmd: 'npm run as -- --file "content/audio.mp3" --whisperDiarization small', - // expectedFile: 'audio-prompt.md', - // newName: '17_SMALL_DIARIZATION.md' - // }, - // { - // cmd: 'npm run as -- --file "content/audio.mp3" --whisperDiarization medium', - // expectedFile: 'audio-prompt.md', - // newName: '18_MEDIUM_DIARIZATION.md' - // }, - // { - // cmd: 'npm run as -- --file "content/audio.mp3" --whisperDiarization large-v1', - // expectedFile: 'audio-prompt.md', - // newName: '19_LARGE_V1_DIARIZATION.md' - // }, - // { - // cmd: 'npm run as -- --file "content/audio.mp3" --whisperDiarization large-v2', - // expectedFile: 'audio-prompt.md', - // newName: '20_LARGE_V2_DIARIZATION.md' - // }, - // { - // cmd: 'npm run as -- --file "content/audio.mp3" --whisperDiarization large-v3-turbo', - // expectedFile: 'audio-prompt.md', - // newName: '21_LARGE_V3_TURBO_DIARIZATION.md' - // }, + newName: '09_BASE_WHISPERCPP_DOCKER.md' + }, + { + cmd: 'npm run as -- --file "content/audio.mp3" --whisperDocker small', + expectedFile: 'audio-prompt.md', + newName: '10_SMALL_WHISPERCPP_DOCKER.md' + }, + { + cmd: 'npm run as -- --file "content/audio.mp3" --whisperDocker medium', + expectedFile: 'audio-prompt.md', + newName: '11_MEDIUM_WHISPERCPP_DOCKER.md' + }, + { + cmd: 'npm run as -- --file "content/audio.mp3" --whisperDocker large-v1', + expectedFile: 'audio-prompt.md', + newName: '12_LARGE_V1_WHISPERCPP_DOCKER.md' + }, + { + cmd: 'npm run as -- --file "content/audio.mp3" --whisperDocker large-v2', + expectedFile: 'audio-prompt.md', + newName: '13_LARGE_V2_WHISPERCPP_DOCKER.md' + }, + { + cmd: 'npm run as -- --file "content/audio.mp3" --whisperDocker large-v3-turbo', + expectedFile: 'audio-prompt.md', + newName: '14_LARGE_V3_TURBO_WHISPERCPP_DOCKER.md' + }, { cmd: 'npm run as -- --file "content/audio.mp3" --whisperPython tiny', expectedFile: 'audio-prompt.md', - newName: '22_TINY_PYTHON.md' - }, - // { - // cmd: 'npm run as -- --file "content/audio.mp3" --whisperPython base', - // expectedFile: 'audio-prompt.md', - // newName: '23_BASE_PYTHON.md' - // }, - // { - // cmd: 'npm run as -- --file "content/audio.mp3" --whisperPython small', - // expectedFile: 'audio-prompt.md', - // newName: '24_SMALL_PYTHON.md' - // }, - // { - // cmd: 'npm run as -- --file "content/audio.mp3" --whisperPython medium', - // expectedFile: 'audio-prompt.md', - // newName: '25_MEDIUM_PYTHON.md' - // }, - // { - // cmd: 'npm run as -- --file "content/audio.mp3" --whisperPython large-v1', - // expectedFile: 'audio-prompt.md', - // newName: '26_LARGE_V1_PYTHON.md' - // }, - // { - // cmd: 'npm run as -- --file "content/audio.mp3" --whisperPython large-v2', - // expectedFile: 'audio-prompt.md', - // newName: '27_LARGE_V2_PYTHON.md' - // }, - // { - // cmd: 'npm run as -- --file "content/audio.mp3" --whisperPython large-v3-turbo', - // expectedFile: 'audio-prompt.md', - // newName: '28_LARGE_V3_TURBO_PYTHON.md' - // } + newName: '15_TINY_PYTHON.md' + }, + { + cmd: 'npm run as -- --file "content/audio.mp3" --whisperPython base', + expectedFile: 'audio-prompt.md', + newName: '16_BASE_PYTHON.md' + }, + { + cmd: 'npm run as -- --file "content/audio.mp3" --whisperPython small', + expectedFile: 'audio-prompt.md', + newName: '17_SMALL_PYTHON.md' + }, + { + cmd: 'npm run as -- --file "content/audio.mp3" --whisperPython medium', + expectedFile: 'audio-prompt.md', + newName: '18_MEDIUM_PYTHON.md' + }, + { + cmd: 'npm run as -- --file "content/audio.mp3" --whisperPython large-v1', + expectedFile: 'audio-prompt.md', + newName: '19_LARGE_V1_PYTHON.md' + }, + { + cmd: 'npm run as -- --file "content/audio.mp3" --whisperPython large-v2', + expectedFile: 'audio-prompt.md', + newName: '20_LARGE_V2_PYTHON.md' + }, + { + cmd: 'npm run as -- --file "content/audio.mp3" --whisperPython large-v3-turbo', + expectedFile: 'audio-prompt.md', + newName: '21_LARGE_V3_TURBO_PYTHON.md' + }, + { + cmd: 'npm run as -- --file "content/audio.mp3" --whisperDiarization tiny', + expectedFile: 'audio-prompt.md', + newName: '22_TINY_DIARIZATION.md' + }, + { + cmd: 'npm run as -- --file "content/audio.mp3" --whisperDiarization base', + expectedFile: 'audio-prompt.md', + newName: '23_BASE_DIARIZATION.md' + }, + { + cmd: 'npm run as -- --file "content/audio.mp3" --whisperDiarization small', + expectedFile: 'audio-prompt.md', + newName: '24_SMALL_DIARIZATION.md' + }, + { + cmd: 'npm run as -- --file "content/audio.mp3" --whisperDiarization medium', + expectedFile: 'audio-prompt.md', + newName: '25_MEDIUM_DIARIZATION.md' + }, + { + cmd: 'npm run as -- --file "content/audio.mp3" --whisperDiarization large-v1', + expectedFile: 'audio-prompt.md', + newName: '26_LARGE_V1_DIARIZATION.md' + }, + { + cmd: 'npm run as -- --file "content/audio.mp3" --whisperDiarization large-v2', + expectedFile: 'audio-prompt.md', + newName: '27_LARGE_V2_DIARIZATION.md' + }, + { + cmd: 'npm run as -- --file "content/audio.mp3" --whisperDiarization large-v3-turbo', + expectedFile: 'audio-prompt.md', + newName: '28_LARGE_V3_TURBO_DIARIZATION.md' + }, ] test('Autoshow Command Tests', async (t) => { diff --git a/typedoc.json b/typedoc.json index 796849d..707c75a 100644 --- a/typedoc.json +++ b/typedoc.json @@ -1,6 +1,6 @@ { "$schema": "https://typedoc.org/schema.json", - "entryPoints": ["src/types.ts", "src/autoshow.ts"], + "entryPoints": ["src/*.ts"], "out": "out", "tsconfig": "tsconfig.json", "excludePrivate": true, From d036493e1464c35dd0e38d2367331adcfb2b80d6 Mon Sep 17 00:00:00 2001 From: Anthony Campolo <12433465+ajcwebdev@users.noreply.github.com> Date: Thu, 31 Oct 2024 14:13:28 -0500 Subject: [PATCH 3/3] add channel option --- docs/examples.md | 7 ++ package.json | 2 + src/autoshow.ts | 13 +- src/commands/processChannel.ts | 147 ++++++++++++++++++++++ src/commands/processPlaylist.ts | 102 ++++++++++++---- src/commands/processURLs.ts | 110 ++++++++++++----- src/globals.ts | 2 +- src/types.ts | 187 ++++++++++++++++++++++++++++ src/utils/downloadAudio.ts | 9 +- src/utils/extractVideoMetadata.ts | 97 --------------- src/utils/generateMarkdown.ts | 195 ++++++++++++++++++------------ 11 files changed, 631 insertions(+), 240 deletions(-) create mode 100644 src/commands/processChannel.ts delete mode 100644 src/utils/extractVideoMetadata.ts diff --git a/docs/examples.md b/docs/examples.md index 8c2dc07..e2be889 100644 --- a/docs/examples.md +++ b/docs/examples.md @@ -60,6 +60,13 @@ npm run as -- \ --info ``` +### Process All Videos from a YouTube Channel + +```bash +npm run as -- \ + --channel "https://www.youtube.com/@ajcwebdev" +``` + ### Process Multiple Videos Specified in a URLs File Run on an arbitrary list of URLs in `example-urls.md`. diff --git a/package.json b/package.json index 2d93514..eddfb4f 100644 --- a/package.json +++ b/package.json @@ -27,10 +27,12 @@ "as": "npm run tsx:base -- src/autoshow.ts", "v": "npm run tsx:base -- src/autoshow.ts --video", "u": "npm run tsx:base -- src/autoshow.ts --urls", + "urls": "npm run tsx:base -- src/autoshow.ts --urls content/urls.md", "p": "npm run tsx:base -- src/autoshow.ts --playlist", "f": "npm run tsx:base -- src/autoshow.ts --file", "r": "npm run tsx:base -- src/autoshow.ts --rss", "rss-info": "npm run tsx:base -- src/autoshow.ts --info --rss", + "info": "npm run tsx:base -- src/autoshow.ts --info", "last2": "npm run tsx:base -- src/autoshow.ts --last 2 --rss", "last3": "npm run tsx:base -- src/autoshow.ts --last 3 --rss", "docker": "docker compose run --remove-orphans --rm autoshow --whisperDocker", diff --git a/src/autoshow.ts b/src/autoshow.ts index f423775..ccc9f52 100644 --- a/src/autoshow.ts +++ b/src/autoshow.ts @@ -15,6 +15,7 @@ import { Command } from 'commander' import { handleInteractivePrompt } from './interactive.js' import { processVideo } from './commands/processVideo.js' import { processPlaylist } from './commands/processPlaylist.js' +import { processChannel } from './commands/processChannel.js' import { processURLs } from './commands/processURLs.js' import { processFile } from './commands/processFile.js' import { processRSS } from './commands/processRSS.js' @@ -38,6 +39,7 @@ program // Input source options .option('-v, --video ', 'Process a single YouTube video') .option('-p, --playlist ', 'Process all videos in a YouTube playlist') + .option('-c, --channel ', 'Process all videos in a YouTube channel') .option('-u, --urls ', 'Process YouTube videos from a list of URLs in a file') .option('-f, --file ', 'Process a local audio or video file') .option('-r, --rss ', 'Process a podcast RSS feed') @@ -46,7 +48,7 @@ program .option('--order ', 'Specify the order for RSS feed processing (newest or oldest)') .option('--skip ', 'Number of items to skip when processing RSS feed', parseInt) .option('--last ', 'Number of most recent items to process (overrides --order and --skip)', parseInt) - .option('--info', 'Generate JSON file with RSS feed information instead of processing items') + .option('--info', 'Generate JSON file with RSS feed or channel information instead of processing items') // Transcription service options .option('--whisper [model]', 'Use Whisper.cpp for transcription with optional model specification') .option('--whisperDocker [model]', 'Use Whisper.cpp in Docker for transcription with optional model specification') @@ -56,15 +58,15 @@ program .option('--assembly', 'Use AssemblyAI for transcription') .option('--speakerLabels', 'Use speaker labels for AssemblyAI transcription') // LLM service options + .option('--ollama [model]', 'Use Ollama for processing with optional model specification') .option('--chatgpt [model]', 'Use ChatGPT for processing with optional model specification') .option('--claude [model]', 'Use Claude for processing with optional model specification') + .option('--gemini [model]', 'Use Gemini for processing with optional model specification') .option('--cohere [model]', 'Use Cohere for processing with optional model specification') .option('--mistral [model]', 'Use Mistral for processing') .option('--fireworks [model]', 'Use Fireworks AI for processing with optional model specification') .option('--together [model]', 'Use Together AI for processing with optional model specification') .option('--groq [model]', 'Use Groq for processing with optional model specification') - .option('--ollama [model]', 'Use Ollama for processing with optional model specification') - .option('--gemini [model]', 'Use Gemini for processing with optional model specification') // Utility options .option('--prompt ', 'Specify prompt sections to include') .option('--noCleanUp', 'Do not delete intermediary files after processing') @@ -75,6 +77,7 @@ program Examples: $ autoshow --video "https://www.youtube.com/watch?v=..." $ autoshow --playlist "https://www.youtube.com/playlist?list=..." + $ autoshow --channel "https://www.youtube.com/channel/..." $ autoshow --file "content/audio.mp3" $ autoshow --rss "https://feeds.transistor.fm/fsjam-podcast/" @@ -99,6 +102,7 @@ program.action(async (options: ProcessingOptions) => { const PROCESS_HANDLERS: Record = { video: processVideo, playlist: processPlaylist, + channel: processChannel, urls: processURLs, file: processFile, rss: processRSS, @@ -142,20 +146,17 @@ program.action(async (options: ProcessingOptions) => { llmServices, transcriptServices ) - // Log success message l(final(`\n================================================================================================`)) l(final(` ${action} Processing Completed Successfully.`)) l(final(`================================================================================================\n`)) exit(0) } catch (error) { - // Log error and exit if processing fails err(`Error processing ${action}:`, (error as Error).message) exit(1) } } }) -// Set up error handling for unknown commands program.on('command:*', function () { err(`Error: Invalid command '${program.args.join(' ')}'. Use --help to see available commands.`) exit(1) diff --git a/src/commands/processChannel.ts b/src/commands/processChannel.ts new file mode 100644 index 0000000..e7dabf1 --- /dev/null +++ b/src/commands/processChannel.ts @@ -0,0 +1,147 @@ +// src/commands/processChannel.ts + +/** + * @file Processes an entire YouTube channel, handling metadata extraction and individual video processing. + * @packageDocumentation + */ + +import { writeFile } from 'node:fs/promises' +import { processVideo } from './processVideo.js' +import { l, err, opts, success, execFilePromise } from '../globals.js' +import type { + LLMServices, TranscriptServices, ProcessingOptions, VideoMetadata, +} from '../types.js' + +/** + * Processes an entire YouTube channel by: + * 1. Fetching all video URLs from the channel using yt-dlp. + * 2. Optionally extracting metadata for all videos. + * 3. Processing each video sequentially with error handling. + * + * The function continues processing remaining videos even if individual videos fail. + * + * @param options - Configuration options for processing. + * @param channelUrl - URL of the YouTube channel to process. + * @param llmServices - Optional language model service for transcript processing. + * @param transcriptServices - Optional transcription service for audio conversion. + * @throws Will terminate the process with exit code 1 if the channel itself cannot be processed. + * @returns Promise that resolves when all videos have been processed. + */ +export async function processChannel( + options: ProcessingOptions, + channelUrl: string, + llmServices?: LLMServices, + transcriptServices?: TranscriptServices +): Promise { + // Log the processing parameters for debugging purposes + l(opts('Parameters passed to processChannel:\n')) + l(opts(` - llmServices: ${llmServices}\n - transcriptServices: ${transcriptServices}`)) + + try { + // Extract all video URLs from the channel using yt-dlp + const { stdout, stderr } = await execFilePromise('yt-dlp', [ + '--flat-playlist', + '--print', 'url', + '--no-warnings', + channelUrl, + ]) + + // Log any warnings from yt-dlp + if (stderr) { + err(`yt-dlp warnings: ${stderr}`) + } + + // Convert stdout into array of video URLs, removing empty entries + const urls = stdout.trim().split('\n').filter(Boolean) + + // Exit if no videos were found in the channel + if (urls.length === 0) { + err('Error: No videos found in the channel.') + process.exit(1) + } + + l(opts(`\nFound ${urls.length} videos in the channel...`)) + + // If the --info option is provided, extract metadata for all videos + if (options.info) { + // Collect metadata for all videos in parallel + const metadataList = await Promise.all( + urls.map(async (url) => { + try { + // Execute yt-dlp command to extract metadata + const { stdout } = await execFilePromise('yt-dlp', [ + '--restrict-filenames', + '--print', '%(webpage_url)s', + '--print', '%(channel)s', + '--print', '%(uploader_url)s', + '--print', '%(title)s', + '--print', '%(upload_date>%Y-%m-%d)s', + '--print', '%(thumbnail)s', + url, + ]) + + // Split the output into individual metadata fields + const [showLink, channel, channelURL, title, publishDate, coverImage] = stdout + .trim() + .split('\n') + + // Validate that all required metadata fields are present + if (!showLink || !channel || !channelURL || !title || !publishDate || !coverImage) { + throw new Error('Incomplete metadata received from yt-dlp.') + } + + // Return the metadata object + return { + showLink, + channel, + channelURL, + title, + description: '', + publishDate, + coverImage, + } as VideoMetadata + } catch (error) { + // Log error but return null to filter out failed extractions + err( + `Error extracting metadata for ${url}: ${ + error instanceof Error ? error.message : String(error) + }` + ) + return null + } + }) + ) + + // Filter out any null results due to errors + const validMetadata = metadataList.filter( + (metadata): metadata is VideoMetadata => metadata !== null + ) + + // Save metadata to a JSON file + const jsonContent = JSON.stringify(validMetadata, null, 2) + const jsonFilePath = 'content/channel_info.json' + await writeFile(jsonFilePath, jsonContent) + l(success(`Channel information saved to: ${jsonFilePath}`)) + return + } + + // Process each video sequentially, with error handling for individual videos + for (const [index, url] of urls.entries()) { + // Visual separator for each video in the console + l(opts(`\n================================================================================================`)) + l(opts(` Processing video ${index + 1}/${urls.length}: ${url}`)) + l(opts(`================================================================================================\n`)) + try { + // Process the video using the existing processVideo function + await processVideo(options, url, llmServices, transcriptServices) + } catch (error) { + // Log error but continue processing remaining videos + err(`Error processing video ${url}: ${(error as Error).message}`) + } + } + } catch (error) { + // Handle fatal errors that prevent channel processing + err(`Error processing channel: ${(error as Error).message}`) + process.exit(1) + } +} diff --git a/src/commands/processPlaylist.ts b/src/commands/processPlaylist.ts index 81609e0..59c82b2 100644 --- a/src/commands/processPlaylist.ts +++ b/src/commands/processPlaylist.ts @@ -1,33 +1,31 @@ // src/commands/processPlaylist.ts /** - * @file Process all videos from a YouTube playlist, handling metadata extraction and individual video processing. + * @file Processes all videos from a YouTube playlist, handling metadata extraction and individual video processing. * @packageDocumentation */ import { writeFile } from 'node:fs/promises' import { processVideo } from './processVideo.js' -import { extractVideoMetadata } from '../utils/extractVideoMetadata.js' import { l, err, opts, success, execFilePromise } from '../globals.js' -import type { LLMServices, TranscriptServices, ProcessingOptions } from '../types.js' +import type { + LLMServices, TranscriptServices, ProcessingOptions, VideoMetadata, +} from '../types.js' /** * Processes an entire YouTube playlist by: - * 1. Validating system dependencies - * 2. Fetching all video URLs from the playlist using yt-dlp - * 3. Extracting metadata for each video - * 4. Either: - * a. Generating a JSON file with playlist information (if --info option is used) - * b. Processing each video sequentially with error handling - * + * 1. Fetching all video URLs from the playlist using yt-dlp. + * 2. Optionally extracting metadata for all videos. + * 3. Processing each video sequentially with error handling. + * * The function continues processing remaining videos even if individual videos fail. - * - * @param options - Configuration options for processing - * @param playlistUrl - URL of the YouTube playlist to process - * @param llmServices - Optional language model service for transcript processing - * @param transcriptServices - Optional transcription service for audio conversion - * @throws Will terminate the process with exit code 1 if the playlist itself cannot be processed - * @returns Promise that resolves when all videos have been processed or JSON info has been saved + * + * @param options - Configuration options for processing. + * @param playlistUrl - URL of the YouTube playlist to process. + * @param llmServices - Optional language model service for transcript processing. + * @param transcriptServices - Optional transcription service for audio conversion. + * @throws Will terminate the process with exit code 1 if the playlist itself cannot be processed. + * @returns Promise that resolves when all videos have been processed. */ export async function processPlaylist( options: ProcessingOptions, @@ -38,38 +36,95 @@ export async function processPlaylist( // Log the processing parameters for debugging purposes l(opts('Parameters passed to processPlaylist:\n')) l(opts(` - llmServices: ${llmServices}\n - transcriptServices: ${transcriptServices}`)) + try { // Extract all video URLs from the playlist using yt-dlp const { stdout, stderr } = await execFilePromise('yt-dlp', [ '--flat-playlist', '--print', 'url', '--no-warnings', - playlistUrl + playlistUrl, ]) + // Log any warnings from yt-dlp if (stderr) { err(`yt-dlp warnings: ${stderr}`) } + // Convert stdout into array of video URLs, removing empty entries const urls = stdout.trim().split('\n').filter(Boolean) + // Exit if no videos were found in the playlist if (urls.length === 0) { err('Error: No videos found in the playlist.') process.exit(1) } + l(opts(`\nFound ${urls.length} videos in the playlist...`)) - // Collect metadata for all videos in parallel - const metadataPromises = urls.map(extractVideoMetadata) - const metadataList = await Promise.all(metadataPromises) - const validMetadata = metadataList.filter(Boolean) - // Handle --info option: save metadata to JSON and exit + + // If the --info option is provided, extract metadata for all videos if (options.info) { + // Collect metadata for all videos in parallel + const metadataList = await Promise.all( + urls.map(async (url) => { + try { + // Execute yt-dlp command to extract metadata + const { stdout } = await execFilePromise('yt-dlp', [ + '--restrict-filenames', + '--print', '%(webpage_url)s', + '--print', '%(channel)s', + '--print', '%(uploader_url)s', + '--print', '%(title)s', + '--print', '%(upload_date>%Y-%m-%d)s', + '--print', '%(thumbnail)s', + url, + ]) + + // Split the output into individual metadata fields + const [showLink, channel, channelURL, title, publishDate, coverImage] = stdout + .trim() + .split('\n') + + // Validate that all required metadata fields are present + if (!showLink || !channel || !channelURL || !title || !publishDate || !coverImage) { + throw new Error('Incomplete metadata received from yt-dlp.') + } + + // Return the metadata object + return { + showLink, + channel, + channelURL, + title, + description: '', + publishDate, + coverImage, + } as VideoMetadata + } catch (error) { + // Log error but return null to filter out failed extractions + err( + `Error extracting metadata for ${url}: ${ + error instanceof Error ? error.message : String(error) + }` + ) + return null + } + }) + ) + + // Filter out any null results due to errors + const validMetadata = metadataList.filter( + (metadata): metadata is VideoMetadata => metadata !== null + ) + + // Save metadata to a JSON file const jsonContent = JSON.stringify(validMetadata, null, 2) const jsonFilePath = 'content/playlist_info.json' await writeFile(jsonFilePath, jsonContent) l(success(`Playlist information saved to: ${jsonFilePath}`)) return } + // Process each video sequentially, with error handling for individual videos for (const [index, url] of urls.entries()) { // Visual separator for each video in the console @@ -77,6 +132,7 @@ export async function processPlaylist( l(opts(` Processing video ${index + 1}/${urls.length}: ${url}`)) l(opts(`================================================================================================\n`)) try { + // Process the video using the existing processVideo function await processVideo(options, url, llmServices, transcriptServices) } catch (error) { // Log error but continue processing remaining videos diff --git a/src/commands/processURLs.ts b/src/commands/processURLs.ts index e56875b..666bc35 100644 --- a/src/commands/processURLs.ts +++ b/src/commands/processURLs.ts @@ -1,35 +1,31 @@ // src/commands/processURLs.ts /** - * @file Process multiple YouTube videos from a list of URLs stored in a file. + * @file Processes multiple YouTube videos from a list of URLs stored in a file. * @packageDocumentation */ import { readFile, writeFile } from 'node:fs/promises' import { processVideo } from './processVideo.js' -import { extractVideoMetadata } from '../utils/extractVideoMetadata.js' -import { l, err, wait, opts } from '../globals.js' -import type { LLMServices, TranscriptServices, ProcessingOptions } from '../types.js' +import { l, err, wait, opts, execFilePromise } from '../globals.js' +import type { + LLMServices, TranscriptServices, ProcessingOptions, VideoMetadata, +} from '../types.js' /** * Processes multiple YouTube videos from a file containing URLs by: - * 1. Validating system dependencies - * 2. Reading and parsing URLs from the input file - * - Skips empty lines and comments (lines starting with #) - * 3. Extracting metadata for all videos - * 4. Either: - * a. Generating a JSON file with video information (if --info option is used) - * b. Processing each video sequentially with error handling - * - * Similar to processPlaylist, this function continues processing - * remaining URLs even if individual videos fail. - * - * @param options - Configuration options for processing - * @param filePath - Path to the file containing video URLs (one per line) - * @param llmServices - Optional language model service for transcript processing - * @param transcriptServices - Optional transcription service for audio conversion - * @throws Will terminate the process with exit code 1 if the file cannot be read or contains no valid URLs - * @returns Promise that resolves when all videos have been processed or JSON info has been saved + * 1. Reading and parsing URLs from the input file. + * 2. Optionally extracting metadata for all videos. + * 3. Processing each video sequentially with error handling. + * + * The function continues processing remaining URLs even if individual videos fail. + * + * @param options - Configuration options for processing. + * @param filePath - Path to the file containing video URLs (one per line). + * @param llmServices - Optional language model service for transcript processing. + * @param transcriptServices - Optional transcription service for audio conversion. + * @throws Will terminate the process with exit code 1 if the file cannot be read or contains no valid URLs. + * @returns Promise that resolves when all videos have been processed or JSON info has been saved. */ export async function processURLs( options: ProcessingOptions, @@ -44,27 +40,82 @@ export async function processURLs( try { // Read the file and extract valid URLs const content = await readFile(filePath, 'utf8') - const urls = content.split('\n') - .map(line => line.trim()) - .filter(line => line && !line.startsWith('#')) + const urls = content + .split('\n') + .map((line) => line.trim()) + .filter((line) => line && !line.startsWith('#')) + // Exit if no valid URLs were found in the file if (urls.length === 0) { err('Error: No URLs found in the file.') process.exit(1) } + l(opts(`\nFound ${urls.length} URLs in the file...`)) - // Collect metadata for all videos in parallel - const metadataPromises = urls.map(extractVideoMetadata) - const metadataList = await Promise.all(metadataPromises) - const validMetadata = metadataList.filter(Boolean) - // Handle --info option: save metadata to JSON and exit + + // If the --info option is provided, extract metadata for all videos if (options.info) { + // Collect metadata for all videos in parallel + const metadataList = await Promise.all( + urls.map(async (url) => { + try { + // Execute yt-dlp command to extract metadata + const { stdout } = await execFilePromise('yt-dlp', [ + '--restrict-filenames', + '--print', '%(webpage_url)s', + '--print', '%(channel)s', + '--print', '%(uploader_url)s', + '--print', '%(title)s', + '--print', '%(upload_date>%Y-%m-%d)s', + '--print', '%(thumbnail)s', + url, + ]) + + // Split the output into individual metadata fields + const [showLink, channel, channelURL, title, publishDate, coverImage] = stdout + .trim() + .split('\n') + + // Validate that all required metadata fields are present + if (!showLink || !channel || !channelURL || !title || !publishDate || !coverImage) { + throw new Error('Incomplete metadata received from yt-dlp.') + } + + // Return the metadata object + return { + showLink, + channel, + channelURL, + title, + description: '', + publishDate, + coverImage, + } as VideoMetadata + } catch (error) { + // Log error but return null to filter out failed extractions + err( + `Error extracting metadata for ${url}: ${ + error instanceof Error ? error.message : String(error) + }` + ) + return null + } + }) + ) + + // Filter out any null results due to errors + const validMetadata = metadataList.filter( + (metadata): metadata is VideoMetadata => metadata !== null + ) + + // Save metadata to a JSON file const jsonContent = JSON.stringify(validMetadata, null, 2) const jsonFilePath = 'content/urls_info.json' await writeFile(jsonFilePath, jsonContent) l(wait(`Video information saved to: ${jsonFilePath}`)) return } + // Process each URL sequentially, with error handling for individual videos for (const [index, url] of urls.entries()) { // Visual separator for each video in the console @@ -72,6 +123,7 @@ export async function processURLs( l(opts(` Processing URL ${index + 1}/${urls.length}: ${url}`)) l(opts(`================================================================================================\n`)) try { + // Process the video using the existing processVideo function await processVideo(options, url, llmServices, transcriptServices) } catch (error) { // Log error but continue processing remaining URLs diff --git a/src/globals.ts b/src/globals.ts index c79f8fb..dd90586 100644 --- a/src/globals.ts +++ b/src/globals.ts @@ -77,7 +77,7 @@ export const err: typeof console.error = console.error * Available action options for content processing * @type {string[]} */ -export const ACTION_OPTIONS = ['video', 'playlist', 'urls', 'file', 'rss'] +export const ACTION_OPTIONS = ['video', 'playlist', 'channel', 'urls', 'file', 'rss'] /** * Available LLM service options diff --git a/src/types.ts b/src/types.ts index 035e5bf..2fa3dd7 100644 --- a/src/types.ts +++ b/src/types.ts @@ -12,64 +12,97 @@ export type ProcessingOptions = { /** URL of the YouTube video to process. */ video?: string + /** URL of the YouTube playlist to process. */ playlist?: string + + /** URL of the YouTube channel to process. */ + channel?: string + /** File path containing a list of URLs to process. */ urls?: string + /** Local audio or video file path to process. */ file?: string + /** URL of the podcast RSS feed to process. */ rss?: string + /** Specific items (audio URLs) from the RSS feed to process. */ item?: string[] + /** Flag to generate JSON file with RSS feed information instead of processing items. */ info?: boolean + /** Flag to indicate whether to keep temporary files after processing. */ noCleanUp?: boolean + /** The Whisper model to use (e.g., 'tiny', 'base'). */ whisper?: WhisperModelType + /** The Whisper Python model to use (e.g., 'tiny', 'base'). */ whisperPython?: WhisperModelType + /** The Whisper Diarization model to use (e.g., 'tiny', 'base'). */ whisperDiarization?: WhisperModelType + /** The Whisper model to use with Docker (e.g., 'tiny', 'base'). */ whisperDocker?: WhisperModelType + /** Flag to use Deepgram for transcription. */ deepgram?: boolean + /** Flag to use AssemblyAI for transcription. */ assembly?: boolean + /** Flag to use speaker labels in AssemblyAI transcription. */ speakerLabels?: boolean + /** ChatGPT model to use (e.g., 'GPT_4o_MINI'). */ chatgpt?: string + /** Claude model to use (e.g., 'CLAUDE_3_SONNET'). */ claude?: string + /** Cohere model to use (e.g., 'COMMAND_R_PLUS'). */ cohere?: string + /** Mistral model to use (e.g., 'MISTRAL_LARGE'). */ mistral?: string + /** Fireworks model to use (e.g., ''). */ fireworks?: string + /** Together model to use (e.g., ''). */ together?: string + /** Groq model to use (e.g., ''). */ groq?: string + /** Ollama model to use for local inference (e.g., 'LLAMA_3_2_1B'). */ ollama?: string + /** Gemini model to use (e.g., 'GEMINI_1_5_FLASH'). */ gemini?: string + /** Array of prompt sections to include (e.g., ['titles', 'summary']). */ prompt?: string[] + /** The selected LLM option. */ llmServices?: LLMServices + /** The selected transcription option. */ transcriptServices?: TranscriptServices + /** Number of items to skip in RSS feed processing. */ skip?: number + /** Order in which to process RSS feed items ('newest' or 'oldest'). */ order?: string + /** Number of most recent items to process (overrides --order and --skip). */ last?: number + /** Whether to run in interactive mode. */ interactive?: boolean } @@ -81,42 +114,61 @@ export type ProcessingOptions = { export type InquirerAnswers = { /** The action selected by the user (e.g., 'video', 'playlist'). */ action?: string + /** YouTube video URL provided by the user. */ video?: string + /** YouTube playlist URL provided by the user. */ playlist?: string + /** File path containing URLs provided by the user. */ urls?: string + /** Local audio/video file path provided by the user. */ file?: string + /** RSS feed URL provided by the user. */ rss?: string + /** Whether the user wants to specify specific RSS items. */ specifyItem?: boolean + /** Comma-separated audio URLs of specific RSS items. */ item?: string | string[] + /** Whether to generate JSON file with RSS feed information instead of processing items. */ info?: boolean + /** Number of items to skip in RSS feed processing. */ skip?: number + /** Number of most recent items to process (overrides order and skip). */ last?: number + /** Order in which to process RSS feed items ('newest' or 'oldest'). */ order?: string + /** LLM option selected by the user. */ llmServices?: LLMServices + /** Specific LLM model selected by the user. */ llmModel?: string + /** Transcription option selected by the user. */ transcriptServices?: TranscriptServices + /** Whisper model type selected by the user. */ whisperModel?: WhisperModelType + /** Whether to use speaker labels in transcription. */ speakerLabels?: boolean + /** Prompt sections selected by the user. */ prompt?: string[] + /** Whether to keep temporary files after processing. */ noCleanUp?: boolean + /** Whether to proceed with the action. */ confirmAction?: boolean } @@ -127,16 +179,22 @@ export type InquirerAnswers = { export type InquirerQuestions = Array<{ /** The type of the prompt (e.g., 'input', 'list', 'confirm', 'checkbox'). */ type: string + /** The name of the answer property. */ name: string + /** The message to display to the user. */ message: string + /** The choices available for selection (for 'list' and 'checkbox' types). */ choices?: Array | (() => Array) + /** A function to determine when to display the prompt. */ when?: () => boolean + /** A function to validate the user's input. */ validate?: (input: any) => boolean | string + /** The default value for the prompt. */ default?: any }> @@ -164,8 +222,10 @@ export type HandlerFunction = ( export type MarkdownData = { /** The front matter content for the markdown file. */ frontMatter: string + /** The base file path (without extension) for the markdown file. */ finalPath: string + /** The sanitized filename used for the markdown file. */ filename: string } @@ -176,16 +236,22 @@ export type MarkdownData = { export type VideoMetadata = { /** The URL to the video's webpage. */ showLink: string + /** The name of the channel that uploaded the video. */ channel: string + /** The URL to the uploader's channel page. */ channelURL: string + /** The title of the video. */ title: string + /** The description of the video. */ description: string + /** The upload date in 'YYYY-MM-DD' format. */ publishDate: string + /** The URL to the video's thumbnail image. */ coverImage: string } @@ -197,18 +263,25 @@ export type VideoMetadata = { export type RSSItem = { /** The publication date of the RSS item (e.g., '2024-09-24'). */ publishDate: string + /** The title of the RSS item. */ title: string + /** The URL to the cover image of the RSS item. */ coverImage: string + /** The URL to the show or episode. */ showLink: string + /** The name of the channel or podcast. */ channel: string + /** The URL to the channel or podcast. */ channelURL: string + /** A brief description of the RSS item. */ description?: string + /** The URL to the audio file of the RSS item. */ audioURL?: string } @@ -219,6 +292,7 @@ export type RSSItem = { export type RSSOptions = { /** The order to process items ('newest' or 'oldest'). */ order?: string + /** The number of items to skip. */ skip?: number } @@ -230,8 +304,10 @@ export type RSSOptions = { export type DownloadAudioOptions = { /** The desired output audio format (e.g., 'wav'). */ outputFormat?: string + /** The sample rate for the audio file (e.g., 16000). */ sampleRate?: number + /** The number of audio channels (e.g., 1 for mono). */ channels?: number } @@ -264,6 +340,7 @@ export type WhisperTranscriptServices = 'whisper' | 'whisperDocker' | 'whisperPy export type PromptSection = { /** The instructions for the section. */ instruction: string + /** An example output for the section. */ example: string } @@ -279,10 +356,13 @@ export type LLMServices = 'chatgpt' | 'claude' | 'cohere' | 'mistral' | 'ollama' export type LLMOptions = { /** The sections to include in the prompt (e.g., ['titles', 'summary']). */ promptSections?: string[] + /** The specific LLM model to use. */ model?: string + /** The temperature parameter for text generation. */ temperature?: number + /** The maximum number of tokens to generate. */ maxTokens?: number } @@ -360,36 +440,52 @@ export type OllamaModelType = 'LLAMA_3_2_1B' | 'LLAMA_3_2_3B' | 'GEMMA_2_2B' | ' export type FireworksResponse = { /** Unique identifier for the response */ id: string + /** Type of object */ object: string + /** Timestamp of creation */ created: number + /** Model used for generation */ model: string + /** Input prompts */ prompt: any[] + /** Array of completion choices */ choices: { + /** Reason for completion finish */ finish_reason: string + /** Index of the choice */ index: number + /** Message content and metadata */ message: { + /** Role of the message author */ role: string + /** Generated content */ content: string + /** Tool calls made during generation */ tool_calls: { + /** Tool call identifier */ id: string + /** Type of tool call */ type: string + /** Function call details */ function: { + /** Name of the function called */ name: string + /** Arguments passed to the function */ arguments: string } @@ -398,10 +494,13 @@ export type FireworksResponse = { }[] /** Token usage statistics */ usage: { + /** Number of tokens in the prompt */ prompt_tokens: number + /** Number of tokens in the completion */ completion_tokens: number + /** Total tokens used */ total_tokens: number } @@ -413,42 +512,61 @@ export type FireworksResponse = { export type TogetherResponse = { /** Unique identifier for the response */ id: string + /** Type of object */ object: string + /** Timestamp of creation */ created: number + /** Model used for generation */ model: string + /** Input prompts */ prompt: any[] + /** Array of completion choices */ choices: { + /** Generated text */ text: string + /** Reason for completion finish */ finish_reason: string + /** Random seed used */ seed: number + /** Choice index */ index: number + /** Message content and metadata */ message: { + /** Role of the message author */ role: string + /** Generated content */ content: string + /** Tool calls made during generation */ tool_calls: { + /** Index of the tool call */ index: number + /** Tool call identifier */ id: string + /** Type of tool call */ type: string + /** Function call details */ function: { + /** Name of the function called */ name: string + /** Arguments passed to the function */ arguments: string } @@ -456,20 +574,26 @@ export type TogetherResponse = { } /** Log probability information */ logprobs: { + /** Array of token IDs */ token_ids: number[] + /** Array of tokens */ tokens: string[] + /** Log probabilities for tokens */ token_logprobs: number[] } }[] /** Token usage statistics */ usage: { + /** Number of tokens in the prompt */ prompt_tokens: number + /** Number of tokens in the completion */ completion_tokens: number + /** Total tokens used */ total_tokens: number } @@ -481,35 +605,50 @@ export type TogetherResponse = { export type GroqChatCompletionResponse = { /** Unique identifier for the response */ id: string + /** Type of object */ object: string + /** Timestamp of creation */ created: number + /** Model used for generation */ model: string + /** System fingerprint */ system_fingerprint: string | null + /** Array of completion choices */ choices: { + /** Choice index */ index: number + /** Message content and metadata */ message: { + /** Role of the message author */ role: 'assistant' | 'user' | 'system' + /** Generated content */ content: string } + /** Reason for completion finish */ finish_reason: string + /** Optional log probability information */ logprobs?: { + /** Array of tokens */ tokens: string[] + /** Log probabilities for tokens */ token_logprobs: number[] + /** Top log probabilities */ top_logprobs: Record[] + /** Text offsets for tokens */ text_offset: number[] } | null @@ -518,14 +657,19 @@ export type GroqChatCompletionResponse = { usage?: { /** Number of tokens in the prompt */ prompt_tokens: number + /** Number of tokens in the completion */ completion_tokens: number + /** Total tokens used */ total_tokens: number + /** Optional timing for prompt processing */ prompt_time?: number + /** Optional timing for completion generation */ completion_time?: number + /** Optional total processing time */ total_time?: number } @@ -537,29 +681,40 @@ export type GroqChatCompletionResponse = { export type OllamaResponse = { /** Model used for generation */ model: string + /** Timestamp of creation */ created_at: string + /** Message content and metadata */ message: { + /** Role of the message author */ role: string + /** Generated content */ content: string } /** Reason for completion */ done_reason: string + /** Whether generation is complete */ done: boolean + /** Total processing duration */ total_duration: number + /** Model loading duration */ load_duration: number + /** Number of prompt evaluations */ prompt_eval_count: number + /** Duration of prompt evaluation */ prompt_eval_duration: number + /** Number of evaluations */ eval_count: number + /** Duration of evaluation */ eval_duration: number } @@ -570,28 +725,40 @@ export type OllamaResponse = { export type OllamaTagsResponse = { /** Array of available models */ models: Array<{ + /** Model name */ name: string + /** Base model identifier */ model: string + /** Last modification timestamp */ modified_at: string + /** Model size in bytes */ size: number + /** Model digest */ digest: string + /** Model details */ details: { + /** Parent model identifier */ parent_model: string + /** Model format */ format: string + /** Model family */ family: string + /** Array of model families */ families: string[] + /** Model parameter size */ parameter_size: string + /** Quantization level */ quantization_level: string } @@ -604,27 +771,38 @@ export type OllamaTagsResponse = { export type DeepgramResponse = { /** Metadata about the transcription */ metadata: { + /** Transaction key */ transaction_key: string + /** Request identifier */ request_id: string + /** SHA256 hash */ sha256: string + /** Creation timestamp */ created: string + /** Audio duration */ duration: number + /** Number of audio channels */ channels: number + /** Array of models used */ models: string[] + /** Information about models used */ model_info: { [key: string]: { + /** Model name */ name: string + /** Model version */ version: string + /** Model architecture */ arch: string } @@ -632,22 +810,31 @@ export type DeepgramResponse = { } /** Transcription results */ results: { + /** Array of channel results */ channels: Array<{ + /** Array of alternative transcriptions */ alternatives: Array<{ + /** Transcribed text */ transcript: string + /** Confidence score */ confidence: number + /** Array of word-level details */ words: Array<{ + /** Individual word */ word: string + /** Start time */ start: number + /** End time */ end: number + /** Word-level confidence */ confidence: number }> diff --git a/src/utils/downloadAudio.ts b/src/utils/downloadAudio.ts index 20b73fc..3f0e3a9 100644 --- a/src/utils/downloadAudio.ts +++ b/src/utils/downloadAudio.ts @@ -14,7 +14,6 @@ import { fileTypeFromBuffer } from 'file-type' import { l, err, step, success, wait } from '../globals.js' import type { SupportedFileType, ProcessingOptions } from '../types.js' -// Promisify node:child_process functions for async/await usage const execFilePromise = promisify(execFile) const execPromise = promisify(exec) @@ -84,7 +83,7 @@ export async function downloadAudio( const outputPath = `${finalPath}.wav` // Handle online content (YouTube, RSS feeds, etc.) - if (options.video || options.playlist || options.urls || options.rss) { + if (options.video || options.playlist || options.urls || options.rss || options.channel) { l(step('\nStep 2 - Downloading URL audio...\n')) try { // Download and convert audio using yt-dlp @@ -141,11 +140,7 @@ export async function downloadAudio( ) l(success(` File converted to WAV format successfully:\n - ${outputPath}`)) } catch (error) { - err( - `Error processing local file: ${ - error instanceof Error ? (error as Error).message : String(error) - }` - ) + err(`Error processing local file: ${error instanceof Error ? (error as Error).message : String(error)}`) throw error } } diff --git a/src/utils/extractVideoMetadata.ts b/src/utils/extractVideoMetadata.ts deleted file mode 100644 index 2d54c1d..0000000 --- a/src/utils/extractVideoMetadata.ts +++ /dev/null @@ -1,97 +0,0 @@ -/** - * @file Utility for extracting metadata from YouTube videos using yt-dlp. - * Provides functionality to retrieve essential video information such as title, - * channel, publish date, and thumbnail URL. - * @packageDocumentation - */ - -import { execFile } from 'node:child_process' -import { promisify } from 'node:util' -import { err } from '../globals.js' -import type { VideoMetadata } from '../types.js' - -// Promisify execFile for async/await usage with yt-dlp -const execFilePromise = promisify(execFile) - -/** - * Extracts metadata for a single video URL using yt-dlp. - * - * This function performs the following steps: - * 1. Verifies yt-dlp is installed - * 2. Executes yt-dlp with specific format strings to extract metadata - * 3. Parses the output into structured video metadata - * 4. Validates that all required metadata fields are present - * - * @param {string} url - The URL of the video to extract metadata from. - * Supports YouTube and other platforms compatible with yt-dlp. - * - * @returns {Promise} A promise that resolves to an object containing: - * - showLink: Direct URL to the video - * - channel: Name of the channel that published the video - * - channelURL: URL to the channel's page - * - title: Title of the video - * - description: Video description (currently returned empty) - * - publishDate: Publication date in YYYY-MM-DD format - * - coverImage: URL to the video's thumbnail - * - * @throws {Error} If: - * - yt-dlp is not installed - * - The video URL is invalid - * - Any required metadata field is missing - * - The yt-dlp command fails - * - * @example - * try { - * const metadata = await extractVideoMetadata('https://www.youtube.com/watch?v=...') - * l(metadata.title) // Video title - * l(metadata.publishDate) // YYYY-MM-DD - * } catch (error) { - * err('Failed to extract video metadata:', error) - * } - */ -export async function extractVideoMetadata(url: string): Promise { - try { - // Execute yt-dlp with format strings to extract specific metadata fields - const { stdout } = await execFilePromise('yt-dlp', [ - '--restrict-filenames', // Ensure safe filenames - '--print', '%(webpage_url)s', // Direct link to video - '--print', '%(channel)s', // Channel name - '--print', '%(uploader_url)s', // Channel URL - '--print', '%(title)s', // Video title - '--print', '%(upload_date>%Y-%m-%d)s', // Formatted upload date - '--print', '%(thumbnail)s', // Thumbnail URL - url, - ]) - - // Split stdout into individual metadata fields - const [ - showLink, channel, channelURL, title, publishDate, coverImage - ] = stdout.trim().split('\n') - - // Validate that all required metadata fields are present - if ( - !showLink || !channel || !channelURL || !title || !publishDate || !coverImage - ) { - throw new Error('Incomplete metadata received from yt-dlp.') - } - - // Return structured video metadata - return { - showLink, // Direct URL to the video - channel, // Channel name - channelURL, // Channel page URL - title, // Video title - description: '', // Empty description to fill in with LLM output - publishDate, // Publication date (YYYY-MM-DD) - coverImage, // Thumbnail URL - } - } catch (error) { - // Enhanced error handling with type checking - err( - `Error extracting metadata for ${url}: ${ - error instanceof Error ? (error as Error).message : String(error) - }` - ) - throw error // Re-throw to allow handling by caller - } -} \ No newline at end of file diff --git a/src/utils/generateMarkdown.ts b/src/utils/generateMarkdown.ts index 99f7259..107a176 100644 --- a/src/utils/generateMarkdown.ts +++ b/src/utils/generateMarkdown.ts @@ -6,14 +6,15 @@ * @packageDocumentation */ -import { execFile } from 'node:child_process' -import { promisify } from 'node:util' -import { writeFile } from 'node:fs/promises' -import { basename, extname } from 'node:path' -import { l, dim, step, success } from '../globals.js' -import type { MarkdownData, ProcessingOptions, RSSItem } from '../types.js' - -// Promisify the execFile function for use with async/await +// Import necessary modules and functions +import { execFile } from 'node:child_process' // Used to execute external commands +import { promisify } from 'node:util' // Used to convert callback-based functions to promises +import { writeFile } from 'node:fs/promises' // Used to write files using promises +import { basename, extname } from 'node:path' // Used to manipulate file paths +import { l, dim, step, success, err } from '../globals.js' // Custom logging functions +import type { MarkdownData, ProcessingOptions, RSSItem } from '../types.js' // Type definitions + +// Convert execFile to return a promise, allowing us to use async/await const execFilePromise = promisify(execFile) /** @@ -78,122 +79,162 @@ export async function generateMarkdown( */ function sanitizeTitle(title: string): string { return title - .replace(/[^\w\s-]/g, '') // Remove all non-word chars except spaces and hyphens - .trim() // Remove leading and trailing whitespace - .replace(/[\s_]+/g, '-') // Replace spaces and underscores with a single hyphen - .replace(/-+/g, '-') // Replace multiple hyphens with a single hyphen - .toLowerCase() // Convert to lowercase - .slice(0, 200) // Limit to 200 characters + .replace(/[^\w\s-]/g, '') // Remove all non-word characters except spaces and hyphens + .trim() // Remove leading and trailing whitespace + .replace(/[\s_]+/g, '-') // Replace spaces and underscores with hyphens + .replace(/-+/g, '-') // Replace multiple hyphens with a single hyphen + .toLowerCase() // Convert to lowercase + .slice(0, 200) // Limit the length to 200 characters } - // Declare variables to store generated content - let frontMatter: string[] - let finalPath: string - let filename: string + // Initialize variables for front matter content, final file path, and sanitized filename + let frontMatter: string[] // Array to hold front matter lines + let finalPath: string // The path where the markdown file will be saved + let filename: string // The sanitized filename - // Handle different content types using a switch statement + // Determine which processing option is selected switch (true) { + // If any of these options are true, process as a video case !!options.video: case !!options.playlist: case !!options.urls: - // Extract video metadata using yt-dlp - const { stdout } = await execFilePromise('yt-dlp', [ - '--restrict-filenames', - '--print', '%(upload_date>%Y-%m-%d)s', // Format: YYYY-MM-DD - '--print', '%(title)s', - '--print', '%(thumbnail)s', - '--print', '%(webpage_url)s', - '--print', '%(channel)s', - '--print', '%(uploader_url)s', - input as string, - ]) - - // Parse the metadata output into individual fields - const [ - formattedDate, videoTitle, thumbnail, webpage_url, videoChannel, uploader_url - ] = stdout.trim().split('\n') - - // Generate filename using date and sanitized title - filename = `${formattedDate}-${sanitizeTitle(videoTitle)}` - finalPath = `content/${filename}` - - // Create video-specific front matter - frontMatter = [ - '---', - `showLink: "${webpage_url}"`, - `channel: "${videoChannel}"`, - `channelURL: "${uploader_url}"`, - `title: "${videoTitle}"`, - `description: ""`, - `publishDate: "${formattedDate}"`, - `coverImage: "${thumbnail}"`, - '---\n', - ] + case !!options.channel: + try { + // Execute yt-dlp command to extract metadata + const { stdout } = await execFilePromise('yt-dlp', [ + '--restrict-filenames', // Restrict filenames to ASCII characters + '--print', '%(webpage_url)s', // Print the webpage URL + '--print', '%(channel)s', // Print the channel name + '--print', '%(uploader_url)s', // Print the uploader's URL + '--print', '%(title)s', // Print the video title + '--print', '%(upload_date>%Y-%m-%d)s', // Print the upload date in YYYY-MM-DD format + '--print', '%(thumbnail)s', // Print the thumbnail URL + input as string, // The video URL provided as input + ]) + + // Split the output into individual metadata fields + const [ + showLink, // The video URL + videoChannel, // The channel name + uploader_url, // The uploader's URL + videoTitle, // The video title + formattedDate, // The upload date + thumbnail, // The thumbnail URL + ] = stdout.trim().split('\n') + + // Validate that all required metadata fields are present + if ( + !showLink || + !videoChannel || + !uploader_url || + !videoTitle || + !formattedDate || + !thumbnail + ) { + throw new Error('Incomplete metadata received from yt-dlp.') + } + + // Generate the sanitized filename using the upload date and video title + filename = `${formattedDate}-${sanitizeTitle(videoTitle)}` + // Define the final path where the markdown file will be saved + finalPath = `content/${filename}` + + // Construct the front matter content as an array of strings + frontMatter = [ + '---', + `showLink: "${showLink}"`, // The video URL + `channel: "${videoChannel}"`, // The channel name + `channelURL: "${uploader_url}"`, // The uploader's URL + `title: "${videoTitle}"`, // The video title + `description: ""`, // Placeholder for description + `publishDate: "${formattedDate}"`, // The upload date + `coverImage: "${thumbnail}"`, // The thumbnail URL + '---\n', + ] + } catch (error) { + // Log the error and rethrow it for upstream handling + err(`Error extracting metadata for ${input}: ${error instanceof Error ? error.message : String(error)}`) + throw error + } break + // If the file option is selected case !!options.file: - // Extract and process local file information + // Get the original filename from the input path const originalFilename = basename(input as string) + // Remove the file extension to get the filename without extension const filenameWithoutExt = originalFilename.replace(extname(originalFilename), '') - // Generate sanitized filename + // Sanitize the filename to make it safe for use in paths filename = sanitizeTitle(filenameWithoutExt) + // Define the final path where the markdown file will be saved finalPath = `content/${filename}` - // Create file-specific front matter with minimal metadata + // Construct the front matter content for a file frontMatter = [ '---', - `showLink: "${originalFilename}"`, - `channel: ""`, - `channelURL: ""`, - `title: "${originalFilename}"`, - `description: ""`, - `publishDate: ""`, - `coverImage: ""`, + `showLink: "${originalFilename}"`, // The original filename + `channel: ""`, // Empty channel field + `channelURL: ""`, // Empty channel URL field + `title: "${originalFilename}"`, // Use the original filename as the title + `description: ""`, // Placeholder for description + `publishDate: ""`, // Empty publish date + `coverImage: ""`, // Empty cover image '---\n', ] break + // If the RSS option is selected case !!options.rss: - // Process RSS feed item + // Cast the input to an RSSItem type const item = input as RSSItem + // Destructure necessary fields from the RSS item const { - publishDate, title: rssTitle, coverImage, showLink, channel: rssChannel, channelURL + publishDate, // Publication date + title: rssTitle, // Title of the RSS item + coverImage, // Cover image URL + showLink, // Link to the content + channel: rssChannel, // Channel name + channelURL, // Channel URL } = item - // Generate filename using date and sanitized title + // Generate the sanitized filename using the publish date and title filename = `${publishDate}-${sanitizeTitle(rssTitle)}` + // Define the final path where the markdown file will be saved finalPath = `content/${filename}` - // Create RSS-specific front matter + // Construct the front matter content for an RSS item frontMatter = [ '---', - `showLink: "${showLink}"`, - `channel: "${rssChannel}"`, - `channelURL: "${channelURL}"`, - `title: "${rssTitle}"`, - `description: ""`, - `publishDate: "${publishDate}"`, - `coverImage: "${coverImage}"`, + `showLink: "${showLink}"`, // Link to the content + `channel: "${rssChannel}"`, // Channel name + `channelURL: "${channelURL}"`, // Channel URL + `title: "${rssTitle}"`, // Title of the RSS item + `description: ""`, // Placeholder for description + `publishDate: "${publishDate}"`, // Publication date + `coverImage: "${coverImage}"`, // Cover image URL '---\n', ] break + // If no valid option is provided, throw an error default: throw new Error('Invalid option provided for markdown generation.') } - // Join front matter array into a single string + // Join the front matter array into a single string with newline separators const frontMatterContent = frontMatter.join('\n') - // Write the front matter content to a markdown file + // Write the front matter content to a markdown file at the specified path await writeFile(`${finalPath}.md`, frontMatterContent) - // Log the generated content and success message + // Log the front matter content in dimmed text l(dim(frontMatterContent)) + // Log the current step in the process l(step('\nStep 1 - Generating markdown...\n')) + // Log a success message indicating where the file was saved l(success(` Front matter successfully created and saved:\n - ${finalPath}.md`)) - // Return the generated markdown data for further processing + // Return an object containing the front matter, final path, and filename return { frontMatter: frontMatterContent, finalPath, filename } } \ No newline at end of file