diff --git a/docker-compose.yml b/docker-compose.yml index fa6c49e..69aa23b 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -11,7 +11,10 @@ services: - /var/run/docker.sock:/var/run/docker.sock depends_on: - whisper - # - llama + - ollama + environment: + - OLLAMA_HOST=localhost + - OLLAMA_PORT=11434 whisper: build: context: ./whisper.cpp @@ -22,15 +25,9 @@ services: command: tail -f /dev/null tty: true stdin_open: true - # llama: - # build: - # context: ./llama.cpp - # dockerfile: Dockerfile - # volumes: - # - ./content:/app/content - # command: tail -f /dev/null - # tty: true - # stdin_open: true + ollama: + image: ollama/ollama + ports: + - "11434:11434" volumes: - whisper: - # llama: \ No newline at end of file + whisper: \ No newline at end of file diff --git a/docs/examples.md b/docs/examples.md index 6bcad4a..b02c500 100644 --- a/docs/examples.md +++ b/docs/examples.md @@ -17,11 +17,12 @@ - [OctoAI's Models](#octoais-models) - [Llama.cpp](#llamacpp) - [Transcription Options](#transcription-options) + - [Whisper.cpp](#whispercpp) - [Deepgram](#deepgram) - [Assembly](#assembly) - - [Whisper.cpp](#whispercpp) -- [Docker Compose](#docker-compose) -- [Alternative JavaScript Runtimes](#alternative-javascript-runtimes) +- [Prompt Options](#prompt-options) +- [Alternative Runtimes](#alternative-runtimes) + - [Docker Compose](#docker-compose) - [Deno](#deno) - [Bun](#bun) - [Makeshift Test Suite](#makeshift-test-suite) @@ -98,6 +99,12 @@ npm run as -- \ --prompt titles summary longChapters takeaways questions ``` +Run on a podcast RSS feed and generate JSON info file with markdown metadata of each item: + +```bash +npm run as -- --rss "https://ajcwebdev.substack.com/feed" --info +``` + ## Language Model (LLM) Options Create a `.env` file and set API key as demonstrated in `.env.example` for either: @@ -226,26 +233,6 @@ npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --llama ## Transcription Options -Create a `.env` file and set API key as demonstrated in `.env.example` for `DEEPGRAM_API_KEY` or `ASSEMBLY_API_KEY`. - -### Deepgram - -```bash -npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --deepgram -``` - -### Assembly - -```bash -npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --assembly -``` - -Include speaker labels and number of speakers: - -```bash -npm run as -- --video "https://ajc.pics/audio/fsjam-short.mp3" --assembly --speakerLabels -``` - ### Whisper.cpp If neither the `--deepgram` or `--assembly` option is included for transcription, `autoshow` will default to running the largest Whisper.cpp model. To configure the size of the Whisper model, use the `--model` option and select one of the following: @@ -273,15 +260,27 @@ Run `whisper.cpp` in a Docker container with `--whisperDocker`: npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --whisperDocker base ``` -## Docker Compose +### Deepgram -This will run both `whisper.cpp` and the AutoShow Commander CLI in their own Docker containers. +Create a `.env` file and set API key as demonstrated in `.env.example` for `DEEPGRAM_API_KEY`. ```bash -docker-compose run autoshow --video "https://www.youtube.com/watch?v=MORMZXEaONk" --whisperDocker base +npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --deepgram ``` -Currently working on the `llama.cpp` Docker integration so the entire project can be encapsulated in one local Docker Compose file. +### Assembly + +Create a `.env` file and set API key as demonstrated in `.env.example` for `ASSEMBLY_API_KEY`. + +```bash +npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --assembly +``` + +Include speaker labels and number of speakers: + +```bash +npm run as -- --video "https://ajc.pics/audio/fsjam-short.mp3" --assembly --speakerLabels +``` ## Prompt Options @@ -339,7 +338,17 @@ Include all prompt options: npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --prompt titles summary longChapters takeaways questions ``` -## Alternative JavaScript Runtimes +## Alternative Runtimes + +### Docker Compose + +This will run both `whisper.cpp` and the AutoShow Commander CLI in their own Docker containers. + +```bash +docker-compose run autoshow --video "https://www.youtube.com/watch?v=MORMZXEaONk" --whisperDocker base +``` + +Currently working on the `llama.cpp` Docker integration so the entire project can be encapsulated in one local Docker Compose file. ### Bun diff --git a/docs/server.md b/docs/server.md index 574f92e..01aa36a 100644 --- a/docs/server.md +++ b/docs/server.md @@ -3,7 +3,7 @@ This is currently a very simple proof-of-concept that only implements the most basic Autoshow command for [processing a single video file from a YouTube URL](/docs/examples.md#process-single-video-or-audio-file): ```bash -npm run autoshow -- --video "https://www.youtube.com/watch?v=jKB0EltG9Jo" +npm run autoshow -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" ``` See the [server section of the roadmap](/docs/readmap.md#server) for more information about future development on the server implementation. @@ -25,119 +25,337 @@ Version 20 enters its maintenance period in October 2024 and end-of-life in Apri -## Video Endpoint +## Process Endpoints + +### Video Endpoint Once the server is running, send a `POST` request to `http://localhost:3000/video` containing a JSON object with the YouTube URL: ```bash -curl -X POST http://localhost:3000/video \ - -H "Content-Type: application/json" \ - -d '{ - "youtubeUrl": "https://www.youtube.com/watch?v=jKB0EltG9Jo" - }' +curl --json '{ + "youtubeUrl": "https://www.youtube.com/watch?v=MORMZXEaONk" +}' http://localhost:3000/video ``` -Configure `model`. + + +Use LLM. ```bash -curl -X POST http://localhost:3000/video \ - -H "Content-Type: application/json" \ - -d '{ - "youtubeUrl": "https://www.youtube.com/watch?v=jKB0EltG9Jo", - "whisperModel": "tiny" - }' +curl --json '{ + "youtubeUrl": "https://www.youtube.com/watch?v=MORMZXEaONk", + "whisperModel": "tiny", + "llm": "llama" +}' http://localhost:3000/video ``` -Use LLM. +### Playlist Endpoint + +```bash +curl --json '{ + "playlistUrl": "https://www.youtube.com/playlist?list=PLCVnrVv4KhXPz0SoAVu8Rc1emAdGPbSbr" +}' http://localhost:3000/playlist +``` + +```bash +curl --json '{ + "playlistUrl": "https://www.youtube.com/playlist?list=PLCVnrVv4KhXPz0SoAVu8Rc1emAdGPbSbr", + "whisperModel": "tiny", + "llm": "llama" +}' http://localhost:3000/playlist +``` + +### URLs Endpoint + +```bash +curl --json '{ + "filePath": "content/example-urls.md" +}' http://localhost:3000/urls +``` + +```bash +curl --json '{ + "filePath": "content/example-urls.md", + "whisperModel": "tiny", + "llm": "llama" +}' http://localhost:3000/urls +``` + +```bash +curl --json '{ + "filePath": "content/example-urls.md", + "prompts": ["titles", "mediumChapters"], + "whisperModel": "tiny", + "llm": "llama" +}' http://localhost:3000/urls +``` + +### File Endpoint + +```bash +curl --json '{ + "filePath": "content/audio.mp3" +}' http://localhost:3000/file +``` + +```bash +curl --json '{ + "filePath": "content/audio.mp3", + "whisperModel": "tiny", + "llm": "llama" +}' http://localhost:3000/file +``` + +```bash +curl --json '{ + "filePath": "content/audio.mp3", + "prompts": ["titles"], + "whisperModel": "tiny", + "llm": "llama" +}' http://localhost:3000/file +``` + +### RSS Endpoint + +```bash +curl --json '{ + "rssUrl": "https://feeds.transistor.fm/fsjam-podcast/" +}' http://localhost:3000/rss +``` + +```bash +curl --json '{ + "rssUrl": "https://feeds.transistor.fm/fsjam-podcast/", + "whisperModel": "tiny", + "llm": "llama", + "order": "newest", + "skip": 0 +}' http://localhost:3000/rss +``` + +```bash +curl --json '{ + "rssUrl": "https://feeds.transistor.fm/fsjam-podcast/", + "order": "newest", + "skip": 94, + "whisperModel": "tiny" +}' http://localhost:3000/rss +``` + +```bash +curl --json '{ + "rssUrl": "https://feeds.transistor.fm/fsjam-podcast/", + "order": "oldest", + "skip": 94, + "whisperModel": "tiny" +}' http://localhost:3000/rss +``` + +## Language Model (LLM) Options + +### ChatGPT + +```bash +curl --json '{ + "youtubeUrl": "https://www.youtube.com/watch?v=MORMZXEaONk", + "llm": "chatgpt" +}' http://localhost:3000/video +``` ```bash -curl -X POST http://localhost:3000/video \ - -H "Content-Type: application/json" \ - -d '{ - "youtubeUrl": "https://www.youtube.com/watch?v=jKB0EltG9Jo", - "whisperModel": "tiny", - "llm": "llama" - }' +curl --json '{ + "youtubeUrl": "https://www.youtube.com/watch?v=MORMZXEaONk", + "llm": "chatgpt", + "llmModel": "GPT_4o_MINI" +}' http://localhost:3000/video ``` -## Playlist Endpoint +### Claude ```bash -curl -X POST http://localhost:3000/playlist \ - -H "Content-Type: application/json" \ - -d '{ - "playlistUrl": "https://www.youtube.com/playlist?list=PLCVnrVv4KhXMh4DQBigyvHSRTf2CSj129" - }' +curl --json '{ + "youtubeUrl": "https://www.youtube.com/watch?v=MORMZXEaONk", + "llm": "claude" +}' http://localhost:3000/video ``` ```bash -curl -X POST http://localhost:3000/playlist \ - -H "Content-Type: application/json" \ - -d '{ - "playlistUrl": "https://www.youtube.com/playlist?list=PLCVnrVv4KhXMh4DQBigyvHSRTf2CSj129", - "whisperModel": "tiny", - "llm": "llama" - }' +curl --json '{ + "youtubeUrl": "https://www.youtube.com/watch?v=MORMZXEaONk", + "llm": "claude", + "llmModel": "CLAUDE_3_SONNET" +}' http://localhost:3000/video ``` -## URLs Endpoint +### Gemini ```bash -curl -X POST http://localhost:3000/urls \ - -H "Content-Type: application/json" \ - -d '{ - "filePath": "content/urls.md" - }' +curl --json '{ + "youtubeUrl": "https://www.youtube.com/watch?v=MORMZXEaONk", + "llm": "gemini" +}' http://localhost:3000/video ``` ```bash -curl -X POST http://localhost:3000/urls \ - -H "Content-Type: application/json" \ - -d '{ - "filePath": "content/urls.md", - "whisperModel": "tiny", - "llm": "llama" - }' +curl --json '{ + "youtubeUrl": "https://www.youtube.com/watch?v=MORMZXEaONk", + "llm": "gemini", + "llmModel": "GEMINI_1_5_FLASH" +}' http://localhost:3000/video ``` -## File Endpoint +### Cohere ```bash -curl -X POST http://localhost:3000/file \ - -H "Content-Type: application/json" \ - -d '{ - "filePath": "content/audio.mp3" - }' +curl --json '{ + "youtubeUrl": "https://www.youtube.com/watch?v=MORMZXEaONk", + "llm": "cohere" +}' http://localhost:3000/video ``` ```bash -curl -X POST http://localhost:3000/file \ - -H "Content-Type: application/json" \ - -d '{ - "filePath": "content/audio.mp3", - "whisperModel": "tiny", - "llm": "llama" - }' +curl --json '{ + "youtubeUrl": "https://www.youtube.com/watch?v=MORMZXEaONk", + "llm": "cohere", + "llmModel": "COMMAND_R_PLUS" +}' http://localhost:3000/video ``` -## RSS Endpoint +### Mistral + +```bash +curl --json '{ + "youtubeUrl": "https://www.youtube.com/watch?v=MORMZXEaONk", + "llm": "mistral" +}' http://localhost:3000/video +``` + +```bash +curl --json '{ + "youtubeUrl": "https://www.youtube.com/watch?v=MORMZXEaONk", + "llm": "mistral", + "llmModel": "MIXTRAL_8x7b" +}' http://localhost:3000/video +``` + +### Octo + +```bash +curl --json '{ + "youtubeUrl": "https://www.youtube.com/watch?v=MORMZXEaONk", + "llm": "octo" +}' http://localhost:3000/video +``` + +```bash +curl --json '{ + "youtubeUrl": "https://www.youtube.com/watch?v=MORMZXEaONk", + "llm": "octo", + "llmModel": "LLAMA_3_1_8B" +}' http://localhost:3000/video +``` + +## Transcription Options + +### Whisper.cpp + +```bash +curl --json '{ + "youtubeUrl": "https://www.youtube.com/watch?v=MORMZXEaONk", + "whisperModel": "tiny" +}' http://localhost:3000/video +``` + +### Deepgram + +```bash +curl --json '{ + "youtubeUrl": "https://www.youtube.com/watch?v=MORMZXEaONk", + "transcriptionService": "deepgram" +}' http://localhost:3000/video +``` + +```bash +curl --json '{ + "youtubeUrl": "https://www.youtube.com/watch?v=MORMZXEaONk", + "transcriptionService": "deepgram", + "llm": "llama" +}' http://localhost:3000/video +``` + +### Assembly + +```bash +curl --json '{ + "youtubeUrl": "https://www.youtube.com/watch?v=MORMZXEaONk", + "transcriptionService": "assembly" +}' http://localhost:3000/video +``` + +```bash +curl --json '{ + "youtubeUrl": "https://www.youtube.com/watch?v=MORMZXEaONk", + "transcriptionService": "assembly", + "llm": "llama" +}' http://localhost:3000/video +``` + +```bash +curl --json '{ + "youtubeUrl": "https://ajc.pics/audio/fsjam-short.mp3", + "transcriptionService": "assembly", + "speakerLabels": true +}' http://localhost:3000/video +``` + +```bash +curl --json '{ + "youtubeUrl": "https://ajc.pics/audio/fsjam-short.mp3", + "transcriptionService": "assembly", + "speakerLabels": true, + "llm": "llama" +}' http://localhost:3000/video +``` + +## Prompt Options + +```bash +curl --json '{ + "youtubeUrl": "https://www.youtube.com/watch?v=MORMZXEaONk", + "prompts": ["titles", "mediumChapters"] +}' http://localhost:3000/video +``` + +```bash +curl --json '{ + "youtubeUrl": "https://www.youtube.com/watch?v=MORMZXEaONk", + "prompts": ["titles", "summary", "shortChapters", "takeaways", "questions"] +}' http://localhost:3000/video +``` + +```bash +curl --json '{ + "youtubeUrl": "https://www.youtube.com/watch?v=MORMZXEaONk", + "prompts": ["titles", "summary", "shortChapters", "takeaways", "questions"], + "whisperModel": "tiny", + "llm": "llama" +}' http://localhost:3000/video +``` ```bash -curl -X POST http://localhost:3000/rss \ - -H "Content-Type: application/json" \ - -d '{ - "rssUrl": "https://feeds.transistor.fm/fsjam-podcast/" - }' +curl --json '{ + "playlistUrl": "https://www.youtube.com/playlist?list=PLCVnrVv4KhXPz0SoAVu8Rc1emAdGPbSbr", + "prompts": ["titles", "mediumChapters"], + "whisperModel": "tiny", + "llm": "llama" +}' http://localhost:3000/playlist ``` ```bash -curl -X POST http://localhost:3000/rss \ - -H "Content-Type: application/json" \ - -d '{ - "rssUrl": "https://feeds.transistor.fm/fsjam-podcast/", - "whisperModel": "tiny", - "llm": "llama", - "order": "newest", - "skip": 0 - }' +curl --json '{ + "playlistUrl": "https://www.youtube.com/playlist?list=PLCVnrVv4KhXPz0SoAVu8Rc1emAdGPbSbr", + "prompts": ["titles", "mediumChapters"], + "whisperModel": "tiny", + "llm": "llama" +}' http://localhost:3000/playlist ``` \ No newline at end of file diff --git a/package.json b/package.json index 72e6031..03d4ce7 100644 --- a/package.json +++ b/package.json @@ -23,6 +23,9 @@ "bun-as": "bun --env-file=.env --no-warnings src/autoshow.js", "deno-as": "deno run --allow-sys --allow-read --allow-run --allow-write --allow-env src/autoshow.js", "v": "node --env-file=.env --no-warnings src/autoshow.js --whisper large --video", + "u": "node --env-file=.env --no-warnings src/autoshow.js --whisper large --urls", + "p": "node --env-file=.env --no-warnings src/autoshow.js --whisper large --playlist", + "f": "node --env-file=.env --no-warnings src/autoshow.js --whisper large --file", "serve": "node --env-file=.env --no-warnings --watch server/index.js", "fetch": "node --env-file=.env --no-warnings server/fetch.js", "test-local": "node --test test/local.test.js", @@ -42,6 +45,7 @@ "file-type": "^19.4.1", "inquirer": "^10.2.2", "node-llama-cpp": "^3.0.0-beta.44", + "ollama": "^0.5.9", "openai": "^4.55.7" } } diff --git a/src/autoshow.js b/src/autoshow.js index a5189b7..80e8533 100644 --- a/src/autoshow.js +++ b/src/autoshow.js @@ -9,7 +9,7 @@ */ import { Command } from 'commander' -import inquirer from 'inquirer' +import { handleInteractivePrompt } from './inquirer.js' import { processVideo } from './commands/processVideo.js' import { processPlaylist } from './commands/processPlaylist.js' import { processURLs } from './commands/processURLs.js' @@ -17,7 +17,7 @@ import { processFile } from './commands/processFile.js' import { processRSS } from './commands/processRSS.js' import { argv } from 'node:process' -/** @import { ProcessingOptions, InquirerAnswers, InquirerQuestions, HandlerFunction, LLMOption, TranscriptOption, WhisperModelType } from './types.js' */ +/** @import { ProcessingOptions, HandlerFunction, LLMOption, TranscriptOption } from './types.js' */ // Initialize the command-line interface const program = new Command() @@ -35,6 +35,7 @@ program .option('--item ', 'Process specific items in the RSS feed by providing their audio URLs') .option('--order ', 'Specify the order for RSS feed processing (newest or oldest)', 'newest') .option('--skip ', 'Number of items to skip when processing RSS feed', parseInt, 0) + .option('--info', 'Generate JSON file with RSS feed information instead of processing items') .option('--whisper [modelType]', 'Use Whisper.cpp for transcription (non-Docker version)') .option('--whisperDocker [modelType]', 'Use Whisper.cpp for transcription (Docker version)') .option('--deepgram', 'Use Deepgram for transcription') @@ -46,194 +47,10 @@ program .option('--mistral [model]', 'Use Mistral for processing') .option('--octo [model]', 'Use Octo for processing') .option('--llama [model]', 'Use Node Llama for processing with optional model specification') + .option('--ollama [model]', 'Use Ollama for processing with optional model specification') .option('--gemini [model]', 'Use Gemini for processing with optional model specification') .option('--noCleanUp', 'Do not delete intermediary files after processing') -// Interactive prompts using inquirer -/** @type {InquirerQuestions} */ -const INQUIRER_PROMPT = [ - { - type: 'list', - name: 'action', - message: 'What would you like to process?', - choices: [ - { name: 'Single YouTube Video', value: 'video' }, - { name: 'YouTube Playlist', value: 'playlist' }, - { name: 'List of URLs from File', value: 'urls' }, - { name: 'Local Audio/Video File', value: 'file' }, - { name: 'Podcast RSS Feed', value: 'rss' }, - ], - }, - { - type: 'input', - name: 'video', - message: 'Enter the YouTube video URL:', - when: (answers) => answers.action === 'video', - validate: (input) => (input ? true : 'Please enter a valid URL.'), - }, - { - type: 'input', - name: 'playlist', - message: 'Enter the YouTube playlist URL:', - when: (answers) => answers.action === 'playlist', - validate: (input) => (input ? true : 'Please enter a valid URL.'), - }, - { - type: 'input', - name: 'urls', - message: 'Enter the file path containing URLs:', - when: (answers) => answers.action === 'urls', - validate: (input) => (input ? true : 'Please enter a valid file path.'), - }, - { - type: 'input', - name: 'file', - message: 'Enter the local audio/video file path:', - when: (answers) => answers.action === 'file', - validate: (input) => (input ? true : 'Please enter a valid file path.'), - }, - { - type: 'input', - name: 'rss', - message: 'Enter the podcast RSS feed URL:', - when: (answers) => answers.action === 'rss', - validate: (input) => (input ? true : 'Please enter a valid URL.'), - }, - { - type: 'confirm', - name: 'specifyItem', - message: 'Do you want to process specific episodes by providing their audio URLs?', - when: (answers) => answers.action === 'rss', - default: false, - }, - { - type: 'input', - name: 'item', - message: 'Enter the audio URLs of the episodes (separated by commas):', - when: (answers) => answers.action === 'rss' && answers.specifyItem, - validate: (input) => (input ? true : 'Please enter at least one valid audio URL.'), - }, - { - type: 'list', - name: 'llmOpt', - message: 'Select the Language Model (LLM) you want to use:', - choices: [ - { name: 'OpenAI ChatGPT', value: 'chatgpt' }, - { name: 'Anthropic Claude', value: 'claude' }, - { name: 'Cohere', value: 'cohere' }, - { name: 'Mistral', value: 'mistral' }, - { name: 'OctoAI', value: 'octo' }, - { name: 'node-llama-cpp (local inference)', value: 'llama' }, - { name: 'Google Gemini', value: 'gemini' }, - { name: 'Skip LLM Processing', value: null }, - ], - }, - { - type: 'list', - name: 'llamaModel', - message: 'Select the LLAMA model you want to use:', - choices: [ - { name: 'LLAMA 3 8B Q4 Model', value: 'LLAMA_3_1_8B_Q4_MODEL' }, - { name: 'LLAMA 3 8B Q6 Model', value: 'LLAMA_3_1_8B_Q6_MODEL' }, - { name: 'GEMMA 2 2B Q4 Model', value: 'GEMMA_2_2B_Q4_MODEL' }, - { name: 'GEMMA 2 2B Q6 Model', value: 'GEMMA_2_2B_Q6_MODEL' }, - { name: 'TINY LLAMA 1B Q4 Model', value: 'TINY_LLAMA_1B_Q4_MODEL' }, - { name: 'TINY LLAMA 1B Q6 Model', value: 'TINY_LLAMA_1B_Q6_MODEL' }, - ], - when: (answers) => answers.llmOpt === 'llama', - }, - { - type: 'list', - name: 'transcriptOpt', - message: 'Select the transcription service you want to use:', - choices: [ - { name: 'Whisper.cpp', value: 'whisper' }, - { name: 'Deepgram', value: 'deepgram' }, - { name: 'AssemblyAI', value: 'assembly' }, - ], - }, - { - type: 'confirm', - name: 'useDocker', - message: 'Do you want to run Whisper.cpp in a Docker container?', - when: (answers) => answers.transcriptOpt === 'whisper', - default: false, - }, - { - type: 'list', - name: 'whisperModel', - message: 'Select the Whisper model type:', - choices: ['tiny', 'tiny.en', 'base', 'base.en', 'small', 'small.en', 'medium', 'medium.en', 'large', 'large-v1', 'large-v2'], - when: (answers) => answers.transcriptOpt === 'whisper', - default: 'large', - }, - { - type: 'confirm', - name: 'speakerLabels', - message: 'Do you want to use speaker labels?', - when: (answers) => answers.transcriptOpt === 'assembly', - default: false, - }, - { - type: 'checkbox', - name: 'prompt', - message: 'Select the prompt sections to include:', - choices: [ - { name: 'Titles', value: 'titles' }, - { name: 'Summary', value: 'summary' }, - { name: 'Short Chapters', value: 'shortChapters' }, - { name: 'Medium Chapters', value: 'mediumChapters' }, - { name: 'Long Chapters', value: 'longChapters' }, - { name: 'Key Takeaways', value: 'takeaways' }, - { name: 'Questions', value: 'questions' }, - ], - default: ['summary', 'longChapters'], - }, - { - type: 'confirm', - name: 'noCleanUp', - message: 'Do you want to keep intermediary files after processing?', - default: false, - }, -] - -/** - * Prompts the user for input if no command-line options are provided. - * @param {ProcessingOptions} options - The initial command-line options. - * @returns {Promise} - The updated options after user input. - */ -async function handleInteractivePrompt(options) { - /** @type {InquirerAnswers} */ - const answers = await inquirer.prompt(INQUIRER_PROMPT) - options = { - ...options, - ...answers, - } - - // Handle LLM options - if (answers.llmOpt) { - options[answers.llmOpt] = answers.llmOpt === 'llama' ? answers.llamaModel : true - } - - // Handle transcription options - if (answers.transcriptOpt === 'whisper') { - if (answers.useDocker) { - options.whisperDocker = /** @type {WhisperModelType} */ (answers.whisperModel) - } else { - options.whisper = /** @type {WhisperModelType} */ (answers.whisperModel) - } - } else { - options[answers.transcriptOpt] = true - } - - // Handle 'item' for RSS feed - if (answers.item && typeof answers.item === 'string') { - options.item = answers.item.split(',').map((url) => url.trim()) - } - - return options -} - /** * Main action for the program. * @param {ProcessingOptions} options - The command-line options provided by the user. @@ -242,12 +59,12 @@ async function handleInteractivePrompt(options) { program.action(async (options) => { console.log(`Options received:\n`) console.log(options) + const { video, playlist, urls, file, rss } = options // Check if no input options are provided and if so, prompt the user interactively - const noInputOptions = !options.video && !options.playlist && !options.urls && !options.file && !options.rss - if (noInputOptions) { - options = await handleInteractivePrompt(options) - } + options = [video, playlist, urls, file, rss].every(opt => !opt) + ? await handleInteractivePrompt(options) + : options // Ensure options.item is an array if provided via command line if (options.item && !Array.isArray(options.item)) { @@ -270,17 +87,17 @@ program.action(async (options) => { * Determine the selected LLM option * @type {LLMOption | undefined} */ - const llmOpt = /** @type {LLMOption | undefined} */ (['chatgpt', 'claude', 'cohere', 'mistral', 'octo', 'llama', 'gemini'].find( - (option) => options[option] - )) + const llmOpt = /** @type {LLMOption | undefined} */ ([ + 'chatgpt', 'claude', 'cohere', 'mistral', 'octo', 'llama', 'ollama', 'gemini' + ].find((option) => options[option])) /** * Determine the transcription service to use * @type {TranscriptOption | undefined} */ - const transcriptOpt = /** @type {TranscriptOption | undefined} */ (['whisper', 'whisperDocker', 'deepgram', 'assembly'].find( - (option) => options[option] - )) + const transcriptOpt = /** @type {TranscriptOption | undefined} */ ([ + 'whisper', 'whisperDocker', 'deepgram', 'assembly' + ].find((option) => options[option])) // Execute the appropriate handler based on the action for (const [key, handler] of Object.entries(handlers)) { diff --git a/src/commands/processPlaylist.js b/src/commands/processPlaylist.js index 489df70..b014092 100644 --- a/src/commands/processPlaylist.js +++ b/src/commands/processPlaylist.js @@ -4,6 +4,7 @@ import { writeFile } from 'node:fs/promises' import { processVideo } from './processVideo.js' import { execFile } from 'node:child_process' import { promisify } from 'node:util' +import { extractVideoMetadata } from '../utils/generateMarkdown.js' /** @import { LLMOption, TranscriptOption, ProcessingOptions } from '../types.js' */ @@ -39,6 +40,20 @@ export async function processPlaylist(playlistUrl, llmOpt, transcriptOpt, option const urls = stdout.trim().split('\n').filter(Boolean) console.log(`Found ${urls.length} videos in the playlist`) + // Extract metadata for all videos + const metadataPromises = urls.map(extractVideoMetadata) + const metadataList = await Promise.all(metadataPromises) + const validMetadata = metadataList.filter(Boolean) + + // Generate JSON file with playlist information + if (options.info) { + const jsonContent = JSON.stringify(validMetadata, null, 2) + const jsonFilePath = 'content/playlist_info.json' + await writeFile(jsonFilePath, jsonContent) + console.log(`Playlist information saved to: ${jsonFilePath}`) + return + } + // Write the URLs to a file for reference try { await writeFile('content/urls.md', urls.join('\n')) diff --git a/src/commands/processRSS.js b/src/commands/processRSS.js index 03ab6fd..04c4cf8 100644 --- a/src/commands/processRSS.js +++ b/src/commands/processRSS.js @@ -5,6 +5,7 @@ * processing specific episodes based on user input. It supports processing multiple specific items or the entire feed. */ +import { writeFile } from 'node:fs/promises' import { XMLParser } from 'fast-xml-parser' import { generateRSSMarkdown } from '../utils/generateMarkdown.js' import { downloadAudio } from '../utils/downloadAudio.js' @@ -144,10 +145,20 @@ export async function processRSS(rssUrl, llmOpt, transcriptOpt, options) { channel: channelTitle, channelURL: channelLink, title: item.title, + description: "", // Initialize description as empty string publishDate: dateFormatter.format(new Date(item.pubDate)), coverImage: item['itunes:image']?.href || channelImage || '', })) + // Generate JSON file with RSS feed information + if (options.info) { + const jsonContent = JSON.stringify(items, null, 2) + const jsonFilePath = 'content/rss_info.json' + await writeFile(jsonFilePath, jsonContent) + console.log(`RSS feed information saved to: ${jsonFilePath}`) + return + } + let itemsToProcess = [] if (options.item && options.item.length > 0) { // Find the items matching the provided audio URLs diff --git a/src/commands/processURLs.js b/src/commands/processURLs.js index 7c2f732..5ccb48a 100644 --- a/src/commands/processURLs.js +++ b/src/commands/processURLs.js @@ -1,8 +1,9 @@ // src/commands/processURLs.js -import { readFile } from 'node:fs/promises' -import { processVideo } from './processVideo.js' +import { readFile, writeFile } from 'node:fs/promises' import { resolve } from 'node:path' +import { processVideo } from './processVideo.js' +import { extractVideoMetadata } from '../utils/generateMarkdown.js' /** @import { LLMOption, TranscriptOption, ProcessingOptions } from '../types.js' */ @@ -29,6 +30,20 @@ export async function processURLs(filePath, llmOpt, transcriptOpt, options) { // Log the number of URLs found console.log(`Found ${urls.length} URLs in the file`) + // Extract metadata for all videos + const metadataPromises = urls.map(extractVideoMetadata) + const metadataList = await Promise.all(metadataPromises) + const validMetadata = metadataList.filter(Boolean) + + // Generate JSON file with video information if --info option is used + if (options.info) { + const jsonContent = JSON.stringify(validMetadata, null, 2) + const jsonFilePath = 'content/urls_info.json' + await writeFile(jsonFilePath, jsonContent) + console.log(`Video information saved to: ${jsonFilePath}`) + return + } + // Process each URL for (const [index, url] of urls.entries()) { console.log(`Processing URL ${index + 1}/${urls.length}: ${url}`) diff --git a/src/inquirer.js b/src/inquirer.js new file mode 100644 index 0000000..c5c0f4c --- /dev/null +++ b/src/inquirer.js @@ -0,0 +1,188 @@ +// src/inquirer.js + +import inquirer from 'inquirer' + +/** @import { ProcessingOptions, InquirerAnswers, InquirerQuestions, WhisperModelType } from './types.js' */ + +// Interactive prompts using inquirer +/** @type {InquirerQuestions} */ +const INQUIRER_PROMPT = [ + { + type: 'list', + name: 'action', + message: 'What would you like to process?', + choices: [ + { name: 'Single YouTube Video', value: 'video' }, + { name: 'YouTube Playlist', value: 'playlist' }, + { name: 'List of URLs from File', value: 'urls' }, + { name: 'Local Audio/Video File', value: 'file' }, + { name: 'Podcast RSS Feed', value: 'rss' }, + ], + }, + { + type: 'input', + name: 'video', + message: 'Enter the YouTube video URL:', + when: (answers) => answers.action === 'video', + validate: (input) => (input ? true : 'Please enter a valid URL.'), + }, + { + type: 'input', + name: 'playlist', + message: 'Enter the YouTube playlist URL:', + when: (answers) => answers.action === 'playlist', + validate: (input) => (input ? true : 'Please enter a valid URL.'), + }, + { + type: 'input', + name: 'urls', + message: 'Enter the file path containing URLs:', + when: (answers) => answers.action === 'urls', + validate: (input) => (input ? true : 'Please enter a valid file path.'), + }, + { + type: 'input', + name: 'file', + message: 'Enter the local audio/video file path:', + when: (answers) => answers.action === 'file', + validate: (input) => (input ? true : 'Please enter a valid file path.'), + }, + { + type: 'input', + name: 'rss', + message: 'Enter the podcast RSS feed URL:', + when: (answers) => answers.action === 'rss', + validate: (input) => (input ? true : 'Please enter a valid URL.'), + }, + { + type: 'confirm', + name: 'specifyItem', + message: 'Do you want to process specific episodes by providing their audio URLs?', + when: (answers) => answers.action === 'rss', + default: false, + }, + { + type: 'input', + name: 'item', + message: 'Enter the audio URLs of the episodes (separated by commas):', + when: (answers) => answers.action === 'rss' && answers.specifyItem, + validate: (input) => (input ? true : 'Please enter at least one valid audio URL.'), + }, + { + type: 'list', + name: 'llmOpt', + message: 'Select the Language Model (LLM) you want to use:', + choices: [ + { name: 'OpenAI ChatGPT', value: 'chatgpt' }, + { name: 'Anthropic Claude', value: 'claude' }, + { name: 'Cohere', value: 'cohere' }, + { name: 'Mistral', value: 'mistral' }, + { name: 'OctoAI', value: 'octo' }, + { name: 'node-llama-cpp (local inference)', value: 'llama' }, + { name: 'Google Gemini', value: 'gemini' }, + { name: 'Skip LLM Processing', value: null }, + ], + }, + { + type: 'list', + name: 'llamaModel', + message: 'Select the LLAMA model you want to use:', + choices: [ + { name: 'LLAMA 3 8B Q4 Model', value: 'LLAMA_3_1_8B_Q4_MODEL' }, + { name: 'LLAMA 3 8B Q6 Model', value: 'LLAMA_3_1_8B_Q6_MODEL' }, + { name: 'GEMMA 2 2B Q4 Model', value: 'GEMMA_2_2B_Q4_MODEL' }, + { name: 'GEMMA 2 2B Q6 Model', value: 'GEMMA_2_2B_Q6_MODEL' }, + ], + when: (answers) => answers.llmOpt === 'llama', + }, + { + type: 'list', + name: 'transcriptOpt', + message: 'Select the transcription service you want to use:', + choices: [ + { name: 'Whisper.cpp', value: 'whisper' }, + { name: 'Deepgram', value: 'deepgram' }, + { name: 'AssemblyAI', value: 'assembly' }, + ], + }, + { + type: 'confirm', + name: 'useDocker', + message: 'Do you want to run Whisper.cpp in a Docker container?', + when: (answers) => answers.transcriptOpt === 'whisper', + default: false, + }, + { + type: 'list', + name: 'whisperModel', + message: 'Select the Whisper model type:', + choices: ['tiny', 'tiny.en', 'base', 'base.en', 'small', 'small.en', 'medium', 'medium.en', 'large', 'large-v1', 'large-v2'], + when: (answers) => answers.transcriptOpt === 'whisper', + default: 'large', + }, + { + type: 'confirm', + name: 'speakerLabels', + message: 'Do you want to use speaker labels?', + when: (answers) => answers.transcriptOpt === 'assembly', + default: false, + }, + { + type: 'checkbox', + name: 'prompt', + message: 'Select the prompt sections to include:', + choices: [ + { name: 'Titles', value: 'titles' }, + { name: 'Summary', value: 'summary' }, + { name: 'Short Chapters', value: 'shortChapters' }, + { name: 'Medium Chapters', value: 'mediumChapters' }, + { name: 'Long Chapters', value: 'longChapters' }, + { name: 'Key Takeaways', value: 'takeaways' }, + { name: 'Questions', value: 'questions' }, + ], + default: ['summary', 'longChapters'], + }, + { + type: 'confirm', + name: 'noCleanUp', + message: 'Do you want to keep intermediary files after processing?', + default: false, + }, +] + +/** + * Prompts the user for input if no command-line options are provided. + * @param {ProcessingOptions} options - The initial command-line options. + * @returns {Promise} - The updated options after user input. + */ +export async function handleInteractivePrompt(options) { + /** @type {InquirerAnswers} */ + const answers = await inquirer.prompt(INQUIRER_PROMPT) + options = { + ...options, + ...answers, + } + + // Handle LLM options + if (answers.llmOpt) { + options[answers.llmOpt] = answers.llmOpt === 'llama' ? answers.llamaModel : true + } + + // Handle transcription options + if (answers.transcriptOpt === 'whisper') { + if (answers.useDocker) { + options.whisperDocker = /** @type {WhisperModelType} */ (answers.whisperModel) + } else { + options.whisper = /** @type {WhisperModelType} */ (answers.whisperModel) + } + } else { + options[answers.transcriptOpt] = true + } + + // Handle 'item' for RSS feed + if (answers.item && typeof answers.item === 'string') { + options.item = answers.item.split(',').map((url) => url.trim()) + } + + return options +} \ No newline at end of file diff --git a/src/llms/llama.js b/src/llms/llama.js index a58d06c..578cfc2 100644 --- a/src/llms/llama.js +++ b/src/llms/llama.js @@ -30,14 +30,6 @@ const localModels = { GEMMA_2_2B_Q6_MODEL: { filename: "gemma-2-2b-it-Q6_K.gguf", url: "https://huggingface.co/lmstudio-community/gemma-2-2b-it-GGUF/resolve/main/gemma-2-2b-it-Q6_K.gguf" - }, - TINY_LLAMA_1B_Q4_MODEL: { - filename: "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf", - url: "https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF/resolve/main/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf" - }, - TINY_LLAMA_1B_Q6_MODEL: { - filename: "tinyllama-1.1b-chat-v1.0.Q6_K.gguf", - url: "https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF/resolve/main/tinyllama-1.1b-chat-v1.0.Q6_K.gguf" } } diff --git a/src/llms/ollama.js b/src/llms/ollama.js new file mode 100644 index 0000000..1bed42d --- /dev/null +++ b/src/llms/ollama.js @@ -0,0 +1,71 @@ +// src/llms/ollama.js + +import { writeFile } from 'node:fs/promises' +import ollama from 'ollama' + +/** @import { LLMFunction, LlamaModelType } from '../types.js' */ + +/** + * Map of model identifiers to their corresponding names in Ollama + * @type {Record} + */ +const ollamaModels = { + LLAMA_3_2_1B_MODEL: 'llama3.2:1b', + LLAMA_3_2_3B_MODEL: 'llama3.2:3b', + LLAMA_3_1_8B_MODEL: 'llama3.1:8b', + GEMMA_2_2B_MODEL: 'gemma2:2b', + GEMMA_2_9B_MODEL: 'gemma2:9b', + PHI_3_5_MODEL: 'phi3.5:3.8b', + QWEN_2_5_1B_MODEL: 'qwen2.5:1.5b', + QWEN_2_5_3B_MODEL: 'qwen2.5:3b', + QWEN_2_5_7B_MODEL: 'qwen2.5:7b', +} + +/** @type {LLMFunction} */ +/** + * Main function to call the Llama model using the Ollama library. + * This function checks if the model is available, pulls it if necessary, + * and then proceeds with the chat. + * @param {string} promptAndTranscript - The combined prompt and transcript content. + * @param {string} tempPath - The temporary file path to write the LLM output. + * @param {LlamaModelType} [modelName='LLAMA_3_2_1B_MODEL'] - The name of the model to use. + * @returns {Promise} + * @throws {Error} - If an error occurs during processing. + */ +export async function callOllama(promptAndTranscript, tempPath, modelName = 'LLAMA_3_2_1B_MODEL') { + try { + // Map the model name to the Ollama model identifier + const ollamaModelName = ollamaModels[modelName] || 'llama3.2:1b' + + // Check if the model is available + const models = await ollama.list() + const isAvailable = models.models.some(model => model.name === ollamaModelName) + + // If the model is not available, pull it + if (!isAvailable) { + console.log(`Model ${ollamaModelName} not found. Pulling it now...`) + const pullStream = await ollama.pull({ model: ollamaModelName, stream: true }) + for await (const part of pullStream) { + console.log(`Pulling ${ollamaModelName}: ${part.status}`) + } + console.log(`Model ${ollamaModelName} successfully pulled.`) + } + + // Call the Ollama chat API + const response = await ollama.chat({ + model: ollamaModelName, + messages: [{ role: 'user', content: promptAndTranscript }], + }) + + // Extract the assistant's reply + const assistantReply = response.message.content + + // Write the response to the output file + await writeFile(tempPath, assistantReply) + console.log(`\nTranscript saved to:\n - ${tempPath}`) + console.log(`\nModel used:\n - ${ollamaModelName}\n`) + } catch (error) { + console.error('Error in callLlama:', error) + throw error + } +} \ No newline at end of file diff --git a/src/llms/ollamaClient.js b/src/llms/ollamaClient.js new file mode 100644 index 0000000..64037c5 --- /dev/null +++ b/src/llms/ollamaClient.js @@ -0,0 +1,92 @@ +// src/llms/ollamaClient.js + +import { writeFile } from 'node:fs/promises' +import { Ollama } from 'ollama' + +/** @import { LLMFunction, LlamaModelType } from '../types.js' */ + +/** + * Map of model identifiers to their corresponding names in Ollama + * @type {Record} + */ +const ollamaModels = { + LLAMA_3_2_1B_MODEL: 'llama3.2:1b', + LLAMA_3_2_3B_MODEL: 'llama3.2:3b', + LLAMA_3_1_8B_MODEL: 'llama3.1:8b', + GEMMA_2_2B_MODEL: 'gemma2:2b', + GEMMA_2_9B_MODEL: 'gemma2:9b', + PHI_3_5_MODEL: 'phi3.5:3.8b', + QWEN_2_5_1B_MODEL: 'qwen2.5:1.5b', + QWEN_2_5_3B_MODEL: 'qwen2.5:3b', + QWEN_2_5_7B_MODEL: 'qwen2.5:7b', +} + +/** + * Main function to call the Llama model using the Ollama library. + * This function initializes the client, checks if the model is available, + * pulls it if necessary, and then proceeds with the chat. + * @type {LLMFunction} + * @param {string} promptAndTranscript - The combined prompt and transcript content. + * @param {string} tempPath - The temporary file path to write the LLM output. + * @param {LlamaModelType} [modelName='LLAMA_3_2_1B_MODEL'] - The name of the model to use. + * @returns {Promise} + * @throws {Error} - If an error occurs during processing. + */ +export async function callOllama(promptAndTranscript, tempPath, modelName = 'LLAMA_3_2_1B_MODEL') { + const ollamaHost = process.env.OLLAMA_HOST || '127.0.0.1' + const ollamaPort = process.env.OLLAMA_PORT || 11434 + const baseUrl = `http://${ollamaHost}:${ollamaPort}` + + console.log(`Initializing Ollama client with base URL: ${baseUrl}`) + const ollamaClient = new Ollama({ host: baseUrl }) + + try { + // Test connection to Ollama server + console.log('Testing connection to Ollama server...') + await ollamaClient.list() + console.log('Successfully connected to Ollama server.') + + // Map the model name to the Ollama model identifier + const ollamaModelName = ollamaModels[modelName] || 'llama3.2:1b' + console.log(`Using Ollama model: ${ollamaModelName}`) + + // Check if the model is available + console.log('Checking model availability...') + const models = await ollamaClient.list() + const isAvailable = models.models.some(model => model.name === ollamaModelName) + + // If the model is not available, pull it + if (!isAvailable) { + console.log(`Model ${ollamaModelName} not found. Pulling it now...`) + const pullStream = await ollamaClient.pull({ model: ollamaModelName, stream: true }) + for await (const part of pullStream) { + console.log(`Pulling ${ollamaModelName}: ${part.status}`) + } + console.log(`Model ${ollamaModelName} successfully pulled.`) + } else { + console.log(`Model ${ollamaModelName} is available.`) + } + + // Call the Ollama chat API + console.log('Sending request to Ollama chat API...') + const response = await ollamaClient.chat({ + model: ollamaModelName, + messages: [{ role: 'user', content: promptAndTranscript }], + }) + + // Extract the assistant's reply + const assistantReply = response.message.content + + // Write the response to the output file + console.log(`Writing response to file: ${tempPath}`) + await writeFile(tempPath, assistantReply) + console.log(`\nTranscript saved to:\n - ${tempPath}`) + console.log(`\nModel used:\n - ${ollamaModelName}\n`) + } catch (error) { + console.error('Error in callOllama:', error) + if (error.code === 'ECONNREFUSED') { + console.error(`Failed to connect to Ollama server at ${baseUrl}. Please ensure it's running and accessible.`) + } + throw error + } +} \ No newline at end of file diff --git a/src/llms/ollamaOAI.js b/src/llms/ollamaOAI.js new file mode 100644 index 0000000..1ed3dfb --- /dev/null +++ b/src/llms/ollamaOAI.js @@ -0,0 +1,96 @@ +// src/llms/ollamaOAI.js + +import { writeFile } from 'node:fs/promises' +import OpenAI from 'openai' + +/** @import { LLMFunction, LlamaModelType } from '../types.js' */ + +/** + * Map of model identifiers to their corresponding names in Ollama + * @type {Record} + */ +const ollamaModels = { + LLAMA_3_2_1B_MODEL: 'llama3.2:1b', + LLAMA_3_2_3B_MODEL: 'llama3.2:3b', + LLAMA_3_1_8B_MODEL: 'llama3.1:8b', + GEMMA_2_2B_MODEL: 'gemma2:2b', + GEMMA_2_9B_MODEL: 'gemma2:9b', + PHI_3_5_MODEL: 'phi3.5:3.8b', + QWEN_2_5_1B_MODEL: 'qwen2.5:1.5b', + QWEN_2_5_3B_MODEL: 'qwen2.5:3b', + QWEN_2_5_7B_MODEL: 'qwen2.5:7b', +} + +const ollamaBaseUrl = 'http://localhost:11434' +const openai = new OpenAI({ + baseURL: `${ollamaBaseUrl}/v1/`, + apiKey: 'ollama', // required but ignored +}) + +/** @type {LLMFunction} */ +/** + * Main function to call the Llama model using the Ollama OpenAI compatibility layer. + * This function checks if the model is available, pulls it if necessary, + * and then proceeds with the chat. + * @param {string} promptAndTranscript - The combined prompt and transcript content. + * @param {string} tempPath - The temporary file path to write the LLM output. + * @param {LlamaModelType} [modelName='LLAMA_3_2_1B_MODEL'] - The name of the model to use. + * @returns {Promise} + * @throws {Error} - If an error occurs during processing. + */ +export async function callOllama(promptAndTranscript, tempPath, modelName = 'LLAMA_3_2_1B_MODEL') { + try { + // Map the model name to the Ollama model identifier + const ollamaModelName = ollamaModels[modelName] || 'llama3.2:1b' + + // Check if the model is available + const modelsResponse = await fetch(`${ollamaBaseUrl}/api/tags`) + const models = await modelsResponse.json() + const isAvailable = models.models.some(model => model.name === ollamaModelName) + + // If the model is not available, pull it + if (!isAvailable) { + console.log(`Model ${ollamaModelName} not found. Pulling it now...`) + const pullResponse = await fetch(`${ollamaBaseUrl}/api/pull`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ name: ollamaModelName }), + }) + + const reader = pullResponse.body.getReader() + const decoder = new TextDecoder() + + while (true) { + const { done, value } = await reader.read() + if (done) break + const chunk = decoder.decode(value) + const lines = chunk.split('\n') + for (const line of lines) { + if (line.trim()) { + const data = JSON.parse(line) + console.log(`Pulling ${ollamaModelName}: ${data.status}`) + } + } + } + + console.log(`Model ${ollamaModelName} successfully pulled.`) + } + + // Call the Ollama chat API using OpenAI client + const response = await openai.chat.completions.create({ + model: ollamaModelName, + messages: [{ role: 'user', content: promptAndTranscript }], + }) + + // Extract the assistant's reply + const assistantReply = response.choices[0].message.content + + // Write the response to the output file + await writeFile(tempPath, assistantReply) + console.log(`\nTranscript saved to:\n - ${tempPath}`) + console.log(`\nModel used:\n - ${ollamaModelName}\n`) + } catch (error) { + console.error('Error in callOllama:', error) + throw error + } +} \ No newline at end of file diff --git a/src/transcription/assembly.js b/src/transcription/assembly.js index 80bf373..aa444c6 100644 --- a/src/transcription/assembly.js +++ b/src/transcription/assembly.js @@ -6,16 +6,6 @@ import { AssemblyAI } from 'assemblyai' /** @import { TranscriptOption, ProcessingOptions } from '../types.js' */ -/** - * Check if the ASSEMBLY_API_KEY environment variable is set - */ -if (!env.ASSEMBLY_API_KEY) { - throw new Error('ASSEMBLY_API_KEY environment variable is not set.') -} - -// Initialize the AssemblyAI client with API key from environment variables -const client = new AssemblyAI({ apiKey: env.ASSEMBLY_API_KEY }) - /** * Main function to handle transcription using AssemblyAI. * @param {string} finalPath - The identifier used for naming output files. @@ -25,6 +15,14 @@ const client = new AssemblyAI({ apiKey: env.ASSEMBLY_API_KEY }) * @throws {Error} - If an error occurs during transcription. */ export async function callAssembly(finalPath, transcriptOpt, options) { + // Check if the ASSEMBLY_API_KEY environment variable is set + if (!env.ASSEMBLY_API_KEY) { + throw new Error('ASSEMBLY_API_KEY environment variable is not set.') + } + + // Initialize the AssemblyAI client with API key from environment variables + const client = new AssemblyAI({ apiKey: env.ASSEMBLY_API_KEY }) + try { const { speakerLabels } = options console.log(`Parameters passed to callAssembly:`) diff --git a/src/transcription/deepgram.js b/src/transcription/deepgram.js index 3246cce..9a6ff3d 100644 --- a/src/transcription/deepgram.js +++ b/src/transcription/deepgram.js @@ -4,16 +4,6 @@ import { writeFile, readFile } from 'node:fs/promises' import { env } from 'node:process' import { createClient } from '@deepgram/sdk' -/** - * Check if the DEEPGRAM_API_KEY environment variable is set - */ -if (!env.DEEPGRAM_API_KEY) { - throw new Error('DEEPGRAM_API_KEY environment variable is not set.') -} - -// Initialize the Deepgram client with the API key from environment variables -const deepgram = createClient(env.DEEPGRAM_API_KEY) - /** * Main function to handle transcription using Deepgram. * @param {string} input - The URL or local file path of the audio to transcribe. @@ -22,6 +12,14 @@ const deepgram = createClient(env.DEEPGRAM_API_KEY) * @throws {Error} - If an error occurs during transcription. */ export async function callDeepgram(input, id) { + // Check if the DEEPGRAM_API_KEY environment variable is set + if (!env.DEEPGRAM_API_KEY) { + throw new Error('DEEPGRAM_API_KEY environment variable is not set.') + } + + // Initialize the Deepgram client with the API key from environment variables + const deepgram = createClient(env.DEEPGRAM_API_KEY) + // Check if the input is a URL or a local file const isUrl = input.startsWith('http://') || input.startsWith('https://') diff --git a/src/transcription/whisper.js b/src/transcription/whisper.js index cd92006..0a1ffdf 100644 --- a/src/transcription/whisper.js +++ b/src/transcription/whisper.js @@ -128,19 +128,30 @@ async function callWhisperMain(finalPath, modelName, whisperModel) { const modelPath = `./whisper.cpp/models/${modelName}` try { + // Check if whisper.cpp directory exists, clone and build if not + await access('./whisper.cpp').catch(async () => { + console.log('\nwhisper.cpp directory not found. Cloning and building whisper.cpp...') + // Clone the whisper.cpp repository + await execPromise(`git clone https://github.com/ggerganov/whisper.cpp.git`) + // Build the project + await execPromise(`make -C whisper.cpp`) + // Copy the Dockerfile (adjust the path as needed) + await execPromise(`cp .github/whisper.Dockerfile whisper.cpp/Dockerfile`) + console.log('whisper.cpp cloned and built successfully.') + }) + console.log('\nwhisper.cpp directory found.') + // Check if the model exists locally, download if not - try { - await access(modelPath) - console.log(`\nWhisper.cpp ${whisperModel} model found:`) - console.log(` - ${modelName} model selected\n - Model located at ${modelPath}`) - } catch { + await access(modelPath).catch(async () => { console.log(`\nWhisper.cpp ${whisperModel} model not found:`) console.log(` - ${modelName} model selected\n - Model downloading to ${modelPath}`) await execPromise(`bash ./whisper.cpp/models/download-ggml-model.sh ${whisperModel}`) console.log(` - Model downloaded successfully`) - } - - // Execute Whisper transcription + }) + console.log(`\nWhisper.cpp ${whisperModel} model found:`) + console.log(` - ${modelName} model selected\n - Model located at ${modelPath}`) + + // Proceed with transcription await execPromise( `./whisper.cpp/main -m "whisper.cpp/models/${modelName}" -f "${finalPath}.wav" -of "${finalPath}" --output-lrc` ) @@ -148,5 +159,5 @@ async function callWhisperMain(finalPath, modelName, whisperModel) { } catch (error) { console.error('Error in callWhisperMain:', error) throw error - } + } } \ No newline at end of file diff --git a/src/types.js b/src/types.js index 3d60b4e..9d30d56 100644 --- a/src/types.js +++ b/src/types.js @@ -13,6 +13,7 @@ * @property {string} [file] - Local audio or video file path to process. * @property {string} [rss] - URL of the podcast RSS feed to process. * @property {string[]} [item] - Specific items (audio URLs) from the RSS feed to process. + * @property {boolean} [info] - Flag to generate JSON file with RSS feed information instead of processing items * @property {boolean} [noCleanUp] - Flag to indicate whether to keep temporary files after processing. * @property {WhisperModelType} [whisper] - Whisper model type to use for transcription (e.g., 'tiny', 'base'). * @property {WhisperModelType} [whisperDocker] - Whisper model type to use in Docker for transcription. @@ -90,12 +91,13 @@ /** * Represents the metadata extracted from a YouTube video. * @typedef {Object} VideoMetadata - * @property {string} formattedDate - The upload date in 'YYYY-MM-DD' format. - * @property {string} title - The title of the video. - * @property {string} thumbnail - The URL to the video's thumbnail image. - * @property {string} webpage_url - The URL to the video's webpage. + * @property {string} showLink - The URL to the video's webpage. * @property {string} channel - The name of the channel that uploaded the video. - * @property {string} uploader_url - The URL to the uploader's channel page. + * @property {string} channelURL - The URL to the uploader's channel page. + * @property {string} title - The title of the video. + * @property {string} description - The description of the video (empty string in this case). + * @property {string} publishDate - The upload date in 'YYYY-MM-DD' format. + * @property {string} coverImage - The URL to the video's thumbnail image. */ /** diff --git a/src/utils/generateMarkdown.js b/src/utils/generateMarkdown.js index 6962f23..9b1a48c 100644 --- a/src/utils/generateMarkdown.js +++ b/src/utils/generateMarkdown.js @@ -5,11 +5,46 @@ import { promisify } from 'node:util' import { writeFile } from 'node:fs/promises' import { basename, extname } from 'node:path' -/** @import { MarkdownData, RSSItem } from '../types.js' */ +/** @import { MarkdownData, RSSItem, VideoMetadata } from '../types.js' */ // Promisify the execFile function for use with async/await const execFilePromise = promisify(execFile) +/** + * Extract metadata for a single video URL. + * @param {string} url - The URL of the video. + * @returns {Promise} - The video metadata. + */ +export async function extractVideoMetadata(url) { + try { + const { stdout } = await execFilePromise('yt-dlp', [ + '--restrict-filenames', + '--print', '%(webpage_url)s', + '--print', '%(channel)s', + '--print', '%(uploader_url)s', + '--print', '%(title)s', + '--print', '%(upload_date>%Y-%m-%d)s', + '--print', '%(thumbnail)s', + url + ]) + + const [showLink, channel, channelURL, title, publishDate, coverImage] = stdout.trim().split('\n') + + return { + showLink, + channel, + channelURL, + title, + description: "", + publishDate, + coverImage + } + } catch (error) { + console.error(`Error extracting metadata for ${url}:`, error) + return null + } +} + /** * Function to generate markdown for RSS feed items. * @param {RSSItem} item - The RSS feed item object. diff --git a/src/utils/runLLM.js b/src/utils/runLLM.js index 71efd3e..7bb3711 100644 --- a/src/utils/runLLM.js +++ b/src/utils/runLLM.js @@ -2,6 +2,7 @@ import { readFile, writeFile, unlink } from 'node:fs/promises' import { callLlama } from '../llms/llama.js' +import { callOllama } from '../llms/ollama.js' import { callChatGPT } from '../llms/chatgpt.js' import { callClaude } from '../llms/claude.js' import { callGemini } from '../llms/gemini.js' @@ -15,6 +16,7 @@ import { generatePrompt } from '../llms/prompt.js' /** @type {LLMFunctions} */ const llmFunctions = { llama: callLlama, + ollama: callOllama, chatgpt: callChatGPT, claude: callClaude, gemini: callGemini,