From 4c55549d7de63737ff9c2ad9bee93a7aa9e2dec8 Mon Sep 17 00:00:00 2001 From: Anthony Campolo <12433465+ajcwebdev@users.noreply.github.com> Date: Thu, 3 Oct 2024 16:50:21 -0500 Subject: [PATCH 1/9] create server test files --- .env.example | 4 +- package.json | 3 +- packages/server/fetch.js | 32 -- packages/server/index.js | 4 +- packages/server/routes/file.js | 8 +- packages/server/routes/playlist.js | 8 +- packages/server/routes/rss.js | 8 +- packages/server/routes/urls.js | 8 +- packages/server/routes/video.js | 8 +- packages/server/tests/fetch-all.js | 423 +++++++++++++++++++++++++++ packages/server/tests/fetch-local.js | 268 +++++++++++++++++ 11 files changed, 716 insertions(+), 58 deletions(-) delete mode 100644 packages/server/fetch.js create mode 100644 packages/server/tests/fetch-all.js create mode 100644 packages/server/tests/fetch-local.js diff --git a/.env.example b/.env.example index bd1f290..3e7a254 100644 --- a/.env.example +++ b/.env.example @@ -1,8 +1,6 @@ -LLAMA_MODEL="gemma-2-2b-it-IQ4_XS.gguf" -HUGGING_FACE_URL="https://huggingface.co/lmstudio-community/gemma-2-2b-it-GGUF" - OPENAI_API_KEY="" ANTHROPIC_API_KEY="" +GEMINI_API_KEY="" COHERE_API_KEY="" MISTRAL_API_KEY="" OCTOAI_API_KEY="" diff --git a/package.json b/package.json index 9c8316e..7e5ff3c 100644 --- a/package.json +++ b/package.json @@ -28,7 +28,8 @@ "p": "node --env-file=.env --no-warnings src/autoshow.js --whisper large --playlist", "f": "node --env-file=.env --no-warnings src/autoshow.js --whisper large --file", "serve": "node --env-file=.env --no-warnings --watch packages/server/index.js", - "fetch": "node --env-file=.env --no-warnings packages/server/fetch.js", + "fetch-local": "node --env-file=.env --no-warnings packages/server/tests/fetch-local.js", + "fetch-all": "node --env-file=.env --no-warnings packages/server/tests/fetch-all.js", "test-local": "node --test test/local.test.js", "test-all": "node --test test/all.test.js" }, diff --git a/packages/server/fetch.js b/packages/server/fetch.js deleted file mode 100644 index 9caef82..0000000 --- a/packages/server/fetch.js +++ /dev/null @@ -1,32 +0,0 @@ -// server/fetch.js - -const BASE_URL = 'http://localhost:3000' -const VIDEO_ROUTE = 'video' - -const data = { - youtubeUrl: 'https://www.youtube.com/watch?v=jKB0EltG9Jo', - whisperModel: 'tiny', - llm: 'llama' -} - -const fetchVideo = async () => { - try { - const response = await fetch(`${BASE_URL}/${VIDEO_ROUTE}`, { - method: 'POST', - headers: { - 'Content-Type': 'application/json' - }, - body: JSON.stringify(data) - }) - console.log('Fetch response status:', response.status) - if (!response.ok) { - throw new Error(`HTTP error! status: ${response.status}`) - } - const result = await response.json() - console.log(result) - } catch (error) { - console.error('Error:', error) - } -} - -fetchVideo() \ No newline at end of file diff --git a/packages/server/index.js b/packages/server/index.js index f95d62a..42c869a 100644 --- a/packages/server/index.js +++ b/packages/server/index.js @@ -26,7 +26,7 @@ async function start() { // Log each incoming request fastify.addHook('onRequest', async (request, reply) => { console.log( - `[${new Date().toISOString()}] Received ${request.method} request for ${request.url}` + `\n[${new Date().toISOString()}] Received ${request.method} request for ${request.url}\n` ) }) @@ -39,7 +39,7 @@ async function start() { try { await fastify.listen({ port }) - console.log(`Server running at http://localhost:${port}`) + console.log(`\nServer running at http://localhost:${port}\n`) } catch (err) { fastify.log.error(err) process.exit(1) diff --git a/packages/server/routes/file.js b/packages/server/routes/file.js index b7eb045..3097765 100644 --- a/packages/server/routes/file.js +++ b/packages/server/routes/file.js @@ -5,12 +5,12 @@ import { reqToOpts } from '../utils/reqToOpts.js' // Handler for /file route const handleFileRequest = async (request, reply) => { - console.log('Entered handleFileRequest') + console.log('\nEntered handleFileRequest') try { // Access parsed request body const requestData = request.body - console.log('Parsed request body:', requestData) + console.log('\nParsed request body:', requestData) // Extract file path const { filePath } = requestData @@ -23,11 +23,11 @@ const handleFileRequest = async (request, reply) => { // Map request data to processing options const { options, llmOpt, transcriptOpt } = reqToOpts(requestData) - console.log('Calling processFile with params:', { filePath, llmOpt, transcriptOpt, options }) + console.log('\nCalling processFile with params:', { filePath, llmOpt, transcriptOpt, options }) await processFile(filePath, llmOpt, transcriptOpt, options) - console.log('processFile completed successfully') + console.log('\nprocessFile completed successfully') reply.send({ message: 'File processed successfully.' }) } catch (error) { console.error('Error processing file:', error) diff --git a/packages/server/routes/playlist.js b/packages/server/routes/playlist.js index bfc6088..a7e6de6 100644 --- a/packages/server/routes/playlist.js +++ b/packages/server/routes/playlist.js @@ -5,12 +5,12 @@ import { reqToOpts } from '../utils/reqToOpts.js' // Handler for /playlist route const handlePlaylistRequest = async (request, reply) => { - console.log('Entered handlePlaylistRequest') + console.log('\nEntered handlePlaylistRequest') try { // Access parsed request body const requestData = request.body - console.log('Parsed request body:', requestData) + console.log('\nParsed request body:', requestData) // Extract playlist URL const { playlistUrl } = requestData @@ -23,11 +23,11 @@ const handlePlaylistRequest = async (request, reply) => { // Map request data to processing options const { options, llmOpt, transcriptOpt } = reqToOpts(requestData) - console.log('Calling processPlaylist with params:', { playlistUrl, llmOpt, transcriptOpt, options }) + console.log('\nCalling processPlaylist with params:', { playlistUrl, llmOpt, transcriptOpt, options }) await processPlaylist(playlistUrl, llmOpt, transcriptOpt, options) - console.log('processPlaylist completed successfully') + console.log('\nprocessPlaylist completed successfully') reply.send({ message: 'Playlist processed successfully.' }) } catch (error) { console.error('Error processing playlist:', error) diff --git a/packages/server/routes/rss.js b/packages/server/routes/rss.js index 5792dcc..f6f8f07 100644 --- a/packages/server/routes/rss.js +++ b/packages/server/routes/rss.js @@ -5,12 +5,12 @@ import { reqToOpts } from '../utils/reqToOpts.js' // Handler for /rss route const handleRSSRequest = async (request, reply) => { - console.log('Entered handleRSSRequest') + console.log('\nEntered handleRSSRequest') try { // Access parsed request body const requestData = request.body - console.log('Parsed request body:', requestData) + console.log('\nParsed request body:', requestData) // Extract RSS URL const { rssUrl } = requestData @@ -23,11 +23,11 @@ const handleRSSRequest = async (request, reply) => { // Map request data to processing options const { options, llmOpt, transcriptOpt } = reqToOpts(requestData) - console.log('Calling processRSS with params:', { rssUrl, llmOpt, transcriptOpt, options }) + console.log('\nCalling processRSS with params:', { rssUrl, llmOpt, transcriptOpt, options }) await processRSS(rssUrl, llmOpt, transcriptOpt, options) - console.log('processRSS completed successfully') + console.log('\nprocessRSS completed successfully') reply.send({ message: 'RSS feed processed successfully.' }) } catch (error) { console.error('Error processing RSS request:', error) diff --git a/packages/server/routes/urls.js b/packages/server/routes/urls.js index 82ae3d0..d0fec32 100644 --- a/packages/server/routes/urls.js +++ b/packages/server/routes/urls.js @@ -5,12 +5,12 @@ import { reqToOpts } from '../utils/reqToOpts.js' // Handler for /urls route const handleURLsRequest = async (request, reply) => { - console.log('Entered handleURLsRequest') + console.log('\nEntered handleURLsRequest') try { // Access parsed request body const requestData = request.body - console.log('Parsed request body:', requestData) + console.log('\nParsed request body:', requestData) // Extract file path const { filePath } = requestData @@ -23,11 +23,11 @@ const handleURLsRequest = async (request, reply) => { // Map request data to processing options const { options, llmOpt, transcriptOpt } = reqToOpts(requestData) - console.log('Calling processURLs with params:', { filePath, llmOpt, transcriptOpt, options }) + console.log('\nCalling processURLs with params:', { filePath, llmOpt, transcriptOpt, options }) await processURLs(filePath, llmOpt, transcriptOpt, options) - console.log('processURLs completed successfully') + console.log('\nprocessURLs completed successfully') reply.send({ message: 'URLs processed successfully.' }) } catch (error) { console.error('Error processing URLs:', error) diff --git a/packages/server/routes/video.js b/packages/server/routes/video.js index d3d0afc..2d6f7d5 100644 --- a/packages/server/routes/video.js +++ b/packages/server/routes/video.js @@ -5,12 +5,12 @@ import { reqToOpts } from '../utils/reqToOpts.js' // Handler for /video route const handleVideoRequest = async (request, reply) => { - console.log('Entered handleVideoRequest') + console.log('\nEntered handleVideoRequest\n') try { // Access parsed request body const requestData = request.body - console.log('Parsed request body:', requestData) + console.log('\nParsed request body:', requestData) // Extract YouTube URL const { youtubeUrl } = requestData @@ -23,11 +23,11 @@ const handleVideoRequest = async (request, reply) => { // Map request data to processing options const { options, llmOpt, transcriptOpt } = reqToOpts(requestData) - console.log('Calling processVideo with params:', { youtubeUrl, llmOpt, transcriptOpt, options }) + console.log('\nCalling processVideo with params:', { youtubeUrl, llmOpt, transcriptOpt, options }) await processVideo(youtubeUrl, llmOpt, transcriptOpt, options) - console.log('processVideo completed successfully') + console.log('\nprocessVideo completed successfully') reply.send({ message: 'Video processed successfully.' }) } catch (error) { console.error('Error processing video:', error) diff --git a/packages/server/tests/fetch-all.js b/packages/server/tests/fetch-all.js new file mode 100644 index 0000000..46ae375 --- /dev/null +++ b/packages/server/tests/fetch-all.js @@ -0,0 +1,423 @@ +// server/fetch.js + +import fs from 'fs/promises' +import path from 'path' + +const BASE_URL = 'http://localhost:3000' +const OUTPUT_DIR = 'content' + +const requests = [ + // Playlist Endpoint Requests + { + data: { + playlistUrl: 'https://www.youtube.com/playlist?list=PLCVnrVv4KhXPz0SoAVu8Rc1emAdGPbSbr', + }, + endpoint: '/playlist', + outputFiles: ['FILE_01A.md', 'FILE_01B.md'], + }, + { + data: { + playlistUrl: 'https://www.youtube.com/playlist?list=PLCVnrVv4KhXPz0SoAVu8Rc1emAdGPbSbr', + whisperModel: 'tiny', + }, + endpoint: '/playlist', + outputFiles: ['FILE_02A.md', 'FILE_02B.md'], + }, + { + data: { + playlistUrl: 'https://www.youtube.com/playlist?list=PLCVnrVv4KhXPz0SoAVu8Rc1emAdGPbSbr', + whisperModel: 'tiny', + llm: 'llama', + }, + endpoint: '/playlist', + outputFiles: ['FILE_03A.md', 'FILE_03B.md'], + }, + { + data: { + playlistUrl: 'https://www.youtube.com/playlist?list=PLCVnrVv4KhXPz0SoAVu8Rc1emAdGPbSbr', + prompts: ['titles', 'mediumChapters'], + whisperModel: 'tiny', + llm: 'llama', + }, + endpoint: '/playlist', + outputFiles: ['FILE_04A.md', 'FILE_04B.md'], + }, + // URLs Endpoint Requests + { + data: { + filePath: 'content/example-urls.md', + }, + endpoint: '/urls', + outputFiles: ['FILE_05A.md', 'FILE_05B.md'], + }, + { + data: { + filePath: 'content/example-urls.md', + whisperModel: 'tiny', + }, + endpoint: '/urls', + outputFiles: ['FILE_06A.md', 'FILE_06B.md'], + }, + { + data: { + filePath: 'content/example-urls.md', + whisperModel: 'tiny', + llm: 'llama', + }, + endpoint: '/urls', + outputFiles: ['FILE_07A.md', 'FILE_07B.md'], + }, + { + data: { + filePath: 'content/example-urls.md', + prompts: ['titles', 'mediumChapters'], + whisperModel: 'tiny', + llm: 'llama', + }, + endpoint: '/urls', + outputFiles: ['FILE_08A.md', 'FILE_08B.md'], + }, + // File Endpoint Requests + { + data: { + filePath: 'content/audio.mp3', + }, + endpoint: '/file', + outputFiles: ['FILE_09.md'], + }, + { + data: { + filePath: 'content/audio.mp3', + whisperModel: 'tiny', + }, + endpoint: '/file', + outputFiles: ['FILE_10.md'], + }, + { + data: { + filePath: 'content/audio.mp3', + whisperModel: 'tiny', + llm: 'llama', + }, + endpoint: '/file', + outputFiles: ['FILE_11.md'], + }, + { + data: { + filePath: 'content/audio.mp3', + prompts: ['titles'], + whisperModel: 'tiny', + llm: 'llama', + }, + endpoint: '/file', + outputFiles: ['FILE_12.md'], + }, + // RSS Endpoint Requests + { + data: { + rssUrl: 'https://ajcwebdev.substack.com/feed/', + }, + endpoint: '/rss', + outputFiles: ['FILE_13.md'], + }, + { + data: { + rssUrl: 'https://ajcwebdev.substack.com/feed/', + whisperModel: 'tiny', + }, + endpoint: '/rss', + outputFiles: ['FILE_14.md'], + }, + { + data: { + rssUrl: 'https://ajcwebdev.substack.com/feed/', + whisperModel: 'tiny', + llm: 'llama', + }, + endpoint: '/rss', + outputFiles: ['FILE_15.md'], + }, + { + data: { + rssUrl: 'https://feeds.transistor.fm/fsjam-podcast/', + whisperModel: 'tiny', + order: 'newest', + skip: 94, + }, + endpoint: '/rss', + outputFiles: ['FILE_16.md'], + }, + { + data: { + rssUrl: 'https://feeds.transistor.fm/fsjam-podcast/', + whisperModel: 'tiny', + order: 'oldest', + skip: 94, + }, + endpoint: '/rss', + outputFiles: ['FILE_17.md'], + }, + // Video Endpoint Requests + { + data: { + youtubeUrl: 'https://www.youtube.com/watch?v=MORMZXEaONk', + }, + endpoint: '/video', + outputFiles: ['FILE_18.md'], + }, + { + data: { + youtubeUrl: 'https://www.youtube.com/watch?v=MORMZXEaONk', + whisperModel: 'tiny', + llm: 'llama', + }, + endpoint: '/video', + outputFiles: ['FILE_19.md'], + }, + { + data: { + youtubeUrl: 'https://www.youtube.com/watch?v=MORMZXEaONk', + llm: 'chatgpt', + }, + endpoint: '/video', + outputFiles: ['FILE_20.md'], + }, + { + data: { + youtubeUrl: 'https://www.youtube.com/watch?v=MORMZXEaONk', + llm: 'chatgpt', + llmModel: 'GPT_4o_MINI', + }, + endpoint: '/video', + outputFiles: ['FILE_21.md'], + }, + { + data: { + youtubeUrl: 'https://www.youtube.com/watch?v=MORMZXEaONk', + llm: 'claude', + }, + endpoint: '/video', + outputFiles: ['FILE_22.md'], + }, + { + data: { + youtubeUrl: 'https://www.youtube.com/watch?v=MORMZXEaONk', + llm: 'claude', + llmModel: 'CLAUDE_3_SONNET', + }, + endpoint: '/video', + outputFiles: ['FILE_23.md'], + }, + { + data: { + youtubeUrl: 'https://www.youtube.com/watch?v=MORMZXEaONk', + llm: 'gemini', + }, + endpoint: '/video', + outputFiles: ['FILE_24.md'], + }, + { + data: { + youtubeUrl: 'https://www.youtube.com/watch?v=MORMZXEaONk', + llm: 'gemini', + llmModel: 'GEMINI_1_5_FLASH', + }, + endpoint: '/video', + outputFiles: ['FILE_25.md'], + }, + { + data: { + youtubeUrl: 'https://www.youtube.com/watch?v=MORMZXEaONk', + llm: 'cohere', + }, + endpoint: '/video', + outputFiles: ['FILE_26.md'], + }, + { + data: { + youtubeUrl: 'https://www.youtube.com/watch?v=MORMZXEaONk', + llm: 'cohere', + llmModel: 'COMMAND_R_PLUS', + }, + endpoint: '/video', + outputFiles: ['FILE_27.md'], + }, + { + data: { + youtubeUrl: 'https://www.youtube.com/watch?v=MORMZXEaONk', + llm: 'mistral', + }, + endpoint: '/video', + outputFiles: ['FILE_28.md'], + }, + { + data: { + youtubeUrl: 'https://www.youtube.com/watch?v=MORMZXEaONk', + llm: 'mistral', + llmModel: 'MIXTRAL_8x7b', + }, + endpoint: '/video', + outputFiles: ['FILE_29.md'], + }, + { + data: { + youtubeUrl: 'https://www.youtube.com/watch?v=MORMZXEaONk', + llm: 'octo', + }, + endpoint: '/video', + outputFiles: ['FILE_30.md'], + }, + { + data: { + youtubeUrl: 'https://www.youtube.com/watch?v=MORMZXEaONk', + llm: 'octo', + llmModel: 'LLAMA_3_1_8B', + }, + endpoint: '/video', + outputFiles: ['FILE_31.md'], + }, + { + data: { + youtubeUrl: 'https://www.youtube.com/watch?v=MORMZXEaONk', + whisperModel: 'tiny', + }, + endpoint: '/video', + outputFiles: ['FILE_32.md'], + }, + { + data: { + youtubeUrl: 'https://www.youtube.com/watch?v=MORMZXEaONk', + transcriptService: 'deepgram', + }, + endpoint: '/video', + outputFiles: ['FILE_33.md'], + }, + { + data: { + youtubeUrl: 'https://www.youtube.com/watch?v=MORMZXEaONk', + transcriptService: 'deepgram', + llm: 'llama', + }, + endpoint: '/video', + outputFiles: ['FILE_34.md'], + }, + { + data: { + youtubeUrl: 'https://www.youtube.com/watch?v=MORMZXEaONk', + transcriptService: 'assembly', + }, + endpoint: '/video', + outputFiles: ['FILE_35.md'], + }, + { + data: { + youtubeUrl: 'https://www.youtube.com/watch?v=MORMZXEaONk', + transcriptService: 'assembly', + llm: 'llama', + }, + endpoint: '/video', + outputFiles: ['FILE_36.md'], + }, + { + data: { + youtubeUrl: 'https://ajc.pics/audio/fsjam-short.mp3', + transcriptService: 'assembly', + speakerLabels: true, + }, + endpoint: '/video', + outputFiles: ['FILE_37.md'], + }, + { + data: { + youtubeUrl: 'https://ajc.pics/audio/fsjam-short.mp3', + transcriptService: 'assembly', + speakerLabels: true, + llm: 'llama', + }, + endpoint: '/video', + outputFiles: ['FILE_38.md'], + }, + { + data: { + youtubeUrl: 'https://www.youtube.com/watch?v=MORMZXEaONk', + prompts: ['titles', 'mediumChapters'], + }, + endpoint: '/video', + outputFiles: ['FILE_39.md'], + }, + { + data: { + youtubeUrl: 'https://www.youtube.com/watch?v=MORMZXEaONk', + prompts: ['titles', 'summary', 'shortChapters', 'takeaways', 'questions'], + }, + endpoint: '/video', + outputFiles: ['FILE_40.md'], + }, + { + data: { + youtubeUrl: 'https://www.youtube.com/watch?v=MORMZXEaONk', + prompts: ['titles', 'summary', 'shortChapters', 'takeaways', 'questions'], + whisperModel: 'tiny', + llm: 'llama', + }, + endpoint: '/video', + outputFiles: ['FILE_41.md'], + }, +] + +const fetchRequest = async (request, index) => { + try { + // Get list of files before the request + const filesBefore = await fs.readdir(OUTPUT_DIR) + + const response = await fetch(`${BASE_URL}${request.endpoint}`, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + }, + body: JSON.stringify(request.data), + }) + console.log(`\nRequest ${index + 1} response status:`, response.status) + if (!response.ok) { + throw new Error(`HTTP error! status: ${response.status}`) + } + const result = await response.json() + console.log(`Request ${index + 1} result: ${result.message}`) + + // Wait briefly to ensure files are written + await new Promise((resolve) => setTimeout(resolve, 1000)) + + // Get list of files after the request + const filesAfter = await fs.readdir(OUTPUT_DIR) + + // Identify new files + const newFiles = filesAfter.filter((f) => !filesBefore.includes(f)) + + // Sort new files to ensure consistent ordering + newFiles.sort() + + const outputFiles = request.outputFiles + + if (newFiles.length > 0) { + for (let i = 0; i < newFiles.length; i++) { + const oldFilePath = path.join(OUTPUT_DIR, newFiles[i]) + const newFileName = outputFiles[i] + const newFilePath = path.join(OUTPUT_DIR, newFileName) + await fs.rename(oldFilePath, newFilePath) + console.log(`\nFile renamed:\n - Old: ${oldFilePath}\n - New: ${newFilePath}`) + } + } else { + console.log('No new files to rename for this request.') + } + } catch (error) { + console.error(`Error in request ${index + 1}:`, error) + } +} + +const runAllRequests = async () => { + for (let i = 0; i < requests.length; i++) { + await fetchRequest(requests[i], i) + } +} + +runAllRequests() \ No newline at end of file diff --git a/packages/server/tests/fetch-local.js b/packages/server/tests/fetch-local.js new file mode 100644 index 0000000..37c1fdc --- /dev/null +++ b/packages/server/tests/fetch-local.js @@ -0,0 +1,268 @@ +// packages/server/tests/fetch-local.js + +import fs from 'fs/promises' +import path from 'path' + +const BASE_URL = 'http://localhost:3000' +const OUTPUT_DIR = 'content' + +const requests = [ + // Playlist Endpoint Requests + { + data: { + playlistUrl: 'https://www.youtube.com/playlist?list=PLCVnrVv4KhXPz0SoAVu8Rc1emAdGPbSbr', + }, + endpoint: '/playlist', + outputFiles: ['FILE_01A.md', 'FILE_01B.md'], + }, + { + data: { + playlistUrl: 'https://www.youtube.com/playlist?list=PLCVnrVv4KhXPz0SoAVu8Rc1emAdGPbSbr', + whisperModel: 'tiny', + }, + endpoint: '/playlist', + outputFiles: ['FILE_02A.md', 'FILE_02B.md'], + }, + { + data: { + playlistUrl: 'https://www.youtube.com/playlist?list=PLCVnrVv4KhXPz0SoAVu8Rc1emAdGPbSbr', + whisperModel: 'tiny', + llm: 'llama', + }, + endpoint: '/playlist', + outputFiles: ['FILE_03A.md', 'FILE_03B.md'], + }, + { + data: { + playlistUrl: 'https://www.youtube.com/playlist?list=PLCVnrVv4KhXPz0SoAVu8Rc1emAdGPbSbr', + prompts: ['titles', 'mediumChapters'], + whisperModel: 'tiny', + llm: 'llama', + }, + endpoint: '/playlist', + outputFiles: ['FILE_04A.md', 'FILE_04B.md'], + }, + // URLs Endpoint Requests + { + data: { + filePath: 'content/example-urls.md', + }, + endpoint: '/urls', + outputFiles: ['FILE_05A.md', 'FILE_05B.md'], + }, + { + data: { + filePath: 'content/example-urls.md', + whisperModel: 'tiny', + }, + endpoint: '/urls', + outputFiles: ['FILE_06A.md', 'FILE_06B.md'], + }, + { + data: { + filePath: 'content/example-urls.md', + whisperModel: 'tiny', + llm: 'llama', + }, + endpoint: '/urls', + outputFiles: ['FILE_07A.md', 'FILE_07B.md'], + }, + { + data: { + filePath: 'content/example-urls.md', + prompts: ['titles', 'mediumChapters'], + whisperModel: 'tiny', + llm: 'llama', + }, + endpoint: '/urls', + outputFiles: ['FILE_08A.md', 'FILE_08B.md'], + }, + // File Endpoint Requests + { + data: { + filePath: 'content/audio.mp3', + }, + endpoint: '/file', + outputFiles: ['FILE_09.md'], + }, + { + data: { + filePath: 'content/audio.mp3', + whisperModel: 'tiny', + }, + endpoint: '/file', + outputFiles: ['FILE_10.md'], + }, + { + data: { + filePath: 'content/audio.mp3', + whisperModel: 'tiny', + llm: 'llama', + }, + endpoint: '/file', + outputFiles: ['FILE_11.md'], + }, + { + data: { + filePath: 'content/audio.mp3', + prompts: ['titles'], + whisperModel: 'tiny', + llm: 'llama', + }, + endpoint: '/file', + outputFiles: ['FILE_12.md'], + }, + // RSS Endpoint Requests + { + data: { + rssUrl: 'https://ajcwebdev.substack.com/feed/', + }, + endpoint: '/rss', + outputFiles: ['FILE_13.md'], + }, + { + data: { + rssUrl: 'https://ajcwebdev.substack.com/feed/', + whisperModel: 'tiny', + }, + endpoint: '/rss', + outputFiles: ['FILE_14.md'], + }, + { + data: { + rssUrl: 'https://ajcwebdev.substack.com/feed/', + whisperModel: 'tiny', + llm: 'llama', + }, + endpoint: '/rss', + outputFiles: ['FILE_15.md'], + }, + { + data: { + rssUrl: 'https://feeds.transistor.fm/fsjam-podcast/', + whisperModel: 'tiny', + order: 'newest', + skip: 94, + }, + endpoint: '/rss', + outputFiles: ['FILE_16.md'], + }, + { + data: { + rssUrl: 'https://feeds.transistor.fm/fsjam-podcast/', + whisperModel: 'tiny', + order: 'oldest', + skip: 94, + }, + endpoint: '/rss', + outputFiles: ['FILE_17.md'], + }, + // Video Endpoint Requests + { + data: { + youtubeUrl: 'https://www.youtube.com/watch?v=MORMZXEaONk', + }, + endpoint: '/video', + outputFiles: ['FILE_18.md'], + }, + { + data: { + youtubeUrl: 'https://www.youtube.com/watch?v=MORMZXEaONk', + whisperModel: 'tiny', + llm: 'llama', + }, + endpoint: '/video', + outputFiles: ['FILE_19.md'], + }, + { + data: { + youtubeUrl: 'https://www.youtube.com/watch?v=MORMZXEaONk', + whisperModel: 'tiny', + }, + endpoint: '/video', + outputFiles: ['FILE_20.md'], + }, + { + data: { + youtubeUrl: 'https://www.youtube.com/watch?v=MORMZXEaONk', + prompts: ['titles', 'mediumChapters'], + }, + endpoint: '/video', + outputFiles: ['FILE_21.md'], + }, + { + data: { + youtubeUrl: 'https://www.youtube.com/watch?v=MORMZXEaONk', + prompts: ['titles', 'summary', 'shortChapters', 'takeaways', 'questions'], + }, + endpoint: '/video', + outputFiles: ['FILE_22.md'], + }, + { + data: { + youtubeUrl: 'https://www.youtube.com/watch?v=MORMZXEaONk', + prompts: ['titles', 'summary', 'shortChapters', 'takeaways', 'questions'], + whisperModel: 'tiny', + llm: 'llama', + }, + endpoint: '/video', + outputFiles: ['FILE_23.md'], + }, +] + +const fetchRequest = async (request, index) => { + try { + // Get list of files before the request + const filesBefore = await fs.readdir(OUTPUT_DIR) + + const response = await fetch(`${BASE_URL}${request.endpoint}`, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + }, + body: JSON.stringify(request.data), + }) + console.log(`\nRequest ${index + 1} response status:`, response.status) + if (!response.ok) { + throw new Error(`HTTP error! status: ${response.status}`) + } + const result = await response.json() + console.log(`Request ${index + 1} result: ${result.message}`) + + // Wait briefly to ensure files are written + await new Promise((resolve) => setTimeout(resolve, 1000)) + + // Get list of files after the request + const filesAfter = await fs.readdir(OUTPUT_DIR) + + // Identify new files + const newFiles = filesAfter.filter((f) => !filesBefore.includes(f)) + + // Sort new files to ensure consistent ordering + newFiles.sort() + + const outputFiles = request.outputFiles + + if (newFiles.length > 0) { + for (let i = 0; i < newFiles.length; i++) { + const oldFilePath = path.join(OUTPUT_DIR, newFiles[i]) + const newFileName = outputFiles[i] + const newFilePath = path.join(OUTPUT_DIR, newFileName) + await fs.rename(oldFilePath, newFilePath) + console.log(`\nFile renamed:\n - Old: ${oldFilePath}\n - New: ${newFilePath}`) + } + } else { + console.log('No new files to rename for this request.') + } + } catch (error) { + console.error(`Error in request ${index + 1}:`, error) + } +} + +const runAllRequests = async () => { + for (let i = 0; i < requests.length; i++) { + await fetchRequest(requests[i], i) + } +} + +runAllRequests() \ No newline at end of file From 6af88affbe414903eb0f03b3e0ecce62d8bbb0c7 Mon Sep 17 00:00:00 2001 From: Anthony Campolo <12433465+ajcwebdev@users.noreply.github.com> Date: Fri, 4 Oct 2024 01:17:20 -0500 Subject: [PATCH 2/9] remove ollama js --- docker-compose.yml | 12 +++++++++- docs/examples.md | 9 ++++++- src/autoshow.js | 4 ++-- src/llms/ollama.js | 58 +++++++++++++++++++++------------------------- src/types.js | 3 ++- test/local.test.js | 43 +++++++++++++++++++--------------- 6 files changed, 73 insertions(+), 56 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index 12c9705..d365591 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -15,6 +15,8 @@ services: environment: - OLLAMA_HOST=ollama - OLLAMA_PORT=11434 + networks: + - autoshownet whisper: build: context: ./whisper.cpp @@ -25,8 +27,11 @@ services: command: tail -f /dev/null tty: true stdin_open: true + networks: + - autoshownet ollama: image: ollama/ollama + command: ["ollama", "serve", "--address", "0.0.0.0"] # Listen on all interfaces ports: - "11434:11434" volumes: @@ -35,4 +40,9 @@ services: test: ["CMD", "curl", "-f", "http://localhost:11434/healthz"] interval: 10s timeout: 5s - retries: 5 \ No newline at end of file + retries: 5 + networks: + - autoshownet +networks: + autoshownet: + driver: bridge \ No newline at end of file diff --git a/docs/examples.md b/docs/examples.md index f3989f1..f244642 100644 --- a/docs/examples.md +++ b/docs/examples.md @@ -243,6 +243,12 @@ npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --octo WIZAR npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --llama ``` +### Ollama + +```bash +npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --ollama +``` + ## Transcription Options ### Whisper.cpp @@ -357,7 +363,7 @@ npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --prompt tit This will run both `whisper.cpp` and the AutoShow Commander CLI in their own Docker containers. ```bash -docker-compose run autoshow --video "https://www.youtube.com/watch?v=MORMZXEaONk" --whisperDocker base +docker compose run autoshow --video "https://www.youtube.com/watch?v=MORMZXEaONk" --whisperDocker base ``` Currently working on the `llama.cpp` Docker integration so the entire project can be encapsulated in one local Docker Compose file. @@ -426,6 +432,7 @@ export MD="LLM.md" && export COMMANDS="src/commands" && export UTILS="src/utils" echo ''$CLOSE'### Call Cohere'$OPEN'' >> $MD && cat $LLMS/cohere.js >> $MD && \ echo ''$CLOSE'### Call Gemini'$OPEN'' >> $MD && cat $LLMS/gemini.js >> $MD && \ echo ''$CLOSE'### Call Llama.cpp'$OPEN'' >> $MD && cat $LLMS/llama.js >> $MD && \ + echo ''$CLOSE'### Call Ollama'$OPEN'' >> $MD && cat $LLMS/ollama.js >> $MD && \ echo ''$CLOSE'### Call Mistral'$OPEN'' >> $MD && cat $LLMS/mistral.js >> $MD && \ echo ''$CLOSE'### Call Octo'$OPEN'' >> $MD && cat $LLMS/octo.js >> $MD && \ echo ''$CLOSE'## Docker Files\n\n```Dockerfile' >> $MD && cat .github/whisper.Dockerfile >> $MD && \ diff --git a/src/autoshow.js b/src/autoshow.js index 41697e9..48a5728 100644 --- a/src/autoshow.js +++ b/src/autoshow.js @@ -39,8 +39,8 @@ program .option('-f, --file ', 'Process a local audio or video file') .option('-r, --rss ', 'Process a podcast RSS feed') .option('--item ', 'Process specific items in the RSS feed by providing their audio URLs') - .option('--order ', 'Specify the order for RSS feed processing (newest or oldest)', 'newest') - .option('--skip ', 'Number of items to skip when processing RSS feed', parseInt, 0) + .option('--order ', 'Specify the order for RSS feed processing (newest or oldest)') + .option('--skip ', 'Number of items to skip when processing RSS feed', parseInt) .option('--info', 'Generate JSON file with RSS feed information instead of processing items') .option('--whisper [model]', 'Use Whisper.cpp for transcription with optional model specification') .option('--whisperDocker [model]', 'Use Whisper.cpp in Docker for transcription with optional model specification') diff --git a/src/llms/ollama.js b/src/llms/ollama.js index 290f58b..6a777d3 100644 --- a/src/llms/ollama.js +++ b/src/llms/ollama.js @@ -1,7 +1,6 @@ // src/llms/ollama.js import { writeFile } from 'node:fs/promises' -import { Ollama } from 'ollama' /** @import { LLMFunction, LlamaModelType } from '../types.js' */ @@ -21,11 +20,11 @@ const ollamaModels = { QWEN_2_5_7B_MODEL: 'qwen2.5:7b', } -/** @type {LLMFunction} */ /** - * Main function to call the Llama model using the Ollama library. + * Main function to call the Llama model using the Ollama REST API. * This function checks if the model is available, pulls it if necessary, * and then proceeds with the chat. + * @type {LLMFunction} * @param {string} promptAndTranscript - The combined prompt and transcript content. * @param {string} tempPath - The temporary file path to write the LLM output. * @param {LlamaModelType} [modelName='LLAMA_3_2_1B_MODEL'] - The name of the model to use. @@ -38,47 +37,42 @@ export async function callOllama(promptAndTranscript, tempPath, modelName = 'LLA const ollamaModelName = ollamaModels[modelName] || 'llama3.2:1b' // Get host and port from environment variables or use defaults - const ollamaHost = process.env.OLLAMA_HOST || 'localhost' + const ollamaHost = process.env.OLLAMA_HOST || 'ollama' const ollamaPort = process.env.OLLAMA_PORT || '11434' const baseUrl = `http://${ollamaHost}:${ollamaPort}` - - // Create a new OllamaClient with the baseUrl - const client = new Ollama({ baseUrl }) + console.log(` - Using Ollama model: ${ollamaModelName} at ${baseUrl}`) - - // Check if the model is available - const models = await client.list() - const isAvailable = models.models.some(model => model.name === ollamaModelName) - - // If the model is not available, pull it - if (!isAvailable) { - console.log(`Model ${ollamaModelName} not found. Pulling it now...`) - try { - const pullStream = await client.pull({ model: ollamaModelName, stream: true }) - for await (const part of pullStream) { - console.log(`Pulling ${ollamaModelName}: ${part.status}`) - } - console.log(`Model ${ollamaModelName} successfully pulled.`) - } catch (pullError) { - console.error(`Error pulling model ${ollamaModelName}: ${pullError.message}`) - throw pullError - } - } - + // Call the Ollama chat API - const response = await client.chat({ - model: ollamaModelName, - messages: [{ role: 'user', content: promptAndTranscript }], + console.log(` - Sending chat request to Ollama...`) + const response = await fetch(`${baseUrl}/api/chat`, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ + model: ollamaModelName, + messages: [{ role: 'user', content: promptAndTranscript }], + stream: false, + }), }) - // Extract the assistant's reply - const assistantReply = response.message.content + if (!response.ok) { + throw new Error(`HTTP error! status: ${response.status}`) + } + const data = await response.json() + + // Extract the assistant's reply + const assistantReply = data.message.content + console.log(` - Received response from Ollama.`) + // Write the response to the output file await writeFile(tempPath, assistantReply) console.log(`\nResponse saved to ${tempPath}`) } catch (error) { console.error(`Error in callOllama: ${error.message}`) + console.error(`Stack Trace: ${error.stack}`) throw error } } \ No newline at end of file diff --git a/src/types.js b/src/types.js index 1323706..8f1a0e7 100644 --- a/src/types.js +++ b/src/types.js @@ -178,7 +178,7 @@ /** * Represents the options for Language Models (LLMs) that can be used in the application. - * @typedef {'chatgpt' | 'claude' | 'cohere' | 'mistral' | 'octo' | 'llama' | 'gemini'} LLMOption + * @typedef {'chatgpt' | 'claude' | 'cohere' | 'mistral' | 'octo' | 'llama' | 'ollama' | 'gemini'} LLMOption * * - `'chatgpt'`: Use OpenAI's ChatGPT models. * - `'claude'`: Use Anthropic's Claude models. @@ -186,6 +186,7 @@ * - `'mistral'`: Use Mistral AI's language models. * - `'octo'`: Use OctoAI's language models. * - `'llama'`: Use Llama models for local inference. + * - `'ollama'`: Use Ollama for processing. * - `'gemini'`: Use Google's Gemini models. */ diff --git a/test/local.test.js b/test/local.test.js index 5093926..67e77f7 100644 --- a/test/local.test.js +++ b/test/local.test.js @@ -10,85 +10,90 @@ const commands = [ { cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk"', expectedFile: '2024-09-24-ep0-fsjam-podcast-prompt.md', - newName: '01---2024-09-24-ep0-fsjam-podcast-prompt.md' + newName: 'FILE_01.md' }, { cmd: 'npm run as -- --playlist "https://www.youtube.com/playlist?list=PLCVnrVv4KhXPz0SoAVu8Rc1emAdGPbSbr"', expectedFiles: [ - { file: '2024-09-24-ep1-fsjam-podcast-prompt.md', newName: '02---2024-09-24-ep1-fsjam-podcast-prompt.md' }, - { file: '2024-09-24-ep0-fsjam-podcast-prompt.md', newName: '03---2024-09-24-ep0-fsjam-podcast-prompt.md' } + { file: '2024-09-24-ep1-fsjam-podcast-prompt.md', newName: 'FILE_02A.md' }, + { file: '2024-09-24-ep0-fsjam-podcast-prompt.md', newName: 'FILE_02B.md' } ] }, { cmd: 'npm run as -- --urls "content/example-urls.md"', expectedFiles: [ - { file: '2024-09-24-ep1-fsjam-podcast-prompt.md', newName: '04---2024-09-24-ep1-fsjam-podcast-prompt.md' }, - { file: '2024-09-24-ep0-fsjam-podcast-prompt.md', newName: '05---2024-09-24-ep0-fsjam-podcast-prompt.md' } + { file: '2024-09-24-ep1-fsjam-podcast-prompt.md', newName: 'FILE_03A.md' }, + { file: '2024-09-24-ep0-fsjam-podcast-prompt.md', newName: 'FILE_03B.md' } ] }, { cmd: 'npm run as -- --file "content/audio.mp3"', expectedFile: 'audio-prompt.md', - newName: '06---audio-prompt.md' + newName: 'FILE_04.md' }, { cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --llama', expectedFile: '2024-09-24-ep0-fsjam-podcast-llama-shownotes.md', - newName: '07---2024-09-24-ep0-fsjam-podcast-llama-shownotes.md' + newName: 'FILE_05.md' + }, + { + cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --ollama', + expectedFile: '2024-09-24-ep0-fsjam-podcast-ollama-shownotes.md', + newName: 'FILE_06.md' }, { cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --whisper tiny', expectedFile: '2024-09-24-ep0-fsjam-podcast-prompt.md', - newName: '08---2024-09-24-ep0-fsjam-podcast-prompt.md' + newName: 'FILE_07.md' }, { cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --prompt titles', expectedFile: '2024-09-24-ep0-fsjam-podcast-prompt.md', - newName: '09---2024-09-24-ep0-fsjam-podcast-prompt.md' + newName: 'FILE_08.md' }, { cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --prompt titles summary mediumChapters takeaways questions', expectedFile: '2024-09-24-ep0-fsjam-podcast-prompt.md', - newName: '10---2024-09-24-ep0-fsjam-podcast-prompt.md' + newName: 'FILE_09.md' }, { cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --prompt titles summary shortChapters takeaways questions --whisper tiny --llama', expectedFile: '2024-09-24-ep0-fsjam-podcast-llama-shownotes.md', - newName: '11---2024-09-24-ep0-fsjam-podcast-llama-shownotes.md' + newName: 'FILE_10.md' }, { cmd: 'npm run as -- --playlist "https://www.youtube.com/playlist?list=PLCVnrVv4KhXPz0SoAVu8Rc1emAdGPbSbr" --prompt titles --whisper tiny --llama', expectedFiles: [ - { file: '2024-09-24-ep1-fsjam-podcast-llama-shownotes.md', newName: '12---2024-09-24-ep1-fsjam-podcast-llama-shownotes.md' }, - { file: '2024-09-24-ep0-fsjam-podcast-llama-shownotes.md', newName: '13---2024-09-24-ep0-fsjam-podcast-llama-shownotes.md' } + { file: '2024-09-24-ep1-fsjam-podcast-llama-shownotes.md', newName: 'FILE_11A.md' }, + { file: '2024-09-24-ep0-fsjam-podcast-llama-shownotes.md', newName: 'FILE_11B.md' } ] }, { cmd: 'npm run as -- --urls "content/example-urls.md" --prompt titles --whisper tiny --llama', expectedFiles: [ - { file: '2024-09-24-ep1-fsjam-podcast-llama-shownotes.md', newName: '14---2024-09-24-ep1-fsjam-podcast-llama-shownotes.md' }, - { file: '2024-09-24-ep0-fsjam-podcast-llama-shownotes.md', newName: '15---2024-09-24-ep0-fsjam-podcast-llama-shownotes.md' } + { file: '2024-09-24-ep1-fsjam-podcast-llama-shownotes.md', newName: 'FILE_12A.md' }, + { file: '2024-09-24-ep0-fsjam-podcast-llama-shownotes.md', newName: 'FILE_12B.md' } ] }, { cmd: 'npm run as -- --file "content/audio.mp3" --prompt titles --whisper tiny --llama', expectedFile: 'audio-llama-shownotes.md', - newName: '16---audio-llama-shownotes.md' + newName: 'FILE_13.md' }, { cmd: 'npm run as -- --rss "https://ajcwebdev.substack.com/feed"', expectedFile: '2021-05-10-thoughts-on-lambda-school-layoffs-prompt.md', - newName: '17---2021-05-10-thoughts-on-lambda-school-layoffs-prompt.md' + newName: 'FILE_14.md' }, { cmd: 'npm run as -- --rss "https://feeds.transistor.fm/fsjam-podcast/" --order newest --skip 94 --whisper tiny', expectedFile: '2020-10-27-episode-0-the-fullstack-jamstack-podcast-with-anthony-campolo-and-christopher-burns-prompt.md', - newName: '18---2020-10-27-episode-0-the-fullstack-jamstack-podcast-with-anthony-campolo-and-christopher-burns-prompt.md' + newName: 'FILE_15.md' }, { cmd: 'npm run as -- --rss "https://feeds.transistor.fm/fsjam-podcast/" --order oldest --skip 94 --whisper tiny', expectedFile: '2023-06-28-episode-94-clerk-with-james-perkins-prompt.md', - newName: '19---2023-06-28-episode-94-clerk-with-james-perkins-prompt.md' + newName: 'FILE_16.md' } ] From 8bb54996b81296bab42c7e1522763d78f7752683 Mon Sep 17 00:00:00 2001 From: Anthony Campolo <12433465+ajcwebdev@users.noreply.github.com> Date: Fri, 4 Oct 2024 03:59:59 -0500 Subject: [PATCH 3/9] refactor --- docs/server.md | 12 +- package.json | 8 +- packages/server/README.md | 12 +- packages/server/routes/file.js | 6 +- packages/server/routes/playlist.js | 6 +- packages/server/routes/rss.js | 6 +- packages/server/routes/urls.js | 6 +- packages/server/routes/video.js | 6 +- packages/server/tests/fetch-all.js | 12 +- packages/server/utils/reqToOpts.js | 24 ++-- src/autoshow.js | 87 ++++--------- src/commands/processFile.js | 12 +- src/commands/processPlaylist.js | 10 +- src/commands/processRSS.js | 20 +-- src/commands/processURLs.js | 10 +- src/commands/processVideo.js | 12 +- src/inquirer.js | 44 +++---- src/llms/chatgpt.js | 14 +- src/llms/claude.js | 14 +- src/llms/cohere.js | 12 +- src/llms/gemini.js | 13 +- src/llms/llama.js | 40 +----- src/llms/mistral.js | 14 +- src/llms/octo.js | 17 +-- src/llms/ollama.js | 40 ++---- src/transcription/assembly.js | 8 +- src/transcription/whisper.js | 134 ++++--------------- src/transcription/whisperDocker.js | 64 +++++++++ src/types.js | 201 ++++++++++++++++++++++------- src/utils/runLLM.js | 46 +++---- src/utils/runTranscription.js | 27 ++-- test/all.test.js | 5 + test/local.test.js | 7 +- 33 files changed, 448 insertions(+), 501 deletions(-) create mode 100644 src/transcription/whisperDocker.js diff --git a/docs/server.md b/docs/server.md index abc9a04..15409f6 100644 --- a/docs/server.md +++ b/docs/server.md @@ -269,14 +269,14 @@ curl --json '{ ```bash curl --json '{ "youtubeUrl": "https://www.youtube.com/watch?v=MORMZXEaONk", - "transcriptService": "deepgram" + "transcriptServices": "deepgram" }' http://localhost:3000/video ``` ```bash curl --json '{ "youtubeUrl": "https://www.youtube.com/watch?v=MORMZXEaONk", - "transcriptService": "deepgram", + "transcriptServices": "deepgram", "llm": "llama" }' http://localhost:3000/video ``` @@ -286,14 +286,14 @@ curl --json '{ ```bash curl --json '{ "youtubeUrl": "https://www.youtube.com/watch?v=MORMZXEaONk", - "transcriptService": "assembly" + "transcriptServices": "assembly" }' http://localhost:3000/video ``` ```bash curl --json '{ "youtubeUrl": "https://www.youtube.com/watch?v=MORMZXEaONk", - "transcriptService": "assembly", + "transcriptServices": "assembly", "llm": "llama" }' http://localhost:3000/video ``` @@ -301,7 +301,7 @@ curl --json '{ ```bash curl --json '{ "youtubeUrl": "https://ajc.pics/audio/fsjam-short.mp3", - "transcriptService": "assembly", + "transcriptServices": "assembly", "speakerLabels": true }' http://localhost:3000/video ``` @@ -309,7 +309,7 @@ curl --json '{ ```bash curl --json '{ "youtubeUrl": "https://ajc.pics/audio/fsjam-short.mp3", - "transcriptService": "assembly", + "transcriptServices": "assembly", "speakerLabels": true, "llm": "llama" }' http://localhost:3000/video diff --git a/package.json b/package.json index 7e5ff3c..c31d35d 100644 --- a/package.json +++ b/package.json @@ -23,10 +23,10 @@ "as": "node --env-file=.env --no-warnings src/autoshow.js", "bun-as": "bun --env-file=.env --no-warnings src/autoshow.js", "deno-as": "deno run --allow-sys --allow-read --allow-run --allow-write --allow-env src/autoshow.js", - "v": "node --env-file=.env --no-warnings src/autoshow.js --whisper large --video", - "u": "node --env-file=.env --no-warnings src/autoshow.js --whisper large --urls", - "p": "node --env-file=.env --no-warnings src/autoshow.js --whisper large --playlist", - "f": "node --env-file=.env --no-warnings src/autoshow.js --whisper large --file", + "v": "node --env-file=.env --no-warnings src/autoshow.js --whisper large-v2 --video", + "u": "node --env-file=.env --no-warnings src/autoshow.js --whisper large-v2 --urls", + "p": "node --env-file=.env --no-warnings src/autoshow.js --whisper large-v2 --playlist", + "f": "node --env-file=.env --no-warnings src/autoshow.js --whisper large-v2 --file", "serve": "node --env-file=.env --no-warnings --watch packages/server/index.js", "fetch-local": "node --env-file=.env --no-warnings packages/server/tests/fetch-local.js", "fetch-all": "node --env-file=.env --no-warnings packages/server/tests/fetch-all.js", diff --git a/packages/server/README.md b/packages/server/README.md index 47fb1d2..5652a9b 100644 --- a/packages/server/README.md +++ b/packages/server/README.md @@ -207,35 +207,35 @@ const TEST_REQ_32 = { const TEST_REQ_33 = { "youtubeUrl": "https://www.youtube.com/watch?v=MORMZXEaONk", - "transcriptService": "deepgram" + "transcriptServices": "deepgram" } const TEST_REQ_34 = { "youtubeUrl": "https://www.youtube.com/watch?v=MORMZXEaONk", - "transcriptService": "deepgram", + "transcriptServices": "deepgram", "llm": "llama" } const TEST_REQ_35 = { "youtubeUrl": "https://www.youtube.com/watch?v=MORMZXEaONk", - "transcriptService": "assembly" + "transcriptServices": "assembly" } const TEST_REQ_36 = { "youtubeUrl": "https://www.youtube.com/watch?v=MORMZXEaONk", - "transcriptService": "assembly", + "transcriptServices": "assembly", "llm": "llama" } const TEST_REQ_37 = { "youtubeUrl": "https://ajc.pics/audio/fsjam-short.mp3", - "transcriptService": "assembly", + "transcriptServices": "assembly", "speakerLabels": true } const TEST_REQ_38 = { "youtubeUrl": "https://ajc.pics/audio/fsjam-short.mp3", - "transcriptService": "assembly", + "transcriptServices": "assembly", "speakerLabels": true, "llm": "llama" } diff --git a/packages/server/routes/file.js b/packages/server/routes/file.js index 3097765..b7a44f7 100644 --- a/packages/server/routes/file.js +++ b/packages/server/routes/file.js @@ -22,10 +22,10 @@ const handleFileRequest = async (request, reply) => { } // Map request data to processing options - const { options, llmOpt, transcriptOpt } = reqToOpts(requestData) - console.log('\nCalling processFile with params:', { filePath, llmOpt, transcriptOpt, options }) + const { options, llmServices, transcriptServices } = reqToOpts(requestData) + console.log('\nCalling processFile with params:', { filePath, llmServices, transcriptServices, options }) - await processFile(filePath, llmOpt, transcriptOpt, options) + await processFile(filePath, llmServices, transcriptServices, options) console.log('\nprocessFile completed successfully') reply.send({ message: 'File processed successfully.' }) diff --git a/packages/server/routes/playlist.js b/packages/server/routes/playlist.js index a7e6de6..e1c32dc 100644 --- a/packages/server/routes/playlist.js +++ b/packages/server/routes/playlist.js @@ -22,10 +22,10 @@ const handlePlaylistRequest = async (request, reply) => { } // Map request data to processing options - const { options, llmOpt, transcriptOpt } = reqToOpts(requestData) - console.log('\nCalling processPlaylist with params:', { playlistUrl, llmOpt, transcriptOpt, options }) + const { options, llmServices, transcriptServices } = reqToOpts(requestData) + console.log('\nCalling processPlaylist with params:', { playlistUrl, llmServices, transcriptServices, options }) - await processPlaylist(playlistUrl, llmOpt, transcriptOpt, options) + await processPlaylist(playlistUrl, llmServices, transcriptServices, options) console.log('\nprocessPlaylist completed successfully') reply.send({ message: 'Playlist processed successfully.' }) diff --git a/packages/server/routes/rss.js b/packages/server/routes/rss.js index f6f8f07..4e28776 100644 --- a/packages/server/routes/rss.js +++ b/packages/server/routes/rss.js @@ -22,10 +22,10 @@ const handleRSSRequest = async (request, reply) => { } // Map request data to processing options - const { options, llmOpt, transcriptOpt } = reqToOpts(requestData) - console.log('\nCalling processRSS with params:', { rssUrl, llmOpt, transcriptOpt, options }) + const { options, llmServices, transcriptServices } = reqToOpts(requestData) + console.log('\nCalling processRSS with params:', { rssUrl, llmServices, transcriptServices, options }) - await processRSS(rssUrl, llmOpt, transcriptOpt, options) + await processRSS(rssUrl, llmServices, transcriptServices, options) console.log('\nprocessRSS completed successfully') reply.send({ message: 'RSS feed processed successfully.' }) diff --git a/packages/server/routes/urls.js b/packages/server/routes/urls.js index d0fec32..45e49ef 100644 --- a/packages/server/routes/urls.js +++ b/packages/server/routes/urls.js @@ -22,10 +22,10 @@ const handleURLsRequest = async (request, reply) => { } // Map request data to processing options - const { options, llmOpt, transcriptOpt } = reqToOpts(requestData) - console.log('\nCalling processURLs with params:', { filePath, llmOpt, transcriptOpt, options }) + const { options, llmServices, transcriptServices } = reqToOpts(requestData) + console.log('\nCalling processURLs with params:', { filePath, llmServices, transcriptServices, options }) - await processURLs(filePath, llmOpt, transcriptOpt, options) + await processURLs(filePath, llmServices, transcriptServices, options) console.log('\nprocessURLs completed successfully') reply.send({ message: 'URLs processed successfully.' }) diff --git a/packages/server/routes/video.js b/packages/server/routes/video.js index 2d6f7d5..9cc892f 100644 --- a/packages/server/routes/video.js +++ b/packages/server/routes/video.js @@ -22,10 +22,10 @@ const handleVideoRequest = async (request, reply) => { } // Map request data to processing options - const { options, llmOpt, transcriptOpt } = reqToOpts(requestData) - console.log('\nCalling processVideo with params:', { youtubeUrl, llmOpt, transcriptOpt, options }) + const { options, llmServices, transcriptServices } = reqToOpts(requestData) + console.log('\nCalling processVideo with params:', { youtubeUrl, llmServices, transcriptServices, options }) - await processVideo(youtubeUrl, llmOpt, transcriptOpt, options) + await processVideo(youtubeUrl, llmServices, transcriptServices, options) console.log('\nprocessVideo completed successfully') reply.send({ message: 'Video processed successfully.' }) diff --git a/packages/server/tests/fetch-all.js b/packages/server/tests/fetch-all.js index 46ae375..d591a93 100644 --- a/packages/server/tests/fetch-all.js +++ b/packages/server/tests/fetch-all.js @@ -287,7 +287,7 @@ const requests = [ { data: { youtubeUrl: 'https://www.youtube.com/watch?v=MORMZXEaONk', - transcriptService: 'deepgram', + transcriptServices: 'deepgram', }, endpoint: '/video', outputFiles: ['FILE_33.md'], @@ -295,7 +295,7 @@ const requests = [ { data: { youtubeUrl: 'https://www.youtube.com/watch?v=MORMZXEaONk', - transcriptService: 'deepgram', + transcriptServices: 'deepgram', llm: 'llama', }, endpoint: '/video', @@ -304,7 +304,7 @@ const requests = [ { data: { youtubeUrl: 'https://www.youtube.com/watch?v=MORMZXEaONk', - transcriptService: 'assembly', + transcriptServices: 'assembly', }, endpoint: '/video', outputFiles: ['FILE_35.md'], @@ -312,7 +312,7 @@ const requests = [ { data: { youtubeUrl: 'https://www.youtube.com/watch?v=MORMZXEaONk', - transcriptService: 'assembly', + transcriptServices: 'assembly', llm: 'llama', }, endpoint: '/video', @@ -321,7 +321,7 @@ const requests = [ { data: { youtubeUrl: 'https://ajc.pics/audio/fsjam-short.mp3', - transcriptService: 'assembly', + transcriptServices: 'assembly', speakerLabels: true, }, endpoint: '/video', @@ -330,7 +330,7 @@ const requests = [ { data: { youtubeUrl: 'https://ajc.pics/audio/fsjam-short.mp3', - transcriptService: 'assembly', + transcriptServices: 'assembly', speakerLabels: true, llm: 'llama', }, diff --git a/packages/server/utils/reqToOpts.js b/packages/server/utils/reqToOpts.js index be9f654..ee7e42f 100644 --- a/packages/server/utils/reqToOpts.js +++ b/packages/server/utils/reqToOpts.js @@ -3,7 +3,7 @@ // Function to map request data to processing options function reqToOpts(requestData) { // Define possible options - const [llmOptions, transcriptOptions, otherOptions] = [ + const [llmServices, transcriptServices, otherOptions] = [ // List of supported LLM options ['chatgpt', 'claude', 'cohere', 'mistral', 'octo', 'llama', 'ollama', 'gemini'], // List of supported transcript services @@ -14,33 +14,29 @@ function reqToOpts(requestData) { // Initialize options object const options = {} - // Initialize llm option - let llmOpt = null - // Initialize transcript option - let transcriptOpt = null // Check if LLM is provided and valid - if (requestData.llm && llmOptions.includes(requestData.llm)) { - // Set llmOpt - llmOpt = requestData.llm + if (requestData.llm && llmServices.includes(requestData.llm)) { + // Set llmServices + llmServices = requestData.llm // Set LLM model or true - options[llmOpt] = requestData.llmModel || true + options[llmServices] = requestData.llmModel || true } // Determine transcript service and default to 'whisper' if not specified - transcriptOpt = transcriptOptions.includes(requestData.transcriptService) - ? requestData.transcriptService + transcriptServices = transcriptServices.includes(requestData.transcriptServices) + ? requestData.transcriptServices : 'whisper' // Set transcript options - if (transcriptOpt === 'whisper') { + if (transcriptServices === 'whisper') { // Set whisper model options.whisperModel = requestData.whisperModel || 'base' // Enable whisper option options.whisper = options.whisperModel } else { // Enable selected transcript service - options[transcriptOpt] = true + options[transcriptServices] = true } // Map other options from request data @@ -52,7 +48,7 @@ function reqToOpts(requestData) { } // Return mapped options - return { options, llmOpt, transcriptOpt } + return { options, llmServices, transcriptServices } } export { reqToOpts } \ No newline at end of file diff --git a/src/autoshow.js b/src/autoshow.js index 48a5728..c222d04 100644 --- a/src/autoshow.js +++ b/src/autoshow.js @@ -21,7 +21,7 @@ import { processFile } from './commands/processFile.js' import { processRSS } from './commands/processRSS.js' import { argv, exit } from 'node:process' -/** @import { ProcessingOptions, HandlerFunction, LLMOption, TranscriptOption } from './types.js' */ +/** @import { ProcessingOptions, HandlerFunction, LLMServices, TranscriptServices } from './types.js' */ // Initialize the command-line interface const program = new Command() @@ -79,6 +79,22 @@ Report Issues: https://github.com/ajcwebdev/autoshow/issues program.action(async (options) => { console.log(`Options received: ${JSON.stringify(options, null, 2)}`) + /** + * Map actions to their respective handler functions + * @type {Object.} + */ + const PROCESS_HANDLERS = { + video: processVideo, + playlist: processPlaylist, + urls: processURLs, + file: processFile, + rss: processRSS, + } + + const ACTION_OPTIONS = ['video', 'playlist', 'urls', 'file', 'rss'] + const LLM_OPTIONS = ['chatgpt', 'claude', 'cohere', 'mistral', 'octo', 'llama', 'ollama', 'gemini'] + const TRANSCRIPT_OPTIONS = ['whisper', 'whisperDocker', 'deepgram', 'assembly'] + // Determine if no action options were provided const { video, playlist, urls, file, rss, interactive } = options const noActionProvided = [video, playlist, urls, file, rss].every((opt) => !opt) @@ -87,8 +103,7 @@ program.action(async (options) => { if (interactive) { options = await handleInteractivePrompt(options) } else if (noActionProvided) { - console.error('Error: No input provided. Please specify an option. Use --help to see available options.') - program.help({ error: true }) + options = await handleInteractivePrompt(options) } // Ensure options.item is an array if provided via command line @@ -96,79 +111,31 @@ program.action(async (options) => { options.item = [options.item] } - /** - * Map actions to their respective handler functions - * @type {Object.} - */ - const handlers = { - video: processVideo, - playlist: processPlaylist, - urls: processURLs, - file: processFile, - rss: processRSS, - } - - // Count the number of action options provided - const actionOptions = ['video', 'playlist', 'urls', 'file', 'rss'] - const actionsProvided = actionOptions.filter((opt) => options[opt]) - - // If more than one action option is provided, show an error + const actionsProvided = ACTION_OPTIONS.filter((opt) => options[opt]) if (actionsProvided.length > 1) { - console.error(`Error: Multiple input options provided (${actionsProvided.join( - ', ' - )}). Please specify only one input option.` - ) + console.error(`Error: Multiple input options provided (${actionsProvided.join(', ')}). Please specify only one input option.`) exit(1) } - /** - * Determine the selected LLM option - * @type {LLMOption | undefined} - */ - const llmOptions = [ - 'chatgpt', 'claude', 'cohere', 'mistral', 'octo', 'llama', 'ollama', 'gemini', - ] - const selectedLLMs = llmOptions.filter((opt) => options[opt]) + const selectedLLMs = LLM_OPTIONS.filter((opt) => options[opt]) if (selectedLLMs.length > 1) { - console.error(`Error: Multiple LLM options provided (${selectedLLMs.join( - ', ' - )}). Please specify only one LLM option.` - ) + console.error(`Error: Multiple LLM options provided (${selectedLLMs.join(', ')}). Please specify only one LLM option.`) exit(1) } - const llmOpt = /** @type {LLMOption | undefined} */ (selectedLLMs[0]) + const llmServices = /** @type {LLMServices | undefined} */ (selectedLLMs[0]) - /** - * Determine the transcription service to use - * @type {TranscriptOption | undefined} - */ - const transcriptOptions = ['whisper', 'whisperDocker', 'deepgram', 'assembly'] - const selectedTranscripts = transcriptOptions.filter((opt) => options[opt]) + const selectedTranscripts = TRANSCRIPT_OPTIONS.filter((opt) => options[opt]) if (selectedTranscripts.length > 1) { console.error(`Error: Multiple transcription options provided (${selectedTranscripts.join(', ')}). Please specify only one transcription option.`) exit(1) } - let transcriptOpt = /** @type {TranscriptOption | undefined} */ (selectedTranscripts[0]) - - // Standardize the transcription option names - if (transcriptOpt === 'whisper-docker') { - transcriptOpt = 'whisperDocker' - } else if (transcriptOpt === 'whisper') { - transcriptOpt = 'whisper' - } - - // Extract the Whisper model if using Whisper transcription - let whisperModel - if (transcriptOpt === 'whisper' || transcriptOpt === 'whisperDocker') { - whisperModel = options[transcriptOpt] || 'base' // Default to 'base' if no model specified - options.whisperModel = whisperModel // Add this line - } + const transcriptServices = /** @type {TranscriptServices | undefined} */ (selectedTranscripts[0]) // Execute the appropriate handler based on the action - for (const [key, handler] of Object.entries(handlers)) { + for (const [key, handler] of Object.entries(PROCESS_HANDLERS)) { if (options[key]) { try { - await handler(options[key], llmOpt, transcriptOpt, options) + await handler(options[key], llmServices, transcriptServices, options) exit(0) // Successful execution } catch (error) { console.error(`Error processing ${key}:`, error.message) diff --git a/src/commands/processFile.js b/src/commands/processFile.js index 3616328..39ea8fe 100644 --- a/src/commands/processFile.js +++ b/src/commands/processFile.js @@ -6,17 +6,17 @@ import { runTranscription } from '../utils/runTranscription.js' import { runLLM } from '../utils/runLLM.js' import { cleanUpFiles } from '../utils/cleanUpFiles.js' -/** @import { LLMOption, TranscriptOption, ProcessingOptions } from '../types.js' */ +/** @import { LLMServices, TranscriptServices, ProcessingOptions } from '../types.js' */ /** * Main function to process a local audio or video file. * @param {string} filePath - The path to the local file to process. - * @param {LLMOption} [llmOpt] - The selected Language Model option. - * @param {TranscriptOption} [transcriptOpt] - The transcription service to use. + * @param {LLMServices} [llmServices] - The selected Language Model option. + * @param {TranscriptServices} [transcriptServices] - The transcription service to use. * @param {ProcessingOptions} options - Additional options for processing. * @returns {Promise} */ -export async function processFile(filePath, llmOpt, transcriptOpt, options) { +export async function processFile(filePath, llmServices, transcriptServices, options) { try { // Generate markdown for the file const { frontMatter, finalPath, filename } = await generateFileMarkdown(filePath) @@ -25,10 +25,10 @@ export async function processFile(filePath, llmOpt, transcriptOpt, options) { await downloadFileAudio(filePath, filename) // Run transcription on the file - await runTranscription(finalPath, transcriptOpt, options, frontMatter) + await runTranscription(finalPath, transcriptServices, options, frontMatter) // Process the transcript with the selected Language Model - await runLLM(finalPath, frontMatter, llmOpt, options) + await runLLM(finalPath, frontMatter, llmServices, options) // Clean up temporary files if the noCleanUp option is not set if (!options.noCleanUp) { diff --git a/src/commands/processPlaylist.js b/src/commands/processPlaylist.js index 1b427f9..9554a09 100644 --- a/src/commands/processPlaylist.js +++ b/src/commands/processPlaylist.js @@ -7,19 +7,19 @@ import { promisify } from 'node:util' import { extractVideoMetadata } from '../utils/generateMarkdown.js' import { checkDependencies } from '../utils/checkDependencies.js' -/** @import { LLMOption, TranscriptOption, ProcessingOptions } from '../types.js' */ +/** @import { LLMServices, TranscriptServices, ProcessingOptions } from '../types.js' */ const execFilePromise = promisify(execFile) /** * Main function to process a YouTube playlist. * @param {string} playlistUrl - The URL of the YouTube playlist to process. - * @param {LLMOption} [llmOpt] - The selected Language Model option. - * @param {TranscriptOption} [transcriptOpt] - The transcription service to use. + * @param {LLMServices} [llmServices] - The selected Language Model option. + * @param {TranscriptServices} [transcriptServices] - The transcription service to use. * @param {ProcessingOptions} options - Additional options for processing. * @returns {Promise} */ -export async function processPlaylist(playlistUrl, llmOpt, transcriptOpt, options) { +export async function processPlaylist(playlistUrl, llmServices, transcriptServices, options) { try { // Check for required dependencies await checkDependencies(['yt-dlp']) @@ -63,7 +63,7 @@ export async function processPlaylist(playlistUrl, llmOpt, transcriptOpt, option for (const [index, url] of urls.entries()) { console.log(`\nProcessing video ${index + 1}/${urls.length}: ${url}`) try { - await processVideo(url, llmOpt, transcriptOpt, options) + await processVideo(url, llmServices, transcriptServices, options) } catch (error) { console.error(`Error processing video ${url}: ${error.message}`) // Continue processing the next video diff --git a/src/commands/processRSS.js b/src/commands/processRSS.js index 36c204c..f042af5 100644 --- a/src/commands/processRSS.js +++ b/src/commands/processRSS.js @@ -8,7 +8,7 @@ import { runTranscription } from '../utils/runTranscription.js' import { runLLM } from '../utils/runLLM.js' import { cleanUpFiles } from '../utils/cleanUpFiles.js' -/** @import { LLMOption, TranscriptOption, ProcessingOptions, RSSItem } from '../types.js' */ +/** @import { LLMServices, TranscriptServices, ProcessingOptions, RSSItem } from '../types.js' */ // Initialize XML parser with specific options const parser = new XMLParser({ @@ -20,12 +20,12 @@ const parser = new XMLParser({ /** * Process a single item from the RSS feed. * @param {RSSItem} item - The item to process. - * @param {TranscriptOption} [transcriptOpt] - The transcription service to use. - * @param {LLMOption} [llmOpt] - The selected Language Model option. + * @param {TranscriptServices} [transcriptServices] - The transcription service to use. + * @param {LLMServices} [llmServices] - The selected Language Model option. * @param {ProcessingOptions} options - Additional options for processing. * @returns {Promise} */ -async function processItem(item, transcriptOpt, llmOpt, options) { +async function processItem(item, transcriptServices, llmServices, options) { try { // Generate markdown for the item const { frontMatter, finalPath, filename } = await generateRSSMarkdown(item) @@ -34,10 +34,10 @@ async function processItem(item, transcriptOpt, llmOpt, options) { await downloadAudio(item.showLink, filename) // Run transcription - await runTranscription(finalPath, transcriptOpt, options, frontMatter) + await runTranscription(finalPath, transcriptServices, options, frontMatter) // Process with Language Model - await runLLM(finalPath, frontMatter, llmOpt, options) + await runLLM(finalPath, frontMatter, llmServices, options) // Clean up temporary files if necessary if (!options.noCleanUp) { @@ -54,12 +54,12 @@ async function processItem(item, transcriptOpt, llmOpt, options) { /** * Main function to process an RSS feed. * @param {string} rssUrl - The URL of the RSS feed to process. - * @param {LLMOption} [llmOpt] - The selected Language Model option. - * @param {TranscriptOption} [transcriptOpt] - The transcription service to use. + * @param {LLMServices} [llmServices] - The selected Language Model option. + * @param {TranscriptServices} [transcriptServices] - The transcription service to use. * @param {ProcessingOptions} options - Additional options for processing. * @returns {Promise} */ -export async function processRSS(rssUrl, llmOpt, transcriptOpt, options) { +export async function processRSS(rssUrl, llmServices, transcriptServices, options) { try { if (options.item && options.item.length > 0) { // If specific items are provided, list them @@ -172,7 +172,7 @@ export async function processRSS(rssUrl, llmOpt, transcriptOpt, options) { // Process each item in the feed for (const [index, item] of itemsToProcess.entries()) { console.log(`\nProcessing item ${index + 1}/${itemsToProcess.length}: ${item.title}`) - await processItem(item, transcriptOpt, llmOpt, options) + await processItem(item, transcriptServices, llmServices, options) } console.log('\n\nRSS feed processing completed successfully.\n') diff --git a/src/commands/processURLs.js b/src/commands/processURLs.js index b509bf2..309f2d6 100644 --- a/src/commands/processURLs.js +++ b/src/commands/processURLs.js @@ -6,17 +6,17 @@ import { processVideo } from './processVideo.js' import { extractVideoMetadata } from '../utils/generateMarkdown.js' import { checkDependencies } from '../utils/checkDependencies.js' -/** @import { LLMOption, TranscriptOption, ProcessingOptions } from '../types.js' */ +/** @import { LLMServices, TranscriptServices, ProcessingOptions } from '../types.js' */ /** * Main function to process URLs from a file. * @param {string} filePath - The path to the file containing URLs. - * @param {LLMOption} [llmOpt] - The selected Language Model option. - * @param {TranscriptOption} [transcriptOpt] - The transcription service to use. + * @param {LLMServices} [llmServices] - The selected Language Model option. + * @param {TranscriptServices} [transcriptServices] - The transcription service to use. * @param {ProcessingOptions} options - Additional options for processing. * @returns {Promise} */ -export async function processURLs(filePath, llmOpt, transcriptOpt, options) { +export async function processURLs(filePath, llmServices, transcriptServices, options) { try { // Check for required dependencies await checkDependencies(['yt-dlp']) @@ -54,7 +54,7 @@ export async function processURLs(filePath, llmOpt, transcriptOpt, options) { for (const [index, url] of urls.entries()) { console.log(`\nProcessing URL ${index + 1}/${urls.length}: ${url}`) try { - await processVideo(url, llmOpt, transcriptOpt, options) + await processVideo(url, llmServices, transcriptServices, options) } catch (error) { console.error(`Error processing URL ${url}: ${error.message}`) // Continue processing the next URL diff --git a/src/commands/processVideo.js b/src/commands/processVideo.js index 88c2227..84bea6a 100644 --- a/src/commands/processVideo.js +++ b/src/commands/processVideo.js @@ -7,17 +7,17 @@ import { runTranscription } from '../utils/runTranscription.js' import { runLLM } from '../utils/runLLM.js' import { cleanUpFiles } from '../utils/cleanUpFiles.js' -/** @import { LLMOption, TranscriptOption, ProcessingOptions } from '../types.js' */ +/** @import { LLMServices, TranscriptServices, ProcessingOptions } from '../types.js' */ /** * Main function to process a single video. * @param {string} url - The URL of the video to process. - * @param {LLMOption} [llmOpt] - The selected Language Model option. - * @param {TranscriptOption} [transcriptOpt] - The transcription service to use. + * @param {LLMServices} [llmServices] - The selected Language Model option. + * @param {TranscriptServices} [transcriptServices] - The transcription service to use. * @param {ProcessingOptions} options - Additional options for processing. * @returns {Promise} */ -export async function processVideo(url, llmOpt, transcriptOpt, options) { +export async function processVideo(url, llmServices, transcriptServices, options) { try { // Check for required dependencies await checkDependencies(['yt-dlp']) @@ -29,10 +29,10 @@ export async function processVideo(url, llmOpt, transcriptOpt, options) { await downloadAudio(url, filename) // Run transcription on the audio - await runTranscription(finalPath, transcriptOpt, options, frontMatter) + await runTranscription(finalPath, frontMatter, transcriptServices, options) // Process the transcript with the selected Language Model - await runLLM(finalPath, frontMatter, llmOpt, options) + await runLLM(finalPath, frontMatter, llmServices, options) // Clean up temporary files if the noCleanUp option is not set if (!options.noCleanUp) { diff --git a/src/inquirer.js b/src/inquirer.js index 5e1e74d..73caccb 100644 --- a/src/inquirer.js +++ b/src/inquirer.js @@ -72,17 +72,18 @@ const INQUIRER_PROMPT = [ }, { type: 'list', - name: 'llmOpt', + name: 'llmServices', message: 'Select the Language Model (LLM) you want to use:', choices: [ + { name: 'Skip LLM Processing', value: null }, + { name: 'node-llama-cpp (local inference)', value: 'llama' }, + { name: 'Ollama (local inference)', value: 'ollama' }, { name: 'OpenAI ChatGPT', value: 'chatgpt' }, { name: 'Anthropic Claude', value: 'claude' }, + { name: 'Google Gemini', value: 'gemini' }, { name: 'Cohere', value: 'cohere' }, { name: 'Mistral', value: 'mistral' }, { name: 'OctoAI', value: 'octo' }, - { name: 'node-llama-cpp (local inference)', value: 'llama' }, - { name: 'Google Gemini', value: 'gemini' }, - { name: 'Skip LLM Processing', value: null }, ], }, { @@ -95,38 +96,32 @@ const INQUIRER_PROMPT = [ { name: 'GEMMA 2 2B Q4 Model', value: 'GEMMA_2_2B_Q4_MODEL' }, { name: 'GEMMA 2 2B Q6 Model', value: 'GEMMA_2_2B_Q6_MODEL' }, ], - when: (answers) => answers.llmOpt === 'llama', + when: (answers) => answers.llmServices === 'llama', }, { type: 'list', - name: 'transcriptOpt', + name: 'transcriptServices', message: 'Select the transcription service you want to use:', choices: [ { name: 'Whisper.cpp', value: 'whisper' }, + { name: 'Whisper.cpp (Docker)', value: 'whisperDocker' }, { name: 'Deepgram', value: 'deepgram' }, { name: 'AssemblyAI', value: 'assembly' }, ], }, - { - type: 'confirm', - name: 'useDocker', - message: 'Do you want to run Whisper.cpp in a Docker container?', - when: (answers) => answers.transcriptOpt === 'whisper', - default: false, - }, { type: 'list', name: 'whisperModel', message: 'Select the Whisper model type:', choices: ['tiny', 'tiny.en', 'base', 'base.en', 'small', 'small.en', 'medium', 'medium.en', 'large', 'large-v1', 'large-v2'], - when: (answers) => answers.transcriptOpt === 'whisper', - default: 'large', + when: (answers) => answers.transcriptServices === 'whisper' || answers.transcriptServices === 'whisperDocker', + default: 'large-v2', }, { type: 'confirm', name: 'speakerLabels', message: 'Do you want to use speaker labels?', - when: (answers) => answers.transcriptOpt === 'assembly', + when: (answers) => answers.transcriptServices === 'assembly', default: false, }, { @@ -179,19 +174,15 @@ export async function handleInteractivePrompt(options) { } // Handle LLM options - if (answers.llmOpt) { - options[answers.llmOpt] = answers.llmOpt === 'llama' ? answers.llamaModel : true + if (answers.llmServices) { + options[answers.llmServices] = answers.llmServices === 'llama' ? answers.llamaModel : true } // Handle transcription options - if (answers.transcriptOpt === 'whisper') { - if (answers.useDocker) { - options.whisperDocker = /** @type {WhisperModelType} */ (answers.whisperModel) - } else { - options.whisper = /** @type {WhisperModelType} */ (answers.whisperModel) - } + if (answers.transcriptServices === 'whisper' || answers.transcriptServices === 'whisperDocker') { + options[answers.transcriptServices] = /** @type {WhisperModelType} */ (answers.whisperModel) } else { - options[answers.transcriptOpt] = true + options[answers.transcriptServices] = true } // Handle 'item' for RSS feed @@ -202,9 +193,6 @@ export async function handleInteractivePrompt(options) { // Remove properties that are not options delete options.action delete options.specifyItem - delete options.llamaModel - delete options.useDocker - delete options.whisperModel delete options.confirmAction return options diff --git a/src/llms/chatgpt.js b/src/llms/chatgpt.js index ec21221..91e952e 100644 --- a/src/llms/chatgpt.js +++ b/src/llms/chatgpt.js @@ -3,20 +3,10 @@ import { writeFile } from 'node:fs/promises' import { env } from 'node:process' import { OpenAI } from 'openai' +import { GPT_MODELS } from '../types.js' /** @import { LLMFunction, ChatGPTModelType } from '../types.js' */ -/** - * Map of ChatGPT model identifiers to their API names - * @type {Record} - */ -const gptModel = { - GPT_4o_MINI: "gpt-4o-mini", - GPT_4o: "gpt-4o", - GPT_4_TURBO: "gpt-4-turbo", - GPT_4: "gpt-4", -} - /** @type {LLMFunction} */ /** * Main function to call ChatGPT API. @@ -37,7 +27,7 @@ export async function callChatGPT(promptAndTranscript, tempPath, model = 'GPT_4o try { // Select the actual model to use, defaulting to GPT_4o_MINI if not specified - const actualModel = gptModel[model] || gptModel.GPT_4o_MINI + const actualModel = GPT_MODELS[model] || GPT_MODELS.GPT_4o_MINI // Call the OpenAI chat completions API const response = await openai.chat.completions.create({ diff --git a/src/llms/claude.js b/src/llms/claude.js index 69518a2..bb5f7bd 100644 --- a/src/llms/claude.js +++ b/src/llms/claude.js @@ -3,20 +3,10 @@ import { writeFile } from 'node:fs/promises' import { env } from 'node:process' import { Anthropic } from '@anthropic-ai/sdk' +import { CLAUDE_MODELS } from '../types.js' /** @import { LLMFunction, ClaudeModelType } from '../types.js' */ -/** - * Map of Claude model identifiers to their API names - * @type {Record} - */ -const claudeModel = { - CLAUDE_3_5_SONNET: "claude-3-5-sonnet-20240620", - CLAUDE_3_OPUS: "claude-3-opus-20240229", - CLAUDE_3_SONNET: "claude-3-sonnet-20240229", - CLAUDE_3_HAIKU: "claude-3-haiku-20240307", -} - /** @type {LLMFunction} */ /** * Main function to call Claude API. @@ -37,7 +27,7 @@ export async function callClaude(promptAndTranscript, tempPath, model = 'CLAUDE_ try { // Select the actual model to use, defaulting to CLAUDE_3_HAIKU if not specified - const actualModel = claudeModel[model] || claudeModel.CLAUDE_3_HAIKU + const actualModel = CLAUDE_MODELS[model] || CLAUDE_MODELS.CLAUDE_3_HAIKU // Call the Anthropic messages API to create a chat completion const response = await anthropic.messages.create({ diff --git a/src/llms/cohere.js b/src/llms/cohere.js index 0a84c01..0bfc8e3 100644 --- a/src/llms/cohere.js +++ b/src/llms/cohere.js @@ -3,18 +3,10 @@ import { writeFile } from 'node:fs/promises' import { env } from 'node:process' import { CohereClient } from 'cohere-ai' +import { COHERE_MODELS } from '../types.js' /** @import { LLMFunction, CohereModelType } from '../types.js' */ -/** - * Map of Cohere model identifiers to their API names - * @type {Record} - */ -const cohereModel = { - COMMAND_R: "command-r", // Standard Command model - COMMAND_R_PLUS: "command-r-plus" // Enhanced Command model -} - /** @type {LLMFunction} */ /** * Main function to call Cohere API. @@ -35,7 +27,7 @@ export async function callCohere(promptAndTranscript, tempPath, model = 'COMMAND try { // Select the actual model to use, defaulting to COMMAND_R if not specified - const actualModel = cohereModel[model] || cohereModel.COMMAND_R + const actualModel = COHERE_MODELS[model] || COHERE_MODELS.COMMAND_R // Call the Cohere chat API const response = await cohere.chat({ diff --git a/src/llms/gemini.js b/src/llms/gemini.js index 092e83e..15a8c44 100644 --- a/src/llms/gemini.js +++ b/src/llms/gemini.js @@ -3,19 +3,10 @@ import { writeFile } from 'node:fs/promises' import { env } from 'node:process' import { GoogleGenerativeAI } from "@google/generative-ai" +import { GEMINI_MODELS } from '../types.js' /** @import { LLMFunction, GeminiModelType } from '../types.js' */ -/** - * Map of Gemini model identifiers to their API names - * @type {Record} - */ -const geminiModel = { - GEMINI_1_5_FLASH: "gemini-1.5-flash", - // GEMINI_1_5_PRO: "gemini-1.5-pro", - GEMINI_1_5_PRO: "gemini-1.5-pro-exp-0827", -} - /** * Utility function to introduce a delay * @param {number} ms - Milliseconds to delay @@ -41,7 +32,7 @@ export async function callGemini(promptAndTranscript, tempPath, model = 'GEMINI_ const genAI = new GoogleGenerativeAI(env.GEMINI_API_KEY) // Select the actual model to use, defaulting to GEMINI_1_5_FLASH if not specified - const actualModel = geminiModel[model] || geminiModel.GEMINI_1_5_FLASH + const actualModel = GEMINI_MODELS[model] || GEMINI_MODELS.GEMINI_1_5_FLASH const maxRetries = 3 // Maximum number of retry attempts diff --git a/src/llms/llama.js b/src/llms/llama.js index e469c6e..e110027 100644 --- a/src/llms/llama.js +++ b/src/llms/llama.js @@ -5,47 +5,21 @@ import { getLlama, LlamaChatSession } from "node-llama-cpp" import { existsSync } from 'node:fs' import { exec } from 'node:child_process' import { promisify } from 'node:util' +import { LLAMA_MODELS } from '../types.js' const execAsync = promisify(exec) -/** @import { LLMFunction, LlamaModelType } from '../types.js' */ - -/** - * Map of local model identifiers to their filenames and URLs - * @type {Record} - */ -const localModels = { - // LLAMA_3_2_1B_Q6_MODEL: { - // filename: "Llama-3.2-1B.i1-Q6_K.gguf", - // url: "https://huggingface.co/mradermacher/Llama-3.2-1B-i1-GGUF/resolve/main/Llama-3.2-1B.i1-Q6_K.gguf" - // }, - LLAMA_3_1_8B_Q4_MODEL: { - filename: "Meta-Llama-3.1-8B-Instruct.IQ4_XS.gguf", - url: "https://huggingface.co/mradermacher/Meta-Llama-3.1-8B-Instruct-GGUF/resolve/main/Meta-Llama-3.1-8B-Instruct.IQ4_XS.gguf" - }, - LLAMA_3_1_8B_Q6_MODEL: { - filename: "Meta-Llama-3.1-8B-Instruct.Q6_K.gguf", - url: "https://huggingface.co/mradermacher/Meta-Llama-3.1-8B-Instruct-GGUF/resolve/main/Meta-Llama-3.1-8B-Instruct.Q6_K.gguf" - }, - GEMMA_2_2B_Q4_MODEL: { - filename: "gemma-2-2b-it-IQ4_XS.gguf", - url: "https://huggingface.co/lmstudio-community/gemma-2-2b-it-GGUF/resolve/main/gemma-2-2b-it-IQ4_XS.gguf" - }, - GEMMA_2_2B_Q6_MODEL: { - filename: "gemma-2-2b-it-Q6_K.gguf", - url: "https://huggingface.co/lmstudio-community/gemma-2-2b-it-GGUF/resolve/main/gemma-2-2b-it-Q6_K.gguf" - } -} +/** @import { LlamaModelType } from '../types.js' */ /** * Function to download the model if it doesn't exist. - * @param {LlamaModelType} [modelName='GEMMA_2_2B_Q4_MODEL'] - The name of the model to use. + * @param {LlamaModelType} [modelName='GEMMA_2_2B'] - The name of the model to use. * @returns {Promise} - The path to the downloaded model. * @throws {Error} - If the model download fails. */ -async function downloadModel(modelName = 'GEMMA_2_2B_Q4_MODEL') { - // Get the model object from localModels using the provided modelName or default to GEMMA_2_2B_Q4_MODEL - const model = localModels[modelName] || localModels.GEMMA_2_2B_Q4_MODEL +async function downloadModel(modelName = 'GEMMA_2_2B') { + // Get the model object from LLAMA_MODELS using the provided modelName or default to GEMMA_2_2B + const model = LLAMA_MODELS[modelName] || LLAMA_MODELS.GEMMA_2_2B console.log(` - ${model.filename} model selected.`) // If no valid model is found, throw an error @@ -96,7 +70,7 @@ async function downloadModel(modelName = 'GEMMA_2_2B_Q4_MODEL') { export async function callLlama(promptAndTranscript, tempPath, modelName = true) { try { // If modelName is true or not provided, use the default model - const actualModelName = modelName === true ? 'GEMMA_2_2B_Q4_MODEL' : modelName + const actualModelName = modelName === true ? 'GEMMA_2_2B' : modelName // Ensure the model is downloaded const modelPath = await downloadModel(actualModelName) diff --git a/src/llms/mistral.js b/src/llms/mistral.js index bb6ae18..d2d5897 100644 --- a/src/llms/mistral.js +++ b/src/llms/mistral.js @@ -3,20 +3,10 @@ import { writeFile } from 'node:fs/promises' import { env } from 'node:process' import { Mistral } from '@mistralai/mistralai' +import { MISTRAL_MODELS } from '../types.js' /** @import { LLMFunction, MistralModelType } from '../types.js' */ -/** - * Map of Mistral model identifiers to their API names - * @type {Record} - */ -const mistralModel = { - MIXTRAL_8x7b: "open-mixtral-8x7b", - MIXTRAL_8x22b: "open-mixtral-8x22b", - MISTRAL_LARGE: "mistral-large-latest", - MISTRAL_NEMO: "open-mistral-nemo" -} - /** @type {LLMFunction} */ /** * Main function to call Mistral AI API. @@ -36,7 +26,7 @@ export async function callMistral(promptAndTranscript, tempPath, model = 'MISTRA try { // Select the actual model to use, defaulting to MISTRAL_NEMO if the specified model is not found - const actualModel = mistralModel[model] || mistralModel.MISTRAL_NEMO + const actualModel = MISTRAL_MODELS[model] || MISTRAL_MODELS.MISTRAL_NEMO console.log(`\nUsing Mistral model: ${actualModel}`) // Make API call to Mistral AI for chat completion diff --git a/src/llms/octo.js b/src/llms/octo.js index 29faaa5..e8dca18 100644 --- a/src/llms/octo.js +++ b/src/llms/octo.js @@ -3,23 +3,10 @@ import { writeFile } from 'node:fs/promises' import { env } from 'node:process' import { OctoAIClient } from '@octoai/sdk' +import { OCTO_MODELS } from '../types.js' /** @import { LLMFunction, OctoModelType } from '../types.js' */ -/** - * Map of OctoAI model identifiers to their API names - * @type {Record} - */ -const octoModel = { - LLAMA_3_1_8B: "meta-llama-3.1-8b-instruct", - LLAMA_3_1_70B: "meta-llama-3.1-70b-instruct", - LLAMA_3_1_405B: "meta-llama-3.1-405b-instruct", - MISTRAL_7B: "mistral-7b-instruct", - MIXTRAL_8X_7B: "mixtral-8x7b-instruct", - NOUS_HERMES_MIXTRAL_8X_7B: "nous-hermes-2-mixtral-8x7b-dpo", - WIZARD_2_8X_22B: "wizardlm-2-8x22b", -} - /** @type {LLMFunction} */ /** * Main function to call OctoAI API. @@ -39,7 +26,7 @@ export async function callOcto(promptAndTranscript, tempPath, model = 'LLAMA_3_1 try { // Select the actual model to use, defaulting to LLAMA_3_1_70B if the specified model is not found - const actualModel = octoModel[model] || octoModel.LLAMA_3_1_70B + const actualModel = OCTO_MODELS[model] || OCTO_MODELS.LLAMA_3_1_70B console.log(`\nUsing OctoAI model: ${actualModel}`) // Make API call to OctoAI for text generation diff --git a/src/llms/ollama.js b/src/llms/ollama.js index 6a777d3..60a31ec 100644 --- a/src/llms/ollama.js +++ b/src/llms/ollama.js @@ -1,24 +1,10 @@ // src/llms/ollama.js import { writeFile } from 'node:fs/promises' +import { env } from 'node:process' +import { OLLAMA_MODELS } from '../types.js' -/** @import { LLMFunction, LlamaModelType } from '../types.js' */ - -/** - * Map of model identifiers to their corresponding names in Ollama - * @type {Record} - */ -const ollamaModels = { - LLAMA_3_2_1B_MODEL: 'llama3.2:1b', - LLAMA_3_2_3B_MODEL: 'llama3.2:3b', - LLAMA_3_1_8B_MODEL: 'llama3.1:8b', - GEMMA_2_2B_MODEL: 'gemma2:2b', - GEMMA_2_9B_MODEL: 'gemma2:9b', - PHI_3_5_MODEL: 'phi3.5:3.8b', - QWEN_2_5_1B_MODEL: 'qwen2.5:1.5b', - QWEN_2_5_3B_MODEL: 'qwen2.5:3b', - QWEN_2_5_7B_MODEL: 'qwen2.5:7b', -} +/** @import { LLMFunction, OllamaModelType } from '../types.js' */ /** * Main function to call the Llama model using the Ollama REST API. @@ -27,25 +13,23 @@ const ollamaModels = { * @type {LLMFunction} * @param {string} promptAndTranscript - The combined prompt and transcript content. * @param {string} tempPath - The temporary file path to write the LLM output. - * @param {LlamaModelType} [modelName='LLAMA_3_2_1B_MODEL'] - The name of the model to use. + * @param {OllamaModelType} [modelName='LLAMA_3_2_1B'] - The name of the model to use. * @returns {Promise} * @throws {Error} - If an error occurs during processing. */ -export async function callOllama(promptAndTranscript, tempPath, modelName = 'LLAMA_3_2_1B_MODEL') { +export async function callOllama(promptAndTranscript, tempPath, modelName = 'LLAMA_3_2_1B') { try { // Map the model name to the Ollama model identifier - const ollamaModelName = ollamaModels[modelName] || 'llama3.2:1b' + const ollamaModelName = OLLAMA_MODELS[modelName] || 'llama3.2:1b' // Get host and port from environment variables or use defaults - const ollamaHost = process.env.OLLAMA_HOST || 'ollama' - const ollamaPort = process.env.OLLAMA_PORT || '11434' - const baseUrl = `http://${ollamaHost}:${ollamaPort}` - - console.log(` - Using Ollama model: ${ollamaModelName} at ${baseUrl}`) + const ollamaHost = env.OLLAMA_HOST || 'ollama' + const ollamaPort = env.OLLAMA_PORT || '11434' + console.log(` - Using Ollama model: ${ollamaModelName} at http://${ollamaHost}:${ollamaPort}`) // Call the Ollama chat API console.log(` - Sending chat request to Ollama...`) - const response = await fetch(`${baseUrl}/api/chat`, { + const response = await fetch(`http://${ollamaHost}:${ollamaPort}/api/chat`, { method: 'POST', headers: { 'Content-Type': 'application/json', @@ -63,11 +47,9 @@ export async function callOllama(promptAndTranscript, tempPath, modelName = 'LLA const data = await response.json() - // Extract the assistant's reply + // Extract the assistant's reply and write the response to the output file const assistantReply = data.message.content console.log(` - Received response from Ollama.`) - - // Write the response to the output file await writeFile(tempPath, assistantReply) console.log(`\nResponse saved to ${tempPath}`) } catch (error) { diff --git a/src/transcription/assembly.js b/src/transcription/assembly.js index 4dad688..03f02e1 100644 --- a/src/transcription/assembly.js +++ b/src/transcription/assembly.js @@ -4,17 +4,17 @@ import { writeFile } from 'node:fs/promises' import { env } from 'node:process' import { AssemblyAI } from 'assemblyai' -/** @import { TranscriptOption, ProcessingOptions } from '../types.js' */ +/** @import { TranscriptServices, ProcessingOptions } from '../types.js' */ /** * Main function to handle transcription using AssemblyAI. * @param {string} finalPath - The identifier used for naming output files. - * @param {TranscriptOption} transcriptOpt - The transcription service to use. + * @param {TranscriptServices} transcriptServices - The transcription service to use. * @param {ProcessingOptions} options - Additional processing options. * @returns {Promise} - Returns the formatted transcript content. * @throws {Error} - If an error occurs during transcription. */ -export async function callAssembly(finalPath, transcriptOpt, options) { +export async function callAssembly(finalPath, transcriptServices, options) { // Check if the ASSEMBLY_API_KEY environment variable is set if (!env.ASSEMBLY_API_KEY) { throw new Error('ASSEMBLY_API_KEY environment variable is not set. Please set it to your AssemblyAI API key.') @@ -26,7 +26,7 @@ export async function callAssembly(finalPath, transcriptOpt, options) { try { const { speakerLabels } = options console.log(`Parameters passed to callAssembly:`) - console.log(` - finalPath: ${finalPath}\n - transcriptOpt: ${transcriptOpt}\n - speakerLabels: ${speakerLabels}`) + console.log(` - finalPath: ${finalPath}\n - transcriptServices: ${transcriptServices}\n - speakerLabels: ${speakerLabels}`) // Request transcription from AssemblyAI const transcript = await client.transcripts.transcribe({ audio: `${finalPath}.wav`, // The audio file to transcribe diff --git a/src/transcription/whisper.js b/src/transcription/whisper.js index 5bc3168..50d3d2a 100644 --- a/src/transcription/whisper.js +++ b/src/transcription/whisper.js @@ -3,146 +3,58 @@ import { readFile, writeFile, access } from 'node:fs/promises' import { exec } from 'node:child_process' import { promisify } from 'node:util' -import { basename, join } from 'node:path' +import { WHISPER_MODELS } from '../types.js' const execPromise = promisify(exec) -/** @import { TranscriptOption, ProcessingOptions, WhisperModelType } from '../types.js' */ - -/** - * Define available Whisper models - * @type {Record} - */ -const WHISPER_MODELS = { - tiny: 'ggml-tiny.bin', 'tiny.en': 'ggml-tiny.en.bin', - base: 'ggml-base.bin', 'base.en': 'ggml-base.en.bin', - small: 'ggml-small.bin', 'small.en': 'ggml-small.en.bin', - medium: 'ggml-medium.bin', 'medium.en': 'ggml-medium.en.bin', - 'large-v1': 'ggml-large-v1.bin', 'large-v2': 'ggml-large-v2.bin', - large: 'ggml-large-v2.bin', -} +/** @import { ProcessingOptions } from '../types.js' */ /** * Main function to handle transcription using Whisper. * @param {string} finalPath - The base path for the files. - * @param {TranscriptOption} transcriptOpt - The transcription service to use. * @param {ProcessingOptions} options - Additional processing options. * @returns {Promise} - Returns the formatted transcript content. * @throws {Error} - If an error occurs during transcription. */ -export async function callWhisper(finalPath, transcriptOpt, options) { +export async function callWhisper(finalPath, options) { try { - // Get the whisper model from options or use 'base' as default const whisperModel = options.whisperModel || 'base' - // Check if the selected model is valid - if (!(whisperModel in WHISPER_MODELS)) throw new Error(`Unknown model type: ${whisperModel}`) + if (!(whisperModel in WHISPER_MODELS)) { + throw new Error(`Unknown model type: ${whisperModel}`) + } - // Get the model file name const modelName = WHISPER_MODELS[whisperModel] - - // Adjust download model name for 'large' model const downloadModelName = whisperModel === 'large' ? 'large-v2' : whisperModel + const modelPath = `./whisper.cpp/models/${modelName}` - // Call appropriate Whisper function based on transcriptOpt - await (transcriptOpt === 'whisperDocker' ? callWhisperDocker : callWhisperMain)(finalPath, modelName, downloadModelName) + // Setup Whisper + await access('./whisper.cpp').catch(async () => { + await execPromise('git clone https://github.com/ggerganov/whisper.cpp.git && make -C whisper.cpp && cp .github/whisper.Dockerfile whisper.cpp/Dockerfile') + }) - // Read the generated LRC file + // Ensure model is downloaded + await access(modelPath).catch(async () => { + await execPromise(`bash ./whisper.cpp/models/download-ggml-model.sh ${downloadModelName}`) + }) + + // Run transcription + await execPromise(`./whisper.cpp/main -m "whisper.cpp/models/${modelName}" -f "${finalPath}.wav" -of "${finalPath}" --output-lrc`) + console.log(` - Transcript LRC file completed at ${finalPath}.lrc`) + + // Process transcript const lrcContent = await readFile(`${finalPath}.lrc`, 'utf8') - - // Process and format the LRC content const txtContent = lrcContent.split('\n') - .filter(line => !line.startsWith('[by:whisper.cpp]')) // Remove whisper.cpp attribution - .map(line => line.replace(/\[(\d{2,3}):(\d{2})\.(\d{2})\]/g, (_, p1, p2) => `[${p1}:${p2}]`)) // Simplify timestamp format + .filter(line => !line.startsWith('[by:whisper.cpp]')) + .map(line => line.replace(/\[(\d{2,3}):(\d{2})\.(\d{2})\]/g, (_, p1, p2) => `[${p1}:${p2}]`)) .join('\n') - // Write the formatted content to a text file await writeFile(`${finalPath}.txt`, txtContent) - - // Log completion message console.log(` - Transcript transformation completed at ${finalPath}.txt`) - // Return the processed content return txtContent } catch (error) { - // Log any errors and exit the process console.error('Error in callWhisper:', error) process.exit(1) } -} - -/** - * Function to handle Whisper transcription using Docker. - * @param {string} finalPath - The base path for the files. - * @param {string} modelName - The model file name. - * @param {WhisperModelType} whisperModel - The Whisper model type. - * @returns {Promise} - * @throws {Error} - If an error occurs during Docker transcription. - */ -async function callWhisperDocker(finalPath, modelName, downloadModelName) { - // Define constants for Docker setup - const WHISPER_CONTAINER_NAME = 'autoshow-whisper-1' - const CONTENT_DIR = '/app/content' - const MODELS_DIR = '/app/models' - const modelPathContainer = `${MODELS_DIR}/${modelName}` - - try { - // Check if Whisper container is running, start it if not - await execPromise(`docker ps | grep ${WHISPER_CONTAINER_NAME}`) - .catch(() => execPromise('docker-compose up -d whisper')) - - // Check if the model exists in the container, download if not - await execPromise(`docker exec ${WHISPER_CONTAINER_NAME} test -f ${modelPathContainer}`) - .catch(() => execPromise(`docker exec ${WHISPER_CONTAINER_NAME} ${MODELS_DIR}/download-ggml-model.sh ${downloadModelName}`)) - - // Get the base filename - const fileName = basename(finalPath) - - // Execute Whisper transcription in Docker - await execPromise(`docker exec ${WHISPER_CONTAINER_NAME} /app/main -m ${modelPathContainer} -f ${join(CONTENT_DIR, `${fileName}.wav`)} -of ${join(CONTENT_DIR, fileName)} --output-lrc`) - - // Log completion message - console.log(` - Transcript LRC file completed at ${finalPath}.lrc`) - } catch (error) { - // Log any errors and re-throw - console.error('Error in callWhisperDocker:', error) - throw error - } -} - -/** - * Function to handle Whisper transcription without Docker. - * @param {string} finalPath - The base path for the files. - * @param {string} modelName - The model file name. - * @param {WhisperModelType} whisperModel - The Whisper model type. - * @returns {Promise} - * @throws {Error} - If an error occurs during transcription. - */ -async function callWhisperMain(finalPath, modelName, downloadModelName) { - // Define the path for the Whisper model - const modelPath = `./whisper.cpp/models/${modelName}` - - try { - // Check if whisper.cpp directory exists, clone and build if not - await access('./whisper.cpp').catch(async () => { - // Clone, build, and setup whisper.cpp - await execPromise('git clone https://github.com/ggerganov/whisper.cpp.git && make -C whisper.cpp && cp .github/whisper.Dockerfile whisper.cpp/Dockerfile') - }) - - // Check if the model exists locally, download if not - await access(modelPath).catch(async () => { - // Download the model - await execPromise(`bash ./whisper.cpp/models/download-ggml-model.sh ${downloadModelName}`) - }) - - // Execute Whisper transcription - await execPromise(`./whisper.cpp/main -m "whisper.cpp/models/${modelName}" -f "${finalPath}.wav" -of "${finalPath}" --output-lrc`) - - // Log completion message - console.log(` - Transcript LRC file completed at ${finalPath}.lrc`) - } catch (error) { - // Log any errors and re-throw - console.error('Error in callWhisperMain:', error) - throw error - } } \ No newline at end of file diff --git a/src/transcription/whisperDocker.js b/src/transcription/whisperDocker.js new file mode 100644 index 0000000..36b34c6 --- /dev/null +++ b/src/transcription/whisperDocker.js @@ -0,0 +1,64 @@ +// src/transcription/whisperDocker.js + +import { readFile, writeFile } from 'node:fs/promises' +import { exec } from 'node:child_process' +import { promisify } from 'node:util' +import { basename, join } from 'node:path' +import { WHISPER_MODELS } from '../types.js' + +const execPromise = promisify(exec) + +/** @import { ProcessingOptions } from '../types.js' */ + +/** + * Main function to handle transcription using Whisper Docker. + * @param {string} finalPath - The base path for the files. + * @param {ProcessingOptions} options - Additional processing options. + * @returns {Promise} - Returns the formatted transcript content. + * @throws {Error} - If an error occurs during transcription. + */ +export async function callWhisperDocker(finalPath, options) { + try { + const whisperModel = options.whisperModel || 'base' + + if (!(whisperModel in WHISPER_MODELS)) { + throw new Error(`Unknown model type: ${whisperModel}`) + } + + const modelName = WHISPER_MODELS[whisperModel] + const downloadModelName = whisperModel === 'large' ? 'large-v2' : whisperModel + + const CONTAINER_NAME = 'autoshow-whisper-1' + const modelPathContainer = `/app/models/${modelName}` + + // Ensure container is running + await execPromise(`docker ps | grep ${CONTAINER_NAME}`) + .catch(() => execPromise('docker-compose up -d whisper')) + + // Ensure model is downloaded + await execPromise(`docker exec ${CONTAINER_NAME} test -f ${modelPathContainer}`) + .catch(() => execPromise(`docker exec ${CONTAINER_NAME} /app/models/download-ggml-model.sh ${downloadModelName}`)) + + // Run transcription + const fileName = basename(finalPath) + await execPromise( + `docker exec ${CONTAINER_NAME} /app/main -m ${modelPathContainer} -f ${join(`/app/content`, `${fileName}.wav`)} -of ${join(`/app/content`, fileName)} --output-lrc` + ) + console.log(` - Transcript LRC file completed at ${finalPath}.lrc`) + + // Process transcript + const lrcContent = await readFile(`${finalPath}.lrc`, 'utf8') + const txtContent = lrcContent.split('\n') + .filter(line => !line.startsWith('[by:whisper.cpp]')) + .map(line => line.replace(/\[(\d{2,3}):(\d{2})\.(\d{2})\]/g, (_, p1, p2) => `[${p1}:${p2}]`)) + .join('\n') + + await writeFile(`${finalPath}.txt`, txtContent) + console.log(` - Transcript transformation completed at ${finalPath}.txt`) + + return txtContent + } catch (error) { + console.error('Error in callWhisperDocker:', error) + process.exit(1) + } +} \ No newline at end of file diff --git a/src/types.js b/src/types.js index 8f1a0e7..29bc7b1 100644 --- a/src/types.js +++ b/src/types.js @@ -24,11 +24,11 @@ * @property {string} [cohere] - Cohere model to use (e.g., 'COMMAND_R_PLUS'). * @property {string} [mistral] - Mistral model to use (e.g., 'MISTRAL_LARGE'). * @property {string} [octo] - OctoAI model to use (e.g., 'LLAMA_3_1_8B'). - * @property {string} [llama] - Llama model to use for local inference (e.g., 'LLAMA_3_1_8B_Q4_MODEL'). + * @property {string} [llama] - Llama model to use for local inference (e.g., 'LLAMA_3_1_8B_Q4'). * @property {string} [gemini] - Gemini model to use (e.g., 'GEMINI_1_5_FLASH'). * @property {string[]} [prompt] - Array of prompt sections to include (e.g., ['titles', 'summary']). - * @property {LLMOption} [llmOpt] - The selected LLM option. - * @property {TranscriptOption} [transcriptOpt] - The selected transcription option. + * @property {LLMServices} [llmServices] - The selected LLM option. + * @property {TranscriptServices} [transcriptServices] - The selected transcription option. * @property {string} [llamaModel] - Specific Llama model to use. * @property {number} [skip] - Number of items to skip in RSS feed processing. * @property {string} [order] - Order in which to process RSS feed items ('newest' or 'oldest'). @@ -46,9 +46,9 @@ * @property {string} [rss] - RSS feed URL provided by the user. * @property {boolean} [specifyItem] - Whether the user wants to specify specific RSS items. * @property {string} [item] - Comma-separated audio URLs of specific RSS items. - * @property {LLMOption} [llmOpt] - LLM option selected by the user. + * @property {LLMServices} [llmServices] - LLM option selected by the user. * @property {string} [llamaModel] - Specific Llama model selected by the user. - * @property {TranscriptOption} [transcriptOpt] - Transcription option selected by the user. + * @property {TranscriptServices} [transcriptServices] - Transcription option selected by the user. * @property {boolean} [useDocker] - Whether to use Docker for Whisper transcription. * @property {WhisperModelType} [whisperModel] - Whisper model type selected by the user. * @property {boolean} [speakerLabels] - Whether to use speaker labels in transcription. @@ -75,8 +75,8 @@ * Represents a handler function for processing different actions (e.g., video, playlist). * @callback HandlerFunction * @param {string} input - The primary input (e.g., URL or file path) for processing. - * @param {LLMOption} [llmOpt] - The selected LLM option. - * @param {TranscriptOption} [transcriptOpt] - The selected transcription option. + * @param {LLMServices} [llmServices] - The selected LLM option. + * @param {TranscriptServices} [transcriptServices] - The selected transcription option. * @param {ProcessingOptions} options - Additional processing options. * @returns {Promise} - A promise that resolves when processing is complete. */ @@ -116,7 +116,7 @@ /** * Represents the options for RSS feed processing. - * @typedef {Object} RSSProcessingOptions + * @typedef {Object} RSSOptions * @property {string} [order] - The order to process items ('newest' or 'oldest'). * @property {number} [skip] - The number of items to skip. */ @@ -136,38 +136,48 @@ /** * Represents the transcription services that can be used in the application. - * @typedef {'whisper' | 'whisperDocker' | 'deepgram' | 'assembly'} TranscriptOption + * @typedef {'whisper' | 'whisperDocker' | 'deepgram' | 'assembly'} TranscriptServices * - * - `'whisper'`: Use Whisper.cpp for transcription. - * - `'whisperDocker'`: Use Whisper.cpp in a Docker container. - * - `'deepgram'`: Use Deepgram's transcription service. - * - `'assembly'`: Use AssemblyAI's transcription service. - */ - -/** - * Represents the options for transcription. - * @typedef {Object} TranscriptionOptions - * @property {boolean} [speakerLabels] - Whether to use speaker labels. - * @property {string} [language] - The language code for transcription (e.g., 'en'). - * @property {string} [model] - The model type to use for transcription. + * - whisper: Use Whisper.cpp for transcription. + * - whisperDocker: Use Whisper.cpp in a Docker container. + * - deepgram: Use Deepgram's transcription service. + * - assembly: Use AssemblyAI's transcription service. */ /** * Represents the available Whisper model types. * @typedef {'tiny' | 'tiny.en' | 'base' | 'base.en' | 'small' | 'small.en' | 'medium' | 'medium.en' | 'large' | 'large-v1' | 'large-v2'} WhisperModelType * - * - `'tiny'`: Smallest multilingual model. - * - `'tiny.en'`: Smallest English-only model. - * - `'base'`: Base multilingual model. - * - `'base.en'`: Base English-only model. - * - `'small'`: Small multilingual model. - * - `'small.en'`: Small English-only model. - * - `'medium'`: Medium multilingual model. - * - `'medium.en'`: Medium English-only model. - * - `'large'`: Largest multilingual model (same as 'large-v2'). - * - `'large-v1'`: Large multilingual model version 1. - * - `'large-v2'`: Large multilingual model version 2. + * - tiny: Smallest multilingual model. + * - tiny.en: Smallest English-only model. + * - base: Base multilingual model. + * - base.en: Base English-only model. + * - small: Small multilingual model. + * - small.en: Small English-only model. + * - medium: Medium multilingual model. + * - medium.en: Medium English-only model. + * - large: Largest multilingual model (same as 'large-v2'). + * - large-v1: Large multilingual model version 1. + * - large-v2: Large multilingual model version 2. + */ + +/** + * Define available Whisper models + * @type {Record} */ +export const WHISPER_MODELS = { + tiny: 'ggml-tiny.bin', + 'tiny.en': 'ggml-tiny.en.bin', + base: 'ggml-base.bin', + 'base.en': 'ggml-base.en.bin', + small: 'ggml-small.bin', + 'small.en': 'ggml-small.en.bin', + medium: 'ggml-medium.bin', + 'medium.en': 'ggml-medium.en.bin', + 'large-v1': 'ggml-large-v1.bin', + 'large-v2': 'ggml-large-v2.bin', + large: 'ggml-large-v2.bin', + } /** * Represents the object containing the different prompts, their instructions to the LLM, and their expected example output. @@ -178,16 +188,16 @@ /** * Represents the options for Language Models (LLMs) that can be used in the application. - * @typedef {'chatgpt' | 'claude' | 'cohere' | 'mistral' | 'octo' | 'llama' | 'ollama' | 'gemini'} LLMOption + * @typedef {'chatgpt' | 'claude' | 'cohere' | 'mistral' | 'octo' | 'llama' | 'ollama' | 'gemini'} LLMServices * - * - `'chatgpt'`: Use OpenAI's ChatGPT models. - * - `'claude'`: Use Anthropic's Claude models. - * - `'cohere'`: Use Cohere's language models. - * - `'mistral'`: Use Mistral AI's language models. - * - `'octo'`: Use OctoAI's language models. - * - `'llama'`: Use Llama models for local inference. - * - `'ollama'`: Use Ollama for processing. - * - `'gemini'`: Use Google's Gemini models. + * - chatgpt: Use OpenAI's ChatGPT models. + * - claude: Use Anthropic's Claude models. + * - cohere: Use Cohere's language models. + * - mistral: Use Mistral AI's language models. + * - octo: Use OctoAI's language models. + * - llama: Use Llama models for local inference. + * - ollama: Use Ollama for processing. + * - gemini: Use Google's Gemini models. */ /** @@ -210,9 +220,9 @@ /** * Represents a mapping of LLM option keys to their corresponding functions. - * @typedef {Object.} LLMFunctions + * @typedef {Object.} LLMFunctions * - * This ensures that only valid `LLMOption` values can be used as keys in the `llmFunctions` object. + * This ensures that only valid `LLMServices` values can be used as keys in the `llmFunctions` object. */ /** @@ -223,8 +233,111 @@ * @typedef {'GEMINI_1_5_FLASH' | 'GEMINI_1_5_PRO'} GeminiModelType - Define available Gemini models. * @typedef {'MIXTRAL_8x7b' | 'MIXTRAL_8x22b' | 'MISTRAL_LARGE' | 'MISTRAL_NEMO'} MistralModelType - Define available Mistral AI models. * @typedef {'LLAMA_3_1_8B' | 'LLAMA_3_1_70B' | 'LLAMA_3_1_405B' | 'MISTRAL_7B' | 'MIXTRAL_8X_7B' | 'NOUS_HERMES_MIXTRAL_8X_7B' | 'WIZARD_2_8X_22B'} OctoModelType - Define available OctoAI models. - * @typedef {'LLAMA_3_1_8B_Q4_MODEL' | 'LLAMA_3_1_8B_Q6_MODEL' | 'GEMMA_2_2B_Q4_MODEL' | 'GEMMA_2_2B_Q6_MODEL' | 'TINY_LLAMA_1B_Q4_MODEL' | 'TINY_LLAMA_1B_Q6_MODEL'} LlamaModelType - Define local model configurations. + * @typedef {'QWEN_2_5_3B' | 'PHI_3_5' | 'LLAMA_3_2_1B' | 'GEMMA_2_2B'} LlamaModelType - Define local model configurations. + * @typedef {'LLAMA_3_2_1B' | 'LLAMA_3_2_3B' | 'GEMMA_2_2B' | 'PHI_3_5' | 'QWEN_2_5_1B' | 'QWEN_2_5_3B'} OllamaModelType - Define local model with Ollama. + */ + +/** + * Map of ChatGPT model identifiers to their API names + * @type {Record} + */ +export const GPT_MODELS = { + GPT_4o_MINI: "gpt-4o-mini", + GPT_4o: "gpt-4o", + GPT_4_TURBO: "gpt-4-turbo", + GPT_4: "gpt-4", +} + +/** + * Map of Claude model identifiers to their API names + * @type {Record} + */ +export const CLAUDE_MODELS = { + CLAUDE_3_5_SONNET: "claude-3-5-sonnet-20240620", + CLAUDE_3_OPUS: "claude-3-opus-20240229", + CLAUDE_3_SONNET: "claude-3-sonnet-20240229", + CLAUDE_3_HAIKU: "claude-3-haiku-20240307", +} + +/** + * Map of Cohere model identifiers to their API names + * @type {Record} + */ +export const COHERE_MODELS = { + COMMAND_R: "command-r", // Standard Command model + COMMAND_R_PLUS: "command-r-plus" // Enhanced Command model +} + +/** + * Map of Gemini model identifiers to their API names + * @type {Record} + */ +export const GEMINI_MODELS = { + GEMINI_1_5_FLASH: "gemini-1.5-flash", + // GEMINI_1_5_PRO: "gemini-1.5-pro", + GEMINI_1_5_PRO: "gemini-1.5-pro-exp-0827", +} + +/** + * Map of Mistral model identifiers to their API names + * @type {Record} + */ +export const MISTRAL_MODELS = { + MIXTRAL_8x7b: "open-mixtral-8x7b", + MIXTRAL_8x22b: "open-mixtral-8x22b", + MISTRAL_LARGE: "mistral-large-latest", + MISTRAL_NEMO: "open-mistral-nemo" +} + +/** + * Map of OctoAI model identifiers to their API names + * @type {Record} + */ +export const OCTO_MODELS = { + LLAMA_3_1_8B: "meta-llama-3.1-8b-instruct", + LLAMA_3_1_70B: "meta-llama-3.1-70b-instruct", + LLAMA_3_1_405B: "meta-llama-3.1-405b-instruct", + MISTRAL_7B: "mistral-7b-instruct", + MIXTRAL_8X_7B: "mixtral-8x7b-instruct", + NOUS_HERMES_MIXTRAL_8X_7B: "nous-hermes-2-mixtral-8x7b-dpo", + WIZARD_2_8X_22B: "wizardlm-2-8x22b", +} + +/** + * Map of local model identifiers to their filenames and URLs + * @type {Record} + */ +export const LLAMA_MODELS = { + QWEN_2_5_3B: { + filename: "qwen2.5-3b-instruct-q6_k.gguf", + url: "https://huggingface.co/Qwen/Qwen2.5-3B-Instruct-GGUF/resolve/main/qwen2.5-3b-instruct-q6_k.gguf" + }, + PHI_3_5: { + filename: "Phi-3.5-mini-instruct-Q6_K.gguf", + url: "https://huggingface.co/bartowski/Phi-3.5-mini-instruct-GGUF/resolve/main/Phi-3.5-mini-instruct-Q6_K.gguf" + }, + LLAMA_3_2_1B: { + filename: "Llama-3.2-1B.i1-Q6_K.gguf", + url: "https://huggingface.co/mradermacher/Llama-3.2-1B-i1-GGUF/resolve/main/Llama-3.2-1B.i1-Q6_K.gguf" + }, + GEMMA_2_2B: { + filename: "gemma-2-2b-it-Q6_K.gguf", + url: "https://huggingface.co/lmstudio-community/gemma-2-2b-it-GGUF/resolve/main/gemma-2-2b-it-Q6_K.gguf" + } +} + +/** + * Map of model identifiers to their corresponding names in Ollama + * @type {Record} */ +export const OLLAMA_MODELS = { + LLAMA_3_2_1B: 'llama3.2:1b', + LLAMA_3_2_3B: 'llama3.2:3b', + GEMMA_2_2B: 'gemma2:2b', + PHI_3_5: 'phi3.5:3.8b', + QWEN_2_5_1B: 'qwen2.5:1.5b', + QWEN_2_5_3B: 'qwen2.5:3b', +} /** * Represents the function signature for cleaning up temporary files. diff --git a/src/utils/runLLM.js b/src/utils/runLLM.js index 0d60a1c..55a8486 100644 --- a/src/utils/runLLM.js +++ b/src/utils/runLLM.js @@ -11,30 +11,30 @@ import { callMistral } from '../llms/mistral.js' import { callOcto } from '../llms/octo.js' import { generatePrompt } from '../llms/prompt.js' -/** @import { LLMOption, ProcessingOptions, LLMFunction, LLMFunctions } from '../types.js' */ - -/** @type {LLMFunctions} */ -const llmFunctions = { - llama: callLlama, - ollama: callOllama, - chatgpt: callChatGPT, - claude: callClaude, - gemini: callGemini, - cohere: callCohere, - mistral: callMistral, - octo: callOcto, -} +/** @import { LLMServices, ProcessingOptions, LLMFunction, LLMFunctions } from '../types.js' */ /** * Main function to run the selected Language Model. * @param {string} finalPath - The base path for the files. * @param {string} frontMatter - The front matter content for the markdown file. - * @param {LLMOption} llmOpt - The selected Language Model option. + * @param {LLMServices} llmServices - The selected Language Model option. * @param {ProcessingOptions} options - Additional options for processing. * @returns {Promise} * @throws {Error} - If the LLM processing fails or an error occurs during execution. */ -export async function runLLM(finalPath, frontMatter, llmOpt, options) { +export async function runLLM(finalPath, frontMatter, llmServices, options) { + /** @type {LLMFunctions} */ + const LLM_FUNCTIONS = { + llama: callLlama, + ollama: callOllama, + chatgpt: callChatGPT, + claude: callClaude, + gemini: callGemini, + cohere: callCohere, + mistral: callMistral, + octo: callOcto, + } + try { // Read the transcript file const tempTranscript = await readFile(`${finalPath}.txt`, 'utf8') @@ -44,25 +44,25 @@ export async function runLLM(finalPath, frontMatter, llmOpt, options) { const prompt = generatePrompt(options.prompt) const promptAndTranscript = `${prompt}${transcript}` - if (llmOpt) { - console.log(`\nStep 4 - Processing with ${llmOpt} Language Model...`) + if (llmServices) { + console.log(`\nStep 4 - Processing with ${llmServices} Language Model...`) /** Get the appropriate LLM function based on the option * @type {LLMFunction} */ - const llmFunction = llmFunctions[llmOpt] + const llmFunction = LLM_FUNCTIONS[llmServices] if (!llmFunction) { - throw new Error(`Invalid LLM option: ${llmOpt}`) + throw new Error(`Invalid LLM option: ${llmServices}`) } // Set up a temporary file path and call the LLM function - const tempPath = `${finalPath}-${llmOpt}-temp.md` - await llmFunction(promptAndTranscript, tempPath, options[llmOpt]) + const tempPath = `${finalPath}-${llmServices}-temp.md` + await llmFunction(promptAndTranscript, tempPath, options[llmServices]) console.log(` - Transcript saved to temporary file at ${tempPath}`) // Read generated content and write front matter, show notes, and transcript to final markdown file const showNotes = await readFile(tempPath, 'utf8') - await writeFile(`${finalPath}-${llmOpt}-shownotes.md`, `${frontMatter}\n${showNotes}\n${transcript}`) + await writeFile(`${finalPath}-${llmServices}-shownotes.md`, `${frontMatter}\n${showNotes}\n${transcript}`) // Remove the temporary file await unlink(tempPath) - console.log(` - ${finalPath}-${llmOpt}-shownotes.md\n - Generated show notes saved to markdown file.`) + console.log(` - ${finalPath}-${llmServices}-shownotes.md\n - Generated show notes saved to markdown file.`) } else { console.log('\nStep 4 - No LLM selected, skipping processing...') // If no LLM is selected, just write the prompt and transcript diff --git a/src/utils/runTranscription.js b/src/utils/runTranscription.js index 37d95fb..c5d7696 100644 --- a/src/utils/runTranscription.js +++ b/src/utils/runTranscription.js @@ -2,15 +2,16 @@ import { readFile, writeFile } from 'node:fs/promises' import { callWhisper } from '../transcription/whisper.js' +import { callWhisperDocker } from '../transcription/whisperDocker.js' import { callDeepgram } from '../transcription/deepgram.js' import { callAssembly } from '../transcription/assembly.js' -/** @import { TranscriptOption, ProcessingOptions } from '../types.js' */ +/** @import { TranscriptServices, ProcessingOptions } from '../types.js' */ /** * Main function to run transcription. * @param {string} finalPath - The base path for the files. - * @param {TranscriptOption} transcriptOpt - The transcription service to use. + * @param {TranscriptServices} transcriptServices - The transcription service to use. * @param {ProcessingOptions} [options={}] - Additional processing options. * @param {string} [frontMatter=''] - Optional front matter content for the markdown file. * @returns {Promise} - Returns the final content including markdown and transcript. @@ -18,7 +19,7 @@ import { callAssembly } from '../transcription/assembly.js' */ export async function runTranscription( finalPath, - transcriptOpt = 'whisper', + transcriptServices, options = {}, frontMatter = '' ) { @@ -26,27 +27,27 @@ export async function runTranscription( let txtContent // Choose the transcription service based on the provided option - switch (transcriptOpt) { + switch (transcriptServices) { case 'deepgram': console.log('\nStep 3 - Using Deepgram for transcription...') - await callDeepgram(`${finalPath}.wav`, finalPath) - txtContent = await readFile(`${finalPath}.txt`, 'utf8') + txtContent = await callDeepgram(finalPath, options) break case 'assembly': console.log('\nStep 3 - Using AssemblyAI for transcription...') - txtContent = await callAssembly(finalPath, transcriptOpt, options) + txtContent = await callAssembly(finalPath, options) break - + case 'whisperDocker': + console.log('\nStep 3 - Using Whisper Docker for transcription...') + txtContent = await callWhisperDocker(finalPath, options) + break + case 'whisper': + default: console.log('\nStep 3 - Using Whisper for transcription...') - txtContent = await callWhisper(finalPath, transcriptOpt, options) + txtContent = await callWhisper(finalPath, options) break - - default: - console.error(`Error: Unsupported transcription option '${transcriptOpt}'.`) - throw new Error('Unsupported transcription option.') } let mdContent = frontMatter diff --git a/test/all.test.js b/test/all.test.js index 0e586d6..fea4a8d 100644 --- a/test/all.test.js +++ b/test/all.test.js @@ -131,6 +131,11 @@ const commands = [ expectedFile: '2024-09-24-ep0-fsjam-podcast-prompt.md', newName: '26---2024-09-24-ep0-fsjam-podcast-prompt.md' }, + { + cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --whisperDocker tiny', + expectedFile: '2024-09-24-ep0-fsjam-podcast-prompt.md', + newName: '26B---2024-09-24-ep0-fsjam-podcast-prompt.md' + }, { cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --prompt titles', expectedFile: '2024-09-24-ep0-fsjam-podcast-prompt.md', diff --git a/test/local.test.js b/test/local.test.js index 67e77f7..90328f4 100644 --- a/test/local.test.js +++ b/test/local.test.js @@ -44,7 +44,12 @@ const commands = [ { cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --whisper tiny', expectedFile: '2024-09-24-ep0-fsjam-podcast-prompt.md', - newName: 'FILE_07.md' + newName: 'FILE_07A.md' + }, + { + cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --whisperDocker tiny', + expectedFile: '2024-09-24-ep0-fsjam-podcast-prompt.md', + newName: 'FILE_07B.md' }, { cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --prompt titles', From 3776f1b8e93b316fa46223bc46e8a7fe30656170 Mon Sep 17 00:00:00 2001 From: Anthony Campolo <12433465+ajcwebdev@users.noreply.github.com> Date: Sat, 5 Oct 2024 03:58:57 -0500 Subject: [PATCH 4/9] add chalk for terminal text styling --- docs/examples.md | 4 +-- package.json | 1 + src/autoshow.js | 5 +++- src/commands/processFile.js | 5 +++- src/commands/processPlaylist.js | 11 +++++--- src/commands/processRSS.js | 23 +++++++++------- src/commands/processURLs.js | 11 +++++--- src/commands/processVideo.js | 5 +++- src/inquirer.js | 5 ++-- src/llms/chatgpt.js | 5 ++-- src/llms/claude.js | 6 ++-- src/llms/cohere.js | 6 ++-- src/llms/gemini.js | 3 +- src/llms/llama.js | 11 ++++---- src/llms/mistral.js | 7 +++-- src/llms/octo.js | 9 +++--- src/llms/ollama.js | 9 +++--- src/transcription/assembly.js | 10 +++---- src/transcription/deepgram.js | 20 ++++++++------ src/transcription/whisper.js | 44 ++++++++++++++++++++---------- src/transcription/whisperDocker.js | 22 +++++++++++---- src/types.js | 23 +++++++++++----- src/utils/cleanUpFiles.js | 8 +++--- src/utils/downloadAudio.js | 10 ++++--- src/utils/generateMarkdown.js | 17 +++++++----- src/utils/runLLM.js | 14 ++++++---- src/utils/runTranscription.js | 22 +++++++-------- 27 files changed, 193 insertions(+), 123 deletions(-) diff --git a/docs/examples.md b/docs/examples.md index f244642..ef930a5 100644 --- a/docs/examples.md +++ b/docs/examples.md @@ -81,7 +81,7 @@ npm run as -- --file "content/audio.mp3" Process RSS feed from newest to oldest (default behavior): ```bash -npm run as -- --rss "https://feeds.transistor.fm/fsjam-podcast/" +npm run as -- --rss "https://ajcwebdev.substack.com/feed" ``` Process RSS feed from oldest to newest: @@ -269,7 +269,7 @@ npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --whisper sm npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --whisper medium # large-v2 model -npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --whisper large +npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --whisper large-v2 ``` Run `whisper.cpp` in a Docker container with `--whisperDocker`: diff --git a/package.json b/package.json index c31d35d..48ff18b 100644 --- a/package.json +++ b/package.json @@ -41,6 +41,7 @@ "@mistralai/mistralai": "^1.0.2", "@octoai/sdk": "^1.5.1", "assemblyai": "^4.6.1", + "chalk": "^5.3.0", "cohere-ai": "^7.12.0", "commander": "^12.1.0", "fast-xml-parser": "^4.4.1", diff --git a/src/autoshow.js b/src/autoshow.js index c222d04..0e10502 100644 --- a/src/autoshow.js +++ b/src/autoshow.js @@ -20,6 +20,7 @@ import { processURLs } from './commands/processURLs.js' import { processFile } from './commands/processFile.js' import { processRSS } from './commands/processRSS.js' import { argv, exit } from 'node:process' +import { log, opts } from './types.js' /** @import { ProcessingOptions, HandlerFunction, LLMServices, TranscriptServices } from './types.js' */ @@ -77,7 +78,9 @@ Report Issues: https://github.com/ajcwebdev/autoshow/issues * @returns {Promise} */ program.action(async (options) => { - console.log(`Options received: ${JSON.stringify(options, null, 2)}`) + log(opts(`Options received:\n`)) + log(options) + log(``) /** * Map actions to their respective handler functions diff --git a/src/commands/processFile.js b/src/commands/processFile.js index 39ea8fe..d645cf5 100644 --- a/src/commands/processFile.js +++ b/src/commands/processFile.js @@ -5,6 +5,7 @@ import { downloadFileAudio } from '../utils/downloadAudio.js' import { runTranscription } from '../utils/runTranscription.js' import { runLLM } from '../utils/runLLM.js' import { cleanUpFiles } from '../utils/cleanUpFiles.js' +import { log, final } from '../types.js' /** @import { LLMServices, TranscriptServices, ProcessingOptions } from '../types.js' */ @@ -17,6 +18,8 @@ import { cleanUpFiles } from '../utils/cleanUpFiles.js' * @returns {Promise} */ export async function processFile(filePath, llmServices, transcriptServices, options) { + // log(opts(`Options received:\n`)) + // log(options) try { // Generate markdown for the file const { frontMatter, finalPath, filename } = await generateFileMarkdown(filePath) @@ -35,7 +38,7 @@ export async function processFile(filePath, llmServices, transcriptServices, opt await cleanUpFiles(finalPath) } - console.log('\n\nLocal file processing completed successfully.\n') + log(final('\nLocal file processing completed successfully.\n')) } catch (error) { console.error(`Error processing file: ${error.message}`) process.exit(1) // Exit with an error code diff --git a/src/commands/processPlaylist.js b/src/commands/processPlaylist.js index 9554a09..8eaa32d 100644 --- a/src/commands/processPlaylist.js +++ b/src/commands/processPlaylist.js @@ -6,6 +6,7 @@ import { execFile } from 'node:child_process' import { promisify } from 'node:util' import { extractVideoMetadata } from '../utils/generateMarkdown.js' import { checkDependencies } from '../utils/checkDependencies.js' +import { log, final, wait } from '../types.js' /** @import { LLMServices, TranscriptServices, ProcessingOptions } from '../types.js' */ @@ -20,6 +21,8 @@ const execFilePromise = promisify(execFile) * @returns {Promise} */ export async function processPlaylist(playlistUrl, llmServices, transcriptServices, options) { + // log(opts(`Options received:\n`)) + // log(options) try { // Check for required dependencies await checkDependencies(['yt-dlp']) @@ -43,7 +46,7 @@ export async function processPlaylist(playlistUrl, llmServices, transcriptServic process.exit(1) // Exit with an error code } - console.log(`\nFound ${urls.length} videos in the playlist`) + log(wait(` Found ${urls.length} videos in the playlist...`)) // Extract metadata for all videos const metadataPromises = urls.map(extractVideoMetadata) @@ -55,13 +58,13 @@ export async function processPlaylist(playlistUrl, llmServices, transcriptServic const jsonContent = JSON.stringify(validMetadata, null, 2) const jsonFilePath = 'content/playlist_info.json' await writeFile(jsonFilePath, jsonContent) - console.log(`Playlist information saved to: ${jsonFilePath}`) + log(wait(`Playlist information saved to: ${jsonFilePath}`)) return } // Process each video in the playlist for (const [index, url] of urls.entries()) { - console.log(`\nProcessing video ${index + 1}/${urls.length}: ${url}`) + log(wait(`\n Processing video ${index + 1}/${urls.length}:\n - ${url}\n`)) try { await processVideo(url, llmServices, transcriptServices, options) } catch (error) { @@ -70,7 +73,7 @@ export async function processPlaylist(playlistUrl, llmServices, transcriptServic } } - console.log('\nPlaylist processing completed successfully.\n') + log(final('\nPlaylist processing completed successfully.\n')) } catch (error) { console.error(`Error processing playlist: ${error.message}`) process.exit(1) // Exit with an error code diff --git a/src/commands/processRSS.js b/src/commands/processRSS.js index f042af5..dee3f84 100644 --- a/src/commands/processRSS.js +++ b/src/commands/processRSS.js @@ -7,6 +7,7 @@ import { downloadAudio } from '../utils/downloadAudio.js' import { runTranscription } from '../utils/runTranscription.js' import { runLLM } from '../utils/runLLM.js' import { cleanUpFiles } from '../utils/cleanUpFiles.js' +import { log, final, wait } from '../types.js' /** @import { LLMServices, TranscriptServices, ProcessingOptions, RSSItem } from '../types.js' */ @@ -26,6 +27,8 @@ const parser = new XMLParser({ * @returns {Promise} */ async function processItem(item, transcriptServices, llmServices, options) { + // log(opts(`\nItem parameter passed to processItem:\n`)) + // log(item) try { // Generate markdown for the item const { frontMatter, finalPath, filename } = await generateRSSMarkdown(item) @@ -44,7 +47,7 @@ async function processItem(item, transcriptServices, llmServices, options) { await cleanUpFiles(finalPath) } - console.log(`\nItem processing completed successfully: ${item.title}`) + log(final(`\nItem processing completed successfully: ${item.title}`)) } catch (error) { console.error(`Error processing item ${item.title}: ${error.message}`) // Continue processing the next item @@ -60,13 +63,13 @@ async function processItem(item, transcriptServices, llmServices, options) { * @returns {Promise} */ export async function processRSS(rssUrl, llmServices, transcriptServices, options) { + // log(opts(`Options received:\n`)) + // log(options) try { if (options.item && options.item.length > 0) { // If specific items are provided, list them - console.log('\nProcessing specific items:') - options.item.forEach((url) => console.log(` - ${url}`)) - } else { - console.log(` - Skipping first ${options.skip} items`) + log(wait('\nProcessing specific items:')) + options.item.forEach((url) => log(` - ${url}`)) } // Fetch the RSS feed with a timeout @@ -147,7 +150,7 @@ export async function processRSS(rssUrl, llmServices, transcriptServices, option const jsonContent = JSON.stringify(items, null, 2) const jsonFilePath = 'content/rss_info.json' await writeFile(jsonFilePath, jsonContent) - console.log(`RSS feed information saved to: ${jsonFilePath}`) + log(wait(`RSS feed information saved to: ${jsonFilePath}`)) return } @@ -165,17 +168,17 @@ export async function processRSS(rssUrl, llmServices, transcriptServices, option const sortedItems = options.order === 'newest' ? items : [...items].reverse() itemsToProcess = sortedItems.slice(options.skip) - console.log(` - Found ${sortedItems.length} items in the RSS feed.`) - console.log(` - Processing ${itemsToProcess.length} items after skipping ${options.skip}.`) + log(wait(` Found ${sortedItems.length} items in the RSS feed.`)) + log(wait(` - Processing ${itemsToProcess.length} items after skipping ${options.skip}.\n`)) } // Process each item in the feed for (const [index, item] of itemsToProcess.entries()) { - console.log(`\nProcessing item ${index + 1}/${itemsToProcess.length}: ${item.title}`) + log(wait(` Processing item ${index + 1}/${itemsToProcess.length}:\n - ${item.title}\n`)) await processItem(item, transcriptServices, llmServices, options) } - console.log('\n\nRSS feed processing completed successfully.\n') + log(final('\nRSS feed processing completed successfully.\n')) } catch (error) { console.error(`Error processing RSS feed: ${error.message}`) process.exit(1) // Exit with an error code diff --git a/src/commands/processURLs.js b/src/commands/processURLs.js index 309f2d6..401ff1a 100644 --- a/src/commands/processURLs.js +++ b/src/commands/processURLs.js @@ -5,6 +5,7 @@ import { resolve } from 'node:path' import { processVideo } from './processVideo.js' import { extractVideoMetadata } from '../utils/generateMarkdown.js' import { checkDependencies } from '../utils/checkDependencies.js' +import { log, final, wait } from '../types.js' /** @import { LLMServices, TranscriptServices, ProcessingOptions } from '../types.js' */ @@ -17,6 +18,8 @@ import { checkDependencies } from '../utils/checkDependencies.js' * @returns {Promise} */ export async function processURLs(filePath, llmServices, transcriptServices, options) { + // log(opts(`Options received:\n`)) + // log(options) try { // Check for required dependencies await checkDependencies(['yt-dlp']) @@ -34,7 +37,7 @@ export async function processURLs(filePath, llmServices, transcriptServices, opt process.exit(1) // Exit with an error code } - console.log(`\nFound ${urls.length} URLs in the file`) + log(wait(`\n Found ${urls.length} URLs in the file...`)) // Extract metadata for all videos const metadataPromises = urls.map(extractVideoMetadata) @@ -46,13 +49,13 @@ export async function processURLs(filePath, llmServices, transcriptServices, opt const jsonContent = JSON.stringify(validMetadata, null, 2) const jsonFilePath = 'content/urls_info.json' await writeFile(jsonFilePath, jsonContent) - console.log(`Video information saved to: ${jsonFilePath}`) + log(wait(`Video information saved to: ${jsonFilePath}`)) return } // Process each URL for (const [index, url] of urls.entries()) { - console.log(`\nProcessing URL ${index + 1}/${urls.length}: ${url}`) + log(wait(`\n Processing URL ${index + 1}/${urls.length}:\n - ${url}\n`)) try { await processVideo(url, llmServices, transcriptServices, options) } catch (error) { @@ -61,7 +64,7 @@ export async function processURLs(filePath, llmServices, transcriptServices, opt } } - console.log('\nURL file processing completed successfully.') + log(final('\nURL file processing completed successfully.')) } catch (error) { console.error(`Error reading or processing file ${filePath}: ${error.message}`) process.exit(1) // Exit with an error code diff --git a/src/commands/processVideo.js b/src/commands/processVideo.js index 84bea6a..e4bf3a6 100644 --- a/src/commands/processVideo.js +++ b/src/commands/processVideo.js @@ -6,6 +6,7 @@ import { downloadAudio } from '../utils/downloadAudio.js' import { runTranscription } from '../utils/runTranscription.js' import { runLLM } from '../utils/runLLM.js' import { cleanUpFiles } from '../utils/cleanUpFiles.js' +import { log, final } from '../types.js' /** @import { LLMServices, TranscriptServices, ProcessingOptions } from '../types.js' */ @@ -18,6 +19,8 @@ import { cleanUpFiles } from '../utils/cleanUpFiles.js' * @returns {Promise} */ export async function processVideo(url, llmServices, transcriptServices, options) { + // log(opts(`\nOptions passed to processVideo:\n`)) + // log(options) try { // Check for required dependencies await checkDependencies(['yt-dlp']) @@ -39,7 +42,7 @@ export async function processVideo(url, llmServices, transcriptServices, options await cleanUpFiles(finalPath) } - console.log('\nVideo processing completed successfully.\n') + log(final('\nVideo processing completed successfully.')) } catch (error) { // Log any errors that occur during video processing console.error('Error processing video:', error.message) diff --git a/src/inquirer.js b/src/inquirer.js index 73caccb..51aa6de 100644 --- a/src/inquirer.js +++ b/src/inquirer.js @@ -1,6 +1,7 @@ // src/inquirer.js import inquirer from 'inquirer' +import { log } from './types.js' /** @import { ProcessingOptions, InquirerAnswers, InquirerQuestions, WhisperModelType } from './types.js' */ @@ -113,7 +114,7 @@ const INQUIRER_PROMPT = [ type: 'list', name: 'whisperModel', message: 'Select the Whisper model type:', - choices: ['tiny', 'tiny.en', 'base', 'base.en', 'small', 'small.en', 'medium', 'medium.en', 'large', 'large-v1', 'large-v2'], + choices: ['tiny', 'tiny.en', 'base', 'base.en', 'small', 'small.en', 'medium', 'medium.en', 'large-v1', 'large-v2'], when: (answers) => answers.transcriptServices === 'whisper' || answers.transcriptServices === 'whisperDocker', default: 'large-v2', }, @@ -164,7 +165,7 @@ export async function handleInteractivePrompt(options) { // If user cancels the action if (!answers.confirmAction) { - console.log('Operation cancelled.') + log('Operation cancelled.') process.exit(0) } diff --git a/src/llms/chatgpt.js b/src/llms/chatgpt.js index 91e952e..1e3dbc9 100644 --- a/src/llms/chatgpt.js +++ b/src/llms/chatgpt.js @@ -4,6 +4,7 @@ import { writeFile } from 'node:fs/promises' import { env } from 'node:process' import { OpenAI } from 'openai' import { GPT_MODELS } from '../types.js' +import { log, wait } from '../types.js' /** @import { LLMFunction, ChatGPTModelType } from '../types.js' */ @@ -45,8 +46,8 @@ export async function callChatGPT(promptAndTranscript, tempPath, model = 'GPT_4o // Write the generated content to the output file await writeFile(tempPath, content) - console.log(` - Finish Reason: ${finish_reason}\n - ChatGPT Model: ${usedModel}`) - console.log(` - Token Usage:\n - ${prompt_tokens} prompt tokens\n - ${completion_tokens} completion tokens\n - ${total_tokens} total tokens`) + log(wait(` - Finish Reason: ${finish_reason}\n - ChatGPT Model: ${usedModel}`)) + log(wait(` - Token Usage:\n - ${prompt_tokens} prompt tokens\n - ${completion_tokens} completion tokens\n - ${total_tokens} total tokens`)) } catch (error) { console.error(`Error in callChatGPT: ${error.message}`) diff --git a/src/llms/claude.js b/src/llms/claude.js index bb5f7bd..36506c6 100644 --- a/src/llms/claude.js +++ b/src/llms/claude.js @@ -4,6 +4,7 @@ import { writeFile } from 'node:fs/promises' import { env } from 'node:process' import { Anthropic } from '@anthropic-ai/sdk' import { CLAUDE_MODELS } from '../types.js' +import { log, wait } from '../types.js' /** @import { LLMFunction, ClaudeModelType } from '../types.js' */ @@ -46,9 +47,8 @@ export async function callClaude(promptAndTranscript, tempPath, model = 'CLAUDE_ // Write the generated text to the output file await writeFile(tempPath, text) - // console.log(`\nClaude response:\n\n${JSON.stringify(response, null, 2)}`) // Commented out detailed response logging - console.log(` - Stop Reason: ${stop_reason}\n - Model: ${usedModel}`) - console.log(` - Token Usage:\n - ${input_tokens} input tokens\n - ${output_tokens} output tokens`) + log(wait(` - Stop Reason: ${stop_reason}\n - Model: ${usedModel}`)) + log(wait(` - Token Usage:\n - ${input_tokens} input tokens\n - ${output_tokens} output tokens`)) } catch (error) { console.error(`Error in callClaude: ${error.message}`) diff --git a/src/llms/cohere.js b/src/llms/cohere.js index 0bfc8e3..d34db16 100644 --- a/src/llms/cohere.js +++ b/src/llms/cohere.js @@ -4,6 +4,7 @@ import { writeFile } from 'node:fs/promises' import { env } from 'node:process' import { CohereClient } from 'cohere-ai' import { COHERE_MODELS } from '../types.js' +import { log, wait } from '../types.js' /** @import { LLMFunction, CohereModelType } from '../types.js' */ @@ -45,9 +46,8 @@ export async function callCohere(promptAndTranscript, tempPath, model = 'COMMAND // Write the generated text to the output file await writeFile(tempPath, text) - // console.log(`\nCohere response:\n\n${JSON.stringify(response, null, 2)}`) // Commented out detailed response logging - console.log(`\nFinish Reason: ${finishReason}\nModel: ${actualModel}`) - console.log(`Token Usage:\n - ${inputTokens} input tokens\n - ${outputTokens} output tokens`) + log(wait(`\n Finish Reason: ${finishReason}\n Model: ${actualModel}`)) + log(wait(` Token Usage:\n - ${inputTokens} input tokens\n - ${outputTokens} output tokens`)) } catch (error) { console.error(`Error in callCohere: ${error.message}`) diff --git a/src/llms/gemini.js b/src/llms/gemini.js index 15a8c44..2dd7867 100644 --- a/src/llms/gemini.js +++ b/src/llms/gemini.js @@ -4,6 +4,7 @@ import { writeFile } from 'node:fs/promises' import { env } from 'node:process' import { GoogleGenerativeAI } from "@google/generative-ai" import { GEMINI_MODELS } from '../types.js' +import { log, wait } from '../types.js' /** @import { LLMFunction, GeminiModelType } from '../types.js' */ @@ -55,7 +56,7 @@ export async function callGemini(promptAndTranscript, tempPath, model = 'GEMINI_ // Write the generated text to the output file await writeFile(tempPath, text) - console.log(`\nModel: ${actualModel}`) + log(wait(`\nModel: ${actualModel}`)) return } catch (error) { diff --git a/src/llms/llama.js b/src/llms/llama.js index e110027..fb819ae 100644 --- a/src/llms/llama.js +++ b/src/llms/llama.js @@ -6,6 +6,7 @@ import { existsSync } from 'node:fs' import { exec } from 'node:child_process' import { promisify } from 'node:util' import { LLAMA_MODELS } from '../types.js' +import { log, wait } from '../types.js' const execAsync = promisify(exec) @@ -20,7 +21,7 @@ const execAsync = promisify(exec) async function downloadModel(modelName = 'GEMMA_2_2B') { // Get the model object from LLAMA_MODELS using the provided modelName or default to GEMMA_2_2B const model = LLAMA_MODELS[modelName] || LLAMA_MODELS.GEMMA_2_2B - console.log(` - ${model.filename} model selected.`) + log(wait(` - Model selected: ${model.filename}`)) // If no valid model is found, throw an error if (!model) { @@ -32,12 +33,12 @@ async function downloadModel(modelName = 'GEMMA_2_2B') { // Check if the model file already exists if (existsSync(modelPath)) { - console.log(` - Model already exists at ${modelPath}`) + log(wait(` - Model path: ${modelPath}`)) // Return the path if the model already exists return modelPath } - console.log(`\nDownloading ${model.filename}...`) + log(wait(`\nDownloading ${model.filename}...`)) try { // Create the directory for storing models if it doesn't exist await mkdir('./src/llms/models', { recursive: true }) @@ -46,8 +47,8 @@ async function downloadModel(modelName = 'GEMMA_2_2B') { const { stderr } = await execAsync(`curl -L ${model.url} -o ${modelPath}`) // If there's any stderr output, log it - if (stderr) console.log(stderr) - console.log('Download completed') + if (stderr) log(stderr) + log('Download completed') // Return the path to the downloaded model return modelPath diff --git a/src/llms/mistral.js b/src/llms/mistral.js index d2d5897..6842ccf 100644 --- a/src/llms/mistral.js +++ b/src/llms/mistral.js @@ -4,6 +4,7 @@ import { writeFile } from 'node:fs/promises' import { env } from 'node:process' import { Mistral } from '@mistralai/mistralai' import { MISTRAL_MODELS } from '../types.js' +import { log, wait } from '../types.js' /** @import { LLMFunction, MistralModelType } from '../types.js' */ @@ -27,7 +28,7 @@ export async function callMistral(promptAndTranscript, tempPath, model = 'MISTRA try { // Select the actual model to use, defaulting to MISTRAL_NEMO if the specified model is not found const actualModel = MISTRAL_MODELS[model] || MISTRAL_MODELS.MISTRAL_NEMO - console.log(`\nUsing Mistral model: ${actualModel}`) + log(wait(`\n Using Mistral model:\n - ${actualModel}`)) // Make API call to Mistral AI for chat completion const response = await mistral.chat.complete({ @@ -46,8 +47,8 @@ export async function callMistral(promptAndTranscript, tempPath, model = 'MISTRA // Write the generated content to the specified output file await writeFile(tempPath, content) // Log finish reason, used model, and token usage - console.log(`\nFinish Reason: ${finishReason}\nModel Used: ${usedModel}`) - console.log(`Token Usage:\n - ${promptTokens} prompt tokens\n - ${completionTokens} completion tokens\n - ${totalTokens} total tokens`) + log(wait(`\n Finish Reason: ${finishReason}\n Model Used: ${usedModel}`)) + log(wait(` Token Usage:\n - ${promptTokens} prompt tokens\n - ${completionTokens} completion tokens\n - ${totalTokens} total tokens`)) } catch (error) { // Log any errors that occur during the process diff --git a/src/llms/octo.js b/src/llms/octo.js index e8dca18..8f47ed3 100644 --- a/src/llms/octo.js +++ b/src/llms/octo.js @@ -4,6 +4,7 @@ import { writeFile } from 'node:fs/promises' import { env } from 'node:process' import { OctoAIClient } from '@octoai/sdk' import { OCTO_MODELS } from '../types.js' +import { log, wait } from '../types.js' /** @import { LLMFunction, OctoModelType } from '../types.js' */ @@ -27,7 +28,7 @@ export async function callOcto(promptAndTranscript, tempPath, model = 'LLAMA_3_1 try { // Select the actual model to use, defaulting to LLAMA_3_1_70B if the specified model is not found const actualModel = OCTO_MODELS[model] || OCTO_MODELS.LLAMA_3_1_70B - console.log(`\nUsing OctoAI model: ${actualModel}`) + log(wait(`\n Using OctoAI model:\n - ${actualModel}`)) // Make API call to OctoAI for text generation const response = await octoai.textGen.createChatCompletion({ @@ -45,11 +46,11 @@ export async function callOcto(promptAndTranscript, tempPath, model = 'LLAMA_3_1 // Write the generated content to the specified output file await writeFile(tempPath, content) - console.log(`\nOctoAI response saved to ${tempPath}`) + log(wait(`\n OctoAI response saved to ${tempPath}`)) // Log finish reason, used model, and token usage - console.log(`\nFinish Reason: ${finishReason}\nModel Used: ${usedModel}`) - console.log(`Token Usage:\n - ${promptTokens} prompt tokens\n - ${completionTokens} completion tokens\n - ${totalTokens} total tokens`) + log(wait(`\n Finish Reason: ${finishReason}\n Model Used: ${usedModel}`)) + log(wait(` Token Usage:\n - ${promptTokens} prompt tokens\n - ${completionTokens} completion tokens\n - ${totalTokens} total tokens`)) } catch (error) { // Log any errors that occur during the process diff --git a/src/llms/ollama.js b/src/llms/ollama.js index 60a31ec..9494a2e 100644 --- a/src/llms/ollama.js +++ b/src/llms/ollama.js @@ -3,6 +3,7 @@ import { writeFile } from 'node:fs/promises' import { env } from 'node:process' import { OLLAMA_MODELS } from '../types.js' +import { log, wait } from '../types.js' /** @import { LLMFunction, OllamaModelType } from '../types.js' */ @@ -25,10 +26,10 @@ export async function callOllama(promptAndTranscript, tempPath, modelName = 'LLA // Get host and port from environment variables or use defaults const ollamaHost = env.OLLAMA_HOST || 'ollama' const ollamaPort = env.OLLAMA_PORT || '11434' - console.log(` - Using Ollama model: ${ollamaModelName} at http://${ollamaHost}:${ollamaPort}`) + log(wait(` - Using Ollama model: ${ollamaModelName} at http://${ollamaHost}:${ollamaPort}`)) // Call the Ollama chat API - console.log(` - Sending chat request to Ollama...`) + log(wait(` - Sending chat request to Ollama...`)) const response = await fetch(`http://${ollamaHost}:${ollamaPort}/api/chat`, { method: 'POST', headers: { @@ -49,9 +50,9 @@ export async function callOllama(promptAndTranscript, tempPath, modelName = 'LLA // Extract the assistant's reply and write the response to the output file const assistantReply = data.message.content - console.log(` - Received response from Ollama.`) + log(wait(` - Received response from Ollama.`)) await writeFile(tempPath, assistantReply) - console.log(`\nResponse saved to ${tempPath}`) + log(wait(`\n Transcript saved to temporary file:\n - ${tempPath}`)) } catch (error) { console.error(`Error in callOllama: ${error.message}`) console.error(`Stack Trace: ${error.stack}`) diff --git a/src/transcription/assembly.js b/src/transcription/assembly.js index 03f02e1..eb61e27 100644 --- a/src/transcription/assembly.js +++ b/src/transcription/assembly.js @@ -3,18 +3,20 @@ import { writeFile } from 'node:fs/promises' import { env } from 'node:process' import { AssemblyAI } from 'assemblyai' +import { log, wait } from '../types.js' /** @import { TranscriptServices, ProcessingOptions } from '../types.js' */ /** * Main function to handle transcription using AssemblyAI. * @param {string} finalPath - The identifier used for naming output files. - * @param {TranscriptServices} transcriptServices - The transcription service to use. * @param {ProcessingOptions} options - Additional processing options. * @returns {Promise} - Returns the formatted transcript content. * @throws {Error} - If an error occurs during transcription. */ -export async function callAssembly(finalPath, transcriptServices, options) { +export async function callAssembly(finalPath, options) { + // log(opts(`Options received:\n`)) + // log(options) // Check if the ASSEMBLY_API_KEY environment variable is set if (!env.ASSEMBLY_API_KEY) { throw new Error('ASSEMBLY_API_KEY environment variable is not set. Please set it to your AssemblyAI API key.') @@ -25,8 +27,6 @@ export async function callAssembly(finalPath, transcriptServices, options) { try { const { speakerLabels } = options - console.log(`Parameters passed to callAssembly:`) - console.log(` - finalPath: ${finalPath}\n - transcriptServices: ${transcriptServices}\n - speakerLabels: ${speakerLabels}`) // Request transcription from AssemblyAI const transcript = await client.transcripts.transcribe({ audio: `${finalPath}.wav`, // The audio file to transcribe @@ -75,7 +75,7 @@ export async function callAssembly(finalPath, transcriptServices, options) { // Write the formatted transcript to a file await writeFile(`${finalPath}.txt`, txtContent) - console.log(`\nTranscript saved:\n - ${finalPath}.txt`) + log(wait(`\n Transcript saved...\n - ${finalPath}.txt\n`)) return txtContent } catch (error) { // Log any errors that occur during the transcription process diff --git a/src/transcription/deepgram.js b/src/transcription/deepgram.js index 7621085..246434f 100644 --- a/src/transcription/deepgram.js +++ b/src/transcription/deepgram.js @@ -3,15 +3,16 @@ import { writeFile, readFile } from 'node:fs/promises' import { env } from 'node:process' import { createClient } from '@deepgram/sdk' +import { log, wait } from '../types.js' /** * Main function to handle transcription using Deepgram. - * @param {string} input - The URL or local file path of the audio to transcribe. - * @param {string} id - The identifier used for naming output files. - * @returns {Promise} + * @param {string} finalPath - The identifier used for naming output files. + * @param {ProcessingOptions} options - Additional processing options. + * @returns {Promise} - Returns the formatted transcript content. * @throws {Error} - If an error occurs during transcription. */ -export async function callDeepgram(input, id) { +export async function callDeepgram(finalPath) { // Check if the DEEPGRAM_API_KEY environment variable is set if (!env.DEEPGRAM_API_KEY) { throw new Error('DEEPGRAM_API_KEY environment variable is not set. Please set it to your Deepgram API key.') @@ -21,19 +22,19 @@ export async function callDeepgram(input, id) { const deepgram = createClient(env.DEEPGRAM_API_KEY) // Check if the input is a URL or a local file - const isUrl = input.startsWith('http://') || input.startsWith('https://') + const isUrl = finalPath.startsWith('http://') || finalPath.startsWith('https://') try { // Request transcription from Deepgram const { result } = await deepgram.listen.prerecorded[isUrl ? 'transcribeUrl' : 'transcribeFile']( // Use URL or file content based on input type - isUrl ? { url: input } : await readFile(input), + isUrl ? { url: finalPath } : await readFile(`${finalPath}.wav`), // Use the "nova-2" model with smart formatting { model: 'nova-2', smart_format: true } ) // Process and format the transcription result - const formattedTranscript = result.results.channels[0].alternatives[0].paragraphs.paragraphs + const txtContent = result.results.channels[0].alternatives[0].paragraphs.paragraphs .flatMap((paragraph) => paragraph.sentences) .map((sentence) => { // Format timestamp and text for each sentence @@ -44,8 +45,9 @@ export async function callDeepgram(input, id) { .join('\n') // Write the formatted transcript to a file - await writeFile(`${id}.txt`, formattedTranscript) - console.log(`\nTranscript saved:\n - ${id}.txt`) + await writeFile(`${finalPath}.txt`, txtContent) + log(wait(`\n Transcript saved:\n - ${finalPath}.txt\n`)) + return txtContent } catch (error) { // Log any errors that occur during the transcription process console.error(`Error processing the transcription: ${error.message}`) diff --git a/src/transcription/whisper.js b/src/transcription/whisper.js index 50d3d2a..30efc3c 100644 --- a/src/transcription/whisper.js +++ b/src/transcription/whisper.js @@ -1,9 +1,11 @@ // src/transcription/whisper.js -import { readFile, writeFile, access } from 'node:fs/promises' +import { readFile, writeFile } from 'node:fs/promises' import { exec } from 'node:child_process' import { promisify } from 'node:util' +import { existsSync } from 'node:fs' import { WHISPER_MODELS } from '../types.js' +import { log, success, wait } from '../types.js' const execPromise = promisify(exec) @@ -17,41 +19,53 @@ const execPromise = promisify(exec) * @throws {Error} - If an error occurs during transcription. */ export async function callWhisper(finalPath, options) { + // log(opts(`Options passed to callWhisper:\n`)) + // log(options) try { - const whisperModel = options.whisperModel || 'base' + // Get the whisper model from options or use 'base' as default + const whisperModel = options.whisper || 'base' if (!(whisperModel in WHISPER_MODELS)) { throw new Error(`Unknown model type: ${whisperModel}`) } - const modelName = WHISPER_MODELS[whisperModel] - const downloadModelName = whisperModel === 'large' ? 'large-v2' : whisperModel - const modelPath = `./whisper.cpp/models/${modelName}` + // Get the model ggml file name + const modelGGMLName = WHISPER_MODELS[whisperModel] + + log(wait(` - whisperModel: ${whisperModel}`)) + log(wait(` - modelGGMLName: ${modelGGMLName}`)) // Setup Whisper - await access('./whisper.cpp').catch(async () => { + if (!existsSync('./whisper.cpp')) { + log(`\nNo whisper.cpp repo found, running git clone and make...\n`) await execPromise('git clone https://github.com/ggerganov/whisper.cpp.git && make -C whisper.cpp && cp .github/whisper.Dockerfile whisper.cpp/Dockerfile') - }) + log(`\nwhisper.cpp clone and make commands complete.\n`) + } // Ensure model is downloaded - await access(modelPath).catch(async () => { - await execPromise(`bash ./whisper.cpp/models/download-ggml-model.sh ${downloadModelName}`) - }) + if (!existsSync(`./whisper.cpp/models/ggml-${whisperModel}.bin`)) { + log(wait(` - Model not found, downloading: ${whisperModel}...\n`)) + await execPromise(`bash ./whisper.cpp/models/download-ggml-model.sh ${whisperModel}`) + log(success(' Model download completed.\n')) + } // Run transcription - await execPromise(`./whisper.cpp/main -m "whisper.cpp/models/${modelName}" -f "${finalPath}.wav" -of "${finalPath}" --output-lrc`) - console.log(` - Transcript LRC file completed at ${finalPath}.lrc`) + await execPromise(`./whisper.cpp/main -m "whisper.cpp/models/${modelGGMLName}" -f "${finalPath}.wav" -of "${finalPath}" --output-lrc`) + log(wait(`\n Transcript LRC file successfully completed...\n - ${finalPath}.lrc\n`)) - // Process transcript + // Read the generated LRC file const lrcContent = await readFile(`${finalPath}.lrc`, 'utf8') + // Process and format the LRC content const txtContent = lrcContent.split('\n') .filter(line => !line.startsWith('[by:whisper.cpp]')) .map(line => line.replace(/\[(\d{2,3}):(\d{2})\.(\d{2})\]/g, (_, p1, p2) => `[${p1}:${p2}]`)) .join('\n') + // Write the formatted content to a text file await writeFile(`${finalPath}.txt`, txtContent) - console.log(` - Transcript transformation completed at ${finalPath}.txt`) - + log(wait(` Transcript transformation successfully completed...\n - ${finalPath}.txt\n`)) + + // Return the processed content return txtContent } catch (error) { console.error('Error in callWhisper:', error) diff --git a/src/transcription/whisperDocker.js b/src/transcription/whisperDocker.js index 36b34c6..a626ca4 100644 --- a/src/transcription/whisperDocker.js +++ b/src/transcription/whisperDocker.js @@ -5,6 +5,7 @@ import { exec } from 'node:child_process' import { promisify } from 'node:util' import { basename, join } from 'node:path' import { WHISPER_MODELS } from '../types.js' +import { log, wait } from '../types.js' const execPromise = promisify(exec) @@ -18,18 +19,27 @@ const execPromise = promisify(exec) * @throws {Error} - If an error occurs during transcription. */ export async function callWhisperDocker(finalPath, options) { + // log(opts(`Options passed to callWhisperDocker:\n`)) + // log(options) try { - const whisperModel = options.whisperModel || 'base' + // Get the whisper model from options or use 'base' as default + const whisperModel = options.whisper || 'base' if (!(whisperModel in WHISPER_MODELS)) { throw new Error(`Unknown model type: ${whisperModel}`) } - const modelName = WHISPER_MODELS[whisperModel] - const downloadModelName = whisperModel === 'large' ? 'large-v2' : whisperModel + // Get the model ggml file name + const modelGGMLName = WHISPER_MODELS[whisperModel] + + log(wait(` - whisperModel: ${whisperModel}`)) + log(wait(` - modelGGMLName: ${modelGGMLName}`)) const CONTAINER_NAME = 'autoshow-whisper-1' - const modelPathContainer = `/app/models/${modelName}` + const modelPathContainer = `/app/models/${modelGGMLName}` + + log(wait(` - CONTAINER_NAME: ${CONTAINER_NAME}`)) + log(wait(` - modelPathContainer: ${modelPathContainer}`)) // Ensure container is running await execPromise(`docker ps | grep ${CONTAINER_NAME}`) @@ -44,7 +54,7 @@ export async function callWhisperDocker(finalPath, options) { await execPromise( `docker exec ${CONTAINER_NAME} /app/main -m ${modelPathContainer} -f ${join(`/app/content`, `${fileName}.wav`)} -of ${join(`/app/content`, fileName)} --output-lrc` ) - console.log(` - Transcript LRC file completed at ${finalPath}.lrc`) + log(wait(`\n Transcript LRC file successfully completed...\n - ${finalPath}.lrc\n`)) // Process transcript const lrcContent = await readFile(`${finalPath}.lrc`, 'utf8') @@ -54,7 +64,7 @@ export async function callWhisperDocker(finalPath, options) { .join('\n') await writeFile(`${finalPath}.txt`, txtContent) - console.log(` - Transcript transformation completed at ${finalPath}.txt`) + log(wait(` Transcript transformation successfully completed...\n - ${finalPath}.txt\n`)) return txtContent } catch (error) { diff --git a/src/types.js b/src/types.js index 29bc7b1..75b0cb4 100644 --- a/src/types.js +++ b/src/types.js @@ -1,5 +1,16 @@ // src/types.js +import chalk from 'chalk' + +export const step = chalk.bold.underline +export const dim = chalk.dim +export const success = chalk.bold.blue +export const opts = chalk.magentaBright.bold +export const wait = chalk.cyan.dim +export const final = chalk.bold.italic + +export const log = console.log + /** * @file This file contains all the custom type definitions used across the Autoshow project. */ @@ -146,7 +157,7 @@ /** * Represents the available Whisper model types. - * @typedef {'tiny' | 'tiny.en' | 'base' | 'base.en' | 'small' | 'small.en' | 'medium' | 'medium.en' | 'large' | 'large-v1' | 'large-v2'} WhisperModelType + * @typedef {'tiny' | 'tiny.en' | 'base' | 'base.en' | 'small' | 'small.en' | 'medium' | 'medium.en' | 'large-v1' | 'large-v2'} WhisperModelType * * - tiny: Smallest multilingual model. * - tiny.en: Smallest English-only model. @@ -156,7 +167,6 @@ * - small.en: Small English-only model. * - medium: Medium multilingual model. * - medium.en: Medium English-only model. - * - large: Largest multilingual model (same as 'large-v2'). * - large-v1: Large multilingual model version 1. * - large-v2: Large multilingual model version 2. */ @@ -166,17 +176,16 @@ * @type {Record} */ export const WHISPER_MODELS = { - tiny: 'ggml-tiny.bin', + 'tiny': 'ggml-tiny.bin', 'tiny.en': 'ggml-tiny.en.bin', - base: 'ggml-base.bin', + 'base': 'ggml-base.bin', 'base.en': 'ggml-base.en.bin', - small: 'ggml-small.bin', + 'small': 'ggml-small.bin', 'small.en': 'ggml-small.en.bin', - medium: 'ggml-medium.bin', + 'medium': 'ggml-medium.bin', 'medium.en': 'ggml-medium.en.bin', 'large-v1': 'ggml-large-v1.bin', 'large-v2': 'ggml-large-v2.bin', - large: 'ggml-large-v2.bin', } /** diff --git a/src/utils/cleanUpFiles.js b/src/utils/cleanUpFiles.js index fd9a694..bcdb0c0 100644 --- a/src/utils/cleanUpFiles.js +++ b/src/utils/cleanUpFiles.js @@ -1,6 +1,7 @@ // src/utils/cleanUpFiles.js import { unlink } from 'node:fs/promises' +import { log, step, success } from '../types.js' /** * Asynchronous function to clean up temporary files. @@ -9,16 +10,15 @@ import { unlink } from 'node:fs/promises' * @throws {Error} - If an error occurs while deleting files. */ export async function cleanUpFiles(id) { + log(step('\nStep 5 - Cleaning up temporary files...\n')) // Array of file extensions to delete const extensions = ['.wav', '.txt', '.md', '.lrc'] - // Log the start of the cleanup process - console.log('\nStep 5 - Cleaning up temporary files...') - + log(success(` Deleted:`)) for (const ext of extensions) { try { await unlink(`${id}${ext}`) - console.log(` - Deleted: ${id}${ext}`) + log(success(` - ${id}${ext}`)) } catch (error) { if (error.code !== 'ENOENT') { console.error(`Error deleting file ${id}${ext}: ${error.message}`) diff --git a/src/utils/downloadAudio.js b/src/utils/downloadAudio.js index 39884b8..68b7e56 100644 --- a/src/utils/downloadAudio.js +++ b/src/utils/downloadAudio.js @@ -6,6 +6,7 @@ import { promisify } from 'node:util' import { readFile, access } from 'node:fs/promises' import { fileTypeFromBuffer } from 'file-type' import ffmpeg from 'ffmpeg-static' +import { log, step, success, wait } from '../types.js' /** @import { SupportedFileType } from '../types.js' */ @@ -20,13 +21,13 @@ const execPromise = promisify(exec) * @throws {Error} - If there is an error during the download process. */ export async function downloadAudio(url, filename) { + log(step('\nStep 2 - Downloading URL audio...\n')) try { // Check for required dependencies await checkDependencies(['yt-dlp']) // Set the final path for the downloaded file const finalPath = `content/${filename}` - console.log('\nStep 2 - Downloading audio...') // Execute yt-dlp to download the audio const { stderr } = await execFilePromise('yt-dlp', [ @@ -47,7 +48,7 @@ export async function downloadAudio(url, filename) { // Construct the path of the downloaded file const downloadedFile = `${finalPath}.wav` - console.log(` - ${downloadedFile}\n - Audio downloaded successfully.`) + log(success(` Audio downloaded successfully:\n - ${downloadedFile}`)) return downloadedFile } catch (error) { console.error(`Error downloading audio: ${error.message}`) @@ -63,6 +64,7 @@ export async function downloadAudio(url, filename) { * @throws {Error} - If the file type is unsupported or processing fails. */ export async function downloadFileAudio(filePath, sanitizedFilename) { + log(step('\nStep 2 - Downloading file audio...\n')) // Define supported audio and video formats /** @type {Set} */ const supportedFormats = new Set([ @@ -82,7 +84,7 @@ export async function downloadFileAudio(filePath, sanitizedFilename) { fileType ? `Unsupported file type: ${fileType.ext}` : 'Unable to determine file type' ) } - console.log(`\nStep 2 - File read successfully and type detected as ${fileType.ext}, converting to WAV...`) + log(wait(` File type detected as ${fileType.ext}, converting to WAV...\n`)) const outputPath = `content/${sanitizedFilename}.wav` @@ -90,7 +92,7 @@ export async function downloadFileAudio(filePath, sanitizedFilename) { await execPromise( `${ffmpeg} -i "${filePath}" -ar 16000 -ac 1 -vn "${outputPath}"` ) - console.log(` - ${outputPath}\n - File converted to WAV format successfully.`) + log(success(` File converted to WAV format successfully:\n - ${outputPath}`)) return outputPath } catch (error) { diff --git a/src/utils/generateMarkdown.js b/src/utils/generateMarkdown.js index 419422b..6bc2a7e 100644 --- a/src/utils/generateMarkdown.js +++ b/src/utils/generateMarkdown.js @@ -5,6 +5,7 @@ import { execFile } from 'node:child_process' import { promisify } from 'node:util' import { writeFile } from 'node:fs/promises' import { basename, extname } from 'node:path' +import { log, dim, step, success } from '../types.js' /** @import { MarkdownData, RSSItem, VideoMetadata } from '../types.js' */ @@ -17,7 +18,6 @@ const execFilePromise = promisify(execFile) * @returns {Promise} - The video metadata. */ export async function extractVideoMetadata(url) { - console.log('\nStep 0 - Generating metadata...') try { // Check for required dependencies await checkDependencies(['yt-dlp']) @@ -63,7 +63,6 @@ export async function extractVideoMetadata(url) { */ export async function generateRSSMarkdown(item) { try { - console.log('\nStep 1 - Generating RSS markdown...') // Destructure the item object const { publishDate, title, coverImage, showLink, channel, channelURL } = item @@ -87,7 +86,9 @@ export async function generateRSSMarkdown(item) { // Write the front matter to the markdown file await writeFile(`${finalPath}.md`, frontMatter) - console.log(` - ${finalPath}.md\n - Front matter successfully created and saved.`) + log(dim(frontMatter)) + log(step('\nStep 1 - Generating RSS markdown...\n')) + log(success(` Front matter successfully created and saved:\n - ${finalPath}.md`)) return { frontMatter, finalPath, filename } } catch (error) { console.error(`Error generating markdown for RSS item: ${error.message}`) @@ -103,7 +104,6 @@ export async function generateRSSMarkdown(item) { */ export async function generateFileMarkdown(filePath) { try { - console.log('\nStep 1 - Generating file markdown...') // Extract the original filename from the full file path const originalFilename = basename(filePath) @@ -133,7 +133,9 @@ export async function generateFileMarkdown(filePath) { await writeFile(`${finalPath}.md`, frontMatter) // Log the creation of the markdown file - console.log(` - ${finalPath}.md\n - Front matter successfully created and saved.`) + log(dim(frontMatter)) + log(step('\nStep 1 - Generating file markdown...\n')) + log(success(` Front matter successfully created and saved:\n - ${finalPath}.md`)) // Return an object with the generated data return { frontMatter, finalPath, filename: sanitizedFilename } @@ -153,7 +155,6 @@ export async function generateFileMarkdown(filePath) { */ export async function generateMarkdown(url) { try { - console.log('\nStep 1 - Generating video markdown...') // Check for required dependencies await checkDependencies(['yt-dlp']) @@ -199,7 +200,9 @@ export async function generateMarkdown(url) { // Write the front matter to the markdown file await writeFile(`${finalPath}.md`, frontMatter) - console.log(` - ${finalPath}.md\n - Front matter successfully created and saved.`) + log(dim(frontMatter)) + log(step('\nStep 1 - Generating video markdown...\n')) + log(success(` Front matter successfully created and saved:\n - ${finalPath}.md`)) return { frontMatter, finalPath, filename } } catch (error) { console.error(`Error generating markdown for video: ${error.message}`) diff --git a/src/utils/runLLM.js b/src/utils/runLLM.js index 55a8486..944e952 100644 --- a/src/utils/runLLM.js +++ b/src/utils/runLLM.js @@ -10,6 +10,7 @@ import { callCohere } from '../llms/cohere.js' import { callMistral } from '../llms/mistral.js' import { callOcto } from '../llms/octo.js' import { generatePrompt } from '../llms/prompt.js' +import { log, step, success, wait } from '../types.js' /** @import { LLMServices, ProcessingOptions, LLMFunction, LLMFunctions } from '../types.js' */ @@ -23,6 +24,9 @@ import { generatePrompt } from '../llms/prompt.js' * @throws {Error} - If the LLM processing fails or an error occurs during execution. */ export async function runLLM(finalPath, frontMatter, llmServices, options) { + log(step(`\nStep 4 - Running LLM processing on transcript...\n`)) + // log(opts(`Options passed to runLLM:\n`)) + // log(options) /** @type {LLMFunctions} */ const LLM_FUNCTIONS = { llama: callLlama, @@ -45,7 +49,7 @@ export async function runLLM(finalPath, frontMatter, llmServices, options) { const promptAndTranscript = `${prompt}${transcript}` if (llmServices) { - console.log(`\nStep 4 - Processing with ${llmServices} Language Model...`) + log(wait(` Processing with ${llmServices} Language Model...`)) /** Get the appropriate LLM function based on the option * @type {LLMFunction} */ @@ -56,18 +60,18 @@ export async function runLLM(finalPath, frontMatter, llmServices, options) { // Set up a temporary file path and call the LLM function const tempPath = `${finalPath}-${llmServices}-temp.md` await llmFunction(promptAndTranscript, tempPath, options[llmServices]) - console.log(` - Transcript saved to temporary file at ${tempPath}`) + log(wait(`\n Transcript saved to temporary file:\n - ${tempPath}`)) // Read generated content and write front matter, show notes, and transcript to final markdown file const showNotes = await readFile(tempPath, 'utf8') await writeFile(`${finalPath}-${llmServices}-shownotes.md`, `${frontMatter}\n${showNotes}\n${transcript}`) // Remove the temporary file await unlink(tempPath) - console.log(` - ${finalPath}-${llmServices}-shownotes.md\n - Generated show notes saved to markdown file.`) + log(success(`\n Generated show notes saved to markdown file:\n - ${finalPath}-${llmServices}-shownotes.md`)) } else { - console.log('\nStep 4 - No LLM selected, skipping processing...') + log(wait(' No LLM selected, skipping processing...')) // If no LLM is selected, just write the prompt and transcript await writeFile(`${finalPath}-prompt.md`, `${frontMatter}\n${promptAndTranscript}`) - console.log(` - ${finalPath}-prompt.md\n - Prompt and transcript saved to markdown file.`) + log(success(`\n Prompt and transcript saved to markdown file:\n - ${finalPath}-prompt.md`)) } } catch (error) { console.error(`Error running Language Model: ${error.message}`) diff --git a/src/utils/runTranscription.js b/src/utils/runTranscription.js index c5d7696..8ab52f2 100644 --- a/src/utils/runTranscription.js +++ b/src/utils/runTranscription.js @@ -5,6 +5,7 @@ import { callWhisper } from '../transcription/whisper.js' import { callWhisperDocker } from '../transcription/whisperDocker.js' import { callDeepgram } from '../transcription/deepgram.js' import { callAssembly } from '../transcription/assembly.js' +import { log, step, success, wait } from '../types.js' /** @import { TranscriptServices, ProcessingOptions } from '../types.js' */ @@ -17,35 +18,34 @@ import { callAssembly } from '../transcription/assembly.js' * @returns {Promise} - Returns the final content including markdown and transcript. * @throws {Error} - If the transcription service fails or an error occurs during processing. */ -export async function runTranscription( - finalPath, - transcriptServices, - options = {}, - frontMatter = '' -) { + +export async function runTranscription(finalPath, frontMatter, transcriptServices, options) { + log(step(`\nStep 3 - Running transcription on audio file...`)) + // log(opts(`Options passed to runTranscription:\n`)) + // log(options) try { let txtContent // Choose the transcription service based on the provided option switch (transcriptServices) { case 'deepgram': - console.log('\nStep 3 - Using Deepgram for transcription...') + log(wait('\n Using Deepgram for transcription...')) txtContent = await callDeepgram(finalPath, options) break case 'assembly': - console.log('\nStep 3 - Using AssemblyAI for transcription...') + log(wait('\n Using AssemblyAI for transcription...')) txtContent = await callAssembly(finalPath, options) break case 'whisperDocker': - console.log('\nStep 3 - Using Whisper Docker for transcription...') + log(wait('\n Using Whisper Docker for transcription...')) txtContent = await callWhisperDocker(finalPath, options) break case 'whisper': default: - console.log('\nStep 3 - Using Whisper for transcription...') + log(wait('\n Using Whisper for transcription...')) txtContent = await callWhisper(finalPath, options) break } @@ -68,7 +68,7 @@ export async function runTranscription( // Write final markdown file, including existing content and the new transcript await writeFile(`${finalPath}.md`, finalContent) - console.log(` - Markdown file updated with transcript at ${finalPath}.md`) + log(success(` Markdown file successfully updated with transcript:\n - ${finalPath}.md`)) return finalContent } catch (error) { From 9719f8afb92494b05188012c6eba8e17483d3603 Mon Sep 17 00:00:00 2001 From: Anthony Campolo <12433465+ajcwebdev@users.noreply.github.com> Date: Sat, 5 Oct 2024 12:11:23 -0500 Subject: [PATCH 5/9] create models file --- .gitignore | 3 +- docs/examples.md | 19 +++++ package.json | 2 + src/autoshow.js | 1 + src/commands/processFile.js | 2 +- src/commands/processRSS.js | 52 ++++++++++-- src/commands/processURLs.js | 2 +- src/commands/processVideo.js | 2 +- src/llms/chatgpt.js | 2 +- src/llms/claude.js | 2 +- src/llms/cohere.js | 2 +- src/llms/gemini.js | 2 +- src/llms/llama.js | 2 +- src/llms/mistral.js | 2 +- src/llms/octo.js | 2 +- src/llms/ollama.js | 2 +- src/models.js | 122 +++++++++++++++++++++++++++++ src/transcription/whisper.js | 4 +- src/transcription/whisperDocker.js | 20 ++--- src/types.js | 119 ---------------------------- 20 files changed, 213 insertions(+), 151 deletions(-) create mode 100644 src/models.js diff --git a/.gitignore b/.gitignore index e3f42ae..7f5f216 100644 --- a/.gitignore +++ b/.gitignore @@ -8,4 +8,5 @@ src/llms/models .idea build deno.lock -out \ No newline at end of file +out +types \ No newline at end of file diff --git a/docs/examples.md b/docs/examples.md index ef930a5..924c674 100644 --- a/docs/examples.md +++ b/docs/examples.md @@ -84,6 +84,14 @@ Process RSS feed from newest to oldest (default behavior): npm run as -- --rss "https://ajcwebdev.substack.com/feed" ``` +Process a certain number of the most recent items, for example the last three episodes released on the feed: + +```bash +npm run as -- \ + --rss "https://feeds.transistor.fm/fsjam-podcast/" \ + --last 3 +``` + Process RSS feed from oldest to newest: ```bash @@ -249,6 +257,17 @@ npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --llama npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --ollama ``` +Select Ollama model: + +```bash +npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --ollama LLAMA_3_2_1B +npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --ollama LLAMA_3_2_3B +npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --ollama GEMMA_2_2B +npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --ollama PHI_3_5 +npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --ollama QWEN_2_5_1B +npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --ollama QWEN_2_5_3B +``` + ## Transcription Options ### Whisper.cpp diff --git a/package.json b/package.json index 48ff18b..8276586 100644 --- a/package.json +++ b/package.json @@ -27,6 +27,8 @@ "u": "node --env-file=.env --no-warnings src/autoshow.js --whisper large-v2 --urls", "p": "node --env-file=.env --no-warnings src/autoshow.js --whisper large-v2 --playlist", "f": "node --env-file=.env --no-warnings src/autoshow.js --whisper large-v2 --file", + "r": "node --env-file=.env --no-warnings src/autoshow.js --whisper large-v2 --rss", + "last3": "node --env-file=.env --no-warnings src/autoshow.js --whisper large-v2 --last 3 --rss", "serve": "node --env-file=.env --no-warnings --watch packages/server/index.js", "fetch-local": "node --env-file=.env --no-warnings packages/server/tests/fetch-local.js", "fetch-all": "node --env-file=.env --no-warnings packages/server/tests/fetch-all.js", diff --git a/src/autoshow.js b/src/autoshow.js index 0e10502..a48a73c 100644 --- a/src/autoshow.js +++ b/src/autoshow.js @@ -42,6 +42,7 @@ program .option('--item ', 'Process specific items in the RSS feed by providing their audio URLs') .option('--order ', 'Specify the order for RSS feed processing (newest or oldest)') .option('--skip ', 'Number of items to skip when processing RSS feed', parseInt) + .option('--last ', 'Number of most recent items to process (overrides --order and --skip)', parseInt) .option('--info', 'Generate JSON file with RSS feed information instead of processing items') .option('--whisper [model]', 'Use Whisper.cpp for transcription with optional model specification') .option('--whisperDocker [model]', 'Use Whisper.cpp in Docker for transcription with optional model specification') diff --git a/src/commands/processFile.js b/src/commands/processFile.js index d645cf5..5043f7e 100644 --- a/src/commands/processFile.js +++ b/src/commands/processFile.js @@ -28,7 +28,7 @@ export async function processFile(filePath, llmServices, transcriptServices, opt await downloadFileAudio(filePath, filename) // Run transcription on the file - await runTranscription(finalPath, transcriptServices, options, frontMatter) + await runTranscription(finalPath, frontMatter, transcriptServices, options) // Process the transcript with the selected Language Model await runLLM(finalPath, frontMatter, llmServices, options) diff --git a/src/commands/processRSS.js b/src/commands/processRSS.js index dee3f84..58a46cc 100644 --- a/src/commands/processRSS.js +++ b/src/commands/processRSS.js @@ -37,7 +37,7 @@ async function processItem(item, transcriptServices, llmServices, options) { await downloadAudio(item.showLink, filename) // Run transcription - await runTranscription(finalPath, transcriptServices, options, frontMatter) + await runTranscription(finalPath, frontMatter, transcriptServices, options) // Process with Language Model await runLLM(finalPath, frontMatter, llmServices, options) @@ -66,10 +66,44 @@ export async function processRSS(rssUrl, llmServices, transcriptServices, option // log(opts(`Options received:\n`)) // log(options) try { + // Validate that --last is a positive integer if provided + if (options.last !== undefined) { + if (!Number.isInteger(options.last) || options.last < 1) { + console.error('Error: The --last option must be a positive integer.') + process.exit(1) + } + // Ensure --last is not used with --skip or --order + if (options.skip !== undefined || options.order !== undefined) { + console.error('Error: The --last option cannot be used with --skip or --order.') + process.exit(1) + } + } + + // Validate that --skip is a non-negative integer if provided + if (options.skip !== undefined) { + if (!Number.isInteger(options.skip) || options.skip < 0) { + console.error('Error: The --skip option must be a non-negative integer.') + process.exit(1) + } + } + + // Validate that --order is either 'newest' or 'oldest' if provided + if (options.order !== undefined) { + if (!['newest', 'oldest'].includes(options.order)) { + console.error("Error: The --order option must be either 'newest' or 'oldest'.") + process.exit(1) + } + } + + // Log the processing action if (options.item && options.item.length > 0) { // If specific items are provided, list them log(wait('\nProcessing specific items:')) options.item.forEach((url) => log(` - ${url}`)) + } else if (options.last) { + console.log(`\nProcessing the last ${options.last} items`) + } else { + console.log(` - Skipping first ${options.skip || 0} items`) } // Fetch the RSS feed with a timeout @@ -163,13 +197,19 @@ export async function processRSS(rssUrl, llmServices, transcriptServices, option process.exit(1) // Exit with an error code } itemsToProcess = matchedItems + log(wait(` Found ${items.length} items in the RSS feed.`)) + log(wait(` - Processing ${itemsToProcess.length} specified items.`)) + } else if (options.last) { + // Process the most recent N items + itemsToProcess = items.slice(0, options.last) + log(wait(` Found ${items.length} items in the RSS feed.`)) + log(wait(` - Processing the last ${options.last} items.`)) } else { // Sort items based on the specified order and apply skip - const sortedItems = options.order === 'newest' ? items : [...items].reverse() - itemsToProcess = sortedItems.slice(options.skip) - - log(wait(` Found ${sortedItems.length} items in the RSS feed.`)) - log(wait(` - Processing ${itemsToProcess.length} items after skipping ${options.skip}.\n`)) + const sortedItems = options.order === 'oldest' ? items.slice().reverse() : items + itemsToProcess = sortedItems.slice(options.skip || 0) + log(wait(` Found ${items.length} items in the RSS feed.`)) + log(wait(` - Processing ${itemsToProcess.length} items after skipping ${options.skip || 0}.\n`)) } // Process each item in the feed diff --git a/src/commands/processURLs.js b/src/commands/processURLs.js index 401ff1a..d222854 100644 --- a/src/commands/processURLs.js +++ b/src/commands/processURLs.js @@ -64,7 +64,7 @@ export async function processURLs(filePath, llmServices, transcriptServices, opt } } - log(final('\nURL file processing completed successfully.')) + log(final('\nURL file processing completed successfully.\n')) } catch (error) { console.error(`Error reading or processing file ${filePath}: ${error.message}`) process.exit(1) // Exit with an error code diff --git a/src/commands/processVideo.js b/src/commands/processVideo.js index e4bf3a6..63f5682 100644 --- a/src/commands/processVideo.js +++ b/src/commands/processVideo.js @@ -42,7 +42,7 @@ export async function processVideo(url, llmServices, transcriptServices, options await cleanUpFiles(finalPath) } - log(final('\nVideo processing completed successfully.')) + log(final('\nVideo processing completed successfully.\n')) } catch (error) { // Log any errors that occur during video processing console.error('Error processing video:', error.message) diff --git a/src/llms/chatgpt.js b/src/llms/chatgpt.js index 1e3dbc9..475f9dc 100644 --- a/src/llms/chatgpt.js +++ b/src/llms/chatgpt.js @@ -3,7 +3,7 @@ import { writeFile } from 'node:fs/promises' import { env } from 'node:process' import { OpenAI } from 'openai' -import { GPT_MODELS } from '../types.js' +import { GPT_MODELS } from '../models.js' import { log, wait } from '../types.js' /** @import { LLMFunction, ChatGPTModelType } from '../types.js' */ diff --git a/src/llms/claude.js b/src/llms/claude.js index 36506c6..b3ec479 100644 --- a/src/llms/claude.js +++ b/src/llms/claude.js @@ -3,7 +3,7 @@ import { writeFile } from 'node:fs/promises' import { env } from 'node:process' import { Anthropic } from '@anthropic-ai/sdk' -import { CLAUDE_MODELS } from '../types.js' +import { CLAUDE_MODELS } from '../models.js' import { log, wait } from '../types.js' /** @import { LLMFunction, ClaudeModelType } from '../types.js' */ diff --git a/src/llms/cohere.js b/src/llms/cohere.js index d34db16..fa0c225 100644 --- a/src/llms/cohere.js +++ b/src/llms/cohere.js @@ -3,7 +3,7 @@ import { writeFile } from 'node:fs/promises' import { env } from 'node:process' import { CohereClient } from 'cohere-ai' -import { COHERE_MODELS } from '../types.js' +import { COHERE_MODELS } from '../models.js' import { log, wait } from '../types.js' /** @import { LLMFunction, CohereModelType } from '../types.js' */ diff --git a/src/llms/gemini.js b/src/llms/gemini.js index 2dd7867..084eb34 100644 --- a/src/llms/gemini.js +++ b/src/llms/gemini.js @@ -3,7 +3,7 @@ import { writeFile } from 'node:fs/promises' import { env } from 'node:process' import { GoogleGenerativeAI } from "@google/generative-ai" -import { GEMINI_MODELS } from '../types.js' +import { GEMINI_MODELS } from '../models.js' import { log, wait } from '../types.js' /** @import { LLMFunction, GeminiModelType } from '../types.js' */ diff --git a/src/llms/llama.js b/src/llms/llama.js index fb819ae..f548989 100644 --- a/src/llms/llama.js +++ b/src/llms/llama.js @@ -5,7 +5,7 @@ import { getLlama, LlamaChatSession } from "node-llama-cpp" import { existsSync } from 'node:fs' import { exec } from 'node:child_process' import { promisify } from 'node:util' -import { LLAMA_MODELS } from '../types.js' +import { LLAMA_MODELS } from '../models.js' import { log, wait } from '../types.js' const execAsync = promisify(exec) diff --git a/src/llms/mistral.js b/src/llms/mistral.js index 6842ccf..ea5f003 100644 --- a/src/llms/mistral.js +++ b/src/llms/mistral.js @@ -3,7 +3,7 @@ import { writeFile } from 'node:fs/promises' import { env } from 'node:process' import { Mistral } from '@mistralai/mistralai' -import { MISTRAL_MODELS } from '../types.js' +import { MISTRAL_MODELS } from '../models.js' import { log, wait } from '../types.js' /** @import { LLMFunction, MistralModelType } from '../types.js' */ diff --git a/src/llms/octo.js b/src/llms/octo.js index 8f47ed3..93ee91e 100644 --- a/src/llms/octo.js +++ b/src/llms/octo.js @@ -3,7 +3,7 @@ import { writeFile } from 'node:fs/promises' import { env } from 'node:process' import { OctoAIClient } from '@octoai/sdk' -import { OCTO_MODELS } from '../types.js' +import { OCTO_MODELS } from '../models.js' import { log, wait } from '../types.js' /** @import { LLMFunction, OctoModelType } from '../types.js' */ diff --git a/src/llms/ollama.js b/src/llms/ollama.js index 9494a2e..fd0e1ac 100644 --- a/src/llms/ollama.js +++ b/src/llms/ollama.js @@ -2,7 +2,7 @@ import { writeFile } from 'node:fs/promises' import { env } from 'node:process' -import { OLLAMA_MODELS } from '../types.js' +import { OLLAMA_MODELS } from '../models.js' import { log, wait } from '../types.js' /** @import { LLMFunction, OllamaModelType } from '../types.js' */ diff --git a/src/models.js b/src/models.js new file mode 100644 index 0000000..65b0a6c --- /dev/null +++ b/src/models.js @@ -0,0 +1,122 @@ +// src/models.js + +/** @import { WhisperModelType, ChatGPTModelType, ClaudeModelType, CohereModelType, GeminiModelType, MistralModelType, OctoModelType, LlamaModelType, OllamaModelType } from './types.js' */ + +/** + * Define available Whisper models + * @type {Record} + */ +export const WHISPER_MODELS = { + 'tiny': 'ggml-tiny.bin', + 'tiny.en': 'ggml-tiny.en.bin', + 'base': 'ggml-base.bin', + 'base.en': 'ggml-base.en.bin', + 'small': 'ggml-small.bin', + 'small.en': 'ggml-small.en.bin', + 'medium': 'ggml-medium.bin', + 'medium.en': 'ggml-medium.en.bin', + 'large-v1': 'ggml-large-v1.bin', + 'large-v2': 'ggml-large-v2.bin', +} + +/** + * Map of ChatGPT model identifiers to their API names + * @type {Record} + */ +export const GPT_MODELS = { + GPT_4o_MINI: "gpt-4o-mini", + GPT_4o: "gpt-4o", + GPT_4_TURBO: "gpt-4-turbo", + GPT_4: "gpt-4", +} + +/** + * Map of Claude model identifiers to their API names + * @type {Record} + */ +export const CLAUDE_MODELS = { + CLAUDE_3_5_SONNET: "claude-3-5-sonnet-20240620", + CLAUDE_3_OPUS: "claude-3-opus-20240229", + CLAUDE_3_SONNET: "claude-3-sonnet-20240229", + CLAUDE_3_HAIKU: "claude-3-haiku-20240307", +} + +/** + * Map of Cohere model identifiers to their API names + * @type {Record} + */ +export const COHERE_MODELS = { + COMMAND_R: "command-r", // Standard Command model + COMMAND_R_PLUS: "command-r-plus" // Enhanced Command model +} + +/** + * Map of Gemini model identifiers to their API names + * @type {Record} + */ +export const GEMINI_MODELS = { + GEMINI_1_5_FLASH: "gemini-1.5-flash", + // GEMINI_1_5_PRO: "gemini-1.5-pro", + GEMINI_1_5_PRO: "gemini-1.5-pro-exp-0827", +} + +/** + * Map of Mistral model identifiers to their API names + * @type {Record} + */ +export const MISTRAL_MODELS = { + MIXTRAL_8x7b: "open-mixtral-8x7b", + MIXTRAL_8x22b: "open-mixtral-8x22b", + MISTRAL_LARGE: "mistral-large-latest", + MISTRAL_NEMO: "open-mistral-nemo" +} + +/** + * Map of OctoAI model identifiers to their API names + * @type {Record} + */ +export const OCTO_MODELS = { + LLAMA_3_1_8B: "meta-llama-3.1-8b-instruct", + LLAMA_3_1_70B: "meta-llama-3.1-70b-instruct", + LLAMA_3_1_405B: "meta-llama-3.1-405b-instruct", + MISTRAL_7B: "mistral-7b-instruct", + MIXTRAL_8X_7B: "mixtral-8x7b-instruct", + NOUS_HERMES_MIXTRAL_8X_7B: "nous-hermes-2-mixtral-8x7b-dpo", + WIZARD_2_8X_22B: "wizardlm-2-8x22b", +} + +/** + * Map of local model identifiers to their filenames and URLs + * @type {Record} + */ +export const LLAMA_MODELS = { + QWEN_2_5_3B: { + filename: "qwen2.5-3b-instruct-q6_k.gguf", + url: "https://huggingface.co/Qwen/Qwen2.5-3B-Instruct-GGUF/resolve/main/qwen2.5-3b-instruct-q6_k.gguf" + }, + PHI_3_5: { + filename: "Phi-3.5-mini-instruct-Q6_K.gguf", + url: "https://huggingface.co/bartowski/Phi-3.5-mini-instruct-GGUF/resolve/main/Phi-3.5-mini-instruct-Q6_K.gguf" + }, + LLAMA_3_2_1B: { + filename: "Llama-3.2-1B.i1-Q6_K.gguf", + url: "https://huggingface.co/mradermacher/Llama-3.2-1B-i1-GGUF/resolve/main/Llama-3.2-1B.i1-Q6_K.gguf" + }, + GEMMA_2_2B: { + filename: "gemma-2-2b-it-Q6_K.gguf", + url: "https://huggingface.co/lmstudio-community/gemma-2-2b-it-GGUF/resolve/main/gemma-2-2b-it-Q6_K.gguf" + } +} + +/** + * Map of model identifiers to their corresponding names in Ollama + * @type {Record} + */ +export const OLLAMA_MODELS = { + LLAMA_3_2_1B: 'llama3.2:1b', + LLAMA_3_2_3B: 'llama3.2:3b', + GEMMA_2_2B: 'gemma2:2b', + PHI_3_5: 'phi3.5:3.8b', + QWEN_2_5_1B: 'qwen2.5:1.5b', + QWEN_2_5_3B: 'qwen2.5:3b', +} \ No newline at end of file diff --git a/src/transcription/whisper.js b/src/transcription/whisper.js index 30efc3c..2fdd095 100644 --- a/src/transcription/whisper.js +++ b/src/transcription/whisper.js @@ -4,8 +4,8 @@ import { readFile, writeFile } from 'node:fs/promises' import { exec } from 'node:child_process' import { promisify } from 'node:util' import { existsSync } from 'node:fs' -import { WHISPER_MODELS } from '../types.js' -import { log, success, wait } from '../types.js' +import { WHISPER_MODELS } from '../models.js' +import { log, success, wait, opts } from '../types.js' const execPromise = promisify(exec) diff --git a/src/transcription/whisperDocker.js b/src/transcription/whisperDocker.js index a626ca4..e98b869 100644 --- a/src/transcription/whisperDocker.js +++ b/src/transcription/whisperDocker.js @@ -3,9 +3,9 @@ import { readFile, writeFile } from 'node:fs/promises' import { exec } from 'node:child_process' import { promisify } from 'node:util' -import { basename, join } from 'node:path' -import { WHISPER_MODELS } from '../types.js' -import { log, wait } from '../types.js' +import { join } from 'node:path' +import { WHISPER_MODELS } from '../models.js' +import { log, wait, opts } from '../types.js' const execPromise = promisify(exec) @@ -23,7 +23,7 @@ export async function callWhisperDocker(finalPath, options) { // log(options) try { // Get the whisper model from options or use 'base' as default - const whisperModel = options.whisper || 'base' + const whisperModel = options.whisperDocker || 'base' if (!(whisperModel in WHISPER_MODELS)) { throw new Error(`Unknown model type: ${whisperModel}`) @@ -31,13 +31,10 @@ export async function callWhisperDocker(finalPath, options) { // Get the model ggml file name const modelGGMLName = WHISPER_MODELS[whisperModel] - - log(wait(` - whisperModel: ${whisperModel}`)) - log(wait(` - modelGGMLName: ${modelGGMLName}`)) - const CONTAINER_NAME = 'autoshow-whisper-1' const modelPathContainer = `/app/models/${modelGGMLName}` - + log(wait(` - whisperModel: ${whisperModel}`)) + log(wait(` - modelGGMLName: ${modelGGMLName}`)) log(wait(` - CONTAINER_NAME: ${CONTAINER_NAME}`)) log(wait(` - modelPathContainer: ${modelPathContainer}`)) @@ -47,12 +44,11 @@ export async function callWhisperDocker(finalPath, options) { // Ensure model is downloaded await execPromise(`docker exec ${CONTAINER_NAME} test -f ${modelPathContainer}`) - .catch(() => execPromise(`docker exec ${CONTAINER_NAME} /app/models/download-ggml-model.sh ${downloadModelName}`)) + .catch(() => execPromise(`docker exec ${CONTAINER_NAME} /app/models/download-ggml-model.sh ${whisperModel}`)) // Run transcription - const fileName = basename(finalPath) await execPromise( - `docker exec ${CONTAINER_NAME} /app/main -m ${modelPathContainer} -f ${join(`/app/content`, `${fileName}.wav`)} -of ${join(`/app/content`, fileName)} --output-lrc` + `docker exec ${CONTAINER_NAME} /app/main -m ${modelPathContainer} -f ${join(`/app`, `${finalPath}.wav`)} -of ${join(`/app`, finalPath)} --output-lrc` ) log(wait(`\n Transcript LRC file successfully completed...\n - ${finalPath}.lrc\n`)) diff --git a/src/types.js b/src/types.js index 75b0cb4..7de1b75 100644 --- a/src/types.js +++ b/src/types.js @@ -171,23 +171,6 @@ export const log = console.log * - large-v2: Large multilingual model version 2. */ -/** - * Define available Whisper models - * @type {Record} - */ -export const WHISPER_MODELS = { - 'tiny': 'ggml-tiny.bin', - 'tiny.en': 'ggml-tiny.en.bin', - 'base': 'ggml-base.bin', - 'base.en': 'ggml-base.en.bin', - 'small': 'ggml-small.bin', - 'small.en': 'ggml-small.en.bin', - 'medium': 'ggml-medium.bin', - 'medium.en': 'ggml-medium.en.bin', - 'large-v1': 'ggml-large-v1.bin', - 'large-v2': 'ggml-large-v2.bin', - } - /** * Represents the object containing the different prompts, their instructions to the LLM, and their expected example output. * @typedef {Object} PromptSection @@ -246,108 +229,6 @@ export const WHISPER_MODELS = { * @typedef {'LLAMA_3_2_1B' | 'LLAMA_3_2_3B' | 'GEMMA_2_2B' | 'PHI_3_5' | 'QWEN_2_5_1B' | 'QWEN_2_5_3B'} OllamaModelType - Define local model with Ollama. */ -/** - * Map of ChatGPT model identifiers to their API names - * @type {Record} - */ -export const GPT_MODELS = { - GPT_4o_MINI: "gpt-4o-mini", - GPT_4o: "gpt-4o", - GPT_4_TURBO: "gpt-4-turbo", - GPT_4: "gpt-4", -} - -/** - * Map of Claude model identifiers to their API names - * @type {Record} - */ -export const CLAUDE_MODELS = { - CLAUDE_3_5_SONNET: "claude-3-5-sonnet-20240620", - CLAUDE_3_OPUS: "claude-3-opus-20240229", - CLAUDE_3_SONNET: "claude-3-sonnet-20240229", - CLAUDE_3_HAIKU: "claude-3-haiku-20240307", -} - -/** - * Map of Cohere model identifiers to their API names - * @type {Record} - */ -export const COHERE_MODELS = { - COMMAND_R: "command-r", // Standard Command model - COMMAND_R_PLUS: "command-r-plus" // Enhanced Command model -} - -/** - * Map of Gemini model identifiers to their API names - * @type {Record} - */ -export const GEMINI_MODELS = { - GEMINI_1_5_FLASH: "gemini-1.5-flash", - // GEMINI_1_5_PRO: "gemini-1.5-pro", - GEMINI_1_5_PRO: "gemini-1.5-pro-exp-0827", -} - -/** - * Map of Mistral model identifiers to their API names - * @type {Record} - */ -export const MISTRAL_MODELS = { - MIXTRAL_8x7b: "open-mixtral-8x7b", - MIXTRAL_8x22b: "open-mixtral-8x22b", - MISTRAL_LARGE: "mistral-large-latest", - MISTRAL_NEMO: "open-mistral-nemo" -} - -/** - * Map of OctoAI model identifiers to their API names - * @type {Record} - */ -export const OCTO_MODELS = { - LLAMA_3_1_8B: "meta-llama-3.1-8b-instruct", - LLAMA_3_1_70B: "meta-llama-3.1-70b-instruct", - LLAMA_3_1_405B: "meta-llama-3.1-405b-instruct", - MISTRAL_7B: "mistral-7b-instruct", - MIXTRAL_8X_7B: "mixtral-8x7b-instruct", - NOUS_HERMES_MIXTRAL_8X_7B: "nous-hermes-2-mixtral-8x7b-dpo", - WIZARD_2_8X_22B: "wizardlm-2-8x22b", -} - -/** - * Map of local model identifiers to their filenames and URLs - * @type {Record} - */ -export const LLAMA_MODELS = { - QWEN_2_5_3B: { - filename: "qwen2.5-3b-instruct-q6_k.gguf", - url: "https://huggingface.co/Qwen/Qwen2.5-3B-Instruct-GGUF/resolve/main/qwen2.5-3b-instruct-q6_k.gguf" - }, - PHI_3_5: { - filename: "Phi-3.5-mini-instruct-Q6_K.gguf", - url: "https://huggingface.co/bartowski/Phi-3.5-mini-instruct-GGUF/resolve/main/Phi-3.5-mini-instruct-Q6_K.gguf" - }, - LLAMA_3_2_1B: { - filename: "Llama-3.2-1B.i1-Q6_K.gguf", - url: "https://huggingface.co/mradermacher/Llama-3.2-1B-i1-GGUF/resolve/main/Llama-3.2-1B.i1-Q6_K.gguf" - }, - GEMMA_2_2B: { - filename: "gemma-2-2b-it-Q6_K.gguf", - url: "https://huggingface.co/lmstudio-community/gemma-2-2b-it-GGUF/resolve/main/gemma-2-2b-it-Q6_K.gguf" - } -} - -/** - * Map of model identifiers to their corresponding names in Ollama - * @type {Record} - */ -export const OLLAMA_MODELS = { - LLAMA_3_2_1B: 'llama3.2:1b', - LLAMA_3_2_3B: 'llama3.2:3b', - GEMMA_2_2B: 'gemma2:2b', - PHI_3_5: 'phi3.5:3.8b', - QWEN_2_5_1B: 'qwen2.5:1.5b', - QWEN_2_5_3B: 'qwen2.5:3b', -} - /** * Represents the function signature for cleaning up temporary files. * @callback CleanUpFunction From b5e166e8e6e19849397e25f3fb92d355d5c84d8a Mon Sep 17 00:00:00 2001 From: Anthony Campolo <12433465+ajcwebdev@users.noreply.github.com> Date: Sat, 5 Oct 2024 20:07:22 -0500 Subject: [PATCH 6/9] TYPESCRIPT BABY AWWWWWW YEAH --- .gitignore | 3 +- package.json | 42 ++- src/{autoshow.js => autoshow.ts} | 47 ++- .../{processFile.js => processFile.ts} | 18 +- ...{processPlaylist.js => processPlaylist.ts} | 29 +- src/commands/{processRSS.js => processRSS.ts} | 40 +- .../{processURLs.js => processURLs.ts} | 27 +- .../{processVideo.js => processVideo.ts} | 38 +- src/inquirer.js | 200 ---------- src/interactive.ts | 188 ++++++++++ src/llms/{chatgpt.js => chatgpt.ts} | 43 ++- src/llms/claude.js | 57 --- src/llms/claude.ts | 91 +++++ src/llms/{cohere.js => cohere.ts} | 43 ++- src/llms/{gemini.js => gemini.ts} | 37 +- src/llms/llama.js | 94 ----- src/llms/llama.ts | 75 ++++ src/llms/{mistral.js => mistral.ts} | 50 ++- src/llms/{octo.js => octo.ts} | 31 +- src/llms/{ollama.js => ollama.ts} | 33 +- src/llms/{prompt.js => prompt.ts} | 10 +- src/{models.js => models.ts} | 22 +- .../{assembly.js => assembly.ts} | 13 +- src/transcription/deepgram.js | 56 --- src/transcription/deepgram.ts | 73 ++++ src/transcription/{whisper.js => whisper.ts} | 11 +- .../{whisperDocker.js => whisperDocker.ts} | 11 +- src/types.js | 237 ------------ src/types.ts | 345 ++++++++++++++++++ ...ckDependencies.js => checkDependencies.ts} | 8 +- .../{cleanUpFiles.js => cleanUpFiles.ts} | 9 +- .../{downloadAudio.js => downloadAudio.ts} | 20 +- ...enerateMarkdown.js => generateMarkdown.ts} | 51 ++- src/utils/{runLLM.js => runLLM.ts} | 24 +- ...unTranscription.js => runTranscription.ts} | 35 +- tsconfig.json | 19 + 36 files changed, 1174 insertions(+), 956 deletions(-) rename src/{autoshow.js => autoshow.ts} (82%) rename src/commands/{processFile.js => processFile.ts} (73%) rename src/commands/{processPlaylist.js => processPlaylist.ts} (73%) rename src/commands/{processRSS.js => processRSS.ts} (86%) rename src/commands/{processURLs.js => processURLs.ts} (73%) rename src/commands/{processVideo.js => processVideo.ts} (50%) delete mode 100644 src/inquirer.js create mode 100644 src/interactive.ts rename src/llms/{chatgpt.js => chatgpt.ts} (53%) delete mode 100644 src/llms/claude.js create mode 100644 src/llms/claude.ts rename src/llms/{cohere.js => cohere.ts} (53%) rename src/llms/{gemini.js => gemini.ts} (60%) delete mode 100644 src/llms/llama.js create mode 100644 src/llms/llama.ts rename src/llms/{mistral.js => mistral.ts} (53%) rename src/llms/{octo.js => octo.ts} (66%) rename src/llms/{ollama.js => ollama.ts} (60%) rename src/llms/{prompt.js => prompt.ts} (97%) rename src/{models.js => models.ts} (81%) rename src/transcription/{assembly.js => assembly.ts} (90%) delete mode 100644 src/transcription/deepgram.js create mode 100644 src/transcription/deepgram.ts rename src/transcription/{whisper.js => whisper.ts} (90%) rename src/transcription/{whisperDocker.js => whisperDocker.ts} (90%) delete mode 100644 src/types.js create mode 100644 src/types.ts rename src/utils/{checkDependencies.js => checkDependencies.ts} (64%) rename src/utils/{cleanUpFiles.js => cleanUpFiles.ts} (70%) rename src/utils/{downloadAudio.js => downloadAudio.ts} (82%) rename src/utils/{generateMarkdown.js => generateMarkdown.ts} (80%) rename src/utils/{runLLM.js => runLLM.ts} (84%) rename src/utils/{runTranscription.js => runTranscription.ts} (69%) create mode 100644 tsconfig.json diff --git a/.gitignore b/.gitignore index 7f5f216..8e434e8 100644 --- a/.gitignore +++ b/.gitignore @@ -9,4 +9,5 @@ src/llms/models build deno.lock out -types \ No newline at end of file +types +dist \ No newline at end of file diff --git a/package.json b/package.json index 8276586..9d02fd0 100644 --- a/package.json +++ b/package.json @@ -12,28 +12,29 @@ "Deepgram", "AssemblyAI" ], - "main": "autoshow.js", + "main": "dist/autoshow.js", "type": "module", "bin": { - "autoshow": "./autoshow.js" + "autoshow": "./dist/autoshow.js" }, "scripts": { "setup": "bash ./setup.sh", - "autoshow": "node --env-file=.env --no-warnings src/autoshow.js", - "as": "node --env-file=.env --no-warnings src/autoshow.js", - "bun-as": "bun --env-file=.env --no-warnings src/autoshow.js", - "deno-as": "deno run --allow-sys --allow-read --allow-run --allow-write --allow-env src/autoshow.js", - "v": "node --env-file=.env --no-warnings src/autoshow.js --whisper large-v2 --video", - "u": "node --env-file=.env --no-warnings src/autoshow.js --whisper large-v2 --urls", - "p": "node --env-file=.env --no-warnings src/autoshow.js --whisper large-v2 --playlist", - "f": "node --env-file=.env --no-warnings src/autoshow.js --whisper large-v2 --file", - "r": "node --env-file=.env --no-warnings src/autoshow.js --whisper large-v2 --rss", - "last3": "node --env-file=.env --no-warnings src/autoshow.js --whisper large-v2 --last 3 --rss", - "serve": "node --env-file=.env --no-warnings --watch packages/server/index.js", - "fetch-local": "node --env-file=.env --no-warnings packages/server/tests/fetch-local.js", - "fetch-all": "node --env-file=.env --no-warnings packages/server/tests/fetch-all.js", - "test-local": "node --test test/local.test.js", - "test-all": "node --test test/all.test.js" + "build": "npx tsc", + "autoshow": "npm run build && node --env-file=.env --no-warnings dist/autoshow.js", + "as": "npm run build && node --env-file=.env --no-warnings dist/autoshow.js", + "bun-as": "npm run build && bun --env-file=.env --no-warnings dist/autoshow.js", + "deno-as": "npm run build && deno run --allow-sys --allow-read --allow-run --allow-write --allow-env dist/autoshow.js", + "v": "npm run build && node --env-file=.env --no-warnings dist/autoshow.js --whisper large-v2 --video", + "u": "npm run build && node --env-file=.env --no-warnings dist/autoshow.js --whisper large-v2 --urls", + "p": "npm run build && node --env-file=.env --no-warnings dist/autoshow.js --whisper large-v2 --playlist", + "f": "npm run build && node --env-file=.env --no-warnings dist/autoshow.js --whisper large-v2 --file", + "r": "npm run build && node --env-file=.env --no-warnings dist/autoshow.js --whisper large-v2 --rss", + "last3": "npm run build && node --env-file=.env --no-warnings dist/autoshow.js --whisper large-v2 --last 3 --rss", + "serve": "npm run build && node --env-file=.env --no-warnings --watch packages/server/index.js", + "fetch-local": "npm run build && node --env-file=.env --no-warnings packages/server/tests/fetch-local.js", + "fetch-all": "npm run build && node --env-file=.env --no-warnings packages/server/tests/fetch-all.js", + "test-local": "npm run build && node --test test/local.test.js", + "test-all": "npm run build && node --test test/all.test.js" }, "dependencies": { "@anthropic-ai/sdk": "^0.26.0", @@ -42,6 +43,7 @@ "@google/generative-ai": "^0.17.1", "@mistralai/mistralai": "^1.0.2", "@octoai/sdk": "^1.5.1", + "@types/inquirer": "^9.0.7", "assemblyai": "^4.6.1", "chalk": "^5.3.0", "cohere-ai": "^7.12.0", @@ -53,6 +55,10 @@ "inquirer": "^10.2.2", "node-llama-cpp": "^3.0.0-beta.44", "ollama": "^0.5.9", - "openai": "^4.55.7" + "openai": "^4.55.7", + "typescript": "^5.6.2" + }, + "devDependencies": { + "typescript": "^5.6.2" } } diff --git a/src/autoshow.js b/src/autoshow.ts similarity index 82% rename from src/autoshow.js rename to src/autoshow.ts index a48a73c..8651fae 100644 --- a/src/autoshow.js +++ b/src/autoshow.ts @@ -1,6 +1,6 @@ #!/usr/bin/env node -// src/autoshow.js +// src/autoshow.ts /** * Autoshow CLI Application @@ -13,7 +13,7 @@ */ import { Command } from 'commander' -import { handleInteractivePrompt } from './inquirer.js' +import { handleInteractivePrompt } from './interactive.js' import { processVideo } from './commands/processVideo.js' import { processPlaylist } from './commands/processPlaylist.js' import { processURLs } from './commands/processURLs.js' @@ -21,8 +21,7 @@ import { processFile } from './commands/processFile.js' import { processRSS } from './commands/processRSS.js' import { argv, exit } from 'node:process' import { log, opts } from './types.js' - -/** @import { ProcessingOptions, HandlerFunction, LLMServices, TranscriptServices } from './types.js' */ +import type { ProcessingOptions, HandlerFunction, LLMServices, TranscriptServices } from './types.js' // Initialize the command-line interface const program = new Command() @@ -78,16 +77,12 @@ Report Issues: https://github.com/ajcwebdev/autoshow/issues * @param {ProcessingOptions} options - The command-line options provided by the user. * @returns {Promise} */ -program.action(async (options) => { +program.action(async (options: ProcessingOptions) => { log(opts(`Options received:\n`)) log(options) log(``) - /** - * Map actions to their respective handler functions - * @type {Object.} - */ - const PROCESS_HANDLERS = { + const PROCESS_HANDLERS: Record = { video: processVideo, playlist: processPlaylist, urls: processURLs, @@ -99,14 +94,10 @@ program.action(async (options) => { const LLM_OPTIONS = ['chatgpt', 'claude', 'cohere', 'mistral', 'octo', 'llama', 'ollama', 'gemini'] const TRANSCRIPT_OPTIONS = ['whisper', 'whisperDocker', 'deepgram', 'assembly'] - // Determine if no action options were provided const { video, playlist, urls, file, rss, interactive } = options const noActionProvided = [video, playlist, urls, file, rss].every((opt) => !opt) - // If interactive mode is selected - if (interactive) { - options = await handleInteractivePrompt(options) - } else if (noActionProvided) { + if (interactive || noActionProvided) { options = await handleInteractivePrompt(options) } @@ -115,34 +106,42 @@ program.action(async (options) => { options.item = [options.item] } - const actionsProvided = ACTION_OPTIONS.filter((opt) => options[opt]) + const actionsProvided = ACTION_OPTIONS.filter((opt) => options[opt as keyof ProcessingOptions]) if (actionsProvided.length > 1) { console.error(`Error: Multiple input options provided (${actionsProvided.join(', ')}). Please specify only one input option.`) exit(1) } - const selectedLLMs = LLM_OPTIONS.filter((opt) => options[opt]) + const selectedLLMs = LLM_OPTIONS.filter((opt) => options[opt as keyof ProcessingOptions]) if (selectedLLMs.length > 1) { console.error(`Error: Multiple LLM options provided (${selectedLLMs.join(', ')}). Please specify only one LLM option.`) exit(1) } - const llmServices = /** @type {LLMServices | undefined} */ (selectedLLMs[0]) + const llmServices = selectedLLMs[0] as LLMServices | undefined - const selectedTranscripts = TRANSCRIPT_OPTIONS.filter((opt) => options[opt]) + const selectedTranscripts = TRANSCRIPT_OPTIONS.filter((opt) => options[opt as keyof ProcessingOptions]) if (selectedTranscripts.length > 1) { console.error(`Error: Multiple transcription options provided (${selectedTranscripts.join(', ')}). Please specify only one transcription option.`) exit(1) } - const transcriptServices = /** @type {TranscriptServices | undefined} */ (selectedTranscripts[0]) + const transcriptServices = selectedTranscripts[0] as TranscriptServices | undefined + + // Set default transcription service if not provided + const finalTranscriptServices: TranscriptServices = transcriptServices || 'whisper' + + // Set default Whisper model if not provided + if (finalTranscriptServices === 'whisper' && !options.whisper) { + options.whisper = 'base' + } // Execute the appropriate handler based on the action for (const [key, handler] of Object.entries(PROCESS_HANDLERS)) { - if (options[key]) { + if (options[key as keyof ProcessingOptions]) { try { - await handler(options[key], llmServices, transcriptServices, options) - exit(0) // Successful execution + await handler(options, options[key as keyof ProcessingOptions] as string, llmServices, finalTranscriptServices) + exit(0) } catch (error) { - console.error(`Error processing ${key}:`, error.message) + console.error(`Error processing ${key}:`, (error as Error).message) exit(1) } } diff --git a/src/commands/processFile.js b/src/commands/processFile.ts similarity index 73% rename from src/commands/processFile.js rename to src/commands/processFile.ts index 5043f7e..16b9eb1 100644 --- a/src/commands/processFile.js +++ b/src/commands/processFile.ts @@ -1,4 +1,4 @@ -// src/commands/processFile.js +// src/commands/processFile.ts import { generateFileMarkdown } from '../utils/generateMarkdown.js' import { downloadFileAudio } from '../utils/downloadAudio.js' @@ -6,8 +6,7 @@ import { runTranscription } from '../utils/runTranscription.js' import { runLLM } from '../utils/runLLM.js' import { cleanUpFiles } from '../utils/cleanUpFiles.js' import { log, final } from '../types.js' - -/** @import { LLMServices, TranscriptServices, ProcessingOptions } from '../types.js' */ +import type { LLMServices, TranscriptServices, ProcessingOptions } from '../types.js' /** * Main function to process a local audio or video file. @@ -17,7 +16,12 @@ import { log, final } from '../types.js' * @param {ProcessingOptions} options - Additional options for processing. * @returns {Promise} */ -export async function processFile(filePath, llmServices, transcriptServices, options) { +export async function processFile( + options: ProcessingOptions, + filePath: string, + llmServices?: LLMServices, + transcriptServices?: TranscriptServices +): Promise { // log(opts(`Options received:\n`)) // log(options) try { @@ -28,10 +32,10 @@ export async function processFile(filePath, llmServices, transcriptServices, opt await downloadFileAudio(filePath, filename) // Run transcription on the file - await runTranscription(finalPath, frontMatter, transcriptServices, options) + await runTranscription(options, finalPath, frontMatter, transcriptServices) // Process the transcript with the selected Language Model - await runLLM(finalPath, frontMatter, llmServices, options) + await runLLM(options, finalPath, frontMatter, llmServices) // Clean up temporary files if the noCleanUp option is not set if (!options.noCleanUp) { @@ -40,7 +44,7 @@ export async function processFile(filePath, llmServices, transcriptServices, opt log(final('\nLocal file processing completed successfully.\n')) } catch (error) { - console.error(`Error processing file: ${error.message}`) + console.error(`Error processing file: ${(error as Error).message}`) process.exit(1) // Exit with an error code } } \ No newline at end of file diff --git a/src/commands/processPlaylist.js b/src/commands/processPlaylist.ts similarity index 73% rename from src/commands/processPlaylist.js rename to src/commands/processPlaylist.ts index 8eaa32d..853e58c 100644 --- a/src/commands/processPlaylist.js +++ b/src/commands/processPlaylist.ts @@ -1,26 +1,29 @@ -// src/commands/processPlaylist.js +// src/commands/processPlaylist.ts import { writeFile } from 'node:fs/promises' -import { processVideo } from './processVideo.js' import { execFile } from 'node:child_process' import { promisify } from 'node:util' +import { processVideo } from './processVideo.js' import { extractVideoMetadata } from '../utils/generateMarkdown.js' import { checkDependencies } from '../utils/checkDependencies.js' import { log, final, wait } from '../types.js' - -/** @import { LLMServices, TranscriptServices, ProcessingOptions } from '../types.js' */ +import type { LLMServices, TranscriptServices, ProcessingOptions } from '../types.js' const execFilePromise = promisify(execFile) /** * Main function to process a YouTube playlist. - * @param {string} playlistUrl - The URL of the YouTube playlist to process. - * @param {LLMServices} [llmServices] - The selected Language Model option. - * @param {TranscriptServices} [transcriptServices] - The transcription service to use. - * @param {ProcessingOptions} options - Additional options for processing. - * @returns {Promise} + * @param playlistUrl - The URL of the YouTube playlist to process. + * @param llmServices - The selected Language Model option. + * @param transcriptServices - The transcription service to use. + * @param options - Additional options for processing. */ -export async function processPlaylist(playlistUrl, llmServices, transcriptServices, options) { +export async function processPlaylist( + options: ProcessingOptions, + playlistUrl: string, + llmServices?: LLMServices, + transcriptServices?: TranscriptServices +): Promise { // log(opts(`Options received:\n`)) // log(options) try { @@ -66,16 +69,16 @@ export async function processPlaylist(playlistUrl, llmServices, transcriptServic for (const [index, url] of urls.entries()) { log(wait(`\n Processing video ${index + 1}/${urls.length}:\n - ${url}\n`)) try { - await processVideo(url, llmServices, transcriptServices, options) + await processVideo(options, url, llmServices, transcriptServices) } catch (error) { - console.error(`Error processing video ${url}: ${error.message}`) + console.error(`Error processing video ${url}: ${(error as Error).message}`) // Continue processing the next video } } log(final('\nPlaylist processing completed successfully.\n')) } catch (error) { - console.error(`Error processing playlist: ${error.message}`) + console.error(`Error processing playlist: ${(error as Error).message}`) process.exit(1) // Exit with an error code } } \ No newline at end of file diff --git a/src/commands/processRSS.js b/src/commands/processRSS.ts similarity index 86% rename from src/commands/processRSS.js rename to src/commands/processRSS.ts index 58a46cc..ca55d25 100644 --- a/src/commands/processRSS.js +++ b/src/commands/processRSS.ts @@ -1,4 +1,4 @@ -// src/commands/processRSS.js +// src/commands/processRSS.ts import { writeFile } from 'node:fs/promises' import { XMLParser } from 'fast-xml-parser' @@ -9,7 +9,7 @@ import { runLLM } from '../utils/runLLM.js' import { cleanUpFiles } from '../utils/cleanUpFiles.js' import { log, final, wait } from '../types.js' -/** @import { LLMServices, TranscriptServices, ProcessingOptions, RSSItem } from '../types.js' */ +import type { LLMServices, TranscriptServices, ProcessingOptions, RSSItem } from '../types.js' // Initialize XML parser with specific options const parser = new XMLParser({ @@ -26,7 +26,12 @@ const parser = new XMLParser({ * @param {ProcessingOptions} options - Additional options for processing. * @returns {Promise} */ -async function processItem(item, transcriptServices, llmServices, options) { +async function processItem( + options: ProcessingOptions, + item: RSSItem, + llmServices?: LLMServices, + transcriptServices?: TranscriptServices +): Promise { // log(opts(`\nItem parameter passed to processItem:\n`)) // log(item) try { @@ -37,10 +42,10 @@ async function processItem(item, transcriptServices, llmServices, options) { await downloadAudio(item.showLink, filename) // Run transcription - await runTranscription(finalPath, frontMatter, transcriptServices, options) + await runTranscription(options, finalPath, frontMatter, transcriptServices) // Process with Language Model - await runLLM(finalPath, frontMatter, llmServices, options) + await runLLM(options, finalPath, frontMatter, llmServices) // Clean up temporary files if necessary if (!options.noCleanUp) { @@ -49,7 +54,7 @@ async function processItem(item, transcriptServices, llmServices, options) { log(final(`\nItem processing completed successfully: ${item.title}`)) } catch (error) { - console.error(`Error processing item ${item.title}: ${error.message}`) + console.error(`Error processing item ${item.title}: ${(error as Error).message}`) // Continue processing the next item } } @@ -62,7 +67,12 @@ async function processItem(item, transcriptServices, llmServices, options) { * @param {ProcessingOptions} options - Additional options for processing. * @returns {Promise} */ -export async function processRSS(rssUrl, llmServices, transcriptServices, options) { +export async function processRSS( + options: ProcessingOptions, + rssUrl: string, + llmServices?: LLMServices, + transcriptServices?: TranscriptServices +): Promise { // log(opts(`Options received:\n`)) // log(options) try { @@ -112,7 +122,7 @@ export async function processRSS(rssUrl, llmServices, transcriptServices, option controller.abort() }, 10000) // 10 seconds timeout - let response + let response: Response try { response = await fetch(rssUrl, { method: 'GET', @@ -123,10 +133,10 @@ export async function processRSS(rssUrl, llmServices, transcriptServices, option }) clearTimeout(timeout) } catch (error) { - if (error.name === 'AbortError') { + if ((error as Error).name === 'AbortError') { console.error('Error: Fetch request timed out.') } else { - console.error(`Error fetching RSS feed: ${error.message}`) + console.error(`Error fetching RSS feed: ${(error as Error).message}`) } process.exit(1) // Exit with an error code } @@ -156,7 +166,7 @@ export async function processRSS(rssUrl, llmServices, transcriptServices, option const feedItemsArray = Array.isArray(feedItems) ? feedItems : [feedItems] // Filter and map feed items - const items = feedItemsArray + const items: RSSItem[] = feedItemsArray .filter((item) => { // Ensure the item has an enclosure with a valid type if (!item.enclosure || !item.enclosure.type) return false @@ -188,10 +198,10 @@ export async function processRSS(rssUrl, llmServices, transcriptServices, option return } - let itemsToProcess = [] + let itemsToProcess: RSSItem[] = [] if (options.item && options.item.length > 0) { // Find the items matching the provided audio URLs - const matchedItems = items.filter((item) => options.item.includes(item.showLink)) + const matchedItems = items.filter((item) => options.item!.includes(item.showLink)) if (matchedItems.length === 0) { console.error('Error: No matching items found for the provided URLs.') process.exit(1) // Exit with an error code @@ -215,12 +225,12 @@ export async function processRSS(rssUrl, llmServices, transcriptServices, option // Process each item in the feed for (const [index, item] of itemsToProcess.entries()) { log(wait(` Processing item ${index + 1}/${itemsToProcess.length}:\n - ${item.title}\n`)) - await processItem(item, transcriptServices, llmServices, options) + await processItem(options, item, llmServices, transcriptServices) } log(final('\nRSS feed processing completed successfully.\n')) } catch (error) { - console.error(`Error processing RSS feed: ${error.message}`) + console.error(`Error processing RSS feed: ${(error as Error).message}`) process.exit(1) // Exit with an error code } } \ No newline at end of file diff --git a/src/commands/processURLs.js b/src/commands/processURLs.ts similarity index 73% rename from src/commands/processURLs.js rename to src/commands/processURLs.ts index d222854..f740e86 100644 --- a/src/commands/processURLs.js +++ b/src/commands/processURLs.ts @@ -1,4 +1,4 @@ -// src/commands/processURLs.js +// src/commands/processURLs.ts import { readFile, writeFile } from 'node:fs/promises' import { resolve } from 'node:path' @@ -6,18 +6,21 @@ import { processVideo } from './processVideo.js' import { extractVideoMetadata } from '../utils/generateMarkdown.js' import { checkDependencies } from '../utils/checkDependencies.js' import { log, final, wait } from '../types.js' - -/** @import { LLMServices, TranscriptServices, ProcessingOptions } from '../types.js' */ +import type { LLMServices, TranscriptServices, ProcessingOptions } from '../types.js' /** * Main function to process URLs from a file. - * @param {string} filePath - The path to the file containing URLs. - * @param {LLMServices} [llmServices] - The selected Language Model option. - * @param {TranscriptServices} [transcriptServices] - The transcription service to use. - * @param {ProcessingOptions} options - Additional options for processing. - * @returns {Promise} + * @param filePath - The path to the file containing URLs. + * @param llmServices - The selected Language Model option. + * @param transcriptServices - The transcription service to use. + * @param options - Additional options for processing. */ -export async function processURLs(filePath, llmServices, transcriptServices, options) { +export async function processURLs( + options: ProcessingOptions, + filePath: string, + llmServices?: LLMServices, + transcriptServices?: TranscriptServices +): Promise { // log(opts(`Options received:\n`)) // log(options) try { @@ -57,16 +60,16 @@ export async function processURLs(filePath, llmServices, transcriptServices, opt for (const [index, url] of urls.entries()) { log(wait(`\n Processing URL ${index + 1}/${urls.length}:\n - ${url}\n`)) try { - await processVideo(url, llmServices, transcriptServices, options) + await processVideo(options, url, llmServices, transcriptServices) } catch (error) { - console.error(`Error processing URL ${url}: ${error.message}`) + console.error(`Error processing URL ${url}: ${(error as Error).message}`) // Continue processing the next URL } } log(final('\nURL file processing completed successfully.\n')) } catch (error) { - console.error(`Error reading or processing file ${filePath}: ${error.message}`) + console.error(`Error reading or processing file ${filePath}: ${(error as Error).message}`) process.exit(1) // Exit with an error code } } \ No newline at end of file diff --git a/src/commands/processVideo.js b/src/commands/processVideo.ts similarity index 50% rename from src/commands/processVideo.js rename to src/commands/processVideo.ts index 63f5682..f106f2d 100644 --- a/src/commands/processVideo.js +++ b/src/commands/processVideo.ts @@ -1,4 +1,4 @@ -// src/commands/processVideo.js +// src/commands/processVideo.ts import { checkDependencies } from '../utils/checkDependencies.js' import { generateMarkdown } from '../utils/generateMarkdown.js' @@ -7,20 +7,22 @@ import { runTranscription } from '../utils/runTranscription.js' import { runLLM } from '../utils/runLLM.js' import { cleanUpFiles } from '../utils/cleanUpFiles.js' import { log, final } from '../types.js' - -/** @import { LLMServices, TranscriptServices, ProcessingOptions } from '../types.js' */ +import type { LLMServices, TranscriptServices, ProcessingOptions } from '../types.js' /** * Main function to process a single video. - * @param {string} url - The URL of the video to process. - * @param {LLMServices} [llmServices] - The selected Language Model option. - * @param {TranscriptServices} [transcriptServices] - The transcription service to use. - * @param {ProcessingOptions} options - Additional options for processing. - * @returns {Promise} + * @param url - The URL of the video to process. + * @param llmServices - The selected Language Model option. + * @param transcriptServices - The transcription service to use. + * @param options - Additional options for processing. + * @returns A promise that resolves when processing is complete. */ -export async function processVideo(url, llmServices, transcriptServices, options) { - // log(opts(`\nOptions passed to processVideo:\n`)) - // log(options) +export async function processVideo( + options: ProcessingOptions, + url: string, + llmServices?: LLMServices, // Make this optional + transcriptServices?: TranscriptServices // Make this optional +): Promise { try { // Check for required dependencies await checkDependencies(['yt-dlp']) @@ -31,11 +33,15 @@ export async function processVideo(url, llmServices, transcriptServices, options // Download audio from the video await downloadAudio(url, filename) - // Run transcription on the audio - await runTranscription(finalPath, frontMatter, transcriptServices, options) + // Run transcription on the audio if transcriptServices is defined + if (transcriptServices) { + await runTranscription(options, finalPath, frontMatter, transcriptServices) + } - // Process the transcript with the selected Language Model - await runLLM(finalPath, frontMatter, llmServices, options) + // Process the transcript with the selected Language Model if llmServices is defined + if (llmServices) { + await runLLM(options, finalPath, frontMatter, llmServices) + } // Clean up temporary files if the noCleanUp option is not set if (!options.noCleanUp) { @@ -45,7 +51,7 @@ export async function processVideo(url, llmServices, transcriptServices, options log(final('\nVideo processing completed successfully.\n')) } catch (error) { // Log any errors that occur during video processing - console.error('Error processing video:', error.message) + console.error('Error processing video:', (error as Error).message) throw error // Re-throw to be handled by caller } } \ No newline at end of file diff --git a/src/inquirer.js b/src/inquirer.js deleted file mode 100644 index 51aa6de..0000000 --- a/src/inquirer.js +++ /dev/null @@ -1,200 +0,0 @@ -// src/inquirer.js - -import inquirer from 'inquirer' -import { log } from './types.js' - -/** @import { ProcessingOptions, InquirerAnswers, InquirerQuestions, WhisperModelType } from './types.js' */ - -/** - * Interactive prompts using inquirer - * @type {InquirerQuestions} - */ -const INQUIRER_PROMPT = [ - { - type: 'list', - name: 'action', - message: 'What would you like to process?', - choices: [ - { name: 'Single YouTube Video', value: 'video' }, - { name: 'YouTube Playlist', value: 'playlist' }, - { name: 'List of URLs from File', value: 'urls' }, - { name: 'Local Audio/Video File', value: 'file' }, - { name: 'Podcast RSS Feed', value: 'rss' }, - ], - }, - { - type: 'input', - name: 'video', - message: 'Enter the YouTube video URL:', - when: (answers) => answers.action === 'video', - validate: (input) => (input ? true : 'Please enter a valid URL.'), - }, - { - type: 'input', - name: 'playlist', - message: 'Enter the YouTube playlist URL:', - when: (answers) => answers.action === 'playlist', - validate: (input) => (input ? true : 'Please enter a valid URL.'), - }, - { - type: 'input', - name: 'urls', - message: 'Enter the file path containing URLs:', - when: (answers) => answers.action === 'urls', - validate: (input) => (input ? true : 'Please enter a valid file path.'), - }, - { - type: 'input', - name: 'file', - message: 'Enter the local audio/video file path:', - when: (answers) => answers.action === 'file', - validate: (input) => (input ? true : 'Please enter a valid file path.'), - }, - { - type: 'input', - name: 'rss', - message: 'Enter the podcast RSS feed URL:', - when: (answers) => answers.action === 'rss', - validate: (input) => (input ? true : 'Please enter a valid URL.'), - }, - { - type: 'confirm', - name: 'specifyItem', - message: 'Do you want to process specific episodes by providing their audio URLs?', - when: (answers) => answers.action === 'rss', - default: false, - }, - { - type: 'input', - name: 'item', - message: 'Enter the audio URLs of the episodes (separated by commas):', - when: (answers) => answers.action === 'rss' && answers.specifyItem, - validate: (input) => (input ? true : 'Please enter at least one valid audio URL.'), - }, - { - type: 'list', - name: 'llmServices', - message: 'Select the Language Model (LLM) you want to use:', - choices: [ - { name: 'Skip LLM Processing', value: null }, - { name: 'node-llama-cpp (local inference)', value: 'llama' }, - { name: 'Ollama (local inference)', value: 'ollama' }, - { name: 'OpenAI ChatGPT', value: 'chatgpt' }, - { name: 'Anthropic Claude', value: 'claude' }, - { name: 'Google Gemini', value: 'gemini' }, - { name: 'Cohere', value: 'cohere' }, - { name: 'Mistral', value: 'mistral' }, - { name: 'OctoAI', value: 'octo' }, - ], - }, - { - type: 'list', - name: 'llamaModel', - message: 'Select the LLAMA model you want to use:', - choices: [ - { name: 'LLAMA 3 8B Q4 Model', value: 'LLAMA_3_1_8B_Q4_MODEL' }, - { name: 'LLAMA 3 8B Q6 Model', value: 'LLAMA_3_1_8B_Q6_MODEL' }, - { name: 'GEMMA 2 2B Q4 Model', value: 'GEMMA_2_2B_Q4_MODEL' }, - { name: 'GEMMA 2 2B Q6 Model', value: 'GEMMA_2_2B_Q6_MODEL' }, - ], - when: (answers) => answers.llmServices === 'llama', - }, - { - type: 'list', - name: 'transcriptServices', - message: 'Select the transcription service you want to use:', - choices: [ - { name: 'Whisper.cpp', value: 'whisper' }, - { name: 'Whisper.cpp (Docker)', value: 'whisperDocker' }, - { name: 'Deepgram', value: 'deepgram' }, - { name: 'AssemblyAI', value: 'assembly' }, - ], - }, - { - type: 'list', - name: 'whisperModel', - message: 'Select the Whisper model type:', - choices: ['tiny', 'tiny.en', 'base', 'base.en', 'small', 'small.en', 'medium', 'medium.en', 'large-v1', 'large-v2'], - when: (answers) => answers.transcriptServices === 'whisper' || answers.transcriptServices === 'whisperDocker', - default: 'large-v2', - }, - { - type: 'confirm', - name: 'speakerLabels', - message: 'Do you want to use speaker labels?', - when: (answers) => answers.transcriptServices === 'assembly', - default: false, - }, - { - type: 'checkbox', - name: 'prompt', - message: 'Select the prompt sections to include:', - choices: [ - { name: 'Titles', value: 'titles' }, - { name: 'Summary', value: 'summary' }, - { name: 'Short Chapters', value: 'shortChapters' }, - { name: 'Medium Chapters', value: 'mediumChapters' }, - { name: 'Long Chapters', value: 'longChapters' }, - { name: 'Key Takeaways', value: 'takeaways' }, - { name: 'Questions', value: 'questions' }, - ], - default: ['summary', 'longChapters'], - }, - { - type: 'confirm', - name: 'noCleanUp', - message: 'Do you want to keep intermediary files after processing?', - default: false, - }, - { - type: 'confirm', - name: 'confirmAction', - message: 'Proceed with the above configuration?', - default: true, - }, -] - -/** - * Prompts the user for input if interactive mode is selected. - * @param {ProcessingOptions} options - The initial command-line options. - * @returns {Promise} - The updated options after user input. - */ -export async function handleInteractivePrompt(options) { - /** @type {InquirerAnswers} */ - const answers = await inquirer.prompt(INQUIRER_PROMPT) - - // If user cancels the action - if (!answers.confirmAction) { - log('Operation cancelled.') - process.exit(0) - } - - options = { - ...options, - ...answers, - } - - // Handle LLM options - if (answers.llmServices) { - options[answers.llmServices] = answers.llmServices === 'llama' ? answers.llamaModel : true - } - - // Handle transcription options - if (answers.transcriptServices === 'whisper' || answers.transcriptServices === 'whisperDocker') { - options[answers.transcriptServices] = /** @type {WhisperModelType} */ (answers.whisperModel) - } else { - options[answers.transcriptServices] = true - } - - // Handle 'item' for RSS feed - if (answers.item && typeof answers.item === 'string') { - options.item = answers.item.split(',').map((url) => url.trim()) - } - - // Remove properties that are not options - delete options.action - delete options.specifyItem - delete options.confirmAction - - return options -} \ No newline at end of file diff --git a/src/interactive.ts b/src/interactive.ts new file mode 100644 index 0000000..5097093 --- /dev/null +++ b/src/interactive.ts @@ -0,0 +1,188 @@ +// src/inquirer.ts + +import inquirer from 'inquirer' +import type { ProcessingOptions, InquirerAnswers, WhisperModelType } from './types.js' +import { log } from './types.js' + +/** + * Prompts the user for input if interactive mode is selected. + * @param {ProcessingOptions} options - The initial command-line options. + * @returns {Promise} - The updated options after user input. + */ +export async function handleInteractivePrompt(options: ProcessingOptions): Promise { + const answers: InquirerAnswers = await inquirer.prompt([ + { + type: 'list', + name: 'action', + message: 'What would you like to process?', + choices: [ + { name: 'Single YouTube Video', value: 'video' }, + { name: 'YouTube Playlist', value: 'playlist' }, + { name: 'List of URLs from File', value: 'urls' }, + { name: 'Local Audio/Video File', value: 'file' }, + { name: 'Podcast RSS Feed', value: 'rss' }, + ], + }, + { + type: 'input', + name: 'video', + message: 'Enter the YouTube video URL:', + when: (answers: InquirerAnswers) => answers.action === 'video', + validate: (input: string) => (input ? true : 'Please enter a valid URL.'), + }, + { + type: 'input', + name: 'playlist', + message: 'Enter the YouTube playlist URL:', + when: (answers: InquirerAnswers) => answers.action === 'playlist', + validate: (input: string) => (input ? true : 'Please enter a valid URL.'), + }, + { + type: 'input', + name: 'urls', + message: 'Enter the file path containing URLs:', + when: (answers: InquirerAnswers) => answers.action === 'urls', + validate: (input: string) => (input ? true : 'Please enter a valid file path.'), + }, + { + type: 'input', + name: 'file', + message: 'Enter the local audio/video file path:', + when: (answers: InquirerAnswers) => answers.action === 'file', + validate: (input: string) => (input ? true : 'Please enter a valid file path.'), + }, + { + type: 'input', + name: 'rss', + message: 'Enter the podcast RSS feed URL:', + when: (answers: InquirerAnswers) => answers.action === 'rss', + validate: (input: string) => (input ? true : 'Please enter a valid URL.'), + }, + { + type: 'confirm', + name: 'specifyItem', + message: 'Do you want to process specific episodes by providing their audio URLs?', + when: (answers: InquirerAnswers) => answers.action === 'rss', + default: false, + }, + { + type: 'input', + name: 'item', + message: 'Enter the audio URLs of the episodes (separated by commas):', + when: (answers: InquirerAnswers) => answers.action === 'rss' && answers.specifyItem, + validate: (input: string) => (input ? true : 'Please enter at least one valid audio URL.'), + }, + { + type: 'list', + name: 'llmServices', + message: 'Select the Language Model (LLM) you want to use:', + choices: [ + { name: 'Skip LLM Processing', value: null }, + { name: 'node-llama-cpp (local inference)', value: 'llama' }, + { name: 'Ollama (local inference)', value: 'ollama' }, + { name: 'OpenAI ChatGPT', value: 'chatgpt' }, + { name: 'Anthropic Claude', value: 'claude' }, + { name: 'Google Gemini', value: 'gemini' }, + { name: 'Cohere', value: 'cohere' }, + { name: 'Mistral', value: 'mistral' }, + { name: 'OctoAI', value: 'octo' }, + ], + }, + { + type: 'list', + name: 'llama', + message: 'Select the LLAMA model you want to use:', + choices: [ + { name: 'LLAMA 3 8B Q4 Model', value: 'LLAMA_3_1_8B_Q4_MODEL' }, + { name: 'LLAMA 3 8B Q6 Model', value: 'LLAMA_3_1_8B_Q6_MODEL' }, + { name: 'GEMMA 2 2B Q4 Model', value: 'GEMMA_2_2B_Q4_MODEL' }, + { name: 'GEMMA 2 2B Q6 Model', value: 'GEMMA_2_2B_Q6_MODEL' }, + ], + when: (answers: InquirerAnswers) => answers.llmServices === 'llama', + }, + { + type: 'list', + name: 'transcriptServices', + message: 'Select the transcription service you want to use:', + choices: [ + { name: 'Whisper.cpp', value: 'whisper' }, + { name: 'Whisper.cpp (Docker)', value: 'whisperDocker' }, + { name: 'Deepgram', value: 'deepgram' }, + { name: 'AssemblyAI', value: 'assembly' }, + ], + }, + { + type: 'list', + name: 'whisperModel', + message: 'Select the Whisper model type:', + choices: ['tiny', 'tiny.en', 'base', 'base.en', 'small', 'small.en', 'medium', 'medium.en', 'large-v1', 'large-v2'], + when: (answers: InquirerAnswers) => answers.transcriptServices === 'whisper' || answers.transcriptServices === 'whisperDocker', + default: 'large-v2', + }, + { + type: 'confirm', + name: 'speakerLabels', + message: 'Do you want to use speaker labels?', + when: (answers: InquirerAnswers) => answers.transcriptServices === 'assembly', + default: false, + }, + { + type: 'checkbox', + name: 'prompt', + message: 'Select the prompt sections to include:', + choices: [ + { name: 'Titles', value: 'titles' }, + { name: 'Summary', value: 'summary' }, + { name: 'Short Chapters', value: 'shortChapters' }, + { name: 'Medium Chapters', value: 'mediumChapters' }, + { name: 'Long Chapters', value: 'longChapters' }, + { name: 'Key Takeaways', value: 'takeaways' }, + { name: 'Questions', value: 'questions' }, + ], + default: ['summary', 'longChapters'], + }, + { + type: 'confirm', + name: 'noCleanUp', + message: 'Do you want to keep intermediary files after processing?', + default: false, + }, + { + type: 'confirm', + name: 'confirmAction', + message: 'Proceed with the above configuration?', + default: true, + }, + ]) + + // If user cancels the action + if (!answers.confirmAction) { + log('Operation cancelled.') + process.exit(0) + } + + options = { + ...options, + ...answers, + } as ProcessingOptions + + // Handle transcription options + if (answers.transcriptServices) { + if (answers.transcriptServices === 'whisper' || answers.transcriptServices === 'whisperDocker') { + options[answers.transcriptServices] = answers.whisperModel as WhisperModelType + } else { + options[answers.transcriptServices] = true + } + } + + // Handle 'item' for RSS feed + if (typeof answers.item === 'string') { + options.item = answers.item.split(',').map(item => item.trim()) + } + + // Remove unnecessary properties + const keysToRemove = ['action', 'specifyItem', 'confirmAction'] + keysToRemove.forEach(key => delete options[key as keyof typeof options]) + + return options +} \ No newline at end of file diff --git a/src/llms/chatgpt.js b/src/llms/chatgpt.ts similarity index 53% rename from src/llms/chatgpt.js rename to src/llms/chatgpt.ts index 475f9dc..f9d4973 100644 --- a/src/llms/chatgpt.js +++ b/src/llms/chatgpt.ts @@ -1,4 +1,4 @@ -// src/llms/chatgpt.js +// src/llms/chatgpt.ts import { writeFile } from 'node:fs/promises' import { env } from 'node:process' @@ -6,18 +6,21 @@ import { OpenAI } from 'openai' import { GPT_MODELS } from '../models.js' import { log, wait } from '../types.js' -/** @import { LLMFunction, ChatGPTModelType } from '../types.js' */ +import type { LLMFunction, ChatGPTModelType } from '../types.js' -/** @type {LLMFunction} */ /** * Main function to call ChatGPT API. - * @param {string} promptAndTranscript - The combined prompt and transcript text to process. - * @param {string} tempPath - The temporary file path to write the LLM output. - * @param {ChatGPTModelType} [model='GPT_4o_MINI'] - The GPT model to use. - * @returns {Promise} - * @throws {Error} - If an error occurs during API call. + * @param promptAndTranscript - The combined prompt and transcript text to process. + * @param tempPath - The temporary file path to write the LLM output. + * @param model - The GPT model to use. + * @returns A Promise that resolves when the API call is complete. + * @throws {Error} If an error occurs during API call. */ -export async function callChatGPT(promptAndTranscript, tempPath, model = 'GPT_4o_MINI') { +export const callChatGPT: LLMFunction = async ( + promptAndTranscript: string, + tempPath: string, + model: string = 'GPT_4o_MINI' +): Promise => { // Check for API key if (!env.OPENAI_API_KEY) { throw new Error('OPENAI_API_KEY environment variable is not set. Please set it to your OpenAI API key.') @@ -28,7 +31,7 @@ export async function callChatGPT(promptAndTranscript, tempPath, model = 'GPT_4o try { // Select the actual model to use, defaulting to GPT_4o_MINI if not specified - const actualModel = GPT_MODELS[model] || GPT_MODELS.GPT_4o_MINI + const actualModel = GPT_MODELS[model as ChatGPTModelType] || GPT_MODELS.GPT_4o_MINI // Call the OpenAI chat completions API const response = await openai.chat.completions.create({ @@ -40,17 +43,29 @@ export async function callChatGPT(promptAndTranscript, tempPath, model = 'GPT_4o // Destructure the response to get relevant information const { choices: [{ message: { content }, finish_reason }], // The generated content and finish reason - usage: { prompt_tokens, completion_tokens, total_tokens }, // Token usage information + usage, // Token usage information model: usedModel // The actual model used } = response // Write the generated content to the output file - await writeFile(tempPath, content) + if (content !== null) { + await writeFile(tempPath, content) + } else { + throw new Error('No content generated from the API') + } + log(wait(` - Finish Reason: ${finish_reason}\n - ChatGPT Model: ${usedModel}`)) - log(wait(` - Token Usage:\n - ${prompt_tokens} prompt tokens\n - ${completion_tokens} completion tokens\n - ${total_tokens} total tokens`)) + + // Check if usage information is available + if (usage) { + const { prompt_tokens, completion_tokens, total_tokens } = usage + log(wait(` - Token Usage:\n - ${prompt_tokens} prompt tokens\n - ${completion_tokens} completion tokens\n - ${total_tokens} total tokens`)) + } else { + log(wait(" - Token usage information not available")) + } } catch (error) { - console.error(`Error in callChatGPT: ${error.message}`) + console.error(`Error in callChatGPT: ${(error as Error).message}`) throw error // Re-throw the error for handling in the calling function } } \ No newline at end of file diff --git a/src/llms/claude.js b/src/llms/claude.js deleted file mode 100644 index b3ec479..0000000 --- a/src/llms/claude.js +++ /dev/null @@ -1,57 +0,0 @@ -// src/llms/claude.js - -import { writeFile } from 'node:fs/promises' -import { env } from 'node:process' -import { Anthropic } from '@anthropic-ai/sdk' -import { CLAUDE_MODELS } from '../models.js' -import { log, wait } from '../types.js' - -/** @import { LLMFunction, ClaudeModelType } from '../types.js' */ - -/** @type {LLMFunction} */ -/** - * Main function to call Claude API. - * @param {string} promptAndTranscript - The combined prompt and transcript text to process. - * @param {string} tempPath - The temporary file path to write the LLM output. - * @param {ClaudeModelType} [model='CLAUDE_3_HAIKU'] - The Claude model to use. - * @returns {Promise} - * @throws {Error} - If an error occurs during the API call. - */ -export async function callClaude(promptAndTranscript, tempPath, model = 'CLAUDE_3_HAIKU') { - // Check if the ANTHROPIC_API_KEY environment variable is set - if (!env.ANTHROPIC_API_KEY) { - throw new Error('ANTHROPIC_API_KEY environment variable is not set. Please set it to your Anthropic API key.') - } - - // Initialize the Anthropic client with the API key from environment variables - const anthropic = new Anthropic({ apiKey: env.ANTHROPIC_API_KEY }) - - try { - // Select the actual model to use, defaulting to CLAUDE_3_HAIKU if not specified - const actualModel = CLAUDE_MODELS[model] || CLAUDE_MODELS.CLAUDE_3_HAIKU - - // Call the Anthropic messages API to create a chat completion - const response = await anthropic.messages.create({ - model: actualModel, - max_tokens: 4000, // Maximum number of tokens in the response - messages: [{ role: 'user', content: promptAndTranscript }] // The input message (transcript content) - }) - - // Destructure the response to get relevant information - const { - content: [{ text }], // The generated text - model: usedModel, // The actual model used - usage: { input_tokens, output_tokens }, // Token usage information - stop_reason // Reason why the generation stopped - } = response - - // Write the generated text to the output file - await writeFile(tempPath, text) - log(wait(` - Stop Reason: ${stop_reason}\n - Model: ${usedModel}`)) - log(wait(` - Token Usage:\n - ${input_tokens} input tokens\n - ${output_tokens} output tokens`)) - - } catch (error) { - console.error(`Error in callClaude: ${error.message}`) - throw error // Re-throw the error for handling in the calling function - } -} \ No newline at end of file diff --git a/src/llms/claude.ts b/src/llms/claude.ts new file mode 100644 index 0000000..791fdb4 --- /dev/null +++ b/src/llms/claude.ts @@ -0,0 +1,91 @@ +// src/llms/claude.ts + +import { writeFile } from 'node:fs/promises' +import { env } from 'node:process' +import { Anthropic } from '@anthropic-ai/sdk' +import { CLAUDE_MODELS } from '../models.js' +import { log, wait } from '../types.js' + +import type { LLMFunction, ClaudeModelType } from '../types.js' + +/** + * Main function to call Claude API. + * @param promptAndTranscript - The combined prompt and transcript text to process. + * @param tempPath - The temporary file path to write the LLM output. + * @param model - The Claude model to use. + * @returns A Promise that resolves when the API call is complete. + * @throws {Error} If an error occurs during the API call. + */ +export const callClaude: LLMFunction = async ( + promptAndTranscript: string, + tempPath: string, + model: string = 'CLAUDE_3_HAIKU' +): Promise => { + // Check if the ANTHROPIC_API_KEY environment variable is set + if (!env.ANTHROPIC_API_KEY) { + throw new Error('ANTHROPIC_API_KEY environment variable is not set. Please set it to your Anthropic API key.') + } + + // Initialize the Anthropic client with the API key from environment variables + const anthropic = new Anthropic({ apiKey: env.ANTHROPIC_API_KEY }) + + try { + // Select the actual model to use, defaulting to CLAUDE_3_HAIKU if not specified + const actualModel = CLAUDE_MODELS[model as ClaudeModelType] || CLAUDE_MODELS.CLAUDE_3_HAIKU + + // Call the Anthropic messages API to create a chat completion + const response = await anthropic.messages.create({ + model: actualModel, + max_tokens: 4000, // Maximum number of tokens in the response + messages: [{ role: 'user', content: promptAndTranscript }] // The input message (transcript content) + }) + + // Destructure the response to get relevant information + const { + content, + model: usedModel, // The actual model used + usage, // Token usage information + stop_reason // Reason why the generation stopped + } = response + + // Extract text content from the response + const textContent = extractTextContent(content) + + // Write the generated text to the output file + if (textContent) { + await writeFile(tempPath, textContent) + } else { + throw new Error('No text content generated from the API') + } + + log(wait(` - Stop Reason: ${stop_reason}\n - Model: ${usedModel}`)) + + // Check if usage information is available + if (usage) { + const { input_tokens, output_tokens } = usage + log(wait(` - Token Usage:\n - ${input_tokens} input tokens\n - ${output_tokens} output tokens`)) + } else { + log(wait(" - Token usage information not available")) + } + + } catch (error) { + console.error(`Error in callClaude: ${(error as Error).message}`) + throw error // Re-throw the error for handling in the calling function + } +} + +/** + * Extracts text content from the API response + * @param content - The content returned by the API + * @returns The extracted text content, or null if no text content is found + */ +function extractTextContent(content: any[]): string | null { + for (const block of content) { + if (typeof block === 'object' && block !== null && 'type' in block) { + if (block.type === 'text' && 'text' in block) { + return block.text + } + } + } + return null +} \ No newline at end of file diff --git a/src/llms/cohere.js b/src/llms/cohere.ts similarity index 53% rename from src/llms/cohere.js rename to src/llms/cohere.ts index fa0c225..db806b0 100644 --- a/src/llms/cohere.js +++ b/src/llms/cohere.ts @@ -1,4 +1,4 @@ -// src/llms/cohere.js +// src/llms/cohere.ts import { writeFile } from 'node:fs/promises' import { env } from 'node:process' @@ -6,18 +6,21 @@ import { CohereClient } from 'cohere-ai' import { COHERE_MODELS } from '../models.js' import { log, wait } from '../types.js' -/** @import { LLMFunction, CohereModelType } from '../types.js' */ +import type { LLMFunction, CohereModelType } from '../types.js' -/** @type {LLMFunction} */ /** * Main function to call Cohere API. - * @param {string} promptAndTranscript - The combined prompt and transcript text to process. - * @param {string} tempPath - The temporary file path to write the LLM output. - * @param {CohereModelType} [model='COMMAND_R'] - The Cohere model to use. - * @returns {Promise} - * @throws {Error} - If an error occurs during the API call. + * @param promptAndTranscript - The combined prompt and transcript text to process. + * @param tempPath - The temporary file path to write the LLM output. + * @param model - The Cohere model to use. + * @returns A Promise that resolves when the API call is complete. + * @throws {Error} If an error occurs during the API call. */ -export async function callCohere(promptAndTranscript, tempPath, model = 'COMMAND_R') { +export const callCohere: LLMFunction = async ( + promptAndTranscript: string, + tempPath: string, + model: string = 'COMMAND_R' +): Promise => { // Check if the COHERE_API_KEY environment variable is set if (!env.COHERE_API_KEY) { throw new Error('COHERE_API_KEY environment variable is not set. Please set it to your Cohere API key.') @@ -28,7 +31,7 @@ export async function callCohere(promptAndTranscript, tempPath, model = 'COMMAND try { // Select the actual model to use, defaulting to COMMAND_R if not specified - const actualModel = COHERE_MODELS[model] || COHERE_MODELS.COMMAND_R + const actualModel = COHERE_MODELS[model as CohereModelType] || COHERE_MODELS.COMMAND_R // Call the Cohere chat API const response = await cohere.chat({ @@ -40,17 +43,29 @@ export async function callCohere(promptAndTranscript, tempPath, model = 'COMMAND // Destructure the response to get relevant information const { text, // The generated text - meta: { tokens: { inputTokens, outputTokens } }, // Token usage information + meta, // Metadata including token usage finishReason // Reason why the generation stopped } = response // Write the generated text to the output file - await writeFile(tempPath, text) + if (text) { + await writeFile(tempPath, text) + } else { + throw new Error('No text content generated from the API') + } + log(wait(`\n Finish Reason: ${finishReason}\n Model: ${actualModel}`)) - log(wait(` Token Usage:\n - ${inputTokens} input tokens\n - ${outputTokens} output tokens`)) + + // Check if token usage information is available + if (meta && meta.tokens) { + const { inputTokens, outputTokens } = meta.tokens + log(wait(` Token Usage:\n - ${inputTokens} input tokens\n - ${outputTokens} output tokens`)) + } else { + log(wait(" - Token usage information not available")) + } } catch (error) { - console.error(`Error in callCohere: ${error.message}`) + console.error(`Error in callCohere: ${(error as Error).message}`) throw error // Re-throw the error for handling in the calling function } } \ No newline at end of file diff --git a/src/llms/gemini.js b/src/llms/gemini.ts similarity index 60% rename from src/llms/gemini.js rename to src/llms/gemini.ts index 084eb34..e1144c3 100644 --- a/src/llms/gemini.js +++ b/src/llms/gemini.ts @@ -1,4 +1,4 @@ -// src/llms/gemini.js +// src/llms/gemini.ts import { writeFile } from 'node:fs/promises' import { env } from 'node:process' @@ -6,42 +6,49 @@ import { GoogleGenerativeAI } from "@google/generative-ai" import { GEMINI_MODELS } from '../models.js' import { log, wait } from '../types.js' -/** @import { LLMFunction, GeminiModelType } from '../types.js' */ +import type { LLMFunction, GeminiModelType } from '../types.js' /** * Utility function to introduce a delay - * @param {number} ms - Milliseconds to delay - * @returns {Promise} + * @param ms - Milliseconds to delay + * @returns A Promise that resolves after the specified delay */ -const delay = (ms) => new Promise(resolve => setTimeout(resolve, ms)) +const delay = (ms: number): Promise => new Promise(resolve => setTimeout(resolve, ms)) -/** @type {LLMFunction} */ /** * Main function to call Gemini API. - * @param {string} promptAndTranscript - The combined prompt and transcript text to process. - * @param {string} tempPath - The temporary file path to write the LLM output. - * @param {GeminiModelType} [model='GEMINI_1_5_FLASH'] - The Gemini model to use. - * @returns {Promise} - * @throws {Error} - If an error occurs during the API call. + * @param promptAndTranscript - The combined prompt and transcript text to process. + * @param tempPath - The temporary file path to write the LLM output. + * @param model - The Gemini model to use. + * @returns A Promise that resolves when the API call is complete. + * @throws {Error} If an error occurs during the API call. */ -export async function callGemini(promptAndTranscript, tempPath, model = 'GEMINI_1_5_FLASH') { +export const callGemini: LLMFunction = async ( + promptAndTranscript: string, + tempPath: string, + model: string = 'GEMINI_1_5_FLASH' +): Promise => { // Check if the GEMINI_API_KEY environment variable is set if (!env.GEMINI_API_KEY) { throw new Error('GEMINI_API_KEY environment variable is not set. Please set it to your Gemini API key.') } + // Initialize the Google Generative AI client const genAI = new GoogleGenerativeAI(env.GEMINI_API_KEY) // Select the actual model to use, defaulting to GEMINI_1_5_FLASH if not specified - const actualModel = GEMINI_MODELS[model] || GEMINI_MODELS.GEMINI_1_5_FLASH + const actualModel = GEMINI_MODELS[model as GeminiModelType] || GEMINI_MODELS.GEMINI_1_5_FLASH + // Create a GenerativeModel instance + const geminiModel = genAI.getGenerativeModel({ model: actualModel }) + const maxRetries = 3 // Maximum number of retry attempts // Retry loop for (let attempt = 1; attempt <= maxRetries; attempt++) { try { // Generate content using the selected model - const result = await gemAI.generateContent(promptAndTranscript, { model: actualModel }) + const result = await geminiModel.generateContent(promptAndTranscript) // Get the response from the generated content const response = await result.response @@ -60,7 +67,7 @@ export async function callGemini(promptAndTranscript, tempPath, model = 'GEMINI_ return } catch (error) { - console.error(`Error in callGemini (attempt ${attempt}/${maxRetries}): ${error.message}`) + console.error(`Error in callGemini (attempt ${attempt}/${maxRetries}): ${error instanceof Error ? (error as Error).message : String(error)}`) // If this is the last attempt, throw the error if (attempt === maxRetries) { diff --git a/src/llms/llama.js b/src/llms/llama.js deleted file mode 100644 index f548989..0000000 --- a/src/llms/llama.js +++ /dev/null @@ -1,94 +0,0 @@ -// src/llms/llama.js - -import { writeFile, mkdir } from 'node:fs/promises' -import { getLlama, LlamaChatSession } from "node-llama-cpp" -import { existsSync } from 'node:fs' -import { exec } from 'node:child_process' -import { promisify } from 'node:util' -import { LLAMA_MODELS } from '../models.js' -import { log, wait } from '../types.js' - -const execAsync = promisify(exec) - -/** @import { LlamaModelType } from '../types.js' */ - -/** - * Function to download the model if it doesn't exist. - * @param {LlamaModelType} [modelName='GEMMA_2_2B'] - The name of the model to use. - * @returns {Promise} - The path to the downloaded model. - * @throws {Error} - If the model download fails. - */ -async function downloadModel(modelName = 'GEMMA_2_2B') { - // Get the model object from LLAMA_MODELS using the provided modelName or default to GEMMA_2_2B - const model = LLAMA_MODELS[modelName] || LLAMA_MODELS.GEMMA_2_2B - log(wait(` - Model selected: ${model.filename}`)) - - // If no valid model is found, throw an error - if (!model) { - throw new Error(`Invalid model name: ${modelName}`) - } - - // Construct the path where the model file should be stored - const modelPath = `./src/llms/models/${model.filename}` - - // Check if the model file already exists - if (existsSync(modelPath)) { - log(wait(` - Model path: ${modelPath}`)) - // Return the path if the model already exists - return modelPath - } - - log(wait(`\nDownloading ${model.filename}...`)) - try { - // Create the directory for storing models if it doesn't exist - await mkdir('./src/llms/models', { recursive: true }) - - // Download the model using curl - const { stderr } = await execAsync(`curl -L ${model.url} -o ${modelPath}`) - - // If there's any stderr output, log it - if (stderr) log(stderr) - log('Download completed') - - // Return the path to the downloaded model - return modelPath - } catch (err) { - // If an error occurs during download, log it and throw a new error - console.error(`Download failed: ${err.message}`) - throw new Error('Failed to download the model') - } -} - -/** @type {LLMFunction} */ -/** - * Main function to call the local Llama model. - * @param {string} promptAndTranscript - The combined prompt and transcript content. - * @param {string} tempPath - The temporary file path to write the LLM output. - * @param {LlamaModelType | boolean} [modelName=true] - The name of the model to use or true to use the default. - * @returns {Promise} - * @throws {Error} - If an error occurs during processing. - */ -export async function callLlama(promptAndTranscript, tempPath, modelName = true) { - try { - // If modelName is true or not provided, use the default model - const actualModelName = modelName === true ? 'GEMMA_2_2B' : modelName - - // Ensure the model is downloaded - const modelPath = await downloadModel(actualModelName) - - // Initialize Llama and load the local model - const llama = await getLlama() - const localModel = await llama.loadModel({ modelPath }) - - // Create a context for the model and create a chat session - const context = await localModel.createContext() - const session = new LlamaChatSession({ contextSequence: context.getSequence() }) - - // Generate a response and write the response to a file - const response = await session.prompt(promptAndTranscript) - await writeFile(tempPath, response) - } catch (error) { - console.error(`Error in callLlama: ${error.message}`) - throw error - } -} \ No newline at end of file diff --git a/src/llms/llama.ts b/src/llms/llama.ts new file mode 100644 index 0000000..271816a --- /dev/null +++ b/src/llms/llama.ts @@ -0,0 +1,75 @@ +// src/llms/llama.ts + +import { writeFile, mkdir } from 'node:fs/promises' +import { getLlama, LlamaChatSession } from "node-llama-cpp" +import { existsSync } from 'node:fs' +import { exec } from 'node:child_process' +import { promisify } from 'node:util' +import { LLAMA_MODELS } from '../models.js' +import { log, wait } from '../types.js' + +import type { LlamaModelType, LLMFunction } from '../types.js' + +const execAsync = promisify(exec) + +/** + * Main function to call the local Llama model. + * @param promptAndTranscript - The combined prompt and transcript content. + * @param tempPath - The temporary file path to write the LLM output. + * @param model - The model name or undefined to use the default model. + * @returns A Promise that resolves when the processing is complete. + * @throws {Error} - If an error occurs during processing. + */ +export const callLlama: LLMFunction = async (promptAndTranscript: string, tempPath: string, model?: string) => { + try { + // Get the model object from LLAMA_MODELS using the provided model name or default to GEMMA_2_2B + const selectedModel = LLAMA_MODELS[model as LlamaModelType] || LLAMA_MODELS.GEMMA_2_2B + log(wait(` - Model selected: ${selectedModel.filename}`)) + + // If no valid model is found, throw an error + if (!selectedModel) { + throw new Error(`Invalid model name: ${model}`) + } + + // Construct the path where the model file should be stored + const modelPath = `./src/llms/models/${selectedModel.filename}` + + // Check if the model file already exists, if not, download it + if (!existsSync(modelPath)) { + log(wait(`\nDownloading ${selectedModel.filename}...`)) + + try { + // Create the directory for storing models if it doesn't exist + await mkdir('./src/llms/models', { recursive: true }) + + // Download the model using curl + const { stderr } = await execAsync(`curl -L ${selectedModel.url} -o ${modelPath}`) + + // If there's any stderr output, log it + if (stderr) log(stderr) + log('Download completed') + } catch (err) { + // If an error occurs during download, log it and throw a new error + console.error(`Download failed: ${err instanceof Error ? err.message : String(err)}`) + throw new Error('Failed to download the model') + } + } else { + log(wait(` - Model path: ${modelPath}`)) + } + + // Initialize Llama and load the local model + const llama = await getLlama() + const localModel = await llama.loadModel({ modelPath }) + + // Create a context for the model and create a chat session + const context = await localModel.createContext() + const session = new LlamaChatSession({ contextSequence: context.getSequence() }) + + // Generate a response and write the response to a file + const response = await session.prompt(promptAndTranscript) + await writeFile(tempPath, response) + } catch (error) { + console.error(`Error in callLlama: ${error instanceof Error ? (error as Error).message : String(error)}`) + throw error + } +} \ No newline at end of file diff --git a/src/llms/mistral.js b/src/llms/mistral.ts similarity index 53% rename from src/llms/mistral.js rename to src/llms/mistral.ts index ea5f003..5b574f4 100644 --- a/src/llms/mistral.js +++ b/src/llms/mistral.ts @@ -1,4 +1,4 @@ -// src/llms/mistral.js +// src/llms/mistral.ts import { writeFile } from 'node:fs/promises' import { env } from 'node:process' @@ -6,28 +6,31 @@ import { Mistral } from '@mistralai/mistralai' import { MISTRAL_MODELS } from '../models.js' import { log, wait } from '../types.js' -/** @import { LLMFunction, MistralModelType } from '../types.js' */ +import type { LLMFunction, MistralModelType } from '../types.js' -/** @type {LLMFunction} */ /** * Main function to call Mistral AI API. - * @param {string} promptAndTranscript - The combined prompt and transcript text to process. - * @param {string} tempPath - The temporary file path to write the LLM output. - * @param {MistralModelType} [model='MISTRAL_NEMO'] - The Mistral model to use. - * @returns {Promise} - * @throws {Error} - If an error occurs during the API call. + * @param promptAndTranscript - The combined prompt and transcript text to process. + * @param tempPath - The temporary file path to write the LLM output. + * @param model - The Mistral model to use. + * @returns A Promise that resolves when the API call is complete. + * @throws {Error} If an error occurs during the API call. */ -export async function callMistral(promptAndTranscript, tempPath, model = 'MISTRAL_NEMO') { +export const callMistral: LLMFunction = async ( + promptAndTranscript: string, + tempPath: string, + model: string = 'MISTRAL_NEMO' +): Promise => { // Check if the MISTRAL_API_KEY environment variable is set if (!env.MISTRAL_API_KEY) { throw new Error('MISTRAL_API_KEY environment variable is not set. Please set it to your Mistral API key.') } // Initialize Mistral client with API key from environment variables - const mistral = new Mistral(env.MISTRAL_API_KEY) + const mistral = new Mistral({ apiKey: env.MISTRAL_API_KEY }) try { // Select the actual model to use, defaulting to MISTRAL_NEMO if the specified model is not found - const actualModel = MISTRAL_MODELS[model] || MISTRAL_MODELS.MISTRAL_NEMO + const actualModel = MISTRAL_MODELS[model as MistralModelType] || MISTRAL_MODELS.MISTRAL_NEMO log(wait(`\n Using Mistral model:\n - ${actualModel}`)) // Make API call to Mistral AI for chat completion @@ -36,23 +39,30 @@ export async function callMistral(promptAndTranscript, tempPath, model = 'MISTRA // max_tokens: ?, // Uncomment and set if you want to limit the response length messages: [{ role: 'user', content: promptAndTranscript }], }) - - // Destructure the response to extract relevant information - const { - choices: [{ message: { content }, finishReason }], - model: usedModel, - usage: { promptTokens, completionTokens, totalTokens } - } = response + + // Safely access the response properties + if (!response.choices || response.choices.length === 0) { + throw new Error("No choices returned from Mistral API") + } + + const content = response.choices[0].message.content + const finishReason = response.choices[0].finishReason + const { promptTokens, completionTokens, totalTokens } = response.usage ?? {} + + // Check if content was generated + if (!content) { + throw new Error("No content generated from Mistral") + } // Write the generated content to the specified output file await writeFile(tempPath, content) // Log finish reason, used model, and token usage - log(wait(`\n Finish Reason: ${finishReason}\n Model Used: ${usedModel}`)) + log(wait(`\n Finish Reason: ${finishReason}\n Model Used: ${actualModel}`)) log(wait(` Token Usage:\n - ${promptTokens} prompt tokens\n - ${completionTokens} completion tokens\n - ${totalTokens} total tokens`)) } catch (error) { // Log any errors that occur during the process - console.error(`Error in callMistral: ${error.message}`) + console.error(`Error in callMistral: ${error instanceof Error ? (error as Error).message : String(error)}`) throw error // Re-throw the error for handling by the caller } } \ No newline at end of file diff --git a/src/llms/octo.js b/src/llms/octo.ts similarity index 66% rename from src/llms/octo.js rename to src/llms/octo.ts index 93ee91e..29a1202 100644 --- a/src/llms/octo.js +++ b/src/llms/octo.ts @@ -1,4 +1,4 @@ -// src/llms/octo.js +// src/llms/octo.ts import { writeFile } from 'node:fs/promises' import { env } from 'node:process' @@ -6,18 +6,17 @@ import { OctoAIClient } from '@octoai/sdk' import { OCTO_MODELS } from '../models.js' import { log, wait } from '../types.js' -/** @import { LLMFunction, OctoModelType } from '../types.js' */ +import type { LLMFunction, OctoModelType } from '../types.js' -/** @type {LLMFunction} */ /** * Main function to call OctoAI API. - * @param {string} promptAndTranscript - The combined prompt and transcript text to process. - * @param {string} tempPath - The temporary file path to write the LLM output. - * @param {OctoModelType} [model='LLAMA_3_1_70B'] - The OctoAI model to use. - * @returns {Promise} + * @param promptAndTranscript - The combined prompt and transcript text to process. + * @param tempPath - The temporary file path to write the LLM output. + * @param model - The OctoAI model to use. + * @returns A Promise that resolves when the API call is complete. * @throws {Error} - If an error occurs during the API call. */ -export async function callOcto(promptAndTranscript, tempPath, model = 'LLAMA_3_1_70B') { +export const callOcto: LLMFunction = async (promptAndTranscript: string, tempPath: string, model: string = 'LLAMA_3_1_70B') => { // Check if the OCTOAI_API_KEY environment variable is set if (!env.OCTOAI_API_KEY) { throw new Error('OCTOAI_API_KEY environment variable is not set. Please set it to your OctoAI API key.') @@ -27,7 +26,7 @@ export async function callOcto(promptAndTranscript, tempPath, model = 'LLAMA_3_1 try { // Select the actual model to use, defaulting to LLAMA_3_1_70B if the specified model is not found - const actualModel = OCTO_MODELS[model] || OCTO_MODELS.LLAMA_3_1_70B + const actualModel = OCTO_MODELS[model as OctoModelType] || OCTO_MODELS.LLAMA_3_1_70B log(wait(`\n Using OctoAI model:\n - ${actualModel}`)) // Make API call to OctoAI for text generation @@ -36,13 +35,11 @@ export async function callOcto(promptAndTranscript, tempPath, model = 'LLAMA_3_1 // max_tokens: ?, // Uncomment and set if you want to limit the response length messages: [{ role: "user", content: promptAndTranscript }] }) - - // Destructure the response to extract relevant information - const { - choices: [{ message: { content }, finishReason }], - model: usedModel, - usage: { promptTokens, completionTokens, totalTokens } - } = response + + const content = response.choices[0].message.content as string + const finishReason = response.choices[0].finishReason + const usedModel = response.model + const { promptTokens, completionTokens, totalTokens } = response.usage ?? {} // Write the generated content to the specified output file await writeFile(tempPath, content) @@ -54,7 +51,7 @@ export async function callOcto(promptAndTranscript, tempPath, model = 'LLAMA_3_1 } catch (error) { // Log any errors that occur during the process - console.error(`Error in callOcto: ${error.message}`) + console.error(`Error in callOcto: ${(error as Error).message}`) throw error // Re-throw the error for handling by the caller } } \ No newline at end of file diff --git a/src/llms/ollama.js b/src/llms/ollama.ts similarity index 60% rename from src/llms/ollama.js rename to src/llms/ollama.ts index fd0e1ac..cf7d901 100644 --- a/src/llms/ollama.js +++ b/src/llms/ollama.ts @@ -1,27 +1,33 @@ -// src/llms/ollama.js +// src/llms/ollama.ts import { writeFile } from 'node:fs/promises' import { env } from 'node:process' import { OLLAMA_MODELS } from '../models.js' import { log, wait } from '../types.js' -/** @import { LLMFunction, OllamaModelType } from '../types.js' */ +import type { LLMFunction, OllamaModelType } from '../types.js' + +// Define the expected structure of the response from Ollama API +interface OllamaResponse { + message: { + content: string + } +} /** * Main function to call the Llama model using the Ollama REST API. * This function checks if the model is available, pulls it if necessary, * and then proceeds with the chat. - * @type {LLMFunction} - * @param {string} promptAndTranscript - The combined prompt and transcript content. - * @param {string} tempPath - The temporary file path to write the LLM output. - * @param {OllamaModelType} [modelName='LLAMA_3_2_1B'] - The name of the model to use. - * @returns {Promise} + * @param promptAndTranscript - The combined prompt and transcript content. + * @param tempPath - The temporary file path to write the LLM output. + * @param modelName - The name of the model to use. + * @returns A Promise that resolves when the processing is complete. * @throws {Error} - If an error occurs during processing. */ -export async function callOllama(promptAndTranscript, tempPath, modelName = 'LLAMA_3_2_1B') { +export const callOllama: LLMFunction = async (promptAndTranscript: string, tempPath: string, modelName: string = 'LLAMA_3_2_1B') => { try { // Map the model name to the Ollama model identifier - const ollamaModelName = OLLAMA_MODELS[modelName] || 'llama3.2:1b' + const ollamaModelName = OLLAMA_MODELS[modelName as OllamaModelType] || 'llama3.2:1b' // Get host and port from environment variables or use defaults const ollamaHost = env.OLLAMA_HOST || 'ollama' @@ -46,16 +52,17 @@ export async function callOllama(promptAndTranscript, tempPath, modelName = 'LLA throw new Error(`HTTP error! status: ${response.status}`) } - const data = await response.json() - + // Type assertion to enforce the structure of the response + const data = (await response.json()) as OllamaResponse + // Extract the assistant's reply and write the response to the output file const assistantReply = data.message.content log(wait(` - Received response from Ollama.`)) await writeFile(tempPath, assistantReply) log(wait(`\n Transcript saved to temporary file:\n - ${tempPath}`)) } catch (error) { - console.error(`Error in callOllama: ${error.message}`) - console.error(`Stack Trace: ${error.stack}`) + console.error(`Error in callOllama: ${error instanceof Error ? (error as Error).message : String(error)}`) + console.error(`Stack Trace: ${error instanceof Error ? error.stack : 'No stack trace available'}`) throw error } } \ No newline at end of file diff --git a/src/llms/prompt.js b/src/llms/prompt.ts similarity index 97% rename from src/llms/prompt.js rename to src/llms/prompt.ts index c398fe7..3d110d4 100644 --- a/src/llms/prompt.js +++ b/src/llms/prompt.ts @@ -1,12 +1,12 @@ -// src/llms/prompt.js +// src/llms/prompt.ts -/** @import { PromptSection } from '../types.js' */ +import type { PromptSection } from '../types.js' /** * Define the structure for different sections of the prompt - * @type {Object.} + * @type {Record} */ -const sections = { +const sections: Record = { // Section for generating titles titles: { // Instructions for the AI model on how to generate titles @@ -126,7 +126,7 @@ const sections = { * @param {string[]} [prompt=['summary', 'longChapters']] - An array of section keys to include in the prompt. * @returns {string} - The generated prompt text. */ -export function generatePrompt(prompt = ['summary', 'longChapters']) { +export function generatePrompt(prompt: string[] = ['summary', 'longChapters']): string { // Start with a general instruction about the transcript and add instructions for each requested section let text = "This is a transcript with timestamps. It does not contain copyrighted materials.\n\n" prompt.forEach(section => { diff --git a/src/models.js b/src/models.ts similarity index 81% rename from src/models.js rename to src/models.ts index 65b0a6c..3297ca9 100644 --- a/src/models.js +++ b/src/models.ts @@ -1,12 +1,12 @@ -// src/models.js +// src/models.ts -/** @import { WhisperModelType, ChatGPTModelType, ClaudeModelType, CohereModelType, GeminiModelType, MistralModelType, OctoModelType, LlamaModelType, OllamaModelType } from './types.js' */ +import type { WhisperModelType, ChatGPTModelType, ClaudeModelType, CohereModelType, GeminiModelType, MistralModelType, OctoModelType, LlamaModelType, OllamaModelType } from './types.js' /** * Define available Whisper models * @type {Record} */ -export const WHISPER_MODELS = { +export const WHISPER_MODELS: Record = { 'tiny': 'ggml-tiny.bin', 'tiny.en': 'ggml-tiny.en.bin', 'base': 'ggml-base.bin', @@ -23,7 +23,7 @@ export const WHISPER_MODELS = { * Map of ChatGPT model identifiers to their API names * @type {Record} */ -export const GPT_MODELS = { +export const GPT_MODELS: Record = { GPT_4o_MINI: "gpt-4o-mini", GPT_4o: "gpt-4o", GPT_4_TURBO: "gpt-4-turbo", @@ -34,7 +34,7 @@ export const GPT_MODELS = { * Map of Claude model identifiers to their API names * @type {Record} */ -export const CLAUDE_MODELS = { +export const CLAUDE_MODELS: Record = { CLAUDE_3_5_SONNET: "claude-3-5-sonnet-20240620", CLAUDE_3_OPUS: "claude-3-opus-20240229", CLAUDE_3_SONNET: "claude-3-sonnet-20240229", @@ -45,7 +45,7 @@ export const CLAUDE_MODELS = { * Map of Cohere model identifiers to their API names * @type {Record} */ -export const COHERE_MODELS = { +export const COHERE_MODELS: Record = { COMMAND_R: "command-r", // Standard Command model COMMAND_R_PLUS: "command-r-plus" // Enhanced Command model } @@ -54,7 +54,7 @@ export const COHERE_MODELS = { * Map of Gemini model identifiers to their API names * @type {Record} */ -export const GEMINI_MODELS = { +export const GEMINI_MODELS: Record = { GEMINI_1_5_FLASH: "gemini-1.5-flash", // GEMINI_1_5_PRO: "gemini-1.5-pro", GEMINI_1_5_PRO: "gemini-1.5-pro-exp-0827", @@ -64,7 +64,7 @@ export const GEMINI_MODELS = { * Map of Mistral model identifiers to their API names * @type {Record} */ -export const MISTRAL_MODELS = { +export const MISTRAL_MODELS: Record = { MIXTRAL_8x7b: "open-mixtral-8x7b", MIXTRAL_8x22b: "open-mixtral-8x22b", MISTRAL_LARGE: "mistral-large-latest", @@ -75,7 +75,7 @@ export const MISTRAL_MODELS = { * Map of OctoAI model identifiers to their API names * @type {Record} */ -export const OCTO_MODELS = { +export const OCTO_MODELS: Record = { LLAMA_3_1_8B: "meta-llama-3.1-8b-instruct", LLAMA_3_1_70B: "meta-llama-3.1-70b-instruct", LLAMA_3_1_405B: "meta-llama-3.1-405b-instruct", @@ -89,7 +89,7 @@ export const OCTO_MODELS = { * Map of local model identifiers to their filenames and URLs * @type {Record} */ -export const LLAMA_MODELS = { +export const LLAMA_MODELS: Record = { QWEN_2_5_3B: { filename: "qwen2.5-3b-instruct-q6_k.gguf", url: "https://huggingface.co/Qwen/Qwen2.5-3B-Instruct-GGUF/resolve/main/qwen2.5-3b-instruct-q6_k.gguf" @@ -112,7 +112,7 @@ export const LLAMA_MODELS = { * Map of model identifiers to their corresponding names in Ollama * @type {Record} */ -export const OLLAMA_MODELS = { +export const OLLAMA_MODELS: Record = { LLAMA_3_2_1B: 'llama3.2:1b', LLAMA_3_2_3B: 'llama3.2:3b', GEMMA_2_2B: 'gemma2:2b', diff --git a/src/transcription/assembly.js b/src/transcription/assembly.ts similarity index 90% rename from src/transcription/assembly.js rename to src/transcription/assembly.ts index eb61e27..12aa874 100644 --- a/src/transcription/assembly.js +++ b/src/transcription/assembly.ts @@ -1,11 +1,10 @@ -// src/transcription/assembly.js +// src/transcription/assembly.ts import { writeFile } from 'node:fs/promises' import { env } from 'node:process' import { AssemblyAI } from 'assemblyai' import { log, wait } from '../types.js' - -/** @import { TranscriptServices, ProcessingOptions } from '../types.js' */ +import type { ProcessingOptions } from '../types.js' /** * Main function to handle transcription using AssemblyAI. @@ -14,9 +13,7 @@ import { log, wait } from '../types.js' * @returns {Promise} - Returns the formatted transcript content. * @throws {Error} - If an error occurs during transcription. */ -export async function callAssembly(finalPath, options) { - // log(opts(`Options received:\n`)) - // log(options) +export async function callAssembly(options: ProcessingOptions, finalPath: string): Promise { // Check if the ASSEMBLY_API_KEY environment variable is set if (!env.ASSEMBLY_API_KEY) { throw new Error('ASSEMBLY_API_KEY environment variable is not set. Please set it to your AssemblyAI API key.') @@ -40,7 +37,7 @@ export async function callAssembly(finalPath, options) { let txtContent = '' // Helper function to format timestamps - const formatTime = (timestamp) => { + const formatTime = (timestamp: number): string => { const totalSeconds = Math.floor(timestamp / 1000) return `${Math.floor(totalSeconds / 60).toString().padStart(2, '0')}:${(totalSeconds % 60).toString().padStart(2, '0')}` } @@ -79,7 +76,7 @@ export async function callAssembly(finalPath, options) { return txtContent } catch (error) { // Log any errors that occur during the transcription process - console.error(`Error processing the transcription: ${error.message}`) + console.error(`Error processing the transcription: ${(error as Error).message}`) throw error // Re-throw the error for handling in the calling function } } \ No newline at end of file diff --git a/src/transcription/deepgram.js b/src/transcription/deepgram.js deleted file mode 100644 index 246434f..0000000 --- a/src/transcription/deepgram.js +++ /dev/null @@ -1,56 +0,0 @@ -// src/transcription/deepgram.js - -import { writeFile, readFile } from 'node:fs/promises' -import { env } from 'node:process' -import { createClient } from '@deepgram/sdk' -import { log, wait } from '../types.js' - -/** - * Main function to handle transcription using Deepgram. - * @param {string} finalPath - The identifier used for naming output files. - * @param {ProcessingOptions} options - Additional processing options. - * @returns {Promise} - Returns the formatted transcript content. - * @throws {Error} - If an error occurs during transcription. - */ -export async function callDeepgram(finalPath) { - // Check if the DEEPGRAM_API_KEY environment variable is set - if (!env.DEEPGRAM_API_KEY) { - throw new Error('DEEPGRAM_API_KEY environment variable is not set. Please set it to your Deepgram API key.') - } - - // Initialize the Deepgram client with the API key from environment variables - const deepgram = createClient(env.DEEPGRAM_API_KEY) - - // Check if the input is a URL or a local file - const isUrl = finalPath.startsWith('http://') || finalPath.startsWith('https://') - - try { - // Request transcription from Deepgram - const { result } = await deepgram.listen.prerecorded[isUrl ? 'transcribeUrl' : 'transcribeFile']( - // Use URL or file content based on input type - isUrl ? { url: finalPath } : await readFile(`${finalPath}.wav`), - // Use the "nova-2" model with smart formatting - { model: 'nova-2', smart_format: true } - ) - - // Process and format the transcription result - const txtContent = result.results.channels[0].alternatives[0].paragraphs.paragraphs - .flatMap((paragraph) => paragraph.sentences) - .map((sentence) => { - // Format timestamp and text for each sentence - const minutes = Math.floor(sentence.start / 60).toString().padStart(2, '0') - const seconds = Math.floor(sentence.start % 60).toString().padStart(2, '0') - return `[${minutes}:${seconds}] ${sentence.text}` - }) - .join('\n') - - // Write the formatted transcript to a file - await writeFile(`${finalPath}.txt`, txtContent) - log(wait(`\n Transcript saved:\n - ${finalPath}.txt\n`)) - return txtContent - } catch (error) { - // Log any errors that occur during the transcription process - console.error(`Error processing the transcription: ${error.message}`) - throw error // Re-throw the error for handling in the calling function - } -} \ No newline at end of file diff --git a/src/transcription/deepgram.ts b/src/transcription/deepgram.ts new file mode 100644 index 0000000..01644b4 --- /dev/null +++ b/src/transcription/deepgram.ts @@ -0,0 +1,73 @@ +// src/transcription/deepgram.ts + +import { writeFile, readFile } from 'node:fs/promises' +import { env } from 'node:process' +import { createClient, SyncPrerecordedResponse, DeepgramResponse } from '@deepgram/sdk' +import { log, wait } from '../types.js' +import type { ProcessingOptions } from '../types.js' + +/** + * Main function to handle transcription using Deepgram. + * @param {string} finalPath - The identifier used for naming output files. + * @param {ProcessingOptions} options - Additional processing options. + * @returns {Promise} - Returns the formatted transcript content. + * @throws {Error} - If an error occurs during transcription. + */ +export async function callDeepgram(options: ProcessingOptions, finalPath: string): Promise { + // Check if the DEEPGRAM_API_KEY environment variable is set + if (!env.DEEPGRAM_API_KEY) { + throw new Error('DEEPGRAM_API_KEY environment variable is not set. Please set it to your Deepgram API key.') + } + + // Initialize the Deepgram client with the API key from environment variables + const deepgram = createClient(env.DEEPGRAM_API_KEY) + + // Check if the input is a URL or a local file + const isUrl = finalPath.startsWith('http://') || finalPath.startsWith('https://') + + try { + let result: DeepgramResponse + if (isUrl) { + // Use transcribeUrl for URL inputs + result = await deepgram.listen.prerecorded.transcribeUrl( + { url: finalPath }, + { model: 'nova-2', smart_format: true } + ) + } else { + // Use transcribeFile for local file inputs + const audioBuffer = await readFile(`${finalPath}.wav`) + result = await deepgram.listen.prerecorded.transcribeFile( + audioBuffer, + { model: 'nova-2', smart_format: true } + ) + } + + // Type guard: Check if the result has 'results' and 'metadata' (success case) + if ('results' in result && 'metadata' in result) { + // Safely cast the result to SyncPrerecordedResponse after the check + const successResult = result as unknown as SyncPrerecordedResponse + + // Safely access properties with optional chaining + const txtContent = successResult.results?.channels[0]?.alternatives[0]?.paragraphs?.paragraphs + ?.flatMap((paragraph) => paragraph.sentences) + ?.map((sentence) => { + // Handle case where sentence or start might be undefined + const minutes = Math.floor((sentence.start ?? 0) / 60).toString().padStart(2, '0') + const seconds = Math.floor((sentence.start ?? 0) % 60).toString().padStart(2, '0') + return `[${minutes}:${seconds}] ${sentence.text ?? ''}` + }) + ?.join('\n') || '' // Default to empty string if undefined + + // Write the formatted transcript to a file + await writeFile(`${finalPath}.txt`, txtContent) + log(wait(`\n Transcript saved:\n - ${finalPath}.txt\n`)) + return txtContent + } else { + throw new Error('Deepgram returned an error response or incomplete data') + } + } catch (error) { + // Log any errors that occur during the transcription process + console.error(`Error processing the transcription: ${(error as Error).message}`) + throw error // Re-throw the error for handling in the calling function + } +} \ No newline at end of file diff --git a/src/transcription/whisper.js b/src/transcription/whisper.ts similarity index 90% rename from src/transcription/whisper.js rename to src/transcription/whisper.ts index 2fdd095..244b034 100644 --- a/src/transcription/whisper.js +++ b/src/transcription/whisper.ts @@ -1,16 +1,15 @@ -// src/transcription/whisper.js +// src/transcription/whisper.ts import { readFile, writeFile } from 'node:fs/promises' import { exec } from 'node:child_process' import { promisify } from 'node:util' import { existsSync } from 'node:fs' import { WHISPER_MODELS } from '../models.js' -import { log, success, wait, opts } from '../types.js' +import { log, success, wait } from '../types.js' +import type { ProcessingOptions } from '../types.js' const execPromise = promisify(exec) -/** @import { ProcessingOptions } from '../types.js' */ - /** * Main function to handle transcription using Whisper. * @param {string} finalPath - The base path for the files. @@ -18,9 +17,7 @@ const execPromise = promisify(exec) * @returns {Promise} - Returns the formatted transcript content. * @throws {Error} - If an error occurs during transcription. */ -export async function callWhisper(finalPath, options) { - // log(opts(`Options passed to callWhisper:\n`)) - // log(options) +export async function callWhisper(options: ProcessingOptions, finalPath: string): Promise { try { // Get the whisper model from options or use 'base' as default const whisperModel = options.whisper || 'base' diff --git a/src/transcription/whisperDocker.js b/src/transcription/whisperDocker.ts similarity index 90% rename from src/transcription/whisperDocker.js rename to src/transcription/whisperDocker.ts index e98b869..8e56bcd 100644 --- a/src/transcription/whisperDocker.js +++ b/src/transcription/whisperDocker.ts @@ -1,16 +1,15 @@ -// src/transcription/whisperDocker.js +// src/transcription/whisperDocker.ts import { readFile, writeFile } from 'node:fs/promises' import { exec } from 'node:child_process' import { promisify } from 'node:util' import { join } from 'node:path' import { WHISPER_MODELS } from '../models.js' -import { log, wait, opts } from '../types.js' +import { log, wait } from '../types.js' +import type { ProcessingOptions } from '../types.js' const execPromise = promisify(exec) -/** @import { ProcessingOptions } from '../types.js' */ - /** * Main function to handle transcription using Whisper Docker. * @param {string} finalPath - The base path for the files. @@ -18,9 +17,7 @@ const execPromise = promisify(exec) * @returns {Promise} - Returns the formatted transcript content. * @throws {Error} - If an error occurs during transcription. */ -export async function callWhisperDocker(finalPath, options) { - // log(opts(`Options passed to callWhisperDocker:\n`)) - // log(options) +export async function callWhisperDocker(options: ProcessingOptions, finalPath: string): Promise { try { // Get the whisper model from options or use 'base' as default const whisperModel = options.whisperDocker || 'base' diff --git a/src/types.js b/src/types.js deleted file mode 100644 index 7de1b75..0000000 --- a/src/types.js +++ /dev/null @@ -1,237 +0,0 @@ -// src/types.js - -import chalk from 'chalk' - -export const step = chalk.bold.underline -export const dim = chalk.dim -export const success = chalk.bold.blue -export const opts = chalk.magentaBright.bold -export const wait = chalk.cyan.dim -export const final = chalk.bold.italic - -export const log = console.log - -/** - * @file This file contains all the custom type definitions used across the Autoshow project. - */ - -/** - * Represents the processing options passed through command-line arguments or interactive prompts. - * @typedef {Object} ProcessingOptions - * @property {string} [video] - URL of the YouTube video to process. - * @property {string} [playlist] - URL of the YouTube playlist to process. - * @property {string} [urls] - File path containing a list of URLs to process. - * @property {string} [file] - Local audio or video file path to process. - * @property {string} [rss] - URL of the podcast RSS feed to process. - * @property {string[]} [item] - Specific items (audio URLs) from the RSS feed to process. - * @property {boolean} [info] - Flag to generate JSON file with RSS feed information instead of processing items. - * @property {boolean} [noCleanUp] - Flag to indicate whether to keep temporary files after processing. - * @property {WhisperModelType} [whisperModel] - The Whisper model to use (e.g., 'tiny', 'base'). - * @property {boolean} [deepgram] - Flag to use Deepgram for transcription. - * @property {boolean} [assembly] - Flag to use AssemblyAI for transcription. - * @property {boolean} [speakerLabels] - Flag to use speaker labels in AssemblyAI transcription. - * @property {string} [chatgpt] - ChatGPT model to use (e.g., 'GPT_4o_MINI'). - * @property {string} [claude] - Claude model to use (e.g., 'CLAUDE_3_SONNET'). - * @property {string} [cohere] - Cohere model to use (e.g., 'COMMAND_R_PLUS'). - * @property {string} [mistral] - Mistral model to use (e.g., 'MISTRAL_LARGE'). - * @property {string} [octo] - OctoAI model to use (e.g., 'LLAMA_3_1_8B'). - * @property {string} [llama] - Llama model to use for local inference (e.g., 'LLAMA_3_1_8B_Q4'). - * @property {string} [gemini] - Gemini model to use (e.g., 'GEMINI_1_5_FLASH'). - * @property {string[]} [prompt] - Array of prompt sections to include (e.g., ['titles', 'summary']). - * @property {LLMServices} [llmServices] - The selected LLM option. - * @property {TranscriptServices} [transcriptServices] - The selected transcription option. - * @property {string} [llamaModel] - Specific Llama model to use. - * @property {number} [skip] - Number of items to skip in RSS feed processing. - * @property {string} [order] - Order in which to process RSS feed items ('newest' or 'oldest'). - * @property {boolean} [interactive] - Whether to run in interactive mode. - */ - -/** - * Represents the answers received from inquirer prompts in interactive mode. - * @typedef {Object} InquirerAnswers - * @property {string} action - The action selected by the user (e.g., 'video', 'playlist'). - * @property {string} [video] - YouTube video URL provided by the user. - * @property {string} [playlist] - YouTube playlist URL provided by the user. - * @property {string} [urls] - File path containing URLs provided by the user. - * @property {string} [file] - Local audio/video file path provided by the user. - * @property {string} [rss] - RSS feed URL provided by the user. - * @property {boolean} [specifyItem] - Whether the user wants to specify specific RSS items. - * @property {string} [item] - Comma-separated audio URLs of specific RSS items. - * @property {LLMServices} [llmServices] - LLM option selected by the user. - * @property {string} [llamaModel] - Specific Llama model selected by the user. - * @property {TranscriptServices} [transcriptServices] - Transcription option selected by the user. - * @property {boolean} [useDocker] - Whether to use Docker for Whisper transcription. - * @property {WhisperModelType} [whisperModel] - Whisper model type selected by the user. - * @property {boolean} [speakerLabels] - Whether to use speaker labels in transcription. - * @property {string[]} [prompt] - Prompt sections selected by the user. - * @property {boolean} [noCleanUp] - Whether to keep temporary files after processing. - * @property {string} [order] - Order in which to process RSS feed items ('newest' or 'oldest'). - * @property {number} [skip] - Number of items to skip in RSS feed processing. - * @property {boolean} [confirmAction] - Whether to proceed with the action. - */ - -/** - * Represents the structure of the inquirer prompt questions. - * @typedef {Object[]} InquirerQuestions - * @property {string} type - The type of the prompt (e.g., 'input', 'list', 'confirm', 'checkbox'). - * @property {string} name - The name of the answer property. - * @property {string} message - The message to display to the user. - * @property {Array|Function} [choices] - The choices available for selection (for 'list' and 'checkbox' types). - * @property {Function} [when] - A function to determine when to display the prompt. - * @property {Function} [validate] - A function to validate the user's input. - * @property {*} [default] - The default value for the prompt. - */ - -/** - * Represents a handler function for processing different actions (e.g., video, playlist). - * @callback HandlerFunction - * @param {string} input - The primary input (e.g., URL or file path) for processing. - * @param {LLMServices} [llmServices] - The selected LLM option. - * @param {TranscriptServices} [transcriptServices] - The selected transcription option. - * @param {ProcessingOptions} options - Additional processing options. - * @returns {Promise} - A promise that resolves when processing is complete. - */ - -/** - * Represents the data structure for markdown generation. - * @typedef {Object} MarkdownData - * @property {string} frontMatter - The front matter content for the markdown file. - * @property {string} finalPath - The base file path (without extension) for the markdown file. - * @property {string} filename - The sanitized filename used for the markdown file. - */ - -/** - * Represents the metadata extracted from a YouTube video. - * @typedef {Object} VideoMetadata - * @property {string} showLink - The URL to the video's webpage. - * @property {string} channel - The name of the channel that uploaded the video. - * @property {string} channelURL - The URL to the uploader's channel page. - * @property {string} title - The title of the video. - * @property {string} description - The description of the video (empty string in this case). - * @property {string} publishDate - The upload date in 'YYYY-MM-DD' format. - * @property {string} coverImage - The URL to the video's thumbnail image. - */ - -/** - * Represents an item in an RSS feed. - * @typedef {Object} RSSItem - * @property {string} publishDate - The publication date of the RSS item (e.g., '2024-09-24'). - * @property {string} title - The title of the RSS item. - * @property {string} coverImage - The URL to the cover image of the RSS item. - * @property {string} showLink - The URL to the show or episode. - * @property {string} channel - The name of the channel or podcast. - * @property {string} channelURL - The URL to the channel or podcast. - * @property {string} [description] - A brief description of the RSS item. - * @property {string} [audioURL] - The URL to the audio file of the RSS item. - */ - -/** - * Represents the options for RSS feed processing. - * @typedef {Object} RSSOptions - * @property {string} [order] - The order to process items ('newest' or 'oldest'). - * @property {number} [skip] - The number of items to skip. - */ - -/** - * Represents the options for downloading audio files. - * @typedef {Object} DownloadAudioOptions - * @property {string} [outputFormat] - The desired output audio format (e.g., 'wav'). - * @property {number} [sampleRate] - The sample rate for the audio file (e.g., 16000). - * @property {number} [channels] - The number of audio channels (e.g., 1 for mono). - */ - -/** - * Represents the supported file types for audio and video processing. - * @typedef {'wav' | 'mp3' | 'm4a' | 'aac' | 'ogg' | 'flac' | 'mp4' | 'mkv' | 'avi' | 'mov' | 'webm'} SupportedFileType - */ - -/** - * Represents the transcription services that can be used in the application. - * @typedef {'whisper' | 'whisperDocker' | 'deepgram' | 'assembly'} TranscriptServices - * - * - whisper: Use Whisper.cpp for transcription. - * - whisperDocker: Use Whisper.cpp in a Docker container. - * - deepgram: Use Deepgram's transcription service. - * - assembly: Use AssemblyAI's transcription service. - */ - -/** - * Represents the available Whisper model types. - * @typedef {'tiny' | 'tiny.en' | 'base' | 'base.en' | 'small' | 'small.en' | 'medium' | 'medium.en' | 'large-v1' | 'large-v2'} WhisperModelType - * - * - tiny: Smallest multilingual model. - * - tiny.en: Smallest English-only model. - * - base: Base multilingual model. - * - base.en: Base English-only model. - * - small: Small multilingual model. - * - small.en: Small English-only model. - * - medium: Medium multilingual model. - * - medium.en: Medium English-only model. - * - large-v1: Large multilingual model version 1. - * - large-v2: Large multilingual model version 2. - */ - -/** - * Represents the object containing the different prompts, their instructions to the LLM, and their expected example output. - * @typedef {Object} PromptSection - * @property {string} instruction - The instructions for the section. - * @property {string} example - An example output for the section. - */ - -/** - * Represents the options for Language Models (LLMs) that can be used in the application. - * @typedef {'chatgpt' | 'claude' | 'cohere' | 'mistral' | 'octo' | 'llama' | 'ollama' | 'gemini'} LLMServices - * - * - chatgpt: Use OpenAI's ChatGPT models. - * - claude: Use Anthropic's Claude models. - * - cohere: Use Cohere's language models. - * - mistral: Use Mistral AI's language models. - * - octo: Use OctoAI's language models. - * - llama: Use Llama models for local inference. - * - ollama: Use Ollama for processing. - * - gemini: Use Google's Gemini models. - */ - -/** - * Represents the options for LLM processing. - * @typedef {Object} LLMOptions - * @property {string[]} [promptSections] - The sections to include in the prompt (e.g., ['titles', 'summary']). - * @property {string} [model] - The specific LLM model to use. - * @property {number} [temperature] - The temperature parameter for text generation. - * @property {number} [maxTokens] - The maximum number of tokens to generate. - */ - -/** - * Represents a function that calls an LLM for processing. - * @callback LLMFunction - * @param {string} promptAndTranscript - The combined prompt and transcript text to process. - * @param {string} tempPath - The temporary file path to write the LLM output. - * @param {string} [model] - The specific model to use for the LLM (optional). - * @returns {Promise} - A promise that resolves when the LLM processing is complete. - */ - -/** - * Represents a mapping of LLM option keys to their corresponding functions. - * @typedef {Object.} LLMFunctions - * - * This ensures that only valid `LLMServices` values can be used as keys in the `llmFunctions` object. - */ - -/** - * Define all available LLM models. - * @typedef {'GPT_4o_MINI' | 'GPT_4o' | 'GPT_4_TURBO' | 'GPT_4'} ChatGPTModelType - Define available GPT models. - * @typedef {'CLAUDE_3_5_SONNET' | 'CLAUDE_3_OPUS' | 'CLAUDE_3_SONNET' | 'CLAUDE_3_HAIKU'} ClaudeModelType - Define available Claude models. - * @typedef {'COMMAND_R' | 'COMMAND_R_PLUS'} CohereModelType - Define available Cohere models. - * @typedef {'GEMINI_1_5_FLASH' | 'GEMINI_1_5_PRO'} GeminiModelType - Define available Gemini models. - * @typedef {'MIXTRAL_8x7b' | 'MIXTRAL_8x22b' | 'MISTRAL_LARGE' | 'MISTRAL_NEMO'} MistralModelType - Define available Mistral AI models. - * @typedef {'LLAMA_3_1_8B' | 'LLAMA_3_1_70B' | 'LLAMA_3_1_405B' | 'MISTRAL_7B' | 'MIXTRAL_8X_7B' | 'NOUS_HERMES_MIXTRAL_8X_7B' | 'WIZARD_2_8X_22B'} OctoModelType - Define available OctoAI models. - * @typedef {'QWEN_2_5_3B' | 'PHI_3_5' | 'LLAMA_3_2_1B' | 'GEMMA_2_2B'} LlamaModelType - Define local model configurations. - * @typedef {'LLAMA_3_2_1B' | 'LLAMA_3_2_3B' | 'GEMMA_2_2B' | 'PHI_3_5' | 'QWEN_2_5_1B' | 'QWEN_2_5_3B'} OllamaModelType - Define local model with Ollama. - */ - -/** - * Represents the function signature for cleaning up temporary files. - * @callback CleanUpFunction - * @param {string} id - The base filename (without extension) for the files to be cleaned up. - * @returns {Promise} - A promise that resolves when cleanup is complete. - */ \ No newline at end of file diff --git a/src/types.ts b/src/types.ts new file mode 100644 index 0000000..2260ed3 --- /dev/null +++ b/src/types.ts @@ -0,0 +1,345 @@ +// src/types.ts + +import type { ChalkInstance } from 'chalk' +// import type { BuiltInQuestion } from 'inquirer' +// import BuiltInQuestion from 'inquirer' +import chalk from 'chalk' + +export const step: ChalkInstance = chalk.bold.underline +export const dim: ChalkInstance = chalk.dim +export const success: ChalkInstance = chalk.bold.blue +export const opts: ChalkInstance = chalk.magentaBright.bold +export const wait: ChalkInstance = chalk.cyan.dim +export const final: ChalkInstance = chalk.bold.italic + +export const log: typeof console.log = console.log + +/** + * @file This file contains all the custom type definitions used across the Autoshow project. + */ + +/** + * Represents the processing options passed through command-line arguments or interactive prompts. + */ +export type ProcessingOptions = { + /** URL of the YouTube video to process. */ + video?: string + /** URL of the YouTube playlist to process. */ + playlist?: string + /** File path containing a list of URLs to process. */ + urls?: string + /** Local audio or video file path to process. */ + file?: string + /** URL of the podcast RSS feed to process. */ + rss?: string + /** Specific items (audio URLs) from the RSS feed to process. */ + item?: string[] + /** Flag to generate JSON file with RSS feed information instead of processing items. */ + info?: boolean + /** Flag to indicate whether to keep temporary files after processing. */ + noCleanUp?: boolean + /** The Whisper model to use (e.g., 'tiny', 'base'). */ + whisper?: WhisperModelType + /** The Whisper model to use with Docker (e.g., 'tiny', 'base'). */ + whisperDocker?: WhisperModelType + /** Flag to use Deepgram for transcription. */ + deepgram?: boolean + /** Flag to use AssemblyAI for transcription. */ + assembly?: boolean + /** Flag to use speaker labels in AssemblyAI transcription. */ + speakerLabels?: boolean + /** ChatGPT model to use (e.g., 'GPT_4o_MINI'). */ + chatgpt?: string + /** Claude model to use (e.g., 'CLAUDE_3_SONNET'). */ + claude?: string + /** Cohere model to use (e.g., 'COMMAND_R_PLUS'). */ + cohere?: string + /** Mistral model to use (e.g., 'MISTRAL_LARGE'). */ + mistral?: string + /** OctoAI model to use (e.g., 'LLAMA_3_1_8B'). */ + octo?: string + /** Ollama model to use for local inference (e.g., 'LLAMA_3_2_1B'). */ + ollama?: string + /** Llama model to use for local inference (e.g., 'LLAMA_3_1_8B'). */ + llama?: string + /** Gemini model to use (e.g., 'GEMINI_1_5_FLASH'). */ + gemini?: string + /** Array of prompt sections to include (e.g., ['titles', 'summary']). */ + prompt?: string[] + /** The selected LLM option. */ + llmServices?: LLMServices | undefined + /** The selected transcription option. */ + transcriptServices?: TranscriptServices | undefined + /** Number of items to skip in RSS feed processing. */ + skip?: number + /** Order in which to process RSS feed items ('newest' or 'oldest'). */ + last?: number + /** Number of most recent items to process (overrides --order and --skip). */ + order?: string + /** Whether to run in interactive mode. */ + interactive?: boolean +} + +/** + * Represents the answers received from inquirer prompts in interactive mode. + */ +export type InquirerAnswers = { + /** The action selected by the user (e.g., 'video', 'playlist'). */ + action?: string // Make this optional + /** YouTube video URL provided by the user. */ + video?: string + /** YouTube playlist URL provided by the user. */ + playlist?: string + /** File path containing URLs provided by the user. */ + urls?: string + /** Local audio/video file path provided by the user. */ + file?: string + /** RSS feed URL provided by the user. */ + rss?: string + /** Whether the user wants to specify specific RSS items. */ + specifyItem?: boolean + /** Comma-separated audio URLs of specific RSS items. */ + item?: string | string[] + /** LLM option selected by the user. */ + llmServices?: LLMServices | undefined + /** Specific Llama model selected by the user. */ + llamaModel?: string + /** Transcription option selected by the user. */ + transcriptServices?: TranscriptServices | undefined + /** Whisper model type selected by the user. */ + whisperModel?: WhisperModelType // Add whisperModel to the InquirerAnswers + /** Whether to use speaker labels in transcription. */ + speakerLabels?: boolean + /** Prompt sections selected by the user. */ + prompt?: string[] + /** Whether to keep temporary files after processing. */ + noCleanUp?: boolean + /** Order in which to process RSS feed items ('newest' or 'oldest'). */ + order?: string + /** Number of items to skip in RSS feed processing. */ + skip?: number + /** Whether to proceed with the action. */ + confirmAction?: boolean +} + +/** + * Represents the structure of the inquirer prompt questions. + */ +export type InquirerQuestions = Array<{ + /** The type of the prompt (e.g., 'input', 'list', 'confirm', 'checkbox'). */ + type: string + /** The name of the answer property. */ + name: string + /** The message to display to the user. */ + message: string + /** The choices available for selection (for 'list' and 'checkbox' types). */ + choices?: Array | Function + /** A function to determine when to display the prompt. */ + when?: Function + /** A function to validate the user's input. */ + validate?: Function + /** The default value for the prompt. */ + default?: any +}> + +/** + * Represents a handler function for processing different actions (e.g., video, playlist). + */ +export type HandlerFunction = ( + // The options containing various inputs + options: ProcessingOptions, + // The specific input (URL or file path) + input: string, + // Allow llmServices to be optional or undefined + llmServices?: LLMServices | undefined, + // Allow transcriptServices to be optional or undefined + transcriptServices?: TranscriptServices | undefined +) => Promise + +/** + * Represents the data structure for markdown generation. + */ +export type MarkdownData = { + /** The front matter content for the markdown file. */ + frontMatter: string + /** The base file path (without extension) for the markdown file. */ + finalPath: string + /** The sanitized filename used for the markdown file. */ + filename: string +} + +/** + * Represents the metadata extracted from a YouTube video. + */ +export type VideoMetadata = { + /** The URL to the video's webpage. */ + showLink: string + /** The name of the channel that uploaded the video. */ + channel: string + /** The URL to the uploader's channel page. */ + channelURL: string + /** The title of the video. */ + title: string + /** The description of the video (empty string in this case). */ + description: string + /** The upload date in 'YYYY-MM-DD' format. */ + publishDate: string + /** The URL to the video's thumbnail image. */ + coverImage: string +} + +/** + * Represents an item in an RSS feed. + */ +export type RSSItem = { + /** The publication date of the RSS item (e.g., '2024-09-24'). */ + publishDate: string + /** The title of the RSS item. */ + title: string + /** The URL to the cover image of the RSS item. */ + coverImage: string + /** The URL to the show or episode. */ + showLink: string + /** The name of the channel or podcast. */ + channel: string + /** The URL to the channel or podcast. */ + channelURL: string + /** A brief description of the RSS item. */ + description?: string + /** The URL to the audio file of the RSS item. */ + audioURL?: string +} + +/** + * Represents the options for RSS feed processing. + */ +export type RSSOptions = { + /** The order to process items ('newest' or 'oldest'). */ + order?: string + /** The number of items to skip. */ + skip?: number +} + +/** + * Represents the options for downloading audio files. + */ +export type DownloadAudioOptions = { + /** The desired output audio format (e.g., 'wav'). */ + outputFormat?: string + /** The sample rate for the audio file (e.g., 16000). */ + sampleRate?: number + /** The number of audio channels (e.g., 1 for mono). */ + channels?: number +} + +/** + * Represents the supported file types for audio and video processing. + */ +export type SupportedFileType = 'wav' | 'mp3' | 'm4a' | 'aac' | 'ogg' | 'flac' | 'mp4' | 'mkv' | 'avi' | 'mov' | 'webm' + +/** + * Represents the transcription services that can be used in the application. + * + * - whisper: Use Whisper.cpp for transcription. + * - whisperDocker: Use Whisper.cpp in a Docker container. + * - deepgram: Use Deepgram's transcription service. + * - assembly: Use AssemblyAI's transcription service. + */ +export type TranscriptServices = 'whisper' | 'whisperDocker' | 'deepgram' | 'assembly' + +/** + * Represents the available Whisper model types. + * + * - tiny: Smallest multilingual model. + * - tiny.en: Smallest English-only model. + * - base: Base multilingual model. + * - base.en: Base English-only model. + * - small: Small multilingual model. + * - small.en: Small English-only model. + * - medium: Medium multilingual model. + * - medium.en: Medium English-only model. + * - large-v1: Large multilingual model version 1. + * - large-v2: Large multilingual model version 2. + */ +export type WhisperModelType = 'tiny' | 'tiny.en' | 'base' | 'base.en' | 'small' | 'small.en' | 'medium' | 'medium.en' | 'large-v1' | 'large-v2' + +/** + * Represents the object containing the different prompts, their instructions to the LLM, and their expected example output. + */ +export type PromptSection = { + /** The instructions for the section. */ + instruction: string + /** An example output for the section. */ + example: string +} + +/** + * Represents the options for Language Models (LLMs) that can be used in the application. + * + * - chatgpt: Use OpenAI's ChatGPT models. + * - claude: Use Anthropic's Claude models. + * - cohere: Use Cohere's language models. + * - mistral: Use Mistral AI's language models. + * - octo: Use OctoAI's language models. + * - llama: Use Llama models for local inference. + * - ollama: Use Ollama for processing. + * - gemini: Use Google's Gemini models. + */ +export type LLMServices = 'chatgpt' | 'claude' | 'cohere' | 'mistral' | 'octo' | 'llama' | 'ollama' | 'gemini' + +/** + * Represents the options for LLM processing. + */ +export type LLMOptions = { + /** The sections to include in the prompt (e.g., ['titles', 'summary']). */ + promptSections?: string[] + /** The specific LLM model to use. */ + model?: string + /** The temperature parameter for text generation. */ + temperature?: number + /** The maximum number of tokens to generate. */ + maxTokens?: number +} + +/** + * Represents a function that calls an LLM for processing. + */ +export type LLMFunction = ( + promptAndTranscript: string, + tempPath: string, + llmModel?: string +) => Promise + +/** + * Represents a mapping of LLM option keys to their corresponding functions. + * + * This ensures that only valid `LLMServices` values can be used as keys in the `llmFunctions` object. + */ +export type LLMFunctions = { + [K in LLMServices]: LLMFunction +} + +/** + * Define all available LLM models. + */ +/** Define available GPT models. */ +export type ChatGPTModelType = 'GPT_4o_MINI' | 'GPT_4o' | 'GPT_4_TURBO' | 'GPT_4' +/** Define available Claude models. */ +export type ClaudeModelType = 'CLAUDE_3_5_SONNET' | 'CLAUDE_3_OPUS' | 'CLAUDE_3_SONNET' | 'CLAUDE_3_HAIKU' +/** Define available Cohere models. */ +export type CohereModelType = 'COMMAND_R' | 'COMMAND_R_PLUS' +/** Define available Gemini models. */ +export type GeminiModelType = 'GEMINI_1_5_FLASH' | 'GEMINI_1_5_PRO' +/** Define available Mistral AI models. */ +export type MistralModelType = 'MIXTRAL_8x7b' | 'MIXTRAL_8x22b' | 'MISTRAL_LARGE' | 'MISTRAL_NEMO' +/** Define available OctoAI models. */ +export type OctoModelType = 'LLAMA_3_1_8B' | 'LLAMA_3_1_70B' | 'LLAMA_3_1_405B' | 'MISTRAL_7B' | 'MIXTRAL_8X_7B' | 'NOUS_HERMES_MIXTRAL_8X_7B' | 'WIZARD_2_8X_22B' +/** Define local model configurations. */ +export type LlamaModelType = 'QWEN_2_5_3B' | 'PHI_3_5' | 'LLAMA_3_2_1B' | 'GEMMA_2_2B' +/** Define local model with Ollama. */ +export type OllamaModelType = 'LLAMA_3_2_1B' | 'LLAMA_3_2_3B' | 'GEMMA_2_2B' | 'PHI_3_5' | 'QWEN_2_5_1B' | 'QWEN_2_5_3B' + +/** + * Represents the function signature for cleaning up temporary files. + */ +export type CleanUpFunction = (id: string) => Promise \ No newline at end of file diff --git a/src/utils/checkDependencies.js b/src/utils/checkDependencies.ts similarity index 64% rename from src/utils/checkDependencies.js rename to src/utils/checkDependencies.ts index 0ac97df..0b23862 100644 --- a/src/utils/checkDependencies.js +++ b/src/utils/checkDependencies.ts @@ -1,4 +1,4 @@ -// src/utils/checkDependencies.js +// src/utils/checkDependencies.ts import { execFile } from 'node:child_process' import { promisify } from 'node:util' @@ -7,10 +7,10 @@ const execFilePromise = promisify(execFile) /** * Check if required dependencies are installed. - * @param {string[]} dependencies - List of command-line tools to check. - * @returns {Promise} + * @param dependencies - List of command-line tools to check. + * @returns A promise that resolves when all dependencies are checked. */ -export async function checkDependencies(dependencies) { +export async function checkDependencies(dependencies: string[]): Promise { for (const command of dependencies) { try { await execFilePromise(command, ['--version']) diff --git a/src/utils/cleanUpFiles.js b/src/utils/cleanUpFiles.ts similarity index 70% rename from src/utils/cleanUpFiles.js rename to src/utils/cleanUpFiles.ts index bcdb0c0..359017f 100644 --- a/src/utils/cleanUpFiles.js +++ b/src/utils/cleanUpFiles.ts @@ -1,7 +1,8 @@ -// src/utils/cleanUpFiles.js +// src/utils/cleanUpFiles.ts import { unlink } from 'node:fs/promises' import { log, step, success } from '../types.js' +import type { CleanUpFunction } from '../types.js' /** * Asynchronous function to clean up temporary files. @@ -9,7 +10,7 @@ import { log, step, success } from '../types.js' * @returns {Promise} * @throws {Error} - If an error occurs while deleting files. */ -export async function cleanUpFiles(id) { +export async function cleanUpFiles(id: string): Promise { log(step('\nStep 5 - Cleaning up temporary files...\n')) // Array of file extensions to delete const extensions = ['.wav', '.txt', '.md', '.lrc'] @@ -20,8 +21,8 @@ export async function cleanUpFiles(id) { await unlink(`${id}${ext}`) log(success(` - ${id}${ext}`)) } catch (error) { - if (error.code !== 'ENOENT') { - console.error(`Error deleting file ${id}${ext}: ${error.message}`) + if (error instanceof Error && (error as Error).message !== 'ENOENT') { + console.error(`Error deleting file ${id}${ext}: ${(error as Error).message}`) } // If the file does not exist, silently continue } diff --git a/src/utils/downloadAudio.js b/src/utils/downloadAudio.ts similarity index 82% rename from src/utils/downloadAudio.js rename to src/utils/downloadAudio.ts index 68b7e56..cae4f2c 100644 --- a/src/utils/downloadAudio.js +++ b/src/utils/downloadAudio.ts @@ -1,14 +1,13 @@ -// src/utils/downloadAudio.js +// src/utils/downloadAudio.ts -import { checkDependencies } from './checkDependencies.js' import { exec, execFile } from 'node:child_process' import { promisify } from 'node:util' import { readFile, access } from 'node:fs/promises' import { fileTypeFromBuffer } from 'file-type' import ffmpeg from 'ffmpeg-static' +import { checkDependencies } from './checkDependencies.js' import { log, step, success, wait } from '../types.js' - -/** @import { SupportedFileType } from '../types.js' */ +import type { SupportedFileType } from '../types.js' const execFilePromise = promisify(execFile) const execPromise = promisify(exec) @@ -20,7 +19,7 @@ const execPromise = promisify(exec) * @returns {Promise} - Returns the path to the downloaded WAV file. * @throws {Error} - If there is an error during the download process. */ -export async function downloadAudio(url, filename) { +export async function downloadAudio(url: string, filename: string): Promise { log(step('\nStep 2 - Downloading URL audio...\n')) try { // Check for required dependencies @@ -51,7 +50,7 @@ export async function downloadAudio(url, filename) { log(success(` Audio downloaded successfully:\n - ${downloadedFile}`)) return downloadedFile } catch (error) { - console.error(`Error downloading audio: ${error.message}`) + console.error(`Error downloading audio: ${error instanceof Error ? (error as Error).message : String(error)}`) throw error } } @@ -63,11 +62,10 @@ export async function downloadAudio(url, filename) { * @returns {Promise} - Returns the final path to the processed WAV file. * @throws {Error} - If the file type is unsupported or processing fails. */ -export async function downloadFileAudio(filePath, sanitizedFilename) { +export async function downloadFileAudio(filePath: string, sanitizedFilename: string): Promise { log(step('\nStep 2 - Downloading file audio...\n')) // Define supported audio and video formats - /** @type {Set} */ - const supportedFormats = new Set([ + const supportedFormats: Set = new Set([ 'wav', 'mp3', 'm4a', 'aac', 'ogg', 'flac', 'mp4', 'mkv', 'avi', 'mov', 'webm', ]) try { @@ -79,7 +77,7 @@ export async function downloadFileAudio(filePath, sanitizedFilename) { // Determine the file type const fileType = await fileTypeFromBuffer(buffer) - if (!fileType || !supportedFormats.has(/** @type {SupportedFileType} */ (fileType.ext))) { + if (!fileType || !supportedFormats.has(fileType.ext as SupportedFileType)) { throw new Error( fileType ? `Unsupported file type: ${fileType.ext}` : 'Unable to determine file type' ) @@ -96,7 +94,7 @@ export async function downloadFileAudio(filePath, sanitizedFilename) { return outputPath } catch (error) { - console.error(`Error processing local file: ${error.message}`) + console.error(`Error processing local file: ${error instanceof Error ? (error as Error).message : String(error)}`) throw error } } \ No newline at end of file diff --git a/src/utils/generateMarkdown.js b/src/utils/generateMarkdown.ts similarity index 80% rename from src/utils/generateMarkdown.js rename to src/utils/generateMarkdown.ts index 6bc2a7e..df081f4 100644 --- a/src/utils/generateMarkdown.js +++ b/src/utils/generateMarkdown.ts @@ -1,23 +1,22 @@ -// src/utils/generateMarkdown.js +// src/utils/generateMarkdown.ts -import { checkDependencies } from './checkDependencies.js' import { execFile } from 'node:child_process' import { promisify } from 'node:util' import { writeFile } from 'node:fs/promises' import { basename, extname } from 'node:path' +import { checkDependencies } from './checkDependencies.js' import { log, dim, step, success } from '../types.js' - -/** @import { MarkdownData, RSSItem, VideoMetadata } from '../types.js' */ +import type { MarkdownData, RSSItem, VideoMetadata } from '../types.js' // Promisify the execFile function for use with async/await const execFilePromise = promisify(execFile) /** * Extract metadata for a single video URL. - * @param {string} url - The URL of the video. - * @returns {Promise} - The video metadata. + * @param url - The URL of the video. + * @returns The video metadata. */ -export async function extractVideoMetadata(url) { +export async function extractVideoMetadata(url: string): Promise { try { // Check for required dependencies await checkDependencies(['yt-dlp']) @@ -50,18 +49,18 @@ export async function extractVideoMetadata(url) { coverImage, } } catch (error) { - console.error(`Error extracting metadata for ${url}: ${error.message}`) + console.error(`Error extracting metadata for ${url}: ${error instanceof Error ? (error as Error).message : String(error)}`) throw error } } /** * Function to generate markdown for RSS feed items. - * @param {RSSItem} item - The RSS feed item object. - * @returns {Promise} - Returns an object with frontMatter, finalPath, and filename. - * @throws {Error} - If markdown generation fails. + * @param item - The RSS feed item object. + * @returns An object with frontMatter, finalPath, and filename. + * @throws {Error} If markdown generation fails. */ -export async function generateRSSMarkdown(item) { +export async function generateRSSMarkdown(item: RSSItem): Promise { try { // Destructure the item object const { publishDate, title, coverImage, showLink, channel, channelURL } = item @@ -91,18 +90,18 @@ export async function generateRSSMarkdown(item) { log(success(` Front matter successfully created and saved:\n - ${finalPath}.md`)) return { frontMatter, finalPath, filename } } catch (error) { - console.error(`Error generating markdown for RSS item: ${error.message}`) + console.error(`Error generating markdown for RSS item: ${error instanceof Error ? (error as Error).message : String(error)}`) throw error } } /** * Function to generate markdown for local audio or video files. - * @param {string} filePath - The path to the local file. - * @returns {Promise} - Returns an object with frontMatter, finalPath, and filename. - * @throws {Error} - If markdown generation fails. + * @param filePath - The path to the local file. + * @returns An object with frontMatter, finalPath, and filename. + * @throws {Error} If markdown generation fails. */ -export async function generateFileMarkdown(filePath) { +export async function generateFileMarkdown(filePath: string): Promise { try { // Extract the original filename from the full file path const originalFilename = basename(filePath) @@ -141,7 +140,7 @@ export async function generateFileMarkdown(filePath) { return { frontMatter, finalPath, filename: sanitizedFilename } } catch (error) { // Log any errors that occur during the process - console.error(`Error generating markdown for file: ${error.message}`) + console.error(`Error generating markdown for file: ${error instanceof Error ? (error as Error).message : String(error)}`) // Re-throw the error to be handled by the calling function throw error } @@ -149,11 +148,11 @@ export async function generateFileMarkdown(filePath) { /** * Function to generate markdown for YouTube videos. - * @param {string} url - The URL of the YouTube video. - * @returns {Promise} - An object containing front matter, final path, and filename. - * @throws {Error} - If markdown generation fails. + * @param url - The URL of the YouTube video. + * @returns An object containing front matter, final path, and filename. + * @throws {Error} If markdown generation fails. */ -export async function generateMarkdown(url) { +export async function generateMarkdown(url: string): Promise { try { // Check for required dependencies await checkDependencies(['yt-dlp']) @@ -205,17 +204,17 @@ export async function generateMarkdown(url) { log(success(` Front matter successfully created and saved:\n - ${finalPath}.md`)) return { frontMatter, finalPath, filename } } catch (error) { - console.error(`Error generating markdown for video: ${error.message}`) + console.error(`Error generating markdown for video: ${error instanceof Error ? (error as Error).message : String(error)}`) throw error } } /** * Sanitize the title to create a safe filename. - * @param {string} title - The title to sanitize. - * @returns {string} - The sanitized title. + * @param title - The title to sanitize. + * @returns The sanitized title. */ -function sanitizeTitle(title) { +function sanitizeTitle(title: string): string { return title .replace(/[^\w\s-]/g, '') .trim() diff --git a/src/utils/runLLM.js b/src/utils/runLLM.ts similarity index 84% rename from src/utils/runLLM.js rename to src/utils/runLLM.ts index 944e952..815fb1f 100644 --- a/src/utils/runLLM.js +++ b/src/utils/runLLM.ts @@ -1,4 +1,4 @@ -// src/utils/runLLM.js +// src/utils/runLLM.ts import { readFile, writeFile, unlink } from 'node:fs/promises' import { callLlama } from '../llms/llama.js' @@ -11,8 +11,7 @@ import { callMistral } from '../llms/mistral.js' import { callOcto } from '../llms/octo.js' import { generatePrompt } from '../llms/prompt.js' import { log, step, success, wait } from '../types.js' - -/** @import { LLMServices, ProcessingOptions, LLMFunction, LLMFunctions } from '../types.js' */ +import type { LLMServices, ProcessingOptions, LLMFunction, LLMFunctions } from '../types.js' /** * Main function to run the selected Language Model. @@ -23,12 +22,14 @@ import { log, step, success, wait } from '../types.js' * @returns {Promise} * @throws {Error} - If the LLM processing fails or an error occurs during execution. */ -export async function runLLM(finalPath, frontMatter, llmServices, options) { +export async function runLLM( + options: ProcessingOptions, + finalPath: string, + frontMatter: string, + llmServices?: LLMServices +): Promise { log(step(`\nStep 4 - Running LLM processing on transcript...\n`)) - // log(opts(`Options passed to runLLM:\n`)) - // log(options) - /** @type {LLMFunctions} */ - const LLM_FUNCTIONS = { + const LLM_FUNCTIONS: LLMFunctions = { llama: callLlama, ollama: callOllama, chatgpt: callChatGPT, @@ -50,10 +51,7 @@ export async function runLLM(finalPath, frontMatter, llmServices, options) { if (llmServices) { log(wait(` Processing with ${llmServices} Language Model...`)) - /** Get the appropriate LLM function based on the option - * @type {LLMFunction} - */ - const llmFunction = LLM_FUNCTIONS[llmServices] + const llmFunction: LLMFunction = LLM_FUNCTIONS[llmServices] if (!llmFunction) { throw new Error(`Invalid LLM option: ${llmServices}`) } @@ -74,7 +72,7 @@ export async function runLLM(finalPath, frontMatter, llmServices, options) { log(success(`\n Prompt and transcript saved to markdown file:\n - ${finalPath}-prompt.md`)) } } catch (error) { - console.error(`Error running Language Model: ${error.message}`) + console.error(`Error running Language Model: ${(error as Error).message}`) throw error } } \ No newline at end of file diff --git a/src/utils/runTranscription.js b/src/utils/runTranscription.ts similarity index 69% rename from src/utils/runTranscription.js rename to src/utils/runTranscription.ts index 8ab52f2..078ac08 100644 --- a/src/utils/runTranscription.js +++ b/src/utils/runTranscription.ts @@ -1,4 +1,4 @@ -// src/utils/runTranscription.js +// src/utils/runTranscription.ts import { readFile, writeFile } from 'node:fs/promises' import { callWhisper } from '../transcription/whisper.js' @@ -6,47 +6,48 @@ import { callWhisperDocker } from '../transcription/whisperDocker.js' import { callDeepgram } from '../transcription/deepgram.js' import { callAssembly } from '../transcription/assembly.js' import { log, step, success, wait } from '../types.js' - -/** @import { TranscriptServices, ProcessingOptions } from '../types.js' */ +import type { TranscriptServices, ProcessingOptions } from '../types.js' /** * Main function to run transcription. * @param {string} finalPath - The base path for the files. + * @param {string} frontMatter - Optional front matter content for the markdown file. * @param {TranscriptServices} transcriptServices - The transcription service to use. - * @param {ProcessingOptions} [options={}] - Additional processing options. - * @param {string} [frontMatter=''] - Optional front matter content for the markdown file. + * @param {ProcessingOptions} [options] - Additional processing options. * @returns {Promise} - Returns the final content including markdown and transcript. * @throws {Error} - If the transcription service fails or an error occurs during processing. */ - -export async function runTranscription(finalPath, frontMatter, transcriptServices, options) { +export async function runTranscription( + options: ProcessingOptions, + finalPath: string, + frontMatter: string, + transcriptServices?: TranscriptServices +): Promise { log(step(`\nStep 3 - Running transcription on audio file...`)) - // log(opts(`Options passed to runTranscription:\n`)) - // log(options) try { - let txtContent + let txtContent: string // Choose the transcription service based on the provided option switch (transcriptServices) { case 'deepgram': log(wait('\n Using Deepgram for transcription...')) - txtContent = await callDeepgram(finalPath, options) + txtContent = await callDeepgram(options, finalPath) break case 'assembly': log(wait('\n Using AssemblyAI for transcription...')) - txtContent = await callAssembly(finalPath, options) + txtContent = await callAssembly(options, finalPath) break case 'whisperDocker': log(wait('\n Using Whisper Docker for transcription...')) - txtContent = await callWhisperDocker(finalPath, options) + txtContent = await callWhisperDocker(options, finalPath) break case 'whisper': default: log(wait('\n Using Whisper for transcription...')) - txtContent = await callWhisper(finalPath, options) + txtContent = await callWhisper(options, finalPath) break } @@ -56,8 +57,8 @@ export async function runTranscription(finalPath, frontMatter, transcriptService const existingContent = await readFile(`${finalPath}.md`, 'utf8') mdContent += existingContent } catch (error) { - if (error.code !== 'ENOENT') { - console.error(`Error reading markdown file: ${error.message}`) + if ((error as NodeJS.ErrnoException).code !== 'ENOENT') { + console.error(`Error reading markdown file: ${(error as Error).message}`) throw error } // If the file does not exist, proceed without appending @@ -72,7 +73,7 @@ export async function runTranscription(finalPath, frontMatter, transcriptService return finalContent } catch (error) { - console.error(`Error in transcription process: ${error.message}`) + console.error(`Error in transcription process: ${(error as Error).message}`) throw error } } \ No newline at end of file diff --git a/tsconfig.json b/tsconfig.json new file mode 100644 index 0000000..deb01f8 --- /dev/null +++ b/tsconfig.json @@ -0,0 +1,19 @@ +{ + "compilerOptions": { + "target": "ESNext", // Target modern JS features + "module": "ESNext", // Use ES modules + "lib": ["ESNext"], // Include modern JS features + "moduleResolution": "bundler", + "esModuleInterop": true, // Allow default imports from CJS modules + "skipLibCheck": true, // Skip type checking for node_modules + "forceConsistentCasingInFileNames": true, // Enforce file name casing consistency + "outDir": "./dist", // Output directory for compiled files + "rootDir": "./src", // Root directory for source files + "resolveJsonModule": true, // Allow importing JSON files + "strict": true, // Enable all strict type-checking options + "declaration": true, // Generate .d.ts files + "noEmitOnError": true // Prevent emitting files if there are errors + }, + "include": ["src/**/*"], // Include all TypeScript files in the `src` folder + "exclude": ["node_modules", "dist"] // Exclude output and dependencies +} \ No newline at end of file From b5a1e68c5e639b7f585f6058b8048219a1bd0a56 Mon Sep 17 00:00:00 2001 From: Anthony Campolo <12433465+ajcwebdev@users.noreply.github.com> Date: Sun, 6 Oct 2024 04:16:09 -0500 Subject: [PATCH 7/9] assembly and deepgram updates --- src/autoshow.ts | 4 +- src/commands/processFile.ts | 15 +- src/commands/processPlaylist.ts | 9 +- src/commands/processRSS.ts | 22 +- src/commands/processURLs.ts | 9 +- src/commands/processVideo.ts | 27 +-- src/interactive.ts | 2 +- src/llms/chatgpt.ts | 2 +- src/llms/claude.ts | 2 +- src/llms/cohere.ts | 2 +- src/llms/gemini.ts | 2 +- src/llms/llama.ts | 2 +- src/llms/mistral.ts | 2 +- src/llms/octo.ts | 2 +- src/llms/ollama.ts | 2 +- src/models.ts | 11 + src/transcription/assembly.ts | 92 ++++++-- src/transcription/deepgram.ts | 141 ++++++++---- src/transcription/whisper.ts | 3 +- src/transcription/whisperDocker.ts | 3 +- src/types.ts | 14 -- src/utils/cleanUpFiles.ts | 3 +- src/utils/downloadAudio.ts | 139 ++++++------ src/utils/extractVideoMetadata.ts | 52 +++++ src/utils/generateMarkdown.ts | 350 ++++++++++++----------------- src/utils/runLLM.ts | 2 +- src/utils/runTranscription.ts | 6 +- 27 files changed, 514 insertions(+), 406 deletions(-) create mode 100644 src/utils/extractVideoMetadata.ts diff --git a/src/autoshow.ts b/src/autoshow.ts index 8651fae..8750b00 100644 --- a/src/autoshow.ts +++ b/src/autoshow.ts @@ -20,7 +20,7 @@ import { processURLs } from './commands/processURLs.js' import { processFile } from './commands/processFile.js' import { processRSS } from './commands/processRSS.js' import { argv, exit } from 'node:process' -import { log, opts } from './types.js' +import { log, opts } from './models.js' import type { ProcessingOptions, HandlerFunction, LLMServices, TranscriptServices } from './types.js' // Initialize the command-line interface @@ -78,7 +78,7 @@ Report Issues: https://github.com/ajcwebdev/autoshow/issues * @returns {Promise} */ program.action(async (options: ProcessingOptions) => { - log(opts(`Options received:\n`)) + log(opts(`Options received at beginning of command:\n`)) log(options) log(``) diff --git a/src/commands/processFile.ts b/src/commands/processFile.ts index 16b9eb1..3827ad0 100644 --- a/src/commands/processFile.ts +++ b/src/commands/processFile.ts @@ -1,11 +1,11 @@ // src/commands/processFile.ts -import { generateFileMarkdown } from '../utils/generateMarkdown.js' -import { downloadFileAudio } from '../utils/downloadAudio.js' +import { generateMarkdown } from '../utils/generateMarkdown.js' +import { downloadAudio } from '../utils/downloadAudio.js' import { runTranscription } from '../utils/runTranscription.js' import { runLLM } from '../utils/runLLM.js' import { cleanUpFiles } from '../utils/cleanUpFiles.js' -import { log, final } from '../types.js' +import { log, final } from '../models.js' import type { LLMServices, TranscriptServices, ProcessingOptions } from '../types.js' /** @@ -22,14 +22,17 @@ export async function processFile( llmServices?: LLMServices, transcriptServices?: TranscriptServices ): Promise { - // log(opts(`Options received:\n`)) + // log(`Options received in processFile:\n`) // log(options) + // log(`filePath:`, filePath) + // log(`llmServices:`, llmServices) + // log(`transcriptServices:`, transcriptServices) try { // Generate markdown for the file - const { frontMatter, finalPath, filename } = await generateFileMarkdown(filePath) + const { frontMatter, finalPath, filename } = await generateMarkdown(options, filePath) // Convert the audio or video file to the required format - await downloadFileAudio(filePath, filename) + await downloadAudio(options, filePath, filename) // Run transcription on the file await runTranscription(options, finalPath, frontMatter, transcriptServices) diff --git a/src/commands/processPlaylist.ts b/src/commands/processPlaylist.ts index 853e58c..7448e99 100644 --- a/src/commands/processPlaylist.ts +++ b/src/commands/processPlaylist.ts @@ -4,9 +4,9 @@ import { writeFile } from 'node:fs/promises' import { execFile } from 'node:child_process' import { promisify } from 'node:util' import { processVideo } from './processVideo.js' -import { extractVideoMetadata } from '../utils/generateMarkdown.js' +import { extractVideoMetadata } from '../utils/extractVideoMetadata.js' import { checkDependencies } from '../utils/checkDependencies.js' -import { log, final, wait } from '../types.js' +import { log, final, wait } from '../models.js' import type { LLMServices, TranscriptServices, ProcessingOptions } from '../types.js' const execFilePromise = promisify(execFile) @@ -24,8 +24,11 @@ export async function processPlaylist( llmServices?: LLMServices, transcriptServices?: TranscriptServices ): Promise { - // log(opts(`Options received:\n`)) + // log(`Options received in processPlaylist:\n`) // log(options) + // log(`playlistUrl:`, playlistUrl) + // log(`llmServices:`, llmServices) + // log(`transcriptServices:`, transcriptServices) try { // Check for required dependencies await checkDependencies(['yt-dlp']) diff --git a/src/commands/processRSS.ts b/src/commands/processRSS.ts index ca55d25..82c307b 100644 --- a/src/commands/processRSS.ts +++ b/src/commands/processRSS.ts @@ -2,12 +2,12 @@ import { writeFile } from 'node:fs/promises' import { XMLParser } from 'fast-xml-parser' -import { generateRSSMarkdown } from '../utils/generateMarkdown.js' +import { generateMarkdown } from '../utils/generateMarkdown.js' import { downloadAudio } from '../utils/downloadAudio.js' import { runTranscription } from '../utils/runTranscription.js' import { runLLM } from '../utils/runLLM.js' import { cleanUpFiles } from '../utils/cleanUpFiles.js' -import { log, final, wait } from '../types.js' +import { log, final, wait } from '../models.js' import type { LLMServices, TranscriptServices, ProcessingOptions, RSSItem } from '../types.js' @@ -32,14 +32,17 @@ async function processItem( llmServices?: LLMServices, transcriptServices?: TranscriptServices ): Promise { - // log(opts(`\nItem parameter passed to processItem:\n`)) - // log(item) + // log(`Options received in processItem:\n`) + // log(options) + // log(`item\n\n`, item) + // log(`llmServices:`, llmServices) + // log(`transcriptServices:`, transcriptServices) try { // Generate markdown for the item - const { frontMatter, finalPath, filename } = await generateRSSMarkdown(item) + const { frontMatter, finalPath, filename } = await generateMarkdown(options, item) // Download audio - await downloadAudio(item.showLink, filename) + await downloadAudio(options, item.showLink, filename) // Run transcription await runTranscription(options, finalPath, frontMatter, transcriptServices) @@ -73,8 +76,11 @@ export async function processRSS( llmServices?: LLMServices, transcriptServices?: TranscriptServices ): Promise { - // log(opts(`Options received:\n`)) - // log(options) + log(`Options received in processRSS:\n`) + log(options) + log(`rssUrl:`, rssUrl) + log(`llmServices:`, llmServices) + log(`transcriptServices:`, transcriptServices) try { // Validate that --last is a positive integer if provided if (options.last !== undefined) { diff --git a/src/commands/processURLs.ts b/src/commands/processURLs.ts index f740e86..68c86a7 100644 --- a/src/commands/processURLs.ts +++ b/src/commands/processURLs.ts @@ -3,9 +3,9 @@ import { readFile, writeFile } from 'node:fs/promises' import { resolve } from 'node:path' import { processVideo } from './processVideo.js' -import { extractVideoMetadata } from '../utils/generateMarkdown.js' +import { extractVideoMetadata } from '../utils/extractVideoMetadata.js' import { checkDependencies } from '../utils/checkDependencies.js' -import { log, final, wait } from '../types.js' +import { log, final, wait } from '../models.js' import type { LLMServices, TranscriptServices, ProcessingOptions } from '../types.js' /** @@ -21,8 +21,11 @@ export async function processURLs( llmServices?: LLMServices, transcriptServices?: TranscriptServices ): Promise { - // log(opts(`Options received:\n`)) + // log(`Options received in processURLs:\n`) // log(options) + // log(`filePath:`, filePath) + // log(`llmServices:`, llmServices) + // log(`transcriptServices:`, transcriptServices) try { // Check for required dependencies await checkDependencies(['yt-dlp']) diff --git a/src/commands/processVideo.ts b/src/commands/processVideo.ts index f106f2d..8c873b6 100644 --- a/src/commands/processVideo.ts +++ b/src/commands/processVideo.ts @@ -6,7 +6,7 @@ import { downloadAudio } from '../utils/downloadAudio.js' import { runTranscription } from '../utils/runTranscription.js' import { runLLM } from '../utils/runLLM.js' import { cleanUpFiles } from '../utils/cleanUpFiles.js' -import { log, final } from '../types.js' +import { log, final } from '../models.js' import type { LLMServices, TranscriptServices, ProcessingOptions } from '../types.js' /** @@ -20,28 +20,29 @@ import type { LLMServices, TranscriptServices, ProcessingOptions } from '../type export async function processVideo( options: ProcessingOptions, url: string, - llmServices?: LLMServices, // Make this optional - transcriptServices?: TranscriptServices // Make this optional + llmServices?: LLMServices, + transcriptServices?: TranscriptServices ): Promise { + // log(`Options received in processVideo:\n`) + // log(options) + // log(`url:`, url) + // log(`llmServices:`, llmServices) + // log(`transcriptServices:`, transcriptServices) try { // Check for required dependencies await checkDependencies(['yt-dlp']) // Generate markdown with video metadata - const { frontMatter, finalPath, filename } = await generateMarkdown(url) + const { frontMatter, finalPath, filename } = await generateMarkdown(options, url) // Download audio from the video - await downloadAudio(url, filename) + await downloadAudio(options, url, filename) - // Run transcription on the audio if transcriptServices is defined - if (transcriptServices) { - await runTranscription(options, finalPath, frontMatter, transcriptServices) - } + // Run transcription on the audio + await runTranscription(options, finalPath, frontMatter, transcriptServices) - // Process the transcript with the selected Language Model if llmServices is defined - if (llmServices) { - await runLLM(options, finalPath, frontMatter, llmServices) - } + // Process transcript with an LLM if llmServices is defined, concatenate prompt and transcript if undefined + await runLLM(options, finalPath, frontMatter, llmServices) // Clean up temporary files if the noCleanUp option is not set if (!options.noCleanUp) { diff --git a/src/interactive.ts b/src/interactive.ts index 5097093..012d86a 100644 --- a/src/interactive.ts +++ b/src/interactive.ts @@ -2,7 +2,7 @@ import inquirer from 'inquirer' import type { ProcessingOptions, InquirerAnswers, WhisperModelType } from './types.js' -import { log } from './types.js' +import { log } from './models.js' /** * Prompts the user for input if interactive mode is selected. diff --git a/src/llms/chatgpt.ts b/src/llms/chatgpt.ts index f9d4973..fc74201 100644 --- a/src/llms/chatgpt.ts +++ b/src/llms/chatgpt.ts @@ -4,7 +4,7 @@ import { writeFile } from 'node:fs/promises' import { env } from 'node:process' import { OpenAI } from 'openai' import { GPT_MODELS } from '../models.js' -import { log, wait } from '../types.js' +import { log, wait } from '../models.js' import type { LLMFunction, ChatGPTModelType } from '../types.js' diff --git a/src/llms/claude.ts b/src/llms/claude.ts index 791fdb4..2481ab1 100644 --- a/src/llms/claude.ts +++ b/src/llms/claude.ts @@ -4,7 +4,7 @@ import { writeFile } from 'node:fs/promises' import { env } from 'node:process' import { Anthropic } from '@anthropic-ai/sdk' import { CLAUDE_MODELS } from '../models.js' -import { log, wait } from '../types.js' +import { log, wait } from '../models.js' import type { LLMFunction, ClaudeModelType } from '../types.js' diff --git a/src/llms/cohere.ts b/src/llms/cohere.ts index db806b0..edf4fdd 100644 --- a/src/llms/cohere.ts +++ b/src/llms/cohere.ts @@ -4,7 +4,7 @@ import { writeFile } from 'node:fs/promises' import { env } from 'node:process' import { CohereClient } from 'cohere-ai' import { COHERE_MODELS } from '../models.js' -import { log, wait } from '../types.js' +import { log, wait } from '../models.js' import type { LLMFunction, CohereModelType } from '../types.js' diff --git a/src/llms/gemini.ts b/src/llms/gemini.ts index e1144c3..d142e4d 100644 --- a/src/llms/gemini.ts +++ b/src/llms/gemini.ts @@ -4,7 +4,7 @@ import { writeFile } from 'node:fs/promises' import { env } from 'node:process' import { GoogleGenerativeAI } from "@google/generative-ai" import { GEMINI_MODELS } from '../models.js' -import { log, wait } from '../types.js' +import { log, wait } from '../models.js' import type { LLMFunction, GeminiModelType } from '../types.js' diff --git a/src/llms/llama.ts b/src/llms/llama.ts index 271816a..9a48d45 100644 --- a/src/llms/llama.ts +++ b/src/llms/llama.ts @@ -6,7 +6,7 @@ import { existsSync } from 'node:fs' import { exec } from 'node:child_process' import { promisify } from 'node:util' import { LLAMA_MODELS } from '../models.js' -import { log, wait } from '../types.js' +import { log, wait } from '../models.js' import type { LlamaModelType, LLMFunction } from '../types.js' diff --git a/src/llms/mistral.ts b/src/llms/mistral.ts index 5b574f4..6ac7ea4 100644 --- a/src/llms/mistral.ts +++ b/src/llms/mistral.ts @@ -4,7 +4,7 @@ import { writeFile } from 'node:fs/promises' import { env } from 'node:process' import { Mistral } from '@mistralai/mistralai' import { MISTRAL_MODELS } from '../models.js' -import { log, wait } from '../types.js' +import { log, wait } from '../models.js' import type { LLMFunction, MistralModelType } from '../types.js' diff --git a/src/llms/octo.ts b/src/llms/octo.ts index 29a1202..9895f89 100644 --- a/src/llms/octo.ts +++ b/src/llms/octo.ts @@ -4,7 +4,7 @@ import { writeFile } from 'node:fs/promises' import { env } from 'node:process' import { OctoAIClient } from '@octoai/sdk' import { OCTO_MODELS } from '../models.js' -import { log, wait } from '../types.js' +import { log, wait } from '../models.js' import type { LLMFunction, OctoModelType } from '../types.js' diff --git a/src/llms/ollama.ts b/src/llms/ollama.ts index cf7d901..3a02c59 100644 --- a/src/llms/ollama.ts +++ b/src/llms/ollama.ts @@ -3,7 +3,7 @@ import { writeFile } from 'node:fs/promises' import { env } from 'node:process' import { OLLAMA_MODELS } from '../models.js' -import { log, wait } from '../types.js' +import { log, wait } from '../models.js' import type { LLMFunction, OllamaModelType } from '../types.js' diff --git a/src/models.ts b/src/models.ts index 3297ca9..1bd7b7c 100644 --- a/src/models.ts +++ b/src/models.ts @@ -1,7 +1,18 @@ // src/models.ts +import chalk from 'chalk' +import type { ChalkInstance } from 'chalk' import type { WhisperModelType, ChatGPTModelType, ClaudeModelType, CohereModelType, GeminiModelType, MistralModelType, OctoModelType, LlamaModelType, OllamaModelType } from './types.js' +export const step: ChalkInstance = chalk.bold.underline +export const dim: ChalkInstance = chalk.dim +export const success: ChalkInstance = chalk.bold.blue +export const opts: ChalkInstance = chalk.magentaBright.bold +export const wait: ChalkInstance = chalk.cyan.dim +export const final: ChalkInstance = chalk.bold.italic + +export const log: typeof console.log = console.log + /** * Define available Whisper models * @type {Record} diff --git a/src/transcription/assembly.ts b/src/transcription/assembly.ts index 12aa874..d8d0914 100644 --- a/src/transcription/assembly.ts +++ b/src/transcription/assembly.ts @@ -1,38 +1,95 @@ -// src/transcription/assembly.ts - +import { createReadStream } from 'node:fs' import { writeFile } from 'node:fs/promises' import { env } from 'node:process' -import { AssemblyAI } from 'assemblyai' -import { log, wait } from '../types.js' +import fetch from 'node-fetch' +import { log, wait, success } from '../models.js' import type { ProcessingOptions } from '../types.js' +const BASE_URL = 'https://api.assemblyai.com/v2' + /** * Main function to handle transcription using AssemblyAI. - * @param {string} finalPath - The identifier used for naming output files. * @param {ProcessingOptions} options - Additional processing options. + * @param {string} finalPath - The identifier used for naming output files. * @returns {Promise} - Returns the formatted transcript content. * @throws {Error} - If an error occurs during transcription. */ export async function callAssembly(options: ProcessingOptions, finalPath: string): Promise { + log(wait('\n Using AssemblyAI for transcription...')) // Check if the ASSEMBLY_API_KEY environment variable is set if (!env.ASSEMBLY_API_KEY) { throw new Error('ASSEMBLY_API_KEY environment variable is not set. Please set it to your AssemblyAI API key.') } - // Initialize the AssemblyAI client with API key from environment variables - const client = new AssemblyAI({ apiKey: env.ASSEMBLY_API_KEY }) + const headers = { + 'Authorization': env.ASSEMBLY_API_KEY, + 'Content-Type': 'application/json' + } try { const { speakerLabels } = options - // Request transcription from AssemblyAI - const transcript = await client.transcripts.transcribe({ - audio: `${finalPath}.wav`, // The audio file to transcribe - speech_model: 'nano', // Use the 'nano' speech model for transcription (`best` also an option) - ...(speakerLabels && { // Conditionally add speaker labeling options - speaker_labels: true, + const audioFilePath = `${finalPath}.wav` + + // Step 1: Upload the audio file + log(wait('\n Uploading audio file to AssemblyAI...')) + const uploadUrl = `${BASE_URL}/upload` + const fileStream = createReadStream(audioFilePath) + + const uploadResponse = await fetch(uploadUrl, { + method: 'POST', + headers: { + 'Authorization': env.ASSEMBLY_API_KEY, + 'Content-Type': 'application/octet-stream', + }, + body: fileStream, + }) + + if (!uploadResponse.ok) { + const errorData = await uploadResponse.json() + throw new Error(`File upload failed: ${errorData.error || uploadResponse.statusText}`) + } + + const uploadData = await uploadResponse.json() + const upload_url: string = uploadData.upload_url + if (!upload_url) { + throw new Error('Upload URL not returned by AssemblyAI.') + } + log(success(' Audio file uploaded successfully.')) + + // Step 2: Request transcription + const response = await fetch(`${BASE_URL}/transcript`, { + method: 'POST', + headers, + body: JSON.stringify({ + audio_url: upload_url, + speech_model: 'nano', + speaker_labels: speakerLabels || false }) }) + if (!response.ok) { + throw new Error(`HTTP error! status: ${response.status}`) + } + + const transcriptData = await response.json() + + // Step 3: Poll for completion + let transcript + while (true) { + const pollingResponse = await fetch(`${BASE_URL}/transcript/${transcriptData.id}`, { headers }) + transcript = await pollingResponse.json() + + if (transcript.status === 'completed' || transcript.status === 'error') { + break + } + + await new Promise(resolve => setTimeout(resolve, 3000)) + } + + if (transcript.status === 'error') { + throw new Error(`Transcription failed: ${transcript.error}`) + } + // Initialize output string let txtContent = '' @@ -45,14 +102,14 @@ export async function callAssembly(options: ProcessingOptions, finalPath: string // Process the transcript based on whether utterances are available if (transcript.utterances) { // If utterances are available, format each with speaker labels if used - txtContent = transcript.utterances.map((utt) => + txtContent = transcript.utterances.map((utt: any) => `${speakerLabels ? `Speaker ${utt.speaker} ` : ''}(${formatTime(utt.start)}): ${utt.text}` ).join('\n') } else if (transcript.words) { // If only words are available, group them into lines with timestamps let currentLine = '' let currentTimestamp = formatTime(transcript.words[0].start) - transcript.words.forEach((word) => { + transcript.words.forEach((word: any) => { if (currentLine.length + word.text.length > 80) { // Start a new line if the current line exceeds 80 characters txtContent += `[${currentTimestamp}] ${currentLine.trim()}\n` @@ -73,6 +130,11 @@ export async function callAssembly(options: ProcessingOptions, finalPath: string // Write the formatted transcript to a file await writeFile(`${finalPath}.txt`, txtContent) log(wait(`\n Transcript saved...\n - ${finalPath}.txt\n`)) + + // Create an empty LRC file to prevent cleanup errors + await writeFile(`${finalPath}.lrc`, '') + log(wait(`\n Empty LRC file created:\n - ${finalPath}.lrc\n`)) + return txtContent } catch (error) { // Log any errors that occur during the transcription process diff --git a/src/transcription/deepgram.ts b/src/transcription/deepgram.ts index 01644b4..0b7950f 100644 --- a/src/transcription/deepgram.ts +++ b/src/transcription/deepgram.ts @@ -2,69 +2,122 @@ import { writeFile, readFile } from 'node:fs/promises' import { env } from 'node:process' -import { createClient, SyncPrerecordedResponse, DeepgramResponse } from '@deepgram/sdk' -import { log, wait } from '../types.js' +import { log, wait } from '../models.js' import type { ProcessingOptions } from '../types.js' +// Define types for Deepgram API response +type DeepgramResponse = { + metadata: { + transaction_key: string + request_id: string + sha256: string + created: string + duration: number + channels: number + models: string[] + model_info: { + [key: string]: { + name: string + version: string + arch: string + } + } + } + results: { + channels: Array<{ + alternatives: Array<{ + transcript: string + confidence: number + words: Array<{ + word: string + start: number + end: number + confidence: number + }> + }> + }> + } +} + /** - * Main function to handle transcription using Deepgram. - * @param {string} finalPath - The identifier used for naming output files. + * Main function to handle transcription using Deepgram API. * @param {ProcessingOptions} options - Additional processing options. + * @param {string} finalPath - The identifier used for naming output files. * @returns {Promise} - Returns the formatted transcript content. * @throws {Error} - If an error occurs during transcription. */ export async function callDeepgram(options: ProcessingOptions, finalPath: string): Promise { + log(wait('\n Using Deepgram for transcription...\n')) + // log(`Options received in callDeepgram:\n`) + // log(options) + // log(`finalPath:`, finalPath) + // Check if the DEEPGRAM_API_KEY environment variable is set if (!env.DEEPGRAM_API_KEY) { throw new Error('DEEPGRAM_API_KEY environment variable is not set. Please set it to your Deepgram API key.') } - // Initialize the Deepgram client with the API key from environment variables - const deepgram = createClient(env.DEEPGRAM_API_KEY) + try { + const apiUrl = new URL('https://api.deepgram.com/v1/listen') - // Check if the input is a URL or a local file - const isUrl = finalPath.startsWith('http://') || finalPath.startsWith('https://') + // Set query parameters + apiUrl.searchParams.append('model', 'nova-2') + apiUrl.searchParams.append('smart_format', 'true') + apiUrl.searchParams.append('punctuate', 'true') + apiUrl.searchParams.append('diarize', 'false') + apiUrl.searchParams.append('paragraphs', 'true') - try { - let result: DeepgramResponse - if (isUrl) { - // Use transcribeUrl for URL inputs - result = await deepgram.listen.prerecorded.transcribeUrl( - { url: finalPath }, - { model: 'nova-2', smart_format: true } - ) - } else { - // Use transcribeFile for local file inputs - const audioBuffer = await readFile(`${finalPath}.wav`) - result = await deepgram.listen.prerecorded.transcribeFile( - audioBuffer, - { model: 'nova-2', smart_format: true } - ) + // Read the local WAV file + const audioBuffer = await readFile(`${finalPath}.wav`) + + // Send the request to Deepgram + const response = await fetch(apiUrl, { + method: 'POST', + headers: { + 'Authorization': `Token ${env.DEEPGRAM_API_KEY}`, + 'Content-Type': 'audio/wav' + }, + body: audioBuffer + }) + + if (!response.ok) { + throw new Error(`Deepgram API request failed with status ${response.status}`) } - // Type guard: Check if the result has 'results' and 'metadata' (success case) - if ('results' in result && 'metadata' in result) { - // Safely cast the result to SyncPrerecordedResponse after the check - const successResult = result as unknown as SyncPrerecordedResponse + const result = await response.json() as DeepgramResponse - // Safely access properties with optional chaining - const txtContent = successResult.results?.channels[0]?.alternatives[0]?.paragraphs?.paragraphs - ?.flatMap((paragraph) => paragraph.sentences) - ?.map((sentence) => { - // Handle case where sentence or start might be undefined - const minutes = Math.floor((sentence.start ?? 0) / 60).toString().padStart(2, '0') - const seconds = Math.floor((sentence.start ?? 0) % 60).toString().padStart(2, '0') - return `[${minutes}:${seconds}] ${sentence.text ?? ''}` - }) - ?.join('\n') || '' // Default to empty string if undefined + // Extract the words array from the Deepgram API response + const txtContent = result.results.channels[0].alternatives[0].words + // Use reduce to iterate over the words array and build the formatted transcript + .reduce((acc, { word, start }, i, arr) => { + // Determine if a timestamp should be added + // Add timestamp if it's the first word, every 30th word, or the start of a sentence + const timestamp = (i % 30 === 0 || word.match(/^[A-Z]/)) + // If true, create a timestamp string that calculates minutes/seconds and converts to string with a pad for leading zeros + ? `[${Math.floor(start / 60).toString().padStart(2, '0') + }:${Math.floor(start % 60).toString().padStart(2, '0')}] ` + // If false, use an empty string (no timestamp) + : '' + + // Add newline if the word ends a sentence, every 30th word, or it's the last word + const newline = (word.match(/[.!?]$/) || i % 30 === 29 || i === arr.length - 1) + // Add a newline character if true and use an empty string if false + ? '\n' + : '' + + // Combine the accumulated text, timestamp (if any), current word, and newline (if any) + return `${acc}${timestamp}${word} ${newline}` + }, '') - // Write the formatted transcript to a file - await writeFile(`${finalPath}.txt`, txtContent) - log(wait(`\n Transcript saved:\n - ${finalPath}.txt\n`)) - return txtContent - } else { - throw new Error('Deepgram returned an error response or incomplete data') - } + // Write the formatted transcript to a file + await writeFile(`${finalPath}.txt`, txtContent) + log(wait(`\n Transcript saved:\n - ${finalPath}.txt\n`)) + + // Create an empty LRC file to prevent cleanup errors + await writeFile(`${finalPath}.lrc`, '') + log(wait(`\n Empty LRC file created:\n - ${finalPath}.lrc\n`)) + + return txtContent } catch (error) { // Log any errors that occur during the transcription process console.error(`Error processing the transcription: ${(error as Error).message}`) diff --git a/src/transcription/whisper.ts b/src/transcription/whisper.ts index 244b034..5aa5663 100644 --- a/src/transcription/whisper.ts +++ b/src/transcription/whisper.ts @@ -5,7 +5,7 @@ import { exec } from 'node:child_process' import { promisify } from 'node:util' import { existsSync } from 'node:fs' import { WHISPER_MODELS } from '../models.js' -import { log, success, wait } from '../types.js' +import { log, success, wait } from '../models.js' import type { ProcessingOptions } from '../types.js' const execPromise = promisify(exec) @@ -18,6 +18,7 @@ const execPromise = promisify(exec) * @throws {Error} - If an error occurs during transcription. */ export async function callWhisper(options: ProcessingOptions, finalPath: string): Promise { + log(wait('\n Using Whisper for transcription...')) try { // Get the whisper model from options or use 'base' as default const whisperModel = options.whisper || 'base' diff --git a/src/transcription/whisperDocker.ts b/src/transcription/whisperDocker.ts index 8e56bcd..295ea05 100644 --- a/src/transcription/whisperDocker.ts +++ b/src/transcription/whisperDocker.ts @@ -5,7 +5,7 @@ import { exec } from 'node:child_process' import { promisify } from 'node:util' import { join } from 'node:path' import { WHISPER_MODELS } from '../models.js' -import { log, wait } from '../types.js' +import { log, wait } from '../models.js' import type { ProcessingOptions } from '../types.js' const execPromise = promisify(exec) @@ -18,6 +18,7 @@ const execPromise = promisify(exec) * @throws {Error} - If an error occurs during transcription. */ export async function callWhisperDocker(options: ProcessingOptions, finalPath: string): Promise { + log(wait('\n Using Whisper Docker for transcription...')) try { // Get the whisper model from options or use 'base' as default const whisperModel = options.whisperDocker || 'base' diff --git a/src/types.ts b/src/types.ts index 2260ed3..c6ca1fa 100644 --- a/src/types.ts +++ b/src/types.ts @@ -1,19 +1,5 @@ // src/types.ts -import type { ChalkInstance } from 'chalk' -// import type { BuiltInQuestion } from 'inquirer' -// import BuiltInQuestion from 'inquirer' -import chalk from 'chalk' - -export const step: ChalkInstance = chalk.bold.underline -export const dim: ChalkInstance = chalk.dim -export const success: ChalkInstance = chalk.bold.blue -export const opts: ChalkInstance = chalk.magentaBright.bold -export const wait: ChalkInstance = chalk.cyan.dim -export const final: ChalkInstance = chalk.bold.italic - -export const log: typeof console.log = console.log - /** * @file This file contains all the custom type definitions used across the Autoshow project. */ diff --git a/src/utils/cleanUpFiles.ts b/src/utils/cleanUpFiles.ts index 359017f..07afea4 100644 --- a/src/utils/cleanUpFiles.ts +++ b/src/utils/cleanUpFiles.ts @@ -1,8 +1,7 @@ // src/utils/cleanUpFiles.ts import { unlink } from 'node:fs/promises' -import { log, step, success } from '../types.js' -import type { CleanUpFunction } from '../types.js' +import { log, step, success } from '../models.js' /** * Asynchronous function to clean up temporary files. diff --git a/src/utils/downloadAudio.ts b/src/utils/downloadAudio.ts index cae4f2c..80fca63 100644 --- a/src/utils/downloadAudio.ts +++ b/src/utils/downloadAudio.ts @@ -6,95 +6,86 @@ import { readFile, access } from 'node:fs/promises' import { fileTypeFromBuffer } from 'file-type' import ffmpeg from 'ffmpeg-static' import { checkDependencies } from './checkDependencies.js' -import { log, step, success, wait } from '../types.js' -import type { SupportedFileType } from '../types.js' +import { log, step, success, wait } from '../models.js' +import type { SupportedFileType, ProcessingOptions } from '../types.js' const execFilePromise = promisify(execFile) const execPromise = promisify(exec) /** - * Function to download audio from a URL using yt-dlp. - * @param {string} url - The URL of the video to download audio from. + * Function to download or process audio based on the input type. + * @param {ProcessingOptions} options - The processing options specifying the type of content to generate. + * @param {string} input - The URL of the video or path to the local file. * @param {string} filename - The base filename to save the audio as. - * @returns {Promise} - Returns the path to the downloaded WAV file. - * @throws {Error} - If there is an error during the download process. + * @returns {Promise} - Returns the path to the downloaded or processed WAV file. + * @throws {Error} - If there is an error during the download or processing. */ -export async function downloadAudio(url: string, filename: string): Promise { - log(step('\nStep 2 - Downloading URL audio...\n')) - try { - // Check for required dependencies - await checkDependencies(['yt-dlp']) +export async function downloadAudio(options: ProcessingOptions, input: string, filename: string): Promise { + const finalPath = `content/${filename}` + const outputPath = `${finalPath}.wav` - // Set the final path for the downloaded file - const finalPath = `content/${filename}` + if (options.video || options.playlist || options.urls || options.rss) { + log(step('\nStep 2 - Downloading URL audio...\n')) + try { + // Check for required dependencies + await checkDependencies(['yt-dlp']) - // Execute yt-dlp to download the audio - const { stderr } = await execFilePromise('yt-dlp', [ - '--no-warnings', - '--restrict-filenames', - '--extract-audio', - '--audio-format', 'wav', - '--postprocessor-args', 'ffmpeg:-ar 16000 -ac 1', - '--no-playlist', - '-o', `${finalPath}.%(ext)s`, - url, - ]) + // Execute yt-dlp to download the audio + const { stderr } = await execFilePromise('yt-dlp', [ + '--no-warnings', + '--restrict-filenames', + '--extract-audio', + '--audio-format', 'wav', + '--postprocessor-args', 'ffmpeg:-ar 16000 -ac 1', + '--no-playlist', + '-o', outputPath, + input, + ]) + + // Log any errors from yt-dlp + if (stderr) { + console.error(`yt-dlp warnings: ${stderr}`) + } - // Log any errors from yt-dlp - if (stderr) { - console.error(`yt-dlp warnings: ${stderr}`) + log(success(` Audio downloaded successfully:\n - ${outputPath}`)) + } catch (error) { + console.error(`Error downloading audio: ${error instanceof Error ? (error as Error).message : String(error)}`) + throw error } + } else if (options.file) { + log(step('\nStep 2 - Processing file audio...\n')) + // Define supported audio and video formats + const supportedFormats: Set = new Set([ + 'wav', 'mp3', 'm4a', 'aac', 'ogg', 'flac', 'mp4', 'mkv', 'avi', 'mov', 'webm', + ]) + try { + // Check if the file exists + await access(input) - // Construct the path of the downloaded file - const downloadedFile = `${finalPath}.wav` - log(success(` Audio downloaded successfully:\n - ${downloadedFile}`)) - return downloadedFile - } catch (error) { - console.error(`Error downloading audio: ${error instanceof Error ? (error as Error).message : String(error)}`) - throw error - } -} + // Read the file into a buffer + const buffer = await readFile(input) -/** - * Function to process a local audio or video file. - * @param {string} filePath - The path to the local file. - * @param {string} sanitizedFilename - The sanitized filename. - * @returns {Promise} - Returns the final path to the processed WAV file. - * @throws {Error} - If the file type is unsupported or processing fails. - */ -export async function downloadFileAudio(filePath: string, sanitizedFilename: string): Promise { - log(step('\nStep 2 - Downloading file audio...\n')) - // Define supported audio and video formats - const supportedFormats: Set = new Set([ - 'wav', 'mp3', 'm4a', 'aac', 'ogg', 'flac', 'mp4', 'mkv', 'avi', 'mov', 'webm', - ]) - try { - // Check if the file exists - await access(filePath) + // Determine the file type + const fileType = await fileTypeFromBuffer(buffer) + if (!fileType || !supportedFormats.has(fileType.ext as SupportedFileType)) { + throw new Error( + fileType ? `Unsupported file type: ${fileType.ext}` : 'Unable to determine file type' + ) + } + log(wait(` File type detected as ${fileType.ext}, converting to WAV...\n`)) - // Read the file into a buffer - const buffer = await readFile(filePath) - - // Determine the file type - const fileType = await fileTypeFromBuffer(buffer) - if (!fileType || !supportedFormats.has(fileType.ext as SupportedFileType)) { - throw new Error( - fileType ? `Unsupported file type: ${fileType.ext}` : 'Unable to determine file type' + // Convert the file to WAV format + await execPromise( + `${ffmpeg} -i "${input}" -ar 16000 -ac 1 -vn "${outputPath}"` ) + log(success(` File converted to WAV format successfully:\n - ${outputPath}`)) + } catch (error) { + console.error(`Error processing local file: ${error instanceof Error ? (error as Error).message : String(error)}`) + throw error } - log(wait(` File type detected as ${fileType.ext}, converting to WAV...\n`)) - - const outputPath = `content/${sanitizedFilename}.wav` - - // Convert the file to WAV format - await execPromise( - `${ffmpeg} -i "${filePath}" -ar 16000 -ac 1 -vn "${outputPath}"` - ) - log(success(` File converted to WAV format successfully:\n - ${outputPath}`)) - - return outputPath - } catch (error) { - console.error(`Error processing local file: ${error instanceof Error ? (error as Error).message : String(error)}`) - throw error + } else { + throw new Error('Invalid option provided for audio download/processing.') } + + return outputPath } \ No newline at end of file diff --git a/src/utils/extractVideoMetadata.ts b/src/utils/extractVideoMetadata.ts new file mode 100644 index 0000000..eeea3e6 --- /dev/null +++ b/src/utils/extractVideoMetadata.ts @@ -0,0 +1,52 @@ +// src/utils/extractVideoMetadata.ts + +import { execFile } from 'node:child_process' +import { promisify } from 'node:util' +import { checkDependencies } from './checkDependencies.js' + +import type { VideoMetadata } from '../types.js' + +const execFilePromise = promisify(execFile) + +/** + * Extract metadata for a single video URL. + * @param url - The URL of the video. + * @returns The video metadata. + */ +export async function extractVideoMetadata(url: string): Promise { + try { + // Check for required dependencies + await checkDependencies(['yt-dlp']) + + const { stdout } = await execFilePromise('yt-dlp', [ + '--restrict-filenames', + '--print', '%(webpage_url)s', + '--print', '%(channel)s', + '--print', '%(uploader_url)s', + '--print', '%(title)s', + '--print', '%(upload_date>%Y-%m-%d)s', + '--print', '%(thumbnail)s', + url, + ]) + + const [showLink, channel, channelURL, title, publishDate, coverImage] = stdout.trim().split('\n') + + // Ensure all metadata is present + if (!showLink || !channel || !channelURL || !title || !publishDate || !coverImage) { + throw new Error('Incomplete metadata received from yt-dlp.') + } + + return { + showLink, + channel, + channelURL, + title, + description: '', + publishDate, + coverImage, + } + } catch (error) { + console.error(`Error extracting metadata for ${url}: ${error instanceof Error ? (error as Error).message : String(error)}`) + throw error + } +} \ No newline at end of file diff --git a/src/utils/generateMarkdown.ts b/src/utils/generateMarkdown.ts index df081f4..af0267c 100644 --- a/src/utils/generateMarkdown.ts +++ b/src/utils/generateMarkdown.ts @@ -5,221 +5,161 @@ import { promisify } from 'node:util' import { writeFile } from 'node:fs/promises' import { basename, extname } from 'node:path' import { checkDependencies } from './checkDependencies.js' -import { log, dim, step, success } from '../types.js' -import type { MarkdownData, RSSItem, VideoMetadata } from '../types.js' +import { log, dim, step, success } from '../models.js' +import type { MarkdownData, ProcessingOptions, RSSItem } from '../types.js' // Promisify the execFile function for use with async/await const execFilePromise = promisify(execFile) /** - * Extract metadata for a single video URL. - * @param url - The URL of the video. - * @returns The video metadata. + * Generates markdown content based on the provided options and input. + * + * @param {ProcessingOptions} options - The processing options specifying the type of content to generate. + * @param {string | RSSItem} input - The input data, either a string (for video URL or file path) or an RSSItem object. + * @returns {Promise} A promise that resolves to an object containing the generated markdown data. + * @throws {Error} If invalid options are provided or if metadata extraction fails. */ -export async function extractVideoMetadata(url: string): Promise { - try { - // Check for required dependencies - await checkDependencies(['yt-dlp']) - - const { stdout } = await execFilePromise('yt-dlp', [ - '--restrict-filenames', - '--print', '%(webpage_url)s', - '--print', '%(channel)s', - '--print', '%(uploader_url)s', - '--print', '%(title)s', - '--print', '%(upload_date>%Y-%m-%d)s', - '--print', '%(thumbnail)s', - url, - ]) - - const [showLink, channel, channelURL, title, publishDate, coverImage] = stdout.trim().split('\n') - - // Ensure all metadata is present - if (!showLink || !channel || !channelURL || !title || !publishDate || !coverImage) { - throw new Error('Incomplete metadata received from yt-dlp.') - } - - return { - showLink, - channel, - channelURL, - title, - description: '', - publishDate, - coverImage, - } - } catch (error) { - console.error(`Error extracting metadata for ${url}: ${error instanceof Error ? (error as Error).message : String(error)}`) - throw error +export async function generateMarkdown( + options: ProcessingOptions, + input: string | RSSItem +): Promise { + // log(`Options received in generateMarkdown:\n`) + // log(options) + // log(`input:`, input) + /** + * Sanitizes a title string for use in filenames. + * + * @param {string} title - The title to sanitize. + * @returns {string} The sanitized title. + */ + function sanitizeTitle(title: string): string { + return title + .replace(/[^\w\s-]/g, '') // Remove all non-word chars except spaces and hyphens + .trim() // Remove leading and trailing whitespace + .replace(/[\s_]+/g, '-') // Replace spaces and underscores with a single hyphen + .replace(/-+/g, '-') // Replace multiple hyphens with a single hyphen + .toLowerCase() // Convert to lowercase + .slice(0, 200) // Limit to 200 characters } -} -/** - * Function to generate markdown for RSS feed items. - * @param item - The RSS feed item object. - * @returns An object with frontMatter, finalPath, and filename. - * @throws {Error} If markdown generation fails. - */ -export async function generateRSSMarkdown(item: RSSItem): Promise { - try { - // Destructure the item object - const { publishDate, title, coverImage, showLink, channel, channelURL } = item - - // Sanitize the title for use in the filename - const sanitizedTitle = sanitizeTitle(title) - - // Construct the filename, path, and front matter for the markdown file - const filename = `${publishDate}-${sanitizedTitle}` - const finalPath = `content/${filename}` - const frontMatter = [ - '---', - `showLink: "${showLink}"`, - `channel: "${channel}"`, - `channelURL: "${channelURL}"`, - `title: "${title}"`, - `description: ""`, - `publishDate: "${publishDate}"`, - `coverImage: "${coverImage}"`, - '---\n', - ].join('\n') - - // Write the front matter to the markdown file - await writeFile(`${finalPath}.md`, frontMatter) - log(dim(frontMatter)) - log(step('\nStep 1 - Generating RSS markdown...\n')) - log(success(` Front matter successfully created and saved:\n - ${finalPath}.md`)) - return { frontMatter, finalPath, filename } - } catch (error) { - console.error(`Error generating markdown for RSS item: ${error instanceof Error ? (error as Error).message : String(error)}`) - throw error + // Declare variables to store generated content + let frontMatter: string[] + let finalPath: string + let filename: string + + // Use a switch statement to handle different content types + switch (true) { + case !!options.video: + case !!options.playlist: + case !!options.urls: + // Check if yt-dlp is installed + await checkDependencies(['yt-dlp']) + + // Execute yt-dlp to extract video metadata + const { stdout } = await execFilePromise('yt-dlp', [ + '--restrict-filenames', + '--print', '%(upload_date>%Y-%m-%d)s', + '--print', '%(title)s', + '--print', '%(thumbnail)s', + '--print', '%(webpage_url)s', + '--print', '%(channel)s', + '--print', '%(uploader_url)s', + input as string, // Assert input as string for video URL + ]) + + // Parse the output from yt-dlp + const [ + formattedDate, videoTitle, thumbnail, webpage_url, videoChannel, uploader_url + ] = stdout.trim().split('\n') + + // Generate filename and path + filename = `${formattedDate}-${sanitizeTitle(videoTitle)}` + finalPath = `content/${filename}` + + // Create front matter for video content + frontMatter = [ + '---', + `showLink: "${webpage_url}"`, + `channel: "${videoChannel}"`, + `channelURL: "${uploader_url}"`, + `title: "${videoTitle}"`, + `description: ""`, + `publishDate: "${formattedDate}"`, + `coverImage: "${thumbnail}"`, + '---\n', + ] + + // Log progress + log(step('\nStep 1 - Generating video markdown...\n')) + break + + case !!options.file: + // Extract filename from the input path + const originalFilename = basename(input as string) + const filenameWithoutExt = originalFilename.replace(extname(originalFilename), '') + + // Generate sanitized filename and path + filename = sanitizeTitle(filenameWithoutExt) + finalPath = `content/${filename}` + + // Create front matter for file content + frontMatter = [ + '---', + `showLink: "${originalFilename}"`, + `channel: ""`, + `channelURL: ""`, + `title: "${originalFilename}"`, + `description: ""`, + `publishDate: ""`, + `coverImage: ""`, + '---\n', + ] + + // Log progress + log(step('\nStep 1 - Generating file markdown...\n')) + break + + case !!options.rss: + // Assert input as RSSItem and destructure its properties + const item = input as RSSItem + const { publishDate, title: rssTitle, coverImage, showLink, channel: rssChannel, channelURL } = item + + // Generate filename and path + filename = `${publishDate}-${sanitizeTitle(rssTitle)}` + finalPath = `content/${filename}` + + // Create front matter for RSS content + frontMatter = [ + '---', + `showLink: "${showLink}"`, + `channel: "${rssChannel}"`, + `channelURL: "${channelURL}"`, + `title: "${rssTitle}"`, + `description: ""`, + `publishDate: "${publishDate}"`, + `coverImage: "${coverImage}"`, + '---\n', + ] + + // Log progress + log(step('\nStep 1 - Generating RSS markdown...\n')) + break + + default: + // Throw an error if an invalid option is provided + throw new Error('Invalid option provided for markdown generation.') } -} -/** - * Function to generate markdown for local audio or video files. - * @param filePath - The path to the local file. - * @returns An object with frontMatter, finalPath, and filename. - * @throws {Error} If markdown generation fails. - */ -export async function generateFileMarkdown(filePath: string): Promise { - try { - // Extract the original filename from the full file path - const originalFilename = basename(filePath) - - // Remove the file extension from the original filename - const filenameWithoutExt = originalFilename.replace(extname(originalFilename), '') - - // Sanitize the filename - const sanitizedFilename = sanitizeTitle(filenameWithoutExt) - - // Construct the final path for the markdown file - const finalPath = `content/${sanitizedFilename}` - - // Create the front matter content for the markdown file - const frontMatter = [ - '---', - `showLink: "${originalFilename}"`, - `channel: ""`, - `channelURL: ""`, - `title: "${originalFilename}"`, - `description: ""`, - `publishDate: ""`, - `coverImage: ""`, - '---\n', - ].join('\n') - - // Write the front matter to the markdown file - await writeFile(`${finalPath}.md`, frontMatter) - - // Log the creation of the markdown file - log(dim(frontMatter)) - log(step('\nStep 1 - Generating file markdown...\n')) - log(success(` Front matter successfully created and saved:\n - ${finalPath}.md`)) - - // Return an object with the generated data - return { frontMatter, finalPath, filename: sanitizedFilename } - } catch (error) { - // Log any errors that occur during the process - console.error(`Error generating markdown for file: ${error instanceof Error ? (error as Error).message : String(error)}`) - // Re-throw the error to be handled by the calling function - throw error - } -} + // Join the front matter array into a single string + const frontMatterContent = frontMatter.join('\n') -/** - * Function to generate markdown for YouTube videos. - * @param url - The URL of the YouTube video. - * @returns An object containing front matter, final path, and filename. - * @throws {Error} If markdown generation fails. - */ -export async function generateMarkdown(url: string): Promise { - try { - // Check for required dependencies - await checkDependencies(['yt-dlp']) - - // Execute yt-dlp to get video information - const { stdout } = await execFilePromise('yt-dlp', [ - '--restrict-filenames', - '--print', '%(upload_date>%Y-%m-%d)s', - '--print', '%(title)s', - '--print', '%(thumbnail)s', - '--print', '%(webpage_url)s', - '--print', '%(channel)s', - '--print', '%(uploader_url)s', - url, - ]) - - // Parse the output from yt-dlp - const [ - formattedDate, title, thumbnail, webpage_url, channel, uploader_url - ] = stdout.trim().split('\n') - - // Ensure all metadata is present - if (!formattedDate || !title || !thumbnail || !webpage_url || !channel || !uploader_url) { - throw new Error('Incomplete metadata received from yt-dlp.') - } - - // Sanitize the title for use in the filename - const sanitizedTitle = sanitizeTitle(title) - - // Construct the filename, path, and front matter for the markdown file - const filename = `${formattedDate}-${sanitizedTitle}` - const finalPath = `content/${filename}` - const frontMatter = [ - '---', - `showLink: "${webpage_url}"`, - `channel: "${channel}"`, - `channelURL: "${uploader_url}"`, - `title: "${title}"`, - `description: ""`, - `publishDate: "${formattedDate}"`, - `coverImage: "${thumbnail}"`, - '---\n', - ].join('\n') - - // Write the front matter to the markdown file - await writeFile(`${finalPath}.md`, frontMatter) - log(dim(frontMatter)) - log(step('\nStep 1 - Generating video markdown...\n')) - log(success(` Front matter successfully created and saved:\n - ${finalPath}.md`)) - return { frontMatter, finalPath, filename } - } catch (error) { - console.error(`Error generating markdown for video: ${error instanceof Error ? (error as Error).message : String(error)}`) - throw error - } -} + // Write the front matter content to a file + await writeFile(`${finalPath}.md`, frontMatterContent) -/** - * Sanitize the title to create a safe filename. - * @param title - The title to sanitize. - * @returns The sanitized title. - */ -function sanitizeTitle(title: string): string { - return title - .replace(/[^\w\s-]/g, '') - .trim() - .replace(/[\s_]+/g, '-') - .replace(/-+/g, '-') - .toLowerCase() - .slice(0, 200) + // Log the generated front matter and success message + log(dim(frontMatterContent)) + log(success(` Front matter successfully created and saved:\n - ${finalPath}.md`)) + + // Return the generated markdown data + return { frontMatter: frontMatterContent, finalPath, filename } } \ No newline at end of file diff --git a/src/utils/runLLM.ts b/src/utils/runLLM.ts index 815fb1f..be5fe72 100644 --- a/src/utils/runLLM.ts +++ b/src/utils/runLLM.ts @@ -10,7 +10,7 @@ import { callCohere } from '../llms/cohere.js' import { callMistral } from '../llms/mistral.js' import { callOcto } from '../llms/octo.js' import { generatePrompt } from '../llms/prompt.js' -import { log, step, success, wait } from '../types.js' +import { log, step, success, wait } from '../models.js' import type { LLMServices, ProcessingOptions, LLMFunction, LLMFunctions } from '../types.js' /** diff --git a/src/utils/runTranscription.ts b/src/utils/runTranscription.ts index 078ac08..dfde1cc 100644 --- a/src/utils/runTranscription.ts +++ b/src/utils/runTranscription.ts @@ -5,7 +5,7 @@ import { callWhisper } from '../transcription/whisper.js' import { callWhisperDocker } from '../transcription/whisperDocker.js' import { callDeepgram } from '../transcription/deepgram.js' import { callAssembly } from '../transcription/assembly.js' -import { log, step, success, wait } from '../types.js' +import { log, step, success, wait } from '../models.js' import type { TranscriptServices, ProcessingOptions } from '../types.js' /** @@ -30,23 +30,19 @@ export async function runTranscription( // Choose the transcription service based on the provided option switch (transcriptServices) { case 'deepgram': - log(wait('\n Using Deepgram for transcription...')) txtContent = await callDeepgram(options, finalPath) break case 'assembly': - log(wait('\n Using AssemblyAI for transcription...')) txtContent = await callAssembly(options, finalPath) break case 'whisperDocker': - log(wait('\n Using Whisper Docker for transcription...')) txtContent = await callWhisperDocker(options, finalPath) break case 'whisper': default: - log(wait('\n Using Whisper for transcription...')) txtContent = await callWhisper(options, finalPath) break } From da9d493ed141d65abb573458b24c8fa662f87b8b Mon Sep 17 00:00:00 2001 From: Anthony Campolo <12433465+ajcwebdev@users.noreply.github.com> Date: Sun, 6 Oct 2024 21:23:31 -0500 Subject: [PATCH 8/9] refactor tests, logging, and global exports --- .gitignore | 4 +- package.json | 3 +- src/autoshow.ts | 78 ++++++++------- src/commands/processFile.ts | 30 ++---- src/commands/processPlaylist.ts | 19 ++-- src/commands/processRSS.ts | 62 +++++------- src/commands/processURLs.ts | 25 ++--- src/commands/processVideo.ts | 39 +++----- src/llms/llama.ts | 19 ++-- src/llms/ollama.ts | 9 +- src/models.ts | 6 +- src/transcription/whisper.ts | 11 +-- src/transcription/whisperDocker.ts | 4 + src/types.ts | 18 ++++ src/utils/cleanUpFiles.ts | 2 +- src/utils/generateMarkdown.ts | 24 ++--- src/utils/runLLM.ts | 2 +- test/all.test.js | 148 +++++++++++++---------------- test/local.test.js | 48 +++++----- 19 files changed, 249 insertions(+), 302 deletions(-) diff --git a/.gitignore b/.gitignore index 8e434e8..ccb1788 100644 --- a/.gitignore +++ b/.gitignore @@ -10,4 +10,6 @@ build deno.lock out types -dist \ No newline at end of file +dist +NEW.md +TODO.md \ No newline at end of file diff --git a/package.json b/package.json index 9d02fd0..b3a05cf 100644 --- a/package.json +++ b/package.json @@ -33,6 +33,7 @@ "serve": "npm run build && node --env-file=.env --no-warnings --watch packages/server/index.js", "fetch-local": "npm run build && node --env-file=.env --no-warnings packages/server/tests/fetch-local.js", "fetch-all": "npm run build && node --env-file=.env --no-warnings packages/server/tests/fetch-all.js", + "t": "npm run build && node --test test/local.test.js", "test-local": "npm run build && node --test test/local.test.js", "test-all": "npm run build && node --test test/all.test.js" }, @@ -53,7 +54,7 @@ "ffmpeg-static": "^5.2.0", "file-type": "^19.4.1", "inquirer": "^10.2.2", - "node-llama-cpp": "^3.0.0-beta.44", + "node-llama-cpp": "^3.1.0", "ollama": "^0.5.9", "openai": "^4.55.7", "typescript": "^5.6.2" diff --git a/src/autoshow.ts b/src/autoshow.ts index 8750b00..c4ae847 100644 --- a/src/autoshow.ts +++ b/src/autoshow.ts @@ -20,7 +20,7 @@ import { processURLs } from './commands/processURLs.js' import { processFile } from './commands/processFile.js' import { processRSS } from './commands/processRSS.js' import { argv, exit } from 'node:process' -import { log, opts } from './models.js' +import { log, opts, final, ACTION_OPTIONS, LLM_OPTIONS, TRANSCRIPT_OPTIONS } from './models.js' import type { ProcessingOptions, HandlerFunction, LLMServices, TranscriptServices } from './types.js' // Initialize the command-line interface @@ -72,6 +72,22 @@ Report Issues: https://github.com/ajcwebdev/autoshow/issues ` ) +/** + * Helper function to validate that only one option from a list is provided. + * @param {string[]} optionKeys - The list of option keys to check. + * @param {ProcessingOptions} options - The options object. + * @param {string} errorMessage - The prefix of the error message. + * @returns {string | undefined} - The selected option or undefined. + */ +function getSingleOption(optionKeys: string[], options: ProcessingOptions, errorMessage: string): string | undefined { + const selectedOptions = optionKeys.filter((opt) => options[opt as keyof ProcessingOptions]) + if (selectedOptions.length > 1) { + console.error(`Error: Multiple ${errorMessage} provided (${selectedOptions.join(', ')}). Please specify only one.`) + exit(1) + } + return selectedOptions[0] as string | undefined +} + /** * Main action for the program. * @param {ProcessingOptions} options - The command-line options provided by the user. @@ -90,12 +106,8 @@ program.action(async (options: ProcessingOptions) => { rss: processRSS, } - const ACTION_OPTIONS = ['video', 'playlist', 'urls', 'file', 'rss'] - const LLM_OPTIONS = ['chatgpt', 'claude', 'cohere', 'mistral', 'octo', 'llama', 'ollama', 'gemini'] - const TRANSCRIPT_OPTIONS = ['whisper', 'whisperDocker', 'deepgram', 'assembly'] - - const { video, playlist, urls, file, rss, interactive } = options - const noActionProvided = [video, playlist, urls, file, rss].every((opt) => !opt) + const { interactive } = options + const noActionProvided = ACTION_OPTIONS.every((opt) => !options[opt as keyof ProcessingOptions]) if (interactive || noActionProvided) { options = await handleInteractivePrompt(options) @@ -106,25 +118,16 @@ program.action(async (options: ProcessingOptions) => { options.item = [options.item] } - const actionsProvided = ACTION_OPTIONS.filter((opt) => options[opt as keyof ProcessingOptions]) - if (actionsProvided.length > 1) { - console.error(`Error: Multiple input options provided (${actionsProvided.join(', ')}). Please specify only one input option.`) - exit(1) - } - - const selectedLLMs = LLM_OPTIONS.filter((opt) => options[opt as keyof ProcessingOptions]) - if (selectedLLMs.length > 1) { - console.error(`Error: Multiple LLM options provided (${selectedLLMs.join(', ')}). Please specify only one LLM option.`) - exit(1) - } - const llmServices = selectedLLMs[0] as LLMServices | undefined + // Validate and retrieve single action option + const action = getSingleOption(ACTION_OPTIONS, options, 'input option') + + // Validate and retrieve single LLM option + const llmKey = getSingleOption(LLM_OPTIONS, options, 'LLM option') + const llmServices = llmKey as LLMServices | undefined - const selectedTranscripts = TRANSCRIPT_OPTIONS.filter((opt) => options[opt as keyof ProcessingOptions]) - if (selectedTranscripts.length > 1) { - console.error(`Error: Multiple transcription options provided (${selectedTranscripts.join(', ')}). Please specify only one transcription option.`) - exit(1) - } - const transcriptServices = selectedTranscripts[0] as TranscriptServices | undefined + // Validate and retrieve single transcription option + const transcriptKey = getSingleOption(TRANSCRIPT_OPTIONS, options, 'transcription option') + const transcriptServices: TranscriptServices | undefined = transcriptKey as TranscriptServices | undefined // Set default transcription service if not provided const finalTranscriptServices: TranscriptServices = transcriptServices || 'whisper' @@ -134,16 +137,21 @@ program.action(async (options: ProcessingOptions) => { options.whisper = 'base' } - // Execute the appropriate handler based on the action - for (const [key, handler] of Object.entries(PROCESS_HANDLERS)) { - if (options[key as keyof ProcessingOptions]) { - try { - await handler(options, options[key as keyof ProcessingOptions] as string, llmServices, finalTranscriptServices) - exit(0) - } catch (error) { - console.error(`Error processing ${key}:`, (error as Error).message) - exit(1) - } + if (action) { + try { + await PROCESS_HANDLERS[action]( + options, + options[action as keyof ProcessingOptions] as string, + llmServices, + finalTranscriptServices + ) + log(final(`\n==================================================`)) + log(final(` ${action} Processing Completed Successfully.`)) + log(final(`==================================================\n`)) + exit(0) + } catch (error) { + console.error(`Error processing ${action}:`, (error as Error).message) + exit(1) } } }) diff --git a/src/commands/processFile.ts b/src/commands/processFile.ts index 3827ad0..6dc4c77 100644 --- a/src/commands/processFile.ts +++ b/src/commands/processFile.ts @@ -5,7 +5,7 @@ import { downloadAudio } from '../utils/downloadAudio.js' import { runTranscription } from '../utils/runTranscription.js' import { runLLM } from '../utils/runLLM.js' import { cleanUpFiles } from '../utils/cleanUpFiles.js' -import { log, final } from '../models.js' +import { log, opts, wait } from '../models.js' import type { LLMServices, TranscriptServices, ProcessingOptions } from '../types.js' /** @@ -22,30 +22,16 @@ export async function processFile( llmServices?: LLMServices, transcriptServices?: TranscriptServices ): Promise { - // log(`Options received in processFile:\n`) - // log(options) - // log(`filePath:`, filePath) - // log(`llmServices:`, llmServices) - // log(`transcriptServices:`, transcriptServices) + log(opts('Parameters passed to processFile:\n')) + log(wait(` - llmServices: ${llmServices}\n - transcriptServices: ${transcriptServices}\n`)) try { - // Generate markdown for the file - const { frontMatter, finalPath, filename } = await generateMarkdown(options, filePath) - - // Convert the audio or video file to the required format - await downloadAudio(options, filePath, filename) - - // Run transcription on the file - await runTranscription(options, finalPath, frontMatter, transcriptServices) - - // Process the transcript with the selected Language Model - await runLLM(options, finalPath, frontMatter, llmServices) - - // Clean up temporary files if the noCleanUp option is not set - if (!options.noCleanUp) { + const { frontMatter, finalPath, filename } = await generateMarkdown(options, filePath) // Generate markdown for the file + await downloadAudio(options, filePath, filename) // Convert the audio or video file to the required format + await runTranscription(options, finalPath, frontMatter, transcriptServices) // Run transcription on the file + await runLLM(options, finalPath, frontMatter, llmServices) // Process the transcript with the selected Language Model + if (!options.noCleanUp) { // Clean up temporary files if the noCleanUp option is not set await cleanUpFiles(finalPath) } - - log(final('\nLocal file processing completed successfully.\n')) } catch (error) { console.error(`Error processing file: ${(error as Error).message}`) process.exit(1) // Exit with an error code diff --git a/src/commands/processPlaylist.ts b/src/commands/processPlaylist.ts index 7448e99..807c6f6 100644 --- a/src/commands/processPlaylist.ts +++ b/src/commands/processPlaylist.ts @@ -6,7 +6,7 @@ import { promisify } from 'node:util' import { processVideo } from './processVideo.js' import { extractVideoMetadata } from '../utils/extractVideoMetadata.js' import { checkDependencies } from '../utils/checkDependencies.js' -import { log, final, wait } from '../models.js' +import { log, opts, success, wait } from '../models.js' import type { LLMServices, TranscriptServices, ProcessingOptions } from '../types.js' const execFilePromise = promisify(execFile) @@ -24,11 +24,8 @@ export async function processPlaylist( llmServices?: LLMServices, transcriptServices?: TranscriptServices ): Promise { - // log(`Options received in processPlaylist:\n`) - // log(options) - // log(`playlistUrl:`, playlistUrl) - // log(`llmServices:`, llmServices) - // log(`transcriptServices:`, transcriptServices) + log(opts('Parameters passed to processPlaylist:\n')) + log(wait(` - llmServices: ${llmServices}\n - transcriptServices: ${transcriptServices}`)) try { // Check for required dependencies await checkDependencies(['yt-dlp']) @@ -52,7 +49,7 @@ export async function processPlaylist( process.exit(1) // Exit with an error code } - log(wait(` Found ${urls.length} videos in the playlist...`)) + log(opts(`\nFound ${urls.length} videos in the playlist...`)) // Extract metadata for all videos const metadataPromises = urls.map(extractVideoMetadata) @@ -64,13 +61,15 @@ export async function processPlaylist( const jsonContent = JSON.stringify(validMetadata, null, 2) const jsonFilePath = 'content/playlist_info.json' await writeFile(jsonFilePath, jsonContent) - log(wait(`Playlist information saved to: ${jsonFilePath}`)) + log(success(`Playlist information saved to: ${jsonFilePath}`)) return } // Process each video in the playlist for (const [index, url] of urls.entries()) { - log(wait(`\n Processing video ${index + 1}/${urls.length}:\n - ${url}\n`)) + log(opts(`\n==============================================================`)) + log(opts(` Processing video ${index + 1}/${urls.length}: ${url}`)) + log(opts(`==============================================================\n`)) try { await processVideo(options, url, llmServices, transcriptServices) } catch (error) { @@ -78,8 +77,6 @@ export async function processPlaylist( // Continue processing the next video } } - - log(final('\nPlaylist processing completed successfully.\n')) } catch (error) { console.error(`Error processing playlist: ${(error as Error).message}`) process.exit(1) // Exit with an error code diff --git a/src/commands/processRSS.ts b/src/commands/processRSS.ts index 82c307b..d1a49dc 100644 --- a/src/commands/processRSS.ts +++ b/src/commands/processRSS.ts @@ -7,7 +7,7 @@ import { downloadAudio } from '../utils/downloadAudio.js' import { runTranscription } from '../utils/runTranscription.js' import { runLLM } from '../utils/runLLM.js' import { cleanUpFiles } from '../utils/cleanUpFiles.js' -import { log, final, wait } from '../models.js' +import { log, final, wait, opts } from '../models.js' import type { LLMServices, TranscriptServices, ProcessingOptions, RSSItem } from '../types.js' @@ -32,30 +32,16 @@ async function processItem( llmServices?: LLMServices, transcriptServices?: TranscriptServices ): Promise { - // log(`Options received in processItem:\n`) - // log(options) - // log(`item\n\n`, item) - // log(`llmServices:`, llmServices) - // log(`transcriptServices:`, transcriptServices) + log(opts('Parameters passed to processItem:\n')) + log(wait(` - llmServices: ${llmServices}\n - transcriptServices: ${transcriptServices}\n`)) try { - // Generate markdown for the item - const { frontMatter, finalPath, filename } = await generateMarkdown(options, item) - - // Download audio - await downloadAudio(options, item.showLink, filename) - - // Run transcription - await runTranscription(options, finalPath, frontMatter, transcriptServices) - - // Process with Language Model - await runLLM(options, finalPath, frontMatter, llmServices) - - // Clean up temporary files if necessary - if (!options.noCleanUp) { + const { frontMatter, finalPath, filename } = await generateMarkdown(options, item) // Generate markdown for the item + await downloadAudio(options, item.showLink, filename) // Download audio + await runTranscription(options, finalPath, frontMatter, transcriptServices) // Run transcription + await runLLM(options, finalPath, frontMatter, llmServices) // Process with Language Model + if (!options.noCleanUp) { // Clean up temporary files if necessary await cleanUpFiles(finalPath) } - - log(final(`\nItem processing completed successfully: ${item.title}`)) } catch (error) { console.error(`Error processing item ${item.title}: ${(error as Error).message}`) // Continue processing the next item @@ -76,11 +62,8 @@ export async function processRSS( llmServices?: LLMServices, transcriptServices?: TranscriptServices ): Promise { - log(`Options received in processRSS:\n`) - log(options) - log(`rssUrl:`, rssUrl) - log(`llmServices:`, llmServices) - log(`transcriptServices:`, transcriptServices) + log(opts('Parameters passed to processRSS:\n')) + log(` - llmServices: ${llmServices}\n - transcriptServices: ${transcriptServices}`) try { // Validate that --last is a positive integer if provided if (options.last !== undefined) { @@ -115,11 +98,11 @@ export async function processRSS( if (options.item && options.item.length > 0) { // If specific items are provided, list them log(wait('\nProcessing specific items:')) - options.item.forEach((url) => log(` - ${url}`)) + options.item.forEach((url) => log(wait(` - ${url}`))) } else if (options.last) { - console.log(`\nProcessing the last ${options.last} items`) - } else { - console.log(` - Skipping first ${options.skip || 0} items`) + log(wait(`\nProcessing the last ${options.last} items`)) + } else if (options.skip) { + log(wait(` - Skipping first ${options.skip || 0} items`)) } // Fetch the RSS feed with a timeout @@ -213,28 +196,31 @@ export async function processRSS( process.exit(1) // Exit with an error code } itemsToProcess = matchedItems - log(wait(` Found ${items.length} items in the RSS feed.`)) + log(wait(`\n - Found ${items.length} items in the RSS feed.`)) log(wait(` - Processing ${itemsToProcess.length} specified items.`)) } else if (options.last) { // Process the most recent N items itemsToProcess = items.slice(0, options.last) - log(wait(` Found ${items.length} items in the RSS feed.`)) + log(wait(`\n - Found ${items.length} items in the RSS feed.`)) log(wait(` - Processing the last ${options.last} items.`)) } else { // Sort items based on the specified order and apply skip const sortedItems = options.order === 'oldest' ? items.slice().reverse() : items itemsToProcess = sortedItems.slice(options.skip || 0) - log(wait(` Found ${items.length} items in the RSS feed.`)) - log(wait(` - Processing ${itemsToProcess.length} items after skipping ${options.skip || 0}.\n`)) + log(wait(`\n - Found ${items.length} item(s) in the RSS feed.`)) + log(wait(` - Processing ${itemsToProcess.length} item(s) after skipping ${options.skip || 0}.\n`)) } // Process each item in the feed for (const [index, item] of itemsToProcess.entries()) { - log(wait(` Processing item ${index + 1}/${itemsToProcess.length}:\n - ${item.title}\n`)) + log(opts(`\n==============================================================`)) + log(opts(` Item ${index + 1}/${itemsToProcess.length} processing: ${item.title}`)) + log(opts(`==============================================================\n`)) await processItem(options, item, llmServices, transcriptServices) + log(final(`\n==============================================================`)) + log(final(` ${index + 1}/${itemsToProcess.length} item processing completed successfully`)) + log(final(`==============================================================\n`)) } - - log(final('\nRSS feed processing completed successfully.\n')) } catch (error) { console.error(`Error processing RSS feed: ${(error as Error).message}`) process.exit(1) // Exit with an error code diff --git a/src/commands/processURLs.ts b/src/commands/processURLs.ts index 68c86a7..33a80e3 100644 --- a/src/commands/processURLs.ts +++ b/src/commands/processURLs.ts @@ -1,11 +1,10 @@ // src/commands/processURLs.ts import { readFile, writeFile } from 'node:fs/promises' -import { resolve } from 'node:path' import { processVideo } from './processVideo.js' import { extractVideoMetadata } from '../utils/extractVideoMetadata.js' import { checkDependencies } from '../utils/checkDependencies.js' -import { log, final, wait } from '../models.js' +import { log, wait, opts } from '../models.js' import type { LLMServices, TranscriptServices, ProcessingOptions } from '../types.js' /** @@ -21,29 +20,23 @@ export async function processURLs( llmServices?: LLMServices, transcriptServices?: TranscriptServices ): Promise { - // log(`Options received in processURLs:\n`) - // log(options) - // log(`filePath:`, filePath) - // log(`llmServices:`, llmServices) - // log(`transcriptServices:`, transcriptServices) + log(opts('Parameters passed to processURLs:\n')) + log(wait(` - llmServices: ${llmServices}\n - transcriptServices: ${transcriptServices}\n`)) try { // Check for required dependencies await checkDependencies(['yt-dlp']) - const absolutePath = resolve(filePath) - // Read and parse the content of the file into an array of URLs - const content = await readFile(absolutePath, 'utf8') + const content = await readFile(filePath, 'utf8') const urls = content.split('\n') .map(line => line.trim()) .filter(line => line && !line.startsWith('#')) if (urls.length === 0) { console.error('Error: No URLs found in the file.') - process.exit(1) // Exit with an error code + process.exit(1) } - - log(wait(`\n Found ${urls.length} URLs in the file...`)) + log(opts(`\n=== Found ${urls.length} URLs in the file... ===`)) // Extract metadata for all videos const metadataPromises = urls.map(extractVideoMetadata) @@ -61,7 +54,9 @@ export async function processURLs( // Process each URL for (const [index, url] of urls.entries()) { - log(wait(`\n Processing URL ${index + 1}/${urls.length}:\n - ${url}\n`)) + log(opts(`\n============================================================`)) + log(opts(` Processing URL ${index + 1}/${urls.length}: ${url}`)) + log(opts(`============================================================\n`)) try { await processVideo(options, url, llmServices, transcriptServices) } catch (error) { @@ -69,8 +64,6 @@ export async function processURLs( // Continue processing the next URL } } - - log(final('\nURL file processing completed successfully.\n')) } catch (error) { console.error(`Error reading or processing file ${filePath}: ${(error as Error).message}`) process.exit(1) // Exit with an error code diff --git a/src/commands/processVideo.ts b/src/commands/processVideo.ts index 8c873b6..f087e31 100644 --- a/src/commands/processVideo.ts +++ b/src/commands/processVideo.ts @@ -6,7 +6,7 @@ import { downloadAudio } from '../utils/downloadAudio.js' import { runTranscription } from '../utils/runTranscription.js' import { runLLM } from '../utils/runLLM.js' import { cleanUpFiles } from '../utils/cleanUpFiles.js' -import { log, final } from '../models.js' +import { log, opts, wait } from '../models.js' import type { LLMServices, TranscriptServices, ProcessingOptions } from '../types.js' /** @@ -23,36 +23,19 @@ export async function processVideo( llmServices?: LLMServices, transcriptServices?: TranscriptServices ): Promise { - // log(`Options received in processVideo:\n`) - // log(options) - // log(`url:`, url) - // log(`llmServices:`, llmServices) - // log(`transcriptServices:`, transcriptServices) + log(opts('Parameters passed to processVideo:\n')) + log(wait(` - llmServices: ${llmServices}\n - transcriptServices: ${transcriptServices}\n`)) try { - // Check for required dependencies - await checkDependencies(['yt-dlp']) - - // Generate markdown with video metadata - const { frontMatter, finalPath, filename } = await generateMarkdown(options, url) - - // Download audio from the video - await downloadAudio(options, url, filename) - - // Run transcription on the audio - await runTranscription(options, finalPath, frontMatter, transcriptServices) - - // Process transcript with an LLM if llmServices is defined, concatenate prompt and transcript if undefined - await runLLM(options, finalPath, frontMatter, llmServices) - - // Clean up temporary files if the noCleanUp option is not set - if (!options.noCleanUp) { + await checkDependencies(['yt-dlp']) // Check for required dependencies. + const { frontMatter, finalPath, filename } = await generateMarkdown(options, url) // Generate markdown with video metadata. + await downloadAudio(options, url, filename) // Download audio from the video. + await runTranscription(options, finalPath, frontMatter, transcriptServices) // Run transcription on the audio. + await runLLM(options, finalPath, frontMatter, llmServices) // If llmServices is set, process with LLM. If llmServices is undefined, bypass LLM processing. + if (!options.noCleanUp) { // Clean up temporary files if the noCleanUp option is not set. await cleanUpFiles(finalPath) } - - log(final('\nVideo processing completed successfully.\n')) } catch (error) { - // Log any errors that occur during video processing - console.error('Error processing video:', (error as Error).message) - throw error // Re-throw to be handled by caller + console.error('Error processing video:', (error as Error).message) // Log any errors that occur during video processing + throw error // Re-throw to be handled by caller } } \ No newline at end of file diff --git a/src/llms/llama.ts b/src/llms/llama.ts index 9a48d45..56cee8c 100644 --- a/src/llms/llama.ts +++ b/src/llms/llama.ts @@ -6,7 +6,7 @@ import { existsSync } from 'node:fs' import { exec } from 'node:child_process' import { promisify } from 'node:util' import { LLAMA_MODELS } from '../models.js' -import { log, wait } from '../models.js' +import { log, success, wait } from '../models.js' import type { LlamaModelType, LLMFunction } from '../types.js' @@ -20,11 +20,15 @@ const execAsync = promisify(exec) * @returns A Promise that resolves when the processing is complete. * @throws {Error} - If an error occurs during processing. */ -export const callLlama: LLMFunction = async (promptAndTranscript: string, tempPath: string, model?: string) => { +export const callLlama: LLMFunction = async ( + promptAndTranscript: string, + tempPath: string, + model?: string +) => { try { // Get the model object from LLAMA_MODELS using the provided model name or default to GEMMA_2_2B const selectedModel = LLAMA_MODELS[model as LlamaModelType] || LLAMA_MODELS.GEMMA_2_2B - log(wait(` - Model selected: ${selectedModel.filename}`)) + log(wait(` - filename: ${selectedModel.filename}\n - url: ${selectedModel.url}\n`)) // If no valid model is found, throw an error if (!selectedModel) { @@ -36,7 +40,7 @@ export const callLlama: LLMFunction = async (promptAndTranscript: string, tempPa // Check if the model file already exists, if not, download it if (!existsSync(modelPath)) { - log(wait(`\nDownloading ${selectedModel.filename}...`)) + log(success(`\nDownloading ${selectedModel.filename}...`)) try { // Create the directory for storing models if it doesn't exist @@ -45,16 +49,15 @@ export const callLlama: LLMFunction = async (promptAndTranscript: string, tempPa // Download the model using curl const { stderr } = await execAsync(`curl -L ${selectedModel.url} -o ${modelPath}`) - // If there's any stderr output, log it - if (stderr) log(stderr) - log('Download completed') + // If there's any stderr output, log completed + if (stderr) log(success('Download completed')) } catch (err) { // If an error occurs during download, log it and throw a new error console.error(`Download failed: ${err instanceof Error ? err.message : String(err)}`) throw new Error('Failed to download the model') } } else { - log(wait(` - Model path: ${modelPath}`)) + log(wait(` modelPath found:\n - ${modelPath}`)) } // Initialize Llama and load the local model diff --git a/src/llms/ollama.ts b/src/llms/ollama.ts index 3a02c59..74955e6 100644 --- a/src/llms/ollama.ts +++ b/src/llms/ollama.ts @@ -5,14 +5,7 @@ import { env } from 'node:process' import { OLLAMA_MODELS } from '../models.js' import { log, wait } from '../models.js' -import type { LLMFunction, OllamaModelType } from '../types.js' - -// Define the expected structure of the response from Ollama API -interface OllamaResponse { - message: { - content: string - } -} +import type { LLMFunction, OllamaModelType, OllamaResponse } from '../types.js' /** * Main function to call the Llama model using the Ollama REST API. diff --git a/src/models.ts b/src/models.ts index 1bd7b7c..5f637fb 100644 --- a/src/models.ts +++ b/src/models.ts @@ -8,11 +8,15 @@ export const step: ChalkInstance = chalk.bold.underline export const dim: ChalkInstance = chalk.dim export const success: ChalkInstance = chalk.bold.blue export const opts: ChalkInstance = chalk.magentaBright.bold -export const wait: ChalkInstance = chalk.cyan.dim +export const wait: ChalkInstance = chalk.bold.cyan export const final: ChalkInstance = chalk.bold.italic export const log: typeof console.log = console.log +export const ACTION_OPTIONS = ['video', 'playlist', 'urls', 'file', 'rss'] +export const LLM_OPTIONS = ['chatgpt', 'claude', 'cohere', 'mistral', 'octo', 'llama', 'ollama', 'gemini'] +export const TRANSCRIPT_OPTIONS = ['whisper', 'whisperDocker', 'deepgram', 'assembly'] + /** * Define available Whisper models * @type {Record} diff --git a/src/transcription/whisper.ts b/src/transcription/whisper.ts index 5aa5663..2e25dcf 100644 --- a/src/transcription/whisper.ts +++ b/src/transcription/whisper.ts @@ -5,7 +5,7 @@ import { exec } from 'node:child_process' import { promisify } from 'node:util' import { existsSync } from 'node:fs' import { WHISPER_MODELS } from '../models.js' -import { log, success, wait } from '../models.js' +import { log, wait } from '../models.js' import type { ProcessingOptions } from '../types.js' const execPromise = promisify(exec) @@ -30,8 +30,7 @@ export async function callWhisper(options: ProcessingOptions, finalPath: string) // Get the model ggml file name const modelGGMLName = WHISPER_MODELS[whisperModel] - log(wait(` - whisperModel: ${whisperModel}`)) - log(wait(` - modelGGMLName: ${modelGGMLName}`)) + log(wait(`\n - whisperModel: ${whisperModel}\n - modelGGMLName: ${modelGGMLName}`)) // Setup Whisper if (!existsSync('./whisper.cpp')) { @@ -42,14 +41,14 @@ export async function callWhisper(options: ProcessingOptions, finalPath: string) // Ensure model is downloaded if (!existsSync(`./whisper.cpp/models/ggml-${whisperModel}.bin`)) { - log(wait(` - Model not found, downloading: ${whisperModel}...\n`)) + log(wait(` Model not found, downloading...\n - ${whisperModel}\n`)) await execPromise(`bash ./whisper.cpp/models/download-ggml-model.sh ${whisperModel}`) - log(success(' Model download completed.\n')) + log(wait(' Model download completed, running transcription...\n')) } // Run transcription await execPromise(`./whisper.cpp/main -m "whisper.cpp/models/${modelGGMLName}" -f "${finalPath}.wav" -of "${finalPath}" --output-lrc`) - log(wait(`\n Transcript LRC file successfully completed...\n - ${finalPath}.lrc\n`)) + log(wait(`\n Transcript LRC file successfully completed...\n - ${finalPath}.lrc`)) // Read the generated LRC file const lrcContent = await readFile(`${finalPath}.lrc`, 'utf8') diff --git a/src/transcription/whisperDocker.ts b/src/transcription/whisperDocker.ts index 295ea05..22b2566 100644 --- a/src/transcription/whisperDocker.ts +++ b/src/transcription/whisperDocker.ts @@ -31,6 +31,7 @@ export async function callWhisperDocker(options: ProcessingOptions, finalPath: s const modelGGMLName = WHISPER_MODELS[whisperModel] const CONTAINER_NAME = 'autoshow-whisper-1' const modelPathContainer = `/app/models/${modelGGMLName}` + log(wait(` - whisperModel: ${whisperModel}`)) log(wait(` - modelGGMLName: ${modelGGMLName}`)) log(wait(` - CONTAINER_NAME: ${CONTAINER_NAME}`)) @@ -52,14 +53,17 @@ export async function callWhisperDocker(options: ProcessingOptions, finalPath: s // Process transcript const lrcContent = await readFile(`${finalPath}.lrc`, 'utf8') + // Process and format the LRC content const txtContent = lrcContent.split('\n') .filter(line => !line.startsWith('[by:whisper.cpp]')) .map(line => line.replace(/\[(\d{2,3}):(\d{2})\.(\d{2})\]/g, (_, p1, p2) => `[${p1}:${p2}]`)) .join('\n') + // Write the formatted content to a text file await writeFile(`${finalPath}.txt`, txtContent) log(wait(` Transcript transformation successfully completed...\n - ${finalPath}.txt\n`)) + // Return the processed content return txtContent } catch (error) { console.error('Error in callWhisperDocker:', error) diff --git a/src/types.ts b/src/types.ts index c6ca1fa..d101983 100644 --- a/src/types.ts +++ b/src/types.ts @@ -325,6 +325,24 @@ export type LlamaModelType = 'QWEN_2_5_3B' | 'PHI_3_5' | 'LLAMA_3_2_1B' | 'GEMMA /** Define local model with Ollama. */ export type OllamaModelType = 'LLAMA_3_2_1B' | 'LLAMA_3_2_3B' | 'GEMMA_2_2B' | 'PHI_3_5' | 'QWEN_2_5_1B' | 'QWEN_2_5_3B' +// Define the expected structure of the response from Ollama API +export type OllamaResponse = { + model: string + created_at: string + message: { + role: string + content: string + } + done_reason: string + done: boolean + total_duration: number + load_duration: number + prompt_eval_count: number + prompt_eval_duration: number + eval_count: number + eval_duration: number +} + /** * Represents the function signature for cleaning up temporary files. */ diff --git a/src/utils/cleanUpFiles.ts b/src/utils/cleanUpFiles.ts index 07afea4..35effe1 100644 --- a/src/utils/cleanUpFiles.ts +++ b/src/utils/cleanUpFiles.ts @@ -14,7 +14,7 @@ export async function cleanUpFiles(id: string): Promise { // Array of file extensions to delete const extensions = ['.wav', '.txt', '.md', '.lrc'] - log(success(` Deleted:`)) + log(success(` Temporary files deleted:`)) for (const ext of extensions) { try { await unlink(`${id}${ext}`) diff --git a/src/utils/generateMarkdown.ts b/src/utils/generateMarkdown.ts index af0267c..ed53d67 100644 --- a/src/utils/generateMarkdown.ts +++ b/src/utils/generateMarkdown.ts @@ -23,9 +23,7 @@ export async function generateMarkdown( options: ProcessingOptions, input: string | RSSItem ): Promise { - // log(`Options received in generateMarkdown:\n`) - // log(options) - // log(`input:`, input) + // log(` - input: ${input}\n`) /** * Sanitizes a title string for use in filenames. * @@ -35,11 +33,11 @@ export async function generateMarkdown( function sanitizeTitle(title: string): string { return title .replace(/[^\w\s-]/g, '') // Remove all non-word chars except spaces and hyphens - .trim() // Remove leading and trailing whitespace - .replace(/[\s_]+/g, '-') // Replace spaces and underscores with a single hyphen - .replace(/-+/g, '-') // Replace multiple hyphens with a single hyphen - .toLowerCase() // Convert to lowercase - .slice(0, 200) // Limit to 200 characters + .trim() // Remove leading and trailing whitespace + .replace(/[\s_]+/g, '-') // Replace spaces and underscores with a single hyphen + .replace(/-+/g, '-') // Replace multiple hyphens with a single hyphen + .toLowerCase() // Convert to lowercase + .slice(0, 200) // Limit to 200 characters } // Declare variables to store generated content @@ -88,9 +86,6 @@ export async function generateMarkdown( `coverImage: "${thumbnail}"`, '---\n', ] - - // Log progress - log(step('\nStep 1 - Generating video markdown...\n')) break case !!options.file: @@ -114,9 +109,6 @@ export async function generateMarkdown( `coverImage: ""`, '---\n', ] - - // Log progress - log(step('\nStep 1 - Generating file markdown...\n')) break case !!options.rss: @@ -140,9 +132,6 @@ export async function generateMarkdown( `coverImage: "${coverImage}"`, '---\n', ] - - // Log progress - log(step('\nStep 1 - Generating RSS markdown...\n')) break default: @@ -158,6 +147,7 @@ export async function generateMarkdown( // Log the generated front matter and success message log(dim(frontMatterContent)) + log(step('\nStep 1 - Generating markdown...\n')) log(success(` Front matter successfully created and saved:\n - ${finalPath}.md`)) // Return the generated markdown data diff --git a/src/utils/runLLM.ts b/src/utils/runLLM.ts index be5fe72..419b5a0 100644 --- a/src/utils/runLLM.ts +++ b/src/utils/runLLM.ts @@ -50,7 +50,7 @@ export async function runLLM( const promptAndTranscript = `${prompt}${transcript}` if (llmServices) { - log(wait(` Processing with ${llmServices} Language Model...`)) + log(wait(` Processing with ${llmServices} Language Model...\n`)) const llmFunction: LLMFunction = LLM_FUNCTIONS[llmServices] if (!llmFunction) { throw new Error(`Invalid LLM option: ${llmServices}`) diff --git a/test/all.test.js b/test/all.test.js index fea4a8d..a86828f 100644 --- a/test/all.test.js +++ b/test/all.test.js @@ -10,20 +10,34 @@ const commands = [ { cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk"', expectedFile: '2024-09-24-ep0-fsjam-podcast-prompt.md', - newName: '01---2024-09-24-ep0-fsjam-podcast-prompt.md' + newName: '01---ep0-fsjam-podcast-prompt.md' }, { cmd: 'npm run as -- --playlist "https://www.youtube.com/playlist?list=PLCVnrVv4KhXPz0SoAVu8Rc1emAdGPbSbr"', expectedFiles: [ - { file: '2024-09-24-ep1-fsjam-podcast-prompt.md', newName: '02---2024-09-24-ep1-fsjam-podcast-prompt.md' }, - { file: '2024-09-24-ep0-fsjam-podcast-prompt.md', newName: '03---2024-09-24-ep0-fsjam-podcast-prompt.md' } + { file: '2024-09-24-ep1-fsjam-podcast-prompt.md', newName: '02A---ep1-fsjam-podcast-prompt.md' }, + { file: '2024-09-24-ep0-fsjam-podcast-prompt.md', newName: '02B---ep0-fsjam-podcast-prompt.md' } + ] + }, + { + cmd: 'npm run as -- --playlist "https://www.youtube.com/playlist?list=PLCVnrVv4KhXPz0SoAVu8Rc1emAdGPbSbr" --prompt titles --whisper tiny --llama', + expectedFiles: [ + { file: '2024-09-24-ep1-fsjam-podcast-llama-shownotes.md', newName: '03A---ep1-fsjam-podcast-llama-shownotes.md' }, + { file: '2024-09-24-ep0-fsjam-podcast-llama-shownotes.md', newName: '03B---ep0-fsjam-podcast-llama-shownotes.md' } ] }, { cmd: 'npm run as -- --urls "content/example-urls.md"', expectedFiles: [ - { file: '2024-09-24-ep1-fsjam-podcast-prompt.md', newName: '04---2024-09-24-ep1-fsjam-podcast-prompt.md' }, - { file: '2024-09-24-ep0-fsjam-podcast-prompt.md', newName: '05---2024-09-24-ep0-fsjam-podcast-prompt.md' } + { file: '2024-09-24-ep1-fsjam-podcast-prompt.md', newName: '04A---ep1-fsjam-podcast-prompt.md' }, + { file: '2024-09-24-ep0-fsjam-podcast-prompt.md', newName: '04B---ep0-fsjam-podcast-prompt.md' } + ] + }, + { + cmd: 'npm run as -- --urls "content/example-urls.md" --prompt titles --whisper tiny --llama', + expectedFiles: [ + { file: '2024-09-24-ep1-fsjam-podcast-llama-shownotes.md', newName: '05A---ep1-fsjam-podcast-llama-shownotes.md' }, + { file: '2024-09-24-ep0-fsjam-podcast-llama-shownotes.md', newName: '05B---ep0-fsjam-podcast-llama-shownotes.md' } ] }, { @@ -31,159 +45,125 @@ const commands = [ expectedFile: 'audio-prompt.md', newName: '06---audio-prompt.md' }, + { + cmd: 'npm run as -- --file "content/audio.mp3" --prompt titles --whisper tiny --llama', + expectedFile: 'audio-llama-shownotes.md', + newName: '07---audio-llama-shownotes.md' + }, + { + cmd: 'npm run as -- --rss "https://ajcwebdev.substack.com/feed"', + expectedFile: '2021-05-10-thoughts-on-lambda-school-layoffs-prompt.md', + newName: '08---thoughts-on-lambda-school-layoffs-prompt.md' + }, { cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --chatgpt', expectedFile: '2024-09-24-ep0-fsjam-podcast-chatgpt-shownotes.md', - newName: '07---2024-09-24-ep0-fsjam-podcast-chatgpt-shownotes.md' + newName: '09---ep0-fsjam-podcast-chatgpt-shownotes.md' }, { cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --chatgpt GPT_4o_MINI', expectedFile: '2024-09-24-ep0-fsjam-podcast-chatgpt-shownotes.md', - newName: '08---2024-09-24-ep0-fsjam-podcast-chatgpt-shownotes.md' + newName: '10---ep0-fsjam-podcast-chatgpt-shownotes.md' }, { cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --claude', expectedFile: '2024-09-24-ep0-fsjam-podcast-claude-shownotes.md', - newName: '09---2024-09-24-ep0-fsjam-podcast-claude-shownotes.md' + newName: '11---ep0-fsjam-podcast-claude-shownotes.md' }, { cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --claude CLAUDE_3_SONNET', expectedFile: '2024-09-24-ep0-fsjam-podcast-claude-shownotes.md', - newName: '10---2024-09-24-ep0-fsjam-podcast-claude-shownotes.md' - }, - { - cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --gemini', - expectedFile: '2024-09-24-ep0-fsjam-podcast-gemini-shownotes.md', - newName: '11---2024-09-24-ep0-fsjam-podcast-gemini-shownotes.md' - }, - { - cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --gemini GEMINI_1_5_FLASH', - expectedFile: '2024-09-24-ep0-fsjam-podcast-gemini-shownotes.md', - newName: '12---2024-09-24-ep0-fsjam-podcast-gemini-shownotes.md' - }, + newName: '12---ep0-fsjam-podcast-claude-shownotes.md' + }, + // { + // cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --gemini', + // expectedFile: '2024-09-24-ep0-fsjam-podcast-gemini-shownotes.md', + // newName: '13---ep0-fsjam-podcast-gemini-shownotes.md' + // }, + // { + // cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --gemini GEMINI_1_5_FLASH', + // expectedFile: '2024-09-24-ep0-fsjam-podcast-gemini-shownotes.md', + // newName: '14---ep0-fsjam-podcast-gemini-shownotes.md' + // }, { cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --cohere', expectedFile: '2024-09-24-ep0-fsjam-podcast-cohere-shownotes.md', - newName: '13---2024-09-24-ep0-fsjam-podcast-cohere-shownotes.md' + newName: '15---ep0-fsjam-podcast-cohere-shownotes.md' }, { cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --cohere COMMAND_R_PLUS', expectedFile: '2024-09-24-ep0-fsjam-podcast-cohere-shownotes.md', - newName: '14---2024-09-24-ep0-fsjam-podcast-cohere-shownotes.md' + newName: '16---ep0-fsjam-podcast-cohere-shownotes.md' }, { cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --mistral', expectedFile: '2024-09-24-ep0-fsjam-podcast-mistral-shownotes.md', - newName: '15---2024-09-24-ep0-fsjam-podcast-mistral-shownotes.md' + newName: '17---ep0-fsjam-podcast-mistral-shownotes.md' }, { cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --mistral MIXTRAL_8x7b', expectedFile: '2024-09-24-ep0-fsjam-podcast-mistral-shownotes.md', - newName: '16---2024-09-24-ep0-fsjam-podcast-mistral-shownotes.md' + newName: '18---ep0-fsjam-podcast-mistral-shownotes.md' }, { cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --octo', expectedFile: '2024-09-24-ep0-fsjam-podcast-octo-shownotes.md', - newName: '17---2024-09-24-ep0-fsjam-podcast-octo-shownotes.md' + newName: '19---ep0-fsjam-podcast-octo-shownotes.md' }, { cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --octo LLAMA_3_1_8B', expectedFile: '2024-09-24-ep0-fsjam-podcast-octo-shownotes.md', - newName: '18---2024-09-24-ep0-fsjam-podcast-octo-shownotes.md' + newName: '20---ep0-fsjam-podcast-octo-shownotes.md' }, { cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --llama', expectedFile: '2024-09-24-ep0-fsjam-podcast-llama-shownotes.md', - newName: '19---2024-09-24-ep0-fsjam-podcast-llama-shownotes.md' + newName: '21---ep0-fsjam-podcast-llama-shownotes.md' + }, + { + cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --ollama', + expectedFile: '2024-09-24-ep0-fsjam-podcast-ollama-shownotes.md', + newName: '22---ep0-fsjam-podcast-ollama-shownotes.md' }, { cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --deepgram', expectedFile: '2024-09-24-ep0-fsjam-podcast-prompt.md', - newName: '20---2024-09-24-ep0-fsjam-podcast-prompt.md' + newName: '23---ep0-fsjam-podcast-prompt.md' }, { cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --deepgram --llama', expectedFile: '2024-09-24-ep0-fsjam-podcast-llama-shownotes.md', - newName: '21---2024-09-24-ep0-fsjam-podcast-llama-shownotes.md' + newName: '24---ep0-fsjam-podcast-llama-shownotes.md' }, { cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --assembly', expectedFile: '2024-09-24-ep0-fsjam-podcast-prompt.md', - newName: '22---2024-09-24-ep0-fsjam-podcast-prompt.md' + newName: '25---ep0-fsjam-podcast-prompt.md' }, { cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --assembly --llama', expectedFile: '2024-09-24-ep0-fsjam-podcast-llama-shownotes.md', - newName: '23---2024-09-24-ep0-fsjam-podcast-llama-shownotes.md' + newName: '26---ep0-fsjam-podcast-llama-shownotes.md' }, { cmd: 'npm run as -- --video "https://ajc.pics/audio/fsjam-short.mp3" --assembly --speakerLabels', expectedFile: '2024-05-08-fsjam-short-prompt.md', - newName: '24---2024-05-08-fsjam-short-prompt.md' - }, - { - cmd: 'npm run as -- --video "https://ajc.pics/audio/fsjam-short.mp3" --assembly --speakerLabels --llama', - expectedFile: '2024-05-08-fsjam-short-llama-shownotes.md', - newName: '25---2024-05-08-fsjam-short-llama-shownotes.md' - }, - { - cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --whisper tiny', - expectedFile: '2024-09-24-ep0-fsjam-podcast-prompt.md', - newName: '26---2024-09-24-ep0-fsjam-podcast-prompt.md' + newName: '27---fsjam-short-prompt.md' }, { cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --whisperDocker tiny', expectedFile: '2024-09-24-ep0-fsjam-podcast-prompt.md', - newName: '26B---2024-09-24-ep0-fsjam-podcast-prompt.md' - }, - { - cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --prompt titles', - expectedFile: '2024-09-24-ep0-fsjam-podcast-prompt.md', - newName: '27---2024-09-24-ep0-fsjam-podcast-prompt.md' + newName: '28---ep0-fsjam-podcast-prompt.md' }, { cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --prompt titles summary shortChapters mediumChapters longChapters takeaways questions', expectedFile: '2024-09-24-ep0-fsjam-podcast-prompt.md', - newName: '28---2024-09-24-ep0-fsjam-podcast-prompt.md' + newName: '29---ep0-fsjam-podcast-prompt.md' }, { cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --prompt titles summary shortChapters takeaways questions --whisper tiny --llama', expectedFile: '2024-09-24-ep0-fsjam-podcast-llama-shownotes.md', - newName: '29---2024-09-24-ep0-fsjam-podcast-llama-shownotes.md' - }, - { - cmd: 'npm run as -- --playlist "https://www.youtube.com/playlist?list=PLCVnrVv4KhXPz0SoAVu8Rc1emAdGPbSbr" --prompt titles --whisper tiny --llama', - expectedFiles: [ - { file: '2024-09-24-ep1-fsjam-podcast-llama-shownotes.md', newName: '30---2024-09-24-ep1-fsjam-podcast-llama-shownotes.md' }, - { file: '2024-09-24-ep0-fsjam-podcast-llama-shownotes.md', newName: '31---2024-09-24-ep0-fsjam-podcast-llama-shownotes.md' } - ] - }, - { - cmd: 'npm run as -- --urls "content/example-urls.md" --prompt titles --whisper tiny --llama', - expectedFiles: [ - { file: '2024-09-24-ep1-fsjam-podcast-llama-shownotes.md', newName: '32---2024-09-24-ep1-fsjam-podcast-llama-shownotes.md' }, - { file: '2024-09-24-ep0-fsjam-podcast-llama-shownotes.md', newName: '33---2024-09-24-ep0-fsjam-podcast-llama-shownotes.md' } - ] - }, - { - cmd: 'npm run as -- --file "content/audio.mp3" --prompt titles --whisper tiny --llama', - expectedFile: 'audio-llama-shownotes.md', - newName: '34---audio-llama-shownotes.md' - }, - { - cmd: 'npm run as -- --rss "https://ajcwebdev.substack.com/feed"', - expectedFile: '2021-05-10-thoughts-on-lambda-school-layoffs-prompt.md', - newName: '35---2021-05-10-thoughts-on-lambda-school-layoffs-prompt.md' - }, - { - cmd: 'npm run as -- --rss "https://feeds.transistor.fm/fsjam-podcast/" --order newest --skip 94 --whisper tiny', - expectedFile: '2020-10-27-episode-0-the-fullstack-jamstack-podcast-with-anthony-campolo-and-christopher-burns-prompt.md', - newName: '36---2020-10-27-episode-0-the-fullstack-jamstack-podcast-with-anthony-campolo-and-christopher-burns-prompt.md' - }, - { - cmd: 'npm run as -- --rss "https://feeds.transistor.fm/fsjam-podcast/" --order oldest --skip 94 --whisper tiny', - expectedFile: '2023-06-28-episode-94-clerk-with-james-perkins-prompt.md', - newName: '37---2023-06-28-episode-94-clerk-with-james-perkins-prompt.md' + newName: '30---ep0-fsjam-podcast-llama-shownotes.md' } ] diff --git a/test/local.test.js b/test/local.test.js index 90328f4..2d7a520 100644 --- a/test/local.test.js +++ b/test/local.test.js @@ -31,74 +31,74 @@ const commands = [ expectedFile: 'audio-prompt.md', newName: 'FILE_04.md' }, + { + cmd: 'npm run as -- --file "content/audio.mp3" --prompt titles --whisper tiny --llama', + expectedFile: 'audio-llama-shownotes.md', + newName: 'FILE_05.md' + }, { cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --llama', expectedFile: '2024-09-24-ep0-fsjam-podcast-llama-shownotes.md', - newName: 'FILE_05.md' + newName: 'FILE_06.md' }, { cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --ollama', expectedFile: '2024-09-24-ep0-fsjam-podcast-ollama-shownotes.md', - newName: 'FILE_06.md' + newName: 'FILE_07.md' }, { cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --whisper tiny', expectedFile: '2024-09-24-ep0-fsjam-podcast-prompt.md', - newName: 'FILE_07A.md' + newName: 'FILE_08.md' }, { cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --whisperDocker tiny', expectedFile: '2024-09-24-ep0-fsjam-podcast-prompt.md', - newName: 'FILE_07B.md' + newName: 'FILE_09.md' }, { cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --prompt titles', expectedFile: '2024-09-24-ep0-fsjam-podcast-prompt.md', - newName: 'FILE_08.md' + newName: 'FILE_10.md' }, { cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --prompt titles summary mediumChapters takeaways questions', expectedFile: '2024-09-24-ep0-fsjam-podcast-prompt.md', - newName: 'FILE_09.md' + newName: 'FILE_11.md' }, { cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --prompt titles summary shortChapters takeaways questions --whisper tiny --llama', expectedFile: '2024-09-24-ep0-fsjam-podcast-llama-shownotes.md', - newName: 'FILE_10.md' + newName: 'FILE_12.md' }, { cmd: 'npm run as -- --playlist "https://www.youtube.com/playlist?list=PLCVnrVv4KhXPz0SoAVu8Rc1emAdGPbSbr" --prompt titles --whisper tiny --llama', expectedFiles: [ - { file: '2024-09-24-ep1-fsjam-podcast-llama-shownotes.md', newName: 'FILE_11A.md' }, - { file: '2024-09-24-ep0-fsjam-podcast-llama-shownotes.md', newName: 'FILE_11B.md' } + { file: '2024-09-24-ep1-fsjam-podcast-llama-shownotes.md', newName: 'FILE_13A.md' }, + { file: '2024-09-24-ep0-fsjam-podcast-llama-shownotes.md', newName: 'FILE_13B.md' } ] }, { - cmd: 'npm run as -- --urls "content/example-urls.md" --prompt titles --whisper tiny --llama', + cmd: 'npm run as -- --urls "content/example-urls.md" --prompt titles --whisper tiny --ollama', expectedFiles: [ - { file: '2024-09-24-ep1-fsjam-podcast-llama-shownotes.md', newName: 'FILE_12A.md' }, - { file: '2024-09-24-ep0-fsjam-podcast-llama-shownotes.md', newName: 'FILE_12B.md' } + { file: '2024-09-24-ep1-fsjam-podcast-ollama-shownotes.md', newName: 'FILE_14A.md' }, + { file: '2024-09-24-ep0-fsjam-podcast-ollama-shownotes.md', newName: 'FILE_14B.md' } ] }, - { - cmd: 'npm run as -- --file "content/audio.mp3" --prompt titles --whisper tiny --llama', - expectedFile: 'audio-llama-shownotes.md', - newName: 'FILE_13.md' - }, { cmd: 'npm run as -- --rss "https://ajcwebdev.substack.com/feed"', expectedFile: '2021-05-10-thoughts-on-lambda-school-layoffs-prompt.md', - newName: 'FILE_14.md' + newName: 'FILE_15.md' }, { - cmd: 'npm run as -- --rss "https://feeds.transistor.fm/fsjam-podcast/" --order newest --skip 94 --whisper tiny', - expectedFile: '2020-10-27-episode-0-the-fullstack-jamstack-podcast-with-anthony-campolo-and-christopher-burns-prompt.md', - newName: 'FILE_15.md' + cmd: 'npm run as -- --rss "https://ajcwebdev.substack.com/feed" --item "https://api.substack.com/feed/podcast/36236609/fd1f1532d9842fe1178de1c920442541.mp3" --whisper tiny --llama --prompt titles summary longChapters takeaways questions', + expectedFile: '2021-05-10-thoughts-on-lambda-school-layoffs-llama-shownotes.md', + newName: 'FILE_16.md', }, { - cmd: 'npm run as -- --rss "https://feeds.transistor.fm/fsjam-podcast/" --order oldest --skip 94 --whisper tiny', - expectedFile: '2023-06-28-episode-94-clerk-with-james-perkins-prompt.md', - newName: 'FILE_16.md' + cmd: 'npm run as -- --rss "https://ajcwebdev.substack.com/feed" --info', + expectedFile: 'rss_info.json', + newName: 'FILE_17_rss_info.json', } ] From fcf0b8781551dbd9abed9a35285cdeb61cd78db2 Mon Sep 17 00:00:00 2001 From: Anthony Campolo <12433465+ajcwebdev@users.noreply.github.com> Date: Mon, 7 Oct 2024 01:53:19 -0500 Subject: [PATCH 9/9] node-llama-cpp v3 cli --- docs/examples.md | 9 +++++ src/commands/processRSS.ts | 15 ++++----- src/llms/llama.ts | 68 ++++++++++++++++++++++---------------- test/all.test.js | 30 +++++++++++++++++ 4 files changed, 84 insertions(+), 38 deletions(-) diff --git a/docs/examples.md b/docs/examples.md index 924c674..1ed1195 100644 --- a/docs/examples.md +++ b/docs/examples.md @@ -251,6 +251,15 @@ npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --octo WIZAR npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --llama ``` +Select Llama model: + +```bash +npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --llama GEMMA_2_2B +npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --llama LLAMA_3_2_1B +npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --llama PHI_3_5 +npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --llama QWEN_2_5_3B +``` + ### Ollama ```bash diff --git a/src/commands/processRSS.ts b/src/commands/processRSS.ts index d1a49dc..1259aa7 100644 --- a/src/commands/processRSS.ts +++ b/src/commands/processRSS.ts @@ -142,10 +142,7 @@ export async function processRSS( // Extract channel and item information const { - title: channelTitle, - link: channelLink, - image: channelImageObject, - item: feedItems, + title: channelTitle, link: channelLink, image: channelImageObject, item: feedItems, } = feed.rss.channel // Extract channel image URL safely @@ -213,13 +210,13 @@ export async function processRSS( // Process each item in the feed for (const [index, item] of itemsToProcess.entries()) { - log(opts(`\n==============================================================`)) - log(opts(` Item ${index + 1}/${itemsToProcess.length} processing: ${item.title}`)) - log(opts(`==============================================================\n`)) + log(opts(`\n========================================================================================`)) + log(opts(` Item ${index + 1}/${itemsToProcess.length} processing:\n\n${item.title}`)) + log(opts(`========================================================================================\n`)) await processItem(options, item, llmServices, transcriptServices) - log(final(`\n==============================================================`)) + log(final(`\n========================================================================================`)) log(final(` ${index + 1}/${itemsToProcess.length} item processing completed successfully`)) - log(final(`==============================================================\n`)) + log(final(`========================================================================================\n`)) } } catch (error) { console.error(`Error processing RSS feed: ${(error as Error).message}`) diff --git a/src/llms/llama.ts b/src/llms/llama.ts index 56cee8c..6dd89b7 100644 --- a/src/llms/llama.ts +++ b/src/llms/llama.ts @@ -1,58 +1,56 @@ // src/llms/llama.ts -import { writeFile, mkdir } from 'node:fs/promises' -import { getLlama, LlamaChatSession } from "node-llama-cpp" +import { writeFile } from 'node:fs/promises' import { existsSync } from 'node:fs' -import { exec } from 'node:child_process' -import { promisify } from 'node:util' +import { resolve } from 'node:path' import { LLAMA_MODELS } from '../models.js' import { log, success, wait } from '../models.js' +import { getLlama, LlamaModel, LlamaContext, LlamaChatSession } from "node-llama-cpp" +import { createModelDownloader } from 'node-llama-cpp' import type { LlamaModelType, LLMFunction } from '../types.js' -const execAsync = promisify(exec) +let model: LlamaModel | null = null +let context: LlamaContext | null = null /** - * Main function to call the local Llama model. + * Main function to call the local Llama model using node-llama-cpp API. * @param promptAndTranscript - The combined prompt and transcript content. * @param tempPath - The temporary file path to write the LLM output. - * @param model - The model name or undefined to use the default model. + * @param modelName - The model name or undefined to use the default model. * @returns A Promise that resolves when the processing is complete. * @throws {Error} - If an error occurs during processing. */ export const callLlama: LLMFunction = async ( promptAndTranscript: string, tempPath: string, - model?: string + modelName?: string ) => { try { - // Get the model object from LLAMA_MODELS using the provided model name or default to GEMMA_2_2B - const selectedModel = LLAMA_MODELS[model as LlamaModelType] || LLAMA_MODELS.GEMMA_2_2B + // Get the model object from LLAMA_MODELS using the provided model name or default to QWEN_2_5_3B + const selectedModel = LLAMA_MODELS[modelName as LlamaModelType] || LLAMA_MODELS.QWEN_2_5_3B log(wait(` - filename: ${selectedModel.filename}\n - url: ${selectedModel.url}\n`)) // If no valid model is found, throw an error if (!selectedModel) { - throw new Error(`Invalid model name: ${model}`) + throw new Error(`Invalid model name: ${modelName}`) } // Construct the path where the model file should be stored - const modelPath = `./src/llms/models/${selectedModel.filename}` + const modelDir = resolve('./src/llms/models') + const modelPath = resolve(modelDir, selectedModel.filename) // Check if the model file already exists, if not, download it if (!existsSync(modelPath)) { log(success(`\nDownloading ${selectedModel.filename}...`)) - try { - // Create the directory for storing models if it doesn't exist - await mkdir('./src/llms/models', { recursive: true }) - - // Download the model using curl - const { stderr } = await execAsync(`curl -L ${selectedModel.url} -o ${modelPath}`) - - // If there's any stderr output, log completed - if (stderr) log(success('Download completed')) + const downloader = await createModelDownloader({ + modelUri: selectedModel.url, + dirPath: modelDir + }) + await downloader.download() + log(success('Download completed')) } catch (err) { - // If an error occurs during download, log it and throw a new error console.error(`Download failed: ${err instanceof Error ? err.message : String(err)}`) throw new Error('Failed to download the model') } @@ -60,17 +58,29 @@ export const callLlama: LLMFunction = async ( log(wait(` modelPath found:\n - ${modelPath}`)) } - // Initialize Llama and load the local model - const llama = await getLlama() - const localModel = await llama.loadModel({ modelPath }) + // Initialize Llama and load the local model if not already loaded + if (!model || !context) { + const llama = await getLlama() + model = await llama.loadModel({ modelPath }) + context = await model.createContext({ }) + } - // Create a context for the model and create a chat session - const context = await localModel.createContext() + // Create a chat session const session = new LlamaChatSession({ contextSequence: context.getSequence() }) - // Generate a response and write the response to a file - const response = await session.prompt(promptAndTranscript) + // Generate a response + const response = await session.prompt(promptAndTranscript, { + maxTokens: -1, + temperature: 0.7, + topK: 40, + topP: 0.95, + // repeatPenalty: 1.1 + }) + + // Write the response to the temporary file await writeFile(tempPath, response) + + log(success('LLM processing completed')) } catch (error) { console.error(`Error in callLlama: ${error instanceof Error ? (error as Error).message : String(error)}`) throw error diff --git a/test/all.test.js b/test/all.test.js index a86828f..d3b7247 100644 --- a/test/all.test.js +++ b/test/all.test.js @@ -8,11 +8,13 @@ import { join } from 'node:path' const commands = [ { + // Process a single YouTube video using Autoshow's default settings. cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk"', expectedFile: '2024-09-24-ep0-fsjam-podcast-prompt.md', newName: '01---ep0-fsjam-podcast-prompt.md' }, { + // Process all videos in a specified YouTube playlist. cmd: 'npm run as -- --playlist "https://www.youtube.com/playlist?list=PLCVnrVv4KhXPz0SoAVu8Rc1emAdGPbSbr"', expectedFiles: [ { file: '2024-09-24-ep1-fsjam-podcast-prompt.md', newName: '02A---ep1-fsjam-podcast-prompt.md' }, @@ -20,6 +22,7 @@ const commands = [ ] }, { + // Process playlist videos with custom title prompt, tiny Whisper model, and Llama for LLM processing. cmd: 'npm run as -- --playlist "https://www.youtube.com/playlist?list=PLCVnrVv4KhXPz0SoAVu8Rc1emAdGPbSbr" --prompt titles --whisper tiny --llama', expectedFiles: [ { file: '2024-09-24-ep1-fsjam-podcast-llama-shownotes.md', newName: '03A---ep1-fsjam-podcast-llama-shownotes.md' }, @@ -27,6 +30,7 @@ const commands = [ ] }, { + // Process multiple YouTube videos from URLs listed in a file. cmd: 'npm run as -- --urls "content/example-urls.md"', expectedFiles: [ { file: '2024-09-24-ep1-fsjam-podcast-prompt.md', newName: '04A---ep1-fsjam-podcast-prompt.md' }, @@ -34,6 +38,7 @@ const commands = [ ] }, { + // Process multiple YouTube videos from URLs with title prompts, Whisper 'tiny' model, and Llama. cmd: 'npm run as -- --urls "content/example-urls.md" --prompt titles --whisper tiny --llama', expectedFiles: [ { file: '2024-09-24-ep1-fsjam-podcast-llama-shownotes.md', newName: '05A---ep1-fsjam-podcast-llama-shownotes.md' }, @@ -41,126 +46,151 @@ const commands = [ ] }, { + // Process a single local audio file. cmd: 'npm run as -- --file "content/audio.mp3"', expectedFile: 'audio-prompt.md', newName: '06---audio-prompt.md' }, { + // Process local audio file with title prompts, Whisper 'tiny' model, and Llama. cmd: 'npm run as -- --file "content/audio.mp3" --prompt titles --whisper tiny --llama', expectedFile: 'audio-llama-shownotes.md', newName: '07---audio-llama-shownotes.md' }, { + // Process podcast RSS feed from default order. cmd: 'npm run as -- --rss "https://ajcwebdev.substack.com/feed"', expectedFile: '2021-05-10-thoughts-on-lambda-school-layoffs-prompt.md', newName: '08---thoughts-on-lambda-school-layoffs-prompt.md' }, { + // Process a video using ChatGPT for LLM operations. cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --chatgpt', expectedFile: '2024-09-24-ep0-fsjam-podcast-chatgpt-shownotes.md', newName: '09---ep0-fsjam-podcast-chatgpt-shownotes.md' }, { + // Process video with ChatGPT using GPT_4o_MINI model. cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --chatgpt GPT_4o_MINI', expectedFile: '2024-09-24-ep0-fsjam-podcast-chatgpt-shownotes.md', newName: '10---ep0-fsjam-podcast-chatgpt-shownotes.md' }, { + // Process a video using Claude for LLM operations. cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --claude', expectedFile: '2024-09-24-ep0-fsjam-podcast-claude-shownotes.md', newName: '11---ep0-fsjam-podcast-claude-shownotes.md' }, { + // Process video with Claude using CLAUDE_3_SONNET model. cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --claude CLAUDE_3_SONNET', expectedFile: '2024-09-24-ep0-fsjam-podcast-claude-shownotes.md', newName: '12---ep0-fsjam-podcast-claude-shownotes.md' }, // { + // // Process a video using Gemini for LLM operations. // cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --gemini', // expectedFile: '2024-09-24-ep0-fsjam-podcast-gemini-shownotes.md', // newName: '13---ep0-fsjam-podcast-gemini-shownotes.md' // }, // { + // // Process video with Gemini using GEMINI_1_5_FLASH model. // cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --gemini GEMINI_1_5_FLASH', // expectedFile: '2024-09-24-ep0-fsjam-podcast-gemini-shownotes.md', // newName: '14---ep0-fsjam-podcast-gemini-shownotes.md' // }, { + // Process a video using Cohere for LLM operations cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --cohere', expectedFile: '2024-09-24-ep0-fsjam-podcast-cohere-shownotes.md', newName: '15---ep0-fsjam-podcast-cohere-shownotes.md' }, { + // Process video with Cohere using COMMAND_R_PLUS model. cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --cohere COMMAND_R_PLUS', expectedFile: '2024-09-24-ep0-fsjam-podcast-cohere-shownotes.md', newName: '16---ep0-fsjam-podcast-cohere-shownotes.md' }, { + // Process a video using Mistral for LLM operations cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --mistral', expectedFile: '2024-09-24-ep0-fsjam-podcast-mistral-shownotes.md', newName: '17---ep0-fsjam-podcast-mistral-shownotes.md' }, { + // Process video with Mistral using MIXTRAL_8x7b model. cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --mistral MIXTRAL_8x7b', expectedFile: '2024-09-24-ep0-fsjam-podcast-mistral-shownotes.md', newName: '18---ep0-fsjam-podcast-mistral-shownotes.md' }, { + // Process a video using OctoAI for LLM operations cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --octo', expectedFile: '2024-09-24-ep0-fsjam-podcast-octo-shownotes.md', newName: '19---ep0-fsjam-podcast-octo-shownotes.md' }, { + // Process video with Octo using LLAMA_3_1_8B model. cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --octo LLAMA_3_1_8B', expectedFile: '2024-09-24-ep0-fsjam-podcast-octo-shownotes.md', newName: '20---ep0-fsjam-podcast-octo-shownotes.md' }, { + // Process a video using Llama for local LLM operations cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --llama', expectedFile: '2024-09-24-ep0-fsjam-podcast-llama-shownotes.md', newName: '21---ep0-fsjam-podcast-llama-shownotes.md' }, { + // Process a video using Ollama for LLM operations cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --ollama', expectedFile: '2024-09-24-ep0-fsjam-podcast-ollama-shownotes.md', newName: '22---ep0-fsjam-podcast-ollama-shownotes.md' }, { + // Process a video using Deepgram for transcription cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --deepgram', expectedFile: '2024-09-24-ep0-fsjam-podcast-prompt.md', newName: '23---ep0-fsjam-podcast-prompt.md' }, { + // Process video using Deepgram and Llama. cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --deepgram --llama', expectedFile: '2024-09-24-ep0-fsjam-podcast-llama-shownotes.md', newName: '24---ep0-fsjam-podcast-llama-shownotes.md' }, { + // Process a video using AssemblyAI for transcription cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --assembly', expectedFile: '2024-09-24-ep0-fsjam-podcast-prompt.md', newName: '25---ep0-fsjam-podcast-prompt.md' }, { + // Process video using AssemblyAI and Llama. cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --assembly --llama', expectedFile: '2024-09-24-ep0-fsjam-podcast-llama-shownotes.md', newName: '26---ep0-fsjam-podcast-llama-shownotes.md' }, { + // Process an audio file using AssemblyAI with speaker labels cmd: 'npm run as -- --video "https://ajc.pics/audio/fsjam-short.mp3" --assembly --speakerLabels', expectedFile: '2024-05-08-fsjam-short-prompt.md', newName: '27---fsjam-short-prompt.md' }, { + // Process video using Whisper.cpp in Docker with 'tiny' model. cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --whisperDocker tiny', expectedFile: '2024-09-24-ep0-fsjam-podcast-prompt.md', newName: '28---ep0-fsjam-podcast-prompt.md' }, { + // Process a video with all available prompt options (except smallChapters and longChapters) cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --prompt titles summary shortChapters mediumChapters longChapters takeaways questions', expectedFile: '2024-09-24-ep0-fsjam-podcast-prompt.md', newName: '29---ep0-fsjam-podcast-prompt.md' }, { + // Process video with multiple prompt sections, Whisper 'tiny' model, and Llama. cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --prompt titles summary shortChapters takeaways questions --whisper tiny --llama', expectedFile: '2024-09-24-ep0-fsjam-podcast-llama-shownotes.md', newName: '30---ep0-fsjam-podcast-llama-shownotes.md'