diff --git a/README.md b/README.md index 938a8dc..9547b95 100644 --- a/README.md +++ b/README.md @@ -72,14 +72,6 @@ git clone https://github.com/ggerganov/whisper.cpp.git && \ > Replace `base` with `large-v2` for the largest model, `medium` for a middle sized model, or `tiny` for the smallest model. -### Clone Llama Repo - -```bash -git clone https://github.com/ggerganov/llama.cpp && \ - make -C llama.cpp && \ - cp .github/llama.Dockerfile llama.cpp/Dockerfile -``` - ## Run Autoshow Node Scripts Run on a single YouTube video. @@ -123,7 +115,7 @@ Use 3rd party LLM providers. ```bash npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --chatgpt GPT_4o_MINI npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --claude CLAUDE_3_5_SONNET -npm run as -- --video "https://www.youtube.com/watch?v=h41DF9GUqx4" --gemini GEMINI_1_5_PRO +npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --gemini GEMINI_1_5_PRO npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --cohere COMMAND_R_PLUS npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --mistral MISTRAL_LARGE npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --octo LLAMA_3_1_405B diff --git a/docker-compose.yml b/docker-compose.yml index 69aa23b..12c9705 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -13,7 +13,7 @@ services: - whisper - ollama environment: - - OLLAMA_HOST=localhost + - OLLAMA_HOST=ollama - OLLAMA_PORT=11434 whisper: build: @@ -29,5 +29,10 @@ services: image: ollama/ollama ports: - "11434:11434" -volumes: - whisper: \ No newline at end of file + volumes: + - ./ollama:/root/.ollama + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:11434/healthz"] + interval: 10s + timeout: 5s + retries: 5 \ No newline at end of file diff --git a/docs/examples.md b/docs/examples.md index b02c500..f3989f1 100644 --- a/docs/examples.md +++ b/docs/examples.md @@ -48,6 +48,12 @@ Run on multiple YouTube videos in a playlist. npm run as -- --playlist "https://www.youtube.com/playlist?list=PLCVnrVv4KhXPz0SoAVu8Rc1emAdGPbSbr" ``` +Run on playlist URL and generate JSON info file with markdown metadata of each video in the playlist: + +```bash +npm run as -- --playlist "https://www.youtube.com/playlist?list=PLCVnrVv4KhXPz0SoAVu8Rc1emAdGPbSbr" --info +``` + ### Process Multiple Videos Specified in a URLs File Run on an arbitrary list of URLs in `example-urls.md`. @@ -56,6 +62,12 @@ Run on an arbitrary list of URLs in `example-urls.md`. npm run as -- --urls "content/example-urls.md" ``` +Run on URLs file and generate JSON info file with markdown metadata of each video: + +```bash +npm run as -- --urls "content/example-urls.md" --info +``` + ### Process Single Audio or Video File Run on `audio.mp3` on the `content` directory: diff --git a/docs/server.md b/docs/server.md index 01aa36a..abc9a04 100644 --- a/docs/server.md +++ b/docs/server.md @@ -37,8 +37,6 @@ curl --json '{ }' http://localhost:3000/video ``` - - Use LLM. ```bash @@ -54,7 +52,7 @@ curl --json '{ ```bash curl --json '{ "playlistUrl": "https://www.youtube.com/playlist?list=PLCVnrVv4KhXPz0SoAVu8Rc1emAdGPbSbr" -}' http://localhost:3000/playlist +}' http://localhost:3000/playlist ``` ```bash @@ -62,7 +60,7 @@ curl --json '{ "playlistUrl": "https://www.youtube.com/playlist?list=PLCVnrVv4KhXPz0SoAVu8Rc1emAdGPbSbr", "whisperModel": "tiny", "llm": "llama" -}' http://localhost:3000/playlist +}' http://localhost:3000/playlist ``` ### URLs Endpoint @@ -271,14 +269,14 @@ curl --json '{ ```bash curl --json '{ "youtubeUrl": "https://www.youtube.com/watch?v=MORMZXEaONk", - "transcriptionService": "deepgram" + "transcriptService": "deepgram" }' http://localhost:3000/video ``` ```bash curl --json '{ "youtubeUrl": "https://www.youtube.com/watch?v=MORMZXEaONk", - "transcriptionService": "deepgram", + "transcriptService": "deepgram", "llm": "llama" }' http://localhost:3000/video ``` @@ -288,14 +286,14 @@ curl --json '{ ```bash curl --json '{ "youtubeUrl": "https://www.youtube.com/watch?v=MORMZXEaONk", - "transcriptionService": "assembly" + "transcriptService": "assembly" }' http://localhost:3000/video ``` ```bash curl --json '{ "youtubeUrl": "https://www.youtube.com/watch?v=MORMZXEaONk", - "transcriptionService": "assembly", + "transcriptService": "assembly", "llm": "llama" }' http://localhost:3000/video ``` @@ -303,7 +301,7 @@ curl --json '{ ```bash curl --json '{ "youtubeUrl": "https://ajc.pics/audio/fsjam-short.mp3", - "transcriptionService": "assembly", + "transcriptService": "assembly", "speakerLabels": true }' http://localhost:3000/video ``` @@ -311,7 +309,7 @@ curl --json '{ ```bash curl --json '{ "youtubeUrl": "https://ajc.pics/audio/fsjam-short.mp3", - "transcriptionService": "assembly", + "transcriptService": "assembly", "speakerLabels": true, "llm": "llama" }' http://localhost:3000/video diff --git a/package.json b/package.json index 03d4ce7..a7eeed8 100644 --- a/package.json +++ b/package.json @@ -18,6 +18,7 @@ "autoshow": "./autoshow.js" }, "scripts": { + "setup": "bash ./setup.sh", "autoshow": "node --env-file=.env --no-warnings src/autoshow.js", "as": "node --env-file=.env --no-warnings src/autoshow.js", "bun-as": "bun --env-file=.env --no-warnings src/autoshow.js", @@ -34,6 +35,7 @@ "dependencies": { "@anthropic-ai/sdk": "^0.26.0", "@deepgram/sdk": "^3.5.1", + "@fastify/cors": "^10.0.1", "@google/generative-ai": "^0.17.1", "@mistralai/mistralai": "^1.0.2", "@octoai/sdk": "^1.5.1", @@ -41,6 +43,7 @@ "cohere-ai": "^7.12.0", "commander": "^12.1.0", "fast-xml-parser": "^4.4.1", + "fastify": "^5.0.0", "ffmpeg-static": "^5.2.0", "file-type": "^19.4.1", "inquirer": "^10.2.2", diff --git a/server/index.js b/server/index.js index c211db1..f95d62a 100644 --- a/server/index.js +++ b/server/index.js @@ -1,6 +1,7 @@ // server/index.js -import http from 'node:http' +import Fastify from 'fastify' +import cors from '@fastify/cors' import { handleVideoRequest } from './routes/video.js' import { handlePlaylistRequest } from './routes/playlist.js' import { handleURLsRequest } from './routes/urls.js' @@ -8,58 +9,41 @@ import { handleFileRequest } from './routes/file.js' import { handleRSSRequest } from './routes/rss.js' import { env } from 'node:process' +// Set the port from environment variable or default to 3000 const port = env.PORT || 3000 -const server = http.createServer(async (req, res) => { - console.log(`[${new Date().toISOString()}] Received ${req.method} request for ${req.url}`) - res.setHeader('Access-Control-Allow-Origin', '*') - res.setHeader('Access-Control-Allow-Methods', 'GET, POST, OPTIONS') - res.setHeader('Access-Control-Allow-Headers', 'Content-Type') - console.log('CORS headers set') +async function start() { + // Create a Fastify instance with logging enabled + const fastify = Fastify({ logger: true }) - if (req.method === 'OPTIONS') { - console.log('Handling OPTIONS preflight request') - res.writeHead(204) - res.end() - return - } + // Register CORS plugin to handle CORS headers and preflight requests + await fastify.register(cors, { + origin: '*', + methods: ['GET', 'POST', 'OPTIONS'], + allowedHeaders: ['Content-Type'], + }) + + // Log each incoming request + fastify.addHook('onRequest', async (request, reply) => { + console.log( + `[${new Date().toISOString()}] Received ${request.method} request for ${request.url}` + ) + }) + + // Define route handlers + fastify.post('/video', handleVideoRequest) + fastify.post('/playlist', handlePlaylistRequest) + fastify.post('/urls', handleURLsRequest) + fastify.post('/file', handleFileRequest) + fastify.post('/rss', handleRSSRequest) - if (req.method === 'POST') { - switch (req.url) { - case '/video': - console.log('Routing to handleVideoRequest') - await handleVideoRequest(req, res) - break - case '/playlist': - console.log('Routing to handlePlaylistRequest') - await handlePlaylistRequest(req, res) - break - case '/urls': - console.log('Routing to handleURLsRequest') - await handleURLsRequest(req, res) - break - case '/file': - console.log('Routing to handleFileRequest') - await handleFileRequest(req, res) - break - case '/rss': - console.log('Routing to handleRSSRequest') - await handleRSSRequest(req, res) - break - default: - console.log('Unknown route, sending 404') - res.statusCode = 404 - res.setHeader('Content-Type', 'application/json') - res.end(JSON.stringify({ error: 'Not Found' })) - } - } else { - console.log(`Method ${req.method} not allowed, sending 405`) - res.statusCode = 405 - res.setHeader('Content-Type', 'application/json') - res.end(JSON.stringify({ error: 'Method Not Allowed' })) + try { + await fastify.listen({ port }) + console.log(`Server running at http://localhost:${port}`) + } catch (err) { + fastify.log.error(err) + process.exit(1) } -}) +} -server.listen(port, () => { - console.log(`Server running at http://localhost:${port}`) -}) \ No newline at end of file +start() \ No newline at end of file diff --git a/server/routes/file.js b/server/routes/file.js index d3fd2f2..5ac7c11 100644 --- a/server/routes/file.js +++ b/server/routes/file.js @@ -1,41 +1,38 @@ // server/routes/file.js import { processFile } from '../../src/commands/processFile.js' +import { reqToOpts } from '../utils/reqToOpts.js' -const handleFileRequest = async (req, res) => { +// Handler for /file route +const handleFileRequest = async (request, reply) => { console.log('Entered handleFileRequest') - let body = '' - req.on('data', chunk => { - body += chunk.toString() - console.log('Received chunk:', chunk.toString()) - }) - req.on('end', async () => { - console.log('Request body complete:', body) - try { - const { filePath, model = 'base', llm, options = {} } = JSON.parse(body) - console.log('Parsed request body:', { filePath, model, llm, options }) - if (!filePath) { - console.log('File path not provided, sending 400') - res.statusCode = 400 - res.setHeader('Content-Type', 'application/json') - res.end(JSON.stringify({ error: 'File path is required' })) - return - } - const llmOpt = llm || null - await processFile(filePath, llmOpt, model, options) - console.log('processFile completed successfully') - res.statusCode = 200 - res.setHeader('Content-Type', 'application/json') - res.end(JSON.stringify({ - message: 'File processed successfully.' - })) - } catch (error) { - console.error('Error processing file:', error) - res.statusCode = 500 - res.setHeader('Content-Type', 'application/json') - res.end(JSON.stringify({ error: 'An error occurred while processing the file' })) + + try { + // Access parsed request body + const requestData = request.body + console.log('Parsed request body:', requestData) + + // Extract file path + const { filePath } = requestData + + if (!filePath) { + console.log('File path not provided, sending 400') + reply.status(400).send({ error: 'File path is required' }) + return } - }) + + // Map request data to processing options + const { options, llmOpt, transcriptOpt } = reqToOpts(requestData) + console.log('Calling processFile with params:', { filePath, llmOpt, transcriptOpt, options }) + + await processFile(filePath, llmOpt, transcriptOpt, options) + + console.log('processFile completed successfully') + reply.send({ message: 'File processed successfully.' }) + } catch (error) { + console.error('Error processing file:', error) + reply.status(500).send({ error: 'An error occurred while processing the file' }) + } } export { handleFileRequest } \ No newline at end of file diff --git a/server/routes/playlist.js b/server/routes/playlist.js index 774b865..a276fbd 100644 --- a/server/routes/playlist.js +++ b/server/routes/playlist.js @@ -1,43 +1,38 @@ // server/routes/playlist.js import { processPlaylist } from '../../src/commands/processPlaylist.js' +import { reqToOpts } from '../utils/reqToOpts.js' -const handlePlaylistRequest = async (req, res) => { +// Handler for /playlist route +const handlePlaylistRequest = async (request, reply) => { console.log('Entered handlePlaylistRequest') - let body = '' - - req.on('data', chunk => { - body += chunk.toString() - console.log('Received chunk:', chunk.toString()) - }) - - req.on('end', async () => { - console.log('Request body complete:', body) - try { - const { playlistUrl, model = 'base', llm, options = {} } = JSON.parse(body) - console.log('Parsed request body:', { playlistUrl, model, llm, options }) - if (!playlistUrl) { - console.log('Playlist URL not provided, sending 400') - res.statusCode = 400 - res.setHeader('Content-Type', 'application/json') - res.end(JSON.stringify({ error: 'Playlist URL is required' })) - return - } - const llmOpt = llm || null - await processPlaylist(playlistUrl, llmOpt, model, options) - console.log('processPlaylist completed successfully') - res.statusCode = 200 - res.setHeader('Content-Type', 'application/json') - res.end(JSON.stringify({ - message: 'Playlist processed successfully.' - })) - } catch (error) { - console.error('Error processing playlist:', error) - res.statusCode = 500 - res.setHeader('Content-Type', 'application/json') - res.end(JSON.stringify({ error: 'An error occurred while processing the playlist' })) + + try { + // Access parsed request body + const requestData = request.body + console.log('Parsed request body:', requestData) + + // Extract playlist URL + const { playlistUrl } = requestData + + if (!playlistUrl) { + console.log('Playlist URL not provided, sending 400') + reply.status(400).send({ error: 'Playlist URL is required' }) + return } - }) + + // Map request data to processing options + const { options, llmOpt, transcriptOpt } = reqToOpts(requestData) + console.log('Calling processPlaylist with params:', { playlistUrl, llmOpt, transcriptOpt, options }) + + await processPlaylist(playlistUrl, llmOpt, transcriptOpt, options) + + console.log('processPlaylist completed successfully') + reply.send({ message: 'Playlist processed successfully.' }) + } catch (error) { + console.error('Error processing playlist:', error) + reply.status(500).send({ error: 'An error occurred while processing the playlist' }) + } } export { handlePlaylistRequest } \ No newline at end of file diff --git a/server/routes/rss.js b/server/routes/rss.js index c946821..2ff5212 100644 --- a/server/routes/rss.js +++ b/server/routes/rss.js @@ -1,50 +1,38 @@ // server/routes/rss.js import { processRSS } from '../../src/commands/processRSS.js' +import { reqToOpts } from '../utils/reqToOpts.js' -const handleRSSRequest = async (req, res) => { +// Handler for /rss route +const handleRSSRequest = async (request, reply) => { console.log('Entered handleRSSRequest') - let body = '' - req.on('data', chunk => { - body += chunk.toString() - console.log('Received chunk:', chunk.toString()) - }) - req.on('end', async () => { - console.log('Request body complete:', body) - try { - const { rssUrl, model = 'base', llm, order = 'newest', skip = 0, options = {} } = JSON.parse(body) - console.log('Parsed request body:', { rssUrl, model, llm, order, skip, options }) - - if (!rssUrl) { - console.log('RSS URL not provided, sending 400') - res.statusCode = 400 - res.setHeader('Content-Type', 'application/json') - res.end(JSON.stringify({ error: 'RSS URL is required' })) - return - } - - const llmOpt = llm || null - const whisperModel = model || 'base' - - console.log('Starting processRSS in background') - // Start processing in the background - processRSS(rssUrl, llmOpt, whisperModel, order, skip, options) - .then(() => console.log('RSS processing completed successfully')) - .catch(error => console.error('Error during RSS processing:', error)) - - // Respond immediately - res.statusCode = 200 - res.setHeader('Content-Type', 'application/json') - res.end(JSON.stringify({ - message: 'RSS processing started successfully. This may take some time to complete.' - })) - } catch (error) { - console.error('Error processing RSS request:', error) - res.statusCode = 500 - res.setHeader('Content-Type', 'application/json') - res.end(JSON.stringify({ error: 'An error occurred while processing the RSS feed' })) + + try { + // Access parsed request body + const requestData = request.body + console.log('Parsed request body:', requestData) + + // Extract RSS URL + const { rssUrl } = requestData + + if (!rssUrl) { + console.log('RSS URL not provided, sending 400') + reply.status(400).send({ error: 'RSS URL is required' }) + return } - }) + + // Map request data to processing options + const { options, llmOpt, transcriptOpt } = reqToOpts(requestData) + console.log('Calling processRSS with params:', { rssUrl, llmOpt, transcriptOpt, options }) + + await processRSS(rssUrl, llmOpt, transcriptOpt, options) + + console.log('processRSS completed successfully') + reply.send({ message: 'RSS feed processed successfully.' }) + } catch (error) { + console.error('Error processing RSS request:', error) + reply.status(500).send({ error: 'An error occurred while processing the RSS feed' }) + } } export { handleRSSRequest } \ No newline at end of file diff --git a/server/routes/urls.js b/server/routes/urls.js index 17eef20..9fd1bc3 100644 --- a/server/routes/urls.js +++ b/server/routes/urls.js @@ -1,41 +1,38 @@ // server/routes/urls.js import { processURLs } from '../../src/commands/processURLs.js' +import { reqToOpts } from '../utils/reqToOpts.js' -const handleURLsRequest = async (req, res) => { +// Handler for /urls route +const handleURLsRequest = async (request, reply) => { console.log('Entered handleURLsRequest') - let body = '' - req.on('data', chunk => { - body += chunk.toString() - console.log('Received chunk:', chunk.toString()) - }) - req.on('end', async () => { - console.log('Request body complete:', body) - try { - const { filePath, model = 'base', llm, options = {} } = JSON.parse(body) - console.log('Parsed request body:', { filePath, model, llm, options }) - if (!filePath) { - console.log('File path not provided, sending 400') - res.statusCode = 400 - res.setHeader('Content-Type', 'application/json') - res.end(JSON.stringify({ error: 'File path is required' })) - return - } - const llmOpt = llm || null - await processURLs(filePath, llmOpt, model, options) - console.log('processURLs completed successfully') - res.statusCode = 200 - res.setHeader('Content-Type', 'application/json') - res.end(JSON.stringify({ - message: 'URLs processed successfully.' - })) - } catch (error) { - console.error('Error processing URLs:', error) - res.statusCode = 500 - res.setHeader('Content-Type', 'application/json') - res.end(JSON.stringify({ error: 'An error occurred while processing the URLs' })) + + try { + // Access parsed request body + const requestData = request.body + console.log('Parsed request body:', requestData) + + // Extract file path + const { filePath } = requestData + + if (!filePath) { + console.log('File path not provided, sending 400') + reply.status(400).send({ error: 'File path is required' }) + return } - }) + + // Map request data to processing options + const { options, llmOpt, transcriptOpt } = reqToOpts(requestData) + console.log('Calling processURLs with params:', { filePath, llmOpt, transcriptOpt, options }) + + await processURLs(filePath, llmOpt, transcriptOpt, options) + + console.log('processURLs completed successfully') + reply.send({ message: 'URLs processed successfully.' }) + } catch (error) { + console.error('Error processing URLs:', error) + reply.status(500).send({ error: 'An error occurred while processing the URLs' }) + } } export { handleURLsRequest } \ No newline at end of file diff --git a/server/routes/video.js b/server/routes/video.js index 899d030..08dd2e7 100644 --- a/server/routes/video.js +++ b/server/routes/video.js @@ -1,46 +1,38 @@ // server/routes/video.js import { processVideo } from '../../src/commands/processVideo.js' +import { reqToOpts } from '../utils/reqToOpts.js' -const handleVideoRequest = async (req, res) => { +// Handler for /video route +const handleVideoRequest = async (request, reply) => { console.log('Entered handleVideoRequest') - let body = '' - - req.on('data', chunk => { - body += chunk.toString() - console.log('Received chunk:', chunk.toString()) - }) - - req.on('end', async () => { - console.log('Request body complete:', body) - try { - const { youtubeUrl, model, llm, options = {} } = JSON.parse(body) - console.log('Parsed request body:', { youtubeUrl, model, llm, options }) - if (!youtubeUrl) { - console.log('YouTube URL not provided, sending 400') - res.statusCode = 400 - res.setHeader('Content-Type', 'application/json') - res.end(JSON.stringify({ error: 'YouTube URL is required' })) - return - } - const llmOpt = llm || null - const whisperModel = model || 'base' - console.log('Calling processVideo with params:', { youtubeUrl, llmOpt, whisperModel, options }) - const finalContent = await processVideo(youtubeUrl, llmOpt, whisperModel, options) - console.log('processVideo completed successfully') - res.statusCode = 200 - res.setHeader('Content-Type', 'application/json') - res.end(JSON.stringify({ - message: 'Video processed successfully.', - content: finalContent - })) - } catch (error) { - console.error('Error processing video:', error) - res.statusCode = 500 - res.setHeader('Content-Type', 'application/json') - res.end(JSON.stringify({ error: 'An error occurred while processing the video' })) + + try { + // Access parsed request body + const requestData = request.body + console.log('Parsed request body:', requestData) + + // Extract YouTube URL + const { youtubeUrl } = requestData + + if (!youtubeUrl) { + console.log('YouTube URL not provided, sending 400') + reply.status(400).send({ error: 'YouTube URL is required' }) + return } - }) + + // Map request data to processing options + const { options, llmOpt, transcriptOpt } = reqToOpts(requestData) + console.log('Calling processVideo with params:', { youtubeUrl, llmOpt, transcriptOpt, options }) + + await processVideo(youtubeUrl, llmOpt, transcriptOpt, options) + + console.log('processVideo completed successfully') + reply.send({ message: 'Video processed successfully.' }) + } catch (error) { + console.error('Error processing video:', error) + reply.status(500).send({ error: 'An error occurred while processing the video' }) + } } export { handleVideoRequest } \ No newline at end of file diff --git a/server/utils/reqToOpts.js b/server/utils/reqToOpts.js new file mode 100644 index 0000000..be9f654 --- /dev/null +++ b/server/utils/reqToOpts.js @@ -0,0 +1,58 @@ +// server/utils/reqToOpts.js + +// Function to map request data to processing options +function reqToOpts(requestData) { + // Define possible options + const [llmOptions, transcriptOptions, otherOptions] = [ + // List of supported LLM options + ['chatgpt', 'claude', 'cohere', 'mistral', 'octo', 'llama', 'ollama', 'gemini'], + // List of supported transcript services + ['whisper', 'whisperDocker', 'deepgram', 'assembly'], + // List of other supported options + ['speakerLabels', 'prompt', 'noCleanUp', 'order', 'skip', 'info', 'item'] + ] + + // Initialize options object + const options = {} + // Initialize llm option + let llmOpt = null + // Initialize transcript option + let transcriptOpt = null + + // Check if LLM is provided and valid + if (requestData.llm && llmOptions.includes(requestData.llm)) { + // Set llmOpt + llmOpt = requestData.llm + // Set LLM model or true + options[llmOpt] = requestData.llmModel || true + } + + // Determine transcript service and default to 'whisper' if not specified + transcriptOpt = transcriptOptions.includes(requestData.transcriptService) + ? requestData.transcriptService + : 'whisper' + + // Set transcript options + if (transcriptOpt === 'whisper') { + // Set whisper model + options.whisperModel = requestData.whisperModel || 'base' + // Enable whisper option + options.whisper = options.whisperModel + } else { + // Enable selected transcript service + options[transcriptOpt] = true + } + + // Map other options from request data + for (const opt of otherOptions) { + if (requestData[opt] !== undefined) { + // Set option if provided + options[opt] = requestData[opt] + } + } + + // Return mapped options + return { options, llmOpt, transcriptOpt } +} + +export { reqToOpts } \ No newline at end of file diff --git a/setup.sh b/setup.sh new file mode 100755 index 0000000..aaaeeb4 --- /dev/null +++ b/setup.sh @@ -0,0 +1,34 @@ +#!/bin/bash + +# Copy environment file +cp .env.example .env + +# Check if yt-dlp is installed, if not, install it +if ! command -v yt-dlp &> /dev/null +then + echo "yt-dlp could not be found, installing now..." + brew install yt-dlp +else + echo "yt-dlp is already installed." +fi + +# Install npm dependencies +npm i + +# Clone whisper.cpp repository +git clone https://github.com/ggerganov/whisper.cpp.git + +# Download whisper models +bash ./whisper.cpp/models/download-ggml-model.sh base +bash ./whisper.cpp/models/download-ggml-model.sh large-v2 + +# Compile whisper.cpp +make -C whisper.cpp + +# Copy Dockerfile +cp .github/whisper.Dockerfile whisper.cpp/Dockerfile + +# Download Llama model +curl -L "https://huggingface.co/mradermacher/Llama-3.2-1B-i1-GGUF/resolve/main/Llama-3.2-1B.i1-Q6_K.gguf" -o "./src/llms/models/Llama-3.2-1B.i1-Q6_K.gguf" + +echo "Setup completed successfully!" \ No newline at end of file diff --git a/src/autoshow.js b/src/autoshow.js index 80e8533..41697e9 100644 --- a/src/autoshow.js +++ b/src/autoshow.js @@ -3,9 +3,13 @@ // src/autoshow.js /** - * This script serves as the entry point for the 'autoshow' CLI application. - * It processes command-line arguments and options, and initiates the appropriate - * processing functions based on user input or interactive prompts. + * Autoshow CLI Application + * + * Automate processing of audio and video content from various sources. + * Supports processing YouTube videos, playlists, local files, and podcast RSS feeds. + * + * Documentation: https://github.com/ajcwebdev/autoshow#readme + * Report Issues: https://github.com/ajcwebdev/autoshow/issues */ import { Command } from 'commander' @@ -15,7 +19,7 @@ import { processPlaylist } from './commands/processPlaylist.js' import { processURLs } from './commands/processURLs.js' import { processFile } from './commands/processFile.js' import { processRSS } from './commands/processRSS.js' -import { argv } from 'node:process' +import { argv, exit } from 'node:process' /** @import { ProcessingOptions, HandlerFunction, LLMOption, TranscriptOption } from './types.js' */ @@ -25,7 +29,9 @@ const program = new Command() // Define command-line options and their descriptions program .name('autoshow') - .description('Automated processing of YouTube videos, playlists, podcast RSS feeds, and local audio/video files') + .version('0.0.1') + .description('Automate processing of audio and video content from various sources.') + .usage('[options]') .option('--prompt ', 'Specify prompt sections to include') .option('-v, --video ', 'Process a single YouTube video') .option('-p, --playlist ', 'Process all videos in a YouTube playlist') @@ -36,8 +42,8 @@ program .option('--order ', 'Specify the order for RSS feed processing (newest or oldest)', 'newest') .option('--skip ', 'Number of items to skip when processing RSS feed', parseInt, 0) .option('--info', 'Generate JSON file with RSS feed information instead of processing items') - .option('--whisper [modelType]', 'Use Whisper.cpp for transcription (non-Docker version)') - .option('--whisperDocker [modelType]', 'Use Whisper.cpp for transcription (Docker version)') + .option('--whisper [model]', 'Use Whisper.cpp for transcription with optional model specification') + .option('--whisperDocker [model]', 'Use Whisper.cpp in Docker for transcription with optional model specification') .option('--deepgram', 'Use Deepgram for transcription') .option('--assembly', 'Use AssemblyAI for transcription') .option('--speakerLabels', 'Use speaker labels for AssemblyAI transcription') @@ -50,6 +56,20 @@ program .option('--ollama [model]', 'Use Ollama for processing with optional model specification') .option('--gemini [model]', 'Use Gemini for processing with optional model specification') .option('--noCleanUp', 'Do not delete intermediary files after processing') + .option('-i, --interactive', 'Run in interactive mode') + .addHelpText( + 'after', + ` +Examples: + $ autoshow --video "https://www.youtube.com/watch?v=..." + $ autoshow --playlist "https://www.youtube.com/playlist?list=..." + $ autoshow --file "content/audio.mp3" + $ autoshow --rss "https://feeds.transistor.fm/fsjam-podcast/" + +Documentation: https://github.com/ajcwebdev/autoshow#readme +Report Issues: https://github.com/ajcwebdev/autoshow/issues +` + ) /** * Main action for the program. @@ -57,14 +77,19 @@ program * @returns {Promise} */ program.action(async (options) => { - console.log(`Options received:\n`) - console.log(options) - const { video, playlist, urls, file, rss } = options + console.log(`Options received: ${JSON.stringify(options, null, 2)}`) + + // Determine if no action options were provided + const { video, playlist, urls, file, rss, interactive } = options + const noActionProvided = [video, playlist, urls, file, rss].every((opt) => !opt) - // Check if no input options are provided and if so, prompt the user interactively - options = [video, playlist, urls, file, rss].every(opt => !opt) - ? await handleInteractivePrompt(options) - : options + // If interactive mode is selected + if (interactive) { + options = await handleInteractivePrompt(options) + } else if (noActionProvided) { + console.error('Error: No input provided. Please specify an option. Use --help to see available options.') + program.help({ error: true }) + } // Ensure options.item is an array if provided via command line if (options.item && !Array.isArray(options.item)) { @@ -83,29 +108,81 @@ program.action(async (options) => { rss: processRSS, } + // Count the number of action options provided + const actionOptions = ['video', 'playlist', 'urls', 'file', 'rss'] + const actionsProvided = actionOptions.filter((opt) => options[opt]) + + // If more than one action option is provided, show an error + if (actionsProvided.length > 1) { + console.error(`Error: Multiple input options provided (${actionsProvided.join( + ', ' + )}). Please specify only one input option.` + ) + exit(1) + } + /** * Determine the selected LLM option * @type {LLMOption | undefined} */ - const llmOpt = /** @type {LLMOption | undefined} */ ([ - 'chatgpt', 'claude', 'cohere', 'mistral', 'octo', 'llama', 'ollama', 'gemini' - ].find((option) => options[option])) + const llmOptions = [ + 'chatgpt', 'claude', 'cohere', 'mistral', 'octo', 'llama', 'ollama', 'gemini', + ] + const selectedLLMs = llmOptions.filter((opt) => options[opt]) + if (selectedLLMs.length > 1) { + console.error(`Error: Multiple LLM options provided (${selectedLLMs.join( + ', ' + )}). Please specify only one LLM option.` + ) + exit(1) + } + const llmOpt = /** @type {LLMOption | undefined} */ (selectedLLMs[0]) /** * Determine the transcription service to use * @type {TranscriptOption | undefined} */ - const transcriptOpt = /** @type {TranscriptOption | undefined} */ ([ - 'whisper', 'whisperDocker', 'deepgram', 'assembly' - ].find((option) => options[option])) + const transcriptOptions = ['whisper', 'whisperDocker', 'deepgram', 'assembly'] + const selectedTranscripts = transcriptOptions.filter((opt) => options[opt]) + if (selectedTranscripts.length > 1) { + console.error(`Error: Multiple transcription options provided (${selectedTranscripts.join(', ')}). Please specify only one transcription option.`) + exit(1) + } + let transcriptOpt = /** @type {TranscriptOption | undefined} */ (selectedTranscripts[0]) + + // Standardize the transcription option names + if (transcriptOpt === 'whisper-docker') { + transcriptOpt = 'whisperDocker' + } else if (transcriptOpt === 'whisper') { + transcriptOpt = 'whisper' + } + + // Extract the Whisper model if using Whisper transcription + let whisperModel + if (transcriptOpt === 'whisper' || transcriptOpt === 'whisperDocker') { + whisperModel = options[transcriptOpt] || 'base' // Default to 'base' if no model specified + options.whisperModel = whisperModel // Add this line + } // Execute the appropriate handler based on the action for (const [key, handler] of Object.entries(handlers)) { if (options[key]) { - await handler(options[key], llmOpt, transcriptOpt, options) + try { + await handler(options[key], llmOpt, transcriptOpt, options) + exit(0) // Successful execution + } catch (error) { + console.error(`Error processing ${key}:`, error.message) + exit(1) + } } } }) +// Handle unknown commands +program.on('command:*', function () { + console.error(`Error: Invalid command '${program.args.join(' ')}'. Use --help to see available commands.`) + exit(1) +}) + // Parse the command-line arguments program.parse(argv) \ No newline at end of file diff --git a/src/commands/processFile.js b/src/commands/processFile.js index 8d02e28..3616328 100644 --- a/src/commands/processFile.js +++ b/src/commands/processFile.js @@ -21,7 +21,7 @@ export async function processFile(filePath, llmOpt, transcriptOpt, options) { // Generate markdown for the file const { frontMatter, finalPath, filename } = await generateFileMarkdown(filePath) - // Download or convert the audio file + // Convert the audio or video file to the required format await downloadFileAudio(filePath, filename) // Run transcription on the file @@ -35,11 +35,9 @@ export async function processFile(filePath, llmOpt, transcriptOpt, options) { await cleanUpFiles(finalPath) } - // Log completion message - console.log('File processing completed') + console.log('\n\nLocal file processing completed successfully.\n') } catch (error) { - // Log any errors that occur during processing - console.error('Error processing file:', error) - throw error + console.error(`Error processing file: ${error.message}`) + process.exit(1) // Exit with an error code } } \ No newline at end of file diff --git a/src/commands/processPlaylist.js b/src/commands/processPlaylist.js index b014092..1b427f9 100644 --- a/src/commands/processPlaylist.js +++ b/src/commands/processPlaylist.js @@ -5,6 +5,7 @@ import { processVideo } from './processVideo.js' import { execFile } from 'node:child_process' import { promisify } from 'node:util' import { extractVideoMetadata } from '../utils/generateMarkdown.js' +import { checkDependencies } from '../utils/checkDependencies.js' /** @import { LLMOption, TranscriptOption, ProcessingOptions } from '../types.js' */ @@ -20,10 +21,10 @@ const execFilePromise = promisify(execFile) */ export async function processPlaylist(playlistUrl, llmOpt, transcriptOpt, options) { try { - // Log the start of playlist processing - console.log(`Processing playlist: ${playlistUrl}`) + // Check for required dependencies + await checkDependencies(['yt-dlp']) - // Use yt-dlp to fetch video URLs from the playlist + // Fetch video URLs from the playlist const { stdout, stderr } = await execFilePromise('yt-dlp', [ '--flat-playlist', '--print', 'url', @@ -31,21 +32,25 @@ export async function processPlaylist(playlistUrl, llmOpt, transcriptOpt, option playlistUrl ]) - // Check for errors in stderr if (stderr) { - console.error(`yt-dlp error: ${stderr}`) + console.error(`yt-dlp warnings: ${stderr}`) } // Split the stdout into an array of video URLs const urls = stdout.trim().split('\n').filter(Boolean) - console.log(`Found ${urls.length} videos in the playlist`) + if (urls.length === 0) { + console.error('Error: No videos found in the playlist.') + process.exit(1) // Exit with an error code + } + + console.log(`\nFound ${urls.length} videos in the playlist`) // Extract metadata for all videos const metadataPromises = urls.map(extractVideoMetadata) const metadataList = await Promise.all(metadataPromises) const validMetadata = metadataList.filter(Boolean) - // Generate JSON file with playlist information + // Generate JSON file with playlist information if --info option is used if (options.info) { const jsonContent = JSON.stringify(validMetadata, null, 2) const jsonFilePath = 'content/playlist_info.json' @@ -54,30 +59,20 @@ export async function processPlaylist(playlistUrl, llmOpt, transcriptOpt, option return } - // Write the URLs to a file for reference - try { - await writeFile('content/urls.md', urls.join('\n')) - } catch (writeError) { - console.error('Error writing URLs to file:', writeError) - } - // Process each video in the playlist for (const [index, url] of urls.entries()) { - console.log(`Processing video ${index + 1}/${urls.length}: ${url}`) + console.log(`\nProcessing video ${index + 1}/${urls.length}: ${url}`) try { - // Process individual video await processVideo(url, llmOpt, transcriptOpt, options) } catch (error) { - // Log any errors that occur during video processing - console.error(`Error processing video ${url}:`, error) + console.error(`Error processing video ${url}: ${error.message}`) + // Continue processing the next video } } - // Log completion of playlist processing - console.log('Playlist processing completed') + console.log('\nPlaylist processing completed successfully.\n') } catch (error) { - // Log any errors that occur during playlist processing - console.error('Error processing playlist:', error) - throw error + console.error(`Error processing playlist: ${error.message}`) + process.exit(1) // Exit with an error code } } \ No newline at end of file diff --git a/src/commands/processRSS.js b/src/commands/processRSS.js index 04c4cf8..36c204c 100644 --- a/src/commands/processRSS.js +++ b/src/commands/processRSS.js @@ -1,10 +1,5 @@ // src/commands/processRSS.js -/** - * This module defines the function to process a podcast RSS feed. It handles fetching the RSS feed, parsing it, and - * processing specific episodes based on user input. It supports processing multiple specific items or the entire feed. - */ - import { writeFile } from 'node:fs/promises' import { XMLParser } from 'fast-xml-parser' import { generateRSSMarkdown } from '../utils/generateMarkdown.js' @@ -48,9 +43,11 @@ async function processItem(item, transcriptOpt, llmOpt, options) { if (!options.noCleanUp) { await cleanUpFiles(finalPath) } - console.log(`\nProcess completed successfully for item: ${item.title}`) + + console.log(`\nItem processing completed successfully: ${item.title}`) } catch (error) { - console.error(`Error processing item: ${item.title}`, error) + console.error(`Error processing item ${item.title}: ${error.message}`) + // Continue processing the next item } } @@ -64,23 +61,19 @@ async function processItem(item, transcriptOpt, llmOpt, options) { */ export async function processRSS(rssUrl, llmOpt, transcriptOpt, options) { try { - // Log the start of RSS feed processing - console.log(`\nProcessing RSS feed: ${rssUrl}`) - if (options.item && options.item.length > 0) { // If specific items are provided, list them - console.log(`Processing specific items:`) + console.log('\nProcessing specific items:') options.item.forEach((url) => console.log(` - ${url}`)) } else { - // If no specific items, log the number of items to skip console.log(` - Skipping first ${options.skip} items`) } - // Set a timeout of 5 seconds using AbortController + // Fetch the RSS feed with a timeout const controller = new AbortController() const timeout = setTimeout(() => { controller.abort() - }, 5000) + }, 10000) // 10 seconds timeout let response try { @@ -94,23 +87,24 @@ export async function processRSS(rssUrl, llmOpt, transcriptOpt, options) { clearTimeout(timeout) } catch (error) { if (error.name === 'AbortError') { - console.error('Fetch request timed out') + console.error('Error: Fetch request timed out.') } else { - console.error('Error fetching RSS feed:', error) + console.error(`Error fetching RSS feed: ${error.message}`) } - throw error + process.exit(1) // Exit with an error code } // Check if the response is successful if (!response.ok) { - throw new Error(`HTTP error! status: ${response.status}`) + console.error(`HTTP error! status: ${response.status}`) + process.exit(1) // Exit with an error code } // Parse the RSS feed content const text = await response.text() const feed = parser.parse(text) - // Extract channel information + // Extract channel and item information const { title: channelTitle, link: channelLink, @@ -121,17 +115,10 @@ export async function processRSS(rssUrl, llmOpt, transcriptOpt, options) { // Extract channel image URL safely const channelImage = channelImageObject?.url || '' - // Initialize date formatter - const dateFormatter = new Intl.DateTimeFormat('en-CA', { - year: 'numeric', - month: '2-digit', - day: '2-digit', - }) - // Ensure feedItems is an array const feedItemsArray = Array.isArray(feedItems) ? feedItems : [feedItems] - // Filter and map feed items to extract necessary information + // Filter and map feed items const items = feedItemsArray .filter((item) => { // Ensure the item has an enclosure with a valid type @@ -145,12 +132,17 @@ export async function processRSS(rssUrl, llmOpt, transcriptOpt, options) { channel: channelTitle, channelURL: channelLink, title: item.title, - description: "", // Initialize description as empty string - publishDate: dateFormatter.format(new Date(item.pubDate)), + description: '', + publishDate: new Date(item.pubDate).toISOString().split('T')[0], coverImage: item['itunes:image']?.href || channelImage || '', })) - // Generate JSON file with RSS feed information + if (items.length === 0) { + console.error('Error: No audio/video items found in the RSS feed.') + process.exit(1) // Exit with an error code + } + + // Generate JSON file with RSS feed information if --info option is used if (options.info) { const jsonContent = JSON.stringify(items, null, 2) const jsonFilePath = 'content/rss_info.json' @@ -164,38 +156,28 @@ export async function processRSS(rssUrl, llmOpt, transcriptOpt, options) { // Find the items matching the provided audio URLs const matchedItems = items.filter((item) => options.item.includes(item.showLink)) if (matchedItems.length === 0) { - console.error(`No matching items found for the provided URLs.`) - return + console.error('Error: No matching items found for the provided URLs.') + process.exit(1) // Exit with an error code } itemsToProcess = matchedItems } else { - // Sort items based on the specified order + // Sort items based on the specified order and apply skip const sortedItems = options.order === 'newest' ? items : [...items].reverse() - const skippedItems = sortedItems.slice(options.skip) - itemsToProcess = skippedItems - - // Log information about found items - console.log( - ` - Found ${sortedItems.length} audio/video items in the RSS feed` - ) - console.log( - ` - Processing ${skippedItems.length} items after skipping ${options.skip}` - ) + itemsToProcess = sortedItems.slice(options.skip) + + console.log(` - Found ${sortedItems.length} items in the RSS feed.`) + console.log(` - Processing ${itemsToProcess.length} items after skipping ${options.skip}.`) } // Process each item in the feed for (const [index, item] of itemsToProcess.entries()) { - console.log( - `\nProcessing item ${index + 1}/${itemsToProcess.length}: ${item.title}` - ) + console.log(`\nProcessing item ${index + 1}/${itemsToProcess.length}: ${item.title}`) await processItem(item, transcriptOpt, llmOpt, options) } - // Log completion of RSS feed processing - console.log('RSS feed processing completed') + console.log('\n\nRSS feed processing completed successfully.\n') } catch (error) { - // Log any errors that occur during RSS feed processing - console.error('Error processing RSS feed:', error) - throw error + console.error(`Error processing RSS feed: ${error.message}`) + process.exit(1) // Exit with an error code } } \ No newline at end of file diff --git a/src/commands/processURLs.js b/src/commands/processURLs.js index 5ccb48a..b509bf2 100644 --- a/src/commands/processURLs.js +++ b/src/commands/processURLs.js @@ -4,6 +4,7 @@ import { readFile, writeFile } from 'node:fs/promises' import { resolve } from 'node:path' import { processVideo } from './processVideo.js' import { extractVideoMetadata } from '../utils/generateMarkdown.js' +import { checkDependencies } from '../utils/checkDependencies.js' /** @import { LLMOption, TranscriptOption, ProcessingOptions } from '../types.js' */ @@ -17,8 +18,9 @@ import { extractVideoMetadata } from '../utils/generateMarkdown.js' */ export async function processURLs(filePath, llmOpt, transcriptOpt, options) { try { - // Log the start of URL processing and resolve the absolute path of the file - console.log(`Processing URLs from file: ${filePath}`) + // Check for required dependencies + await checkDependencies(['yt-dlp']) + const absolutePath = resolve(filePath) // Read and parse the content of the file into an array of URLs @@ -27,8 +29,12 @@ export async function processURLs(filePath, llmOpt, transcriptOpt, options) { .map(line => line.trim()) .filter(line => line && !line.startsWith('#')) - // Log the number of URLs found - console.log(`Found ${urls.length} URLs in the file`) + if (urls.length === 0) { + console.error('Error: No URLs found in the file.') + process.exit(1) // Exit with an error code + } + + console.log(`\nFound ${urls.length} URLs in the file`) // Extract metadata for all videos const metadataPromises = urls.map(extractVideoMetadata) @@ -46,21 +52,18 @@ export async function processURLs(filePath, llmOpt, transcriptOpt, options) { // Process each URL for (const [index, url] of urls.entries()) { - console.log(`Processing URL ${index + 1}/${urls.length}: ${url}`) + console.log(`\nProcessing URL ${index + 1}/${urls.length}: ${url}`) try { - // Process individual video await processVideo(url, llmOpt, transcriptOpt, options) } catch (error) { - // Log any errors that occur during video processing - console.error(`Error processing URL ${url}:`, error) + console.error(`Error processing URL ${url}: ${error.message}`) + // Continue processing the next URL } } - // Log completion of file processing - console.log('File processing completed') + console.log('\nURL file processing completed successfully.') } catch (error) { - // Log any errors that occur during file reading or processing - console.error(`Error reading or processing file ${filePath}:`, error) - throw error + console.error(`Error reading or processing file ${filePath}: ${error.message}`) + process.exit(1) // Exit with an error code } } \ No newline at end of file diff --git a/src/commands/processVideo.js b/src/commands/processVideo.js index 1256134..88c2227 100644 --- a/src/commands/processVideo.js +++ b/src/commands/processVideo.js @@ -1,5 +1,6 @@ // src/commands/processVideo.js +import { checkDependencies } from '../utils/checkDependencies.js' import { generateMarkdown } from '../utils/generateMarkdown.js' import { downloadAudio } from '../utils/downloadAudio.js' import { runTranscription } from '../utils/runTranscription.js' @@ -18,6 +19,9 @@ import { cleanUpFiles } from '../utils/cleanUpFiles.js' */ export async function processVideo(url, llmOpt, transcriptOpt, options) { try { + // Check for required dependencies + await checkDependencies(['yt-dlp']) + // Generate markdown with video metadata const { frontMatter, finalPath, filename } = await generateMarkdown(url) @@ -25,7 +29,7 @@ export async function processVideo(url, llmOpt, transcriptOpt, options) { await downloadAudio(url, filename) // Run transcription on the audio - await runTranscription(finalPath, transcriptOpt, options) + await runTranscription(finalPath, transcriptOpt, options, frontMatter) // Process the transcript with the selected Language Model await runLLM(finalPath, frontMatter, llmOpt, options) @@ -34,8 +38,11 @@ export async function processVideo(url, llmOpt, transcriptOpt, options) { if (!options.noCleanUp) { await cleanUpFiles(finalPath) } + + console.log('\nVideo processing completed successfully.\n') } catch (error) { // Log any errors that occur during video processing - console.error('Error processing video:', error) + console.error('Error processing video:', error.message) + throw error // Re-throw to be handled by caller } } \ No newline at end of file diff --git a/src/inquirer.js b/src/inquirer.js index c5c0f4c..5e1e74d 100644 --- a/src/inquirer.js +++ b/src/inquirer.js @@ -4,8 +4,10 @@ import inquirer from 'inquirer' /** @import { ProcessingOptions, InquirerAnswers, InquirerQuestions, WhisperModelType } from './types.js' */ -// Interactive prompts using inquirer -/** @type {InquirerQuestions} */ +/** + * Interactive prompts using inquirer + * @type {InquirerQuestions} + */ const INQUIRER_PROMPT = [ { type: 'list', @@ -148,16 +150,29 @@ const INQUIRER_PROMPT = [ message: 'Do you want to keep intermediary files after processing?', default: false, }, + { + type: 'confirm', + name: 'confirmAction', + message: 'Proceed with the above configuration?', + default: true, + }, ] /** - * Prompts the user for input if no command-line options are provided. + * Prompts the user for input if interactive mode is selected. * @param {ProcessingOptions} options - The initial command-line options. * @returns {Promise} - The updated options after user input. */ export async function handleInteractivePrompt(options) { /** @type {InquirerAnswers} */ const answers = await inquirer.prompt(INQUIRER_PROMPT) + + // If user cancels the action + if (!answers.confirmAction) { + console.log('Operation cancelled.') + process.exit(0) + } + options = { ...options, ...answers, @@ -184,5 +199,13 @@ export async function handleInteractivePrompt(options) { options.item = answers.item.split(',').map((url) => url.trim()) } + // Remove properties that are not options + delete options.action + delete options.specifyItem + delete options.llamaModel + delete options.useDocker + delete options.whisperModel + delete options.confirmAction + return options } \ No newline at end of file diff --git a/src/llms/chatgpt.js b/src/llms/chatgpt.js index 2773ed1..ec21221 100644 --- a/src/llms/chatgpt.js +++ b/src/llms/chatgpt.js @@ -29,7 +29,7 @@ const gptModel = { export async function callChatGPT(promptAndTranscript, tempPath, model = 'GPT_4o_MINI') { // Check for API key if (!env.OPENAI_API_KEY) { - throw new Error('OPENAI_API_KEY environment variable is not set.') + throw new Error('OPENAI_API_KEY environment variable is not set. Please set it to your OpenAI API key.') } // Initialize the OpenAI client with the API key from environment variables @@ -55,13 +55,11 @@ export async function callChatGPT(promptAndTranscript, tempPath, model = 'GPT_4o // Write the generated content to the output file await writeFile(tempPath, content) - - console.log(`\nTranscript saved to:\n - ${tempPath}`) - console.log(`\nFinish Reason: ${finish_reason}\nModel: ${usedModel}`) - console.log(`Token Usage:\n - ${prompt_tokens} prompt tokens\n - ${completion_tokens} completion tokens\n - ${total_tokens} total tokens\n`) + console.log(` - Finish Reason: ${finish_reason}\n - ChatGPT Model: ${usedModel}`) + console.log(` - Token Usage:\n - ${prompt_tokens} prompt tokens\n - ${completion_tokens} completion tokens\n - ${total_tokens} total tokens`) } catch (error) { - console.error('Error in callChatGPT:', error) + console.error(`Error in callChatGPT: ${error.message}`) throw error // Re-throw the error for handling in the calling function } } \ No newline at end of file diff --git a/src/llms/claude.js b/src/llms/claude.js index 1773b37..69518a2 100644 --- a/src/llms/claude.js +++ b/src/llms/claude.js @@ -29,7 +29,7 @@ const claudeModel = { export async function callClaude(promptAndTranscript, tempPath, model = 'CLAUDE_3_HAIKU') { // Check if the ANTHROPIC_API_KEY environment variable is set if (!env.ANTHROPIC_API_KEY) { - throw new Error('ANTHROPIC_API_KEY environment variable is not set.') + throw new Error('ANTHROPIC_API_KEY environment variable is not set. Please set it to your Anthropic API key.') } // Initialize the Anthropic client with the API key from environment variables @@ -56,14 +56,12 @@ export async function callClaude(promptAndTranscript, tempPath, model = 'CLAUDE_ // Write the generated text to the output file await writeFile(tempPath, text) - - console.log(`\nTranscript saved to:\n - ${tempPath}`) // console.log(`\nClaude response:\n\n${JSON.stringify(response, null, 2)}`) // Commented out detailed response logging - console.log(`\nStop Reason: ${stop_reason}\nModel: ${usedModel}`) - console.log(`Token Usage:\n - ${input_tokens} input tokens\n - ${output_tokens} output tokens\n`) + console.log(` - Stop Reason: ${stop_reason}\n - Model: ${usedModel}`) + console.log(` - Token Usage:\n - ${input_tokens} input tokens\n - ${output_tokens} output tokens`) } catch (error) { - console.error('Error:', error) + console.error(`Error in callClaude: ${error.message}`) throw error // Re-throw the error for handling in the calling function } } \ No newline at end of file diff --git a/src/llms/cohere.js b/src/llms/cohere.js index 67c5f4a..0a84c01 100644 --- a/src/llms/cohere.js +++ b/src/llms/cohere.js @@ -27,7 +27,7 @@ const cohereModel = { export async function callCohere(promptAndTranscript, tempPath, model = 'COMMAND_R') { // Check if the COHERE_API_KEY environment variable is set if (!env.COHERE_API_KEY) { - throw new Error('COHERE_API_KEY environment variable is not set.') + throw new Error('COHERE_API_KEY environment variable is not set. Please set it to your Cohere API key.') } // Initialize the Cohere client with the API key from environment variables @@ -53,14 +53,12 @@ export async function callCohere(promptAndTranscript, tempPath, model = 'COMMAND // Write the generated text to the output file await writeFile(tempPath, text) - - console.log(`\nTranscript saved to:\n - ${tempPath}`) // console.log(`\nCohere response:\n\n${JSON.stringify(response, null, 2)}`) // Commented out detailed response logging console.log(`\nFinish Reason: ${finishReason}\nModel: ${actualModel}`) - console.log(`Token Usage:\n - ${inputTokens} input tokens\n - ${outputTokens} output tokens\n`) + console.log(`Token Usage:\n - ${inputTokens} input tokens\n - ${outputTokens} output tokens`) } catch (error) { - console.error('Error:', error) + console.error(`Error in callCohere: ${error.message}`) throw error // Re-throw the error for handling in the calling function } } \ No newline at end of file diff --git a/src/llms/gemini.js b/src/llms/gemini.js index 4aa80d2..092e83e 100644 --- a/src/llms/gemini.js +++ b/src/llms/gemini.js @@ -21,7 +21,7 @@ const geminiModel = { * @param {number} ms - Milliseconds to delay * @returns {Promise} */ -const delay = (ms) => new Promise(resolve => setTimeout(resolve, ms)); +const delay = (ms) => new Promise(resolve => setTimeout(resolve, ms)) /** @type {LLMFunction} */ /** @@ -35,7 +35,7 @@ const delay = (ms) => new Promise(resolve => setTimeout(resolve, ms)); export async function callGemini(promptAndTranscript, tempPath, model = 'GEMINI_1_5_FLASH') { // Check if the GEMINI_API_KEY environment variable is set if (!env.GEMINI_API_KEY) { - throw new Error('GEMINI_API_KEY environment variable is not set.') + throw new Error('GEMINI_API_KEY environment variable is not set. Please set it to your Gemini API key.') } // Initialize the Google Generative AI client const genAI = new GoogleGenerativeAI(env.GEMINI_API_KEY) @@ -43,16 +43,13 @@ export async function callGemini(promptAndTranscript, tempPath, model = 'GEMINI_ // Select the actual model to use, defaulting to GEMINI_1_5_FLASH if not specified const actualModel = geminiModel[model] || geminiModel.GEMINI_1_5_FLASH - // Get the generative model - const gem = genAI.getGenerativeModel({ model: actualModel }) - const maxRetries = 3 // Maximum number of retry attempts // Retry loop for (let attempt = 1; attempt <= maxRetries; attempt++) { try { // Generate content using the selected model - const result = await gem.generateContent(promptAndTranscript) + const result = await gemAI.generateContent(promptAndTranscript, { model: actualModel }) // Get the response from the generated content const response = await result.response @@ -67,13 +64,11 @@ export async function callGemini(promptAndTranscript, tempPath, model = 'GEMINI_ // Write the generated text to the output file await writeFile(tempPath, text) - - console.log(`\nTranscript saved to:\n - ${tempPath}`) console.log(`\nModel: ${actualModel}`) return } catch (error) { - console.error(`Error in callGemini (attempt ${attempt}/${maxRetries}):`, error) + console.error(`Error in callGemini (attempt ${attempt}/${maxRetries}): ${error.message}`) // If this is the last attempt, throw the error if (attempt === maxRetries) { diff --git a/src/llms/llama.js b/src/llms/llama.js index 578cfc2..e469c6e 100644 --- a/src/llms/llama.js +++ b/src/llms/llama.js @@ -15,6 +15,10 @@ const execAsync = promisify(exec) * @type {Record} */ const localModels = { + // LLAMA_3_2_1B_Q6_MODEL: { + // filename: "Llama-3.2-1B.i1-Q6_K.gguf", + // url: "https://huggingface.co/mradermacher/Llama-3.2-1B-i1-GGUF/resolve/main/Llama-3.2-1B.i1-Q6_K.gguf" + // }, LLAMA_3_1_8B_Q4_MODEL: { filename: "Meta-Llama-3.1-8B-Instruct.IQ4_XS.gguf", url: "https://huggingface.co/mradermacher/Meta-Llama-3.1-8B-Instruct-GGUF/resolve/main/Meta-Llama-3.1-8B-Instruct.IQ4_XS.gguf" @@ -42,6 +46,7 @@ const localModels = { async function downloadModel(modelName = 'GEMMA_2_2B_Q4_MODEL') { // Get the model object from localModels using the provided modelName or default to GEMMA_2_2B_Q4_MODEL const model = localModels[modelName] || localModels.GEMMA_2_2B_Q4_MODEL + console.log(` - ${model.filename} model selected.`) // If no valid model is found, throw an error if (!model) { @@ -53,7 +58,7 @@ async function downloadModel(modelName = 'GEMMA_2_2B_Q4_MODEL') { // Check if the model file already exists if (existsSync(modelPath)) { - console.log(`\nModel already exists: ${modelPath}`) + console.log(` - Model already exists at ${modelPath}`) // Return the path if the model already exists return modelPath } @@ -74,7 +79,7 @@ async function downloadModel(modelName = 'GEMMA_2_2B_Q4_MODEL') { return modelPath } catch (err) { // If an error occurs during download, log it and throw a new error - console.error('Download failed:', err.message) + console.error(`Download failed: ${err.message}`) throw new Error('Failed to download the model') } } @@ -84,14 +89,17 @@ async function downloadModel(modelName = 'GEMMA_2_2B_Q4_MODEL') { * Main function to call the local Llama model. * @param {string} promptAndTranscript - The combined prompt and transcript content. * @param {string} tempPath - The temporary file path to write the LLM output. - * @param {LlamaModelType} [modelName='GEMMA_2_2B_Q4_MODEL'] - The name of the model to use. + * @param {LlamaModelType | boolean} [modelName=true] - The name of the model to use or true to use the default. * @returns {Promise} * @throws {Error} - If an error occurs during processing. */ -export async function callLlama(promptAndTranscript, tempPath, modelName = 'GEMMA_2_2B_Q4_MODEL') { +export async function callLlama(promptAndTranscript, tempPath, modelName = true) { try { + // If modelName is true or not provided, use the default model + const actualModelName = modelName === true ? 'GEMMA_2_2B_Q4_MODEL' : modelName + // Ensure the model is downloaded - const modelPath = await downloadModel(modelName) + const modelPath = await downloadModel(actualModelName) // Initialize Llama and load the local model const llama = await getLlama() @@ -104,10 +112,8 @@ export async function callLlama(promptAndTranscript, tempPath, modelName = 'GEMM // Generate a response and write the response to a file const response = await session.prompt(promptAndTranscript) await writeFile(tempPath, response) - console.log(`\nTranscript saved to:\n - ${tempPath}`) - console.log(`\nModel used:\n - ${modelName}\n`) } catch (error) { - console.error('Error in callLlama:', error) + console.error(`Error in callLlama: ${error.message}`) throw error } } \ No newline at end of file diff --git a/src/llms/mistral.js b/src/llms/mistral.js index 212a2a5..bb6ae18 100644 --- a/src/llms/mistral.js +++ b/src/llms/mistral.js @@ -29,7 +29,7 @@ const mistralModel = { export async function callMistral(promptAndTranscript, tempPath, model = 'MISTRAL_NEMO') { // Check if the MISTRAL_API_KEY environment variable is set if (!env.MISTRAL_API_KEY) { - throw new Error('MISTRAL_API_KEY environment variable is not set.') + throw new Error('MISTRAL_API_KEY environment variable is not set. Please set it to your Mistral API key.') } // Initialize Mistral client with API key from environment variables const mistral = new Mistral(env.MISTRAL_API_KEY) @@ -37,6 +37,7 @@ export async function callMistral(promptAndTranscript, tempPath, model = 'MISTRA try { // Select the actual model to use, defaulting to MISTRAL_NEMO if the specified model is not found const actualModel = mistralModel[model] || mistralModel.MISTRAL_NEMO + console.log(`\nUsing Mistral model: ${actualModel}`) // Make API call to Mistral AI for chat completion const response = await mistral.chat.complete({ @@ -54,15 +55,13 @@ export async function callMistral(promptAndTranscript, tempPath, model = 'MISTRA // Write the generated content to the specified output file await writeFile(tempPath, content) - console.log(`\nTranscript saved to:\n - ${tempPath}`) - // Log finish reason, used model, and token usage - console.log(`\nFinish Reason: ${finishReason}\nModel: ${usedModel}`) - console.log(`Token Usage:\n - ${promptTokens} prompt tokens\n - ${completionTokens} completion tokens\n - ${totalTokens} total tokens\n`) + console.log(`\nFinish Reason: ${finishReason}\nModel Used: ${usedModel}`) + console.log(`Token Usage:\n - ${promptTokens} prompt tokens\n - ${completionTokens} completion tokens\n - ${totalTokens} total tokens`) } catch (error) { // Log any errors that occur during the process - console.error('Error:', error) + console.error(`Error in callMistral: ${error.message}`) throw error // Re-throw the error for handling by the caller } } \ No newline at end of file diff --git a/src/llms/octo.js b/src/llms/octo.js index 31d03e2..29faaa5 100644 --- a/src/llms/octo.js +++ b/src/llms/octo.js @@ -32,7 +32,7 @@ const octoModel = { export async function callOcto(promptAndTranscript, tempPath, model = 'LLAMA_3_1_70B') { // Check if the OCTOAI_API_KEY environment variable is set if (!env.OCTOAI_API_KEY) { - throw new Error('OCTOAI_API_KEY environment variable is not set.') + throw new Error('OCTOAI_API_KEY environment variable is not set. Please set it to your OctoAI API key.') } // Initialize OctoAI client with API key from environment variables const octoai = new OctoAIClient({ apiKey: env.OCTOAI_API_KEY }) @@ -40,6 +40,7 @@ export async function callOcto(promptAndTranscript, tempPath, model = 'LLAMA_3_1 try { // Select the actual model to use, defaulting to LLAMA_3_1_70B if the specified model is not found const actualModel = octoModel[model] || octoModel.LLAMA_3_1_70B + console.log(`\nUsing OctoAI model: ${actualModel}`) // Make API call to OctoAI for text generation const response = await octoai.textGen.createChatCompletion({ @@ -57,15 +58,15 @@ export async function callOcto(promptAndTranscript, tempPath, model = 'LLAMA_3_1 // Write the generated content to the specified output file await writeFile(tempPath, content) - console.log(`Octo show notes saved to ${tempPath}`) + console.log(`\nOctoAI response saved to ${tempPath}`) // Log finish reason, used model, and token usage - console.log(`\nFinish Reason: ${finishReason}\nModel: ${usedModel}`) - console.log(`Token Usage:\n - ${promptTokens} prompt tokens\n - ${completionTokens} completion tokens\n - ${totalTokens} total tokens\n`) + console.log(`\nFinish Reason: ${finishReason}\nModel Used: ${usedModel}`) + console.log(`Token Usage:\n - ${promptTokens} prompt tokens\n - ${completionTokens} completion tokens\n - ${totalTokens} total tokens`) } catch (error) { // Log any errors that occur during the process - console.error('Error:', error) + console.error(`Error in callOcto: ${error.message}`) throw error // Re-throw the error for handling by the caller } } \ No newline at end of file diff --git a/src/llms/ollama.js b/src/llms/ollama.js index 1bed42d..290f58b 100644 --- a/src/llms/ollama.js +++ b/src/llms/ollama.js @@ -1,7 +1,7 @@ // src/llms/ollama.js import { writeFile } from 'node:fs/promises' -import ollama from 'ollama' +import { Ollama } from 'ollama' /** @import { LLMFunction, LlamaModelType } from '../types.js' */ @@ -36,23 +36,37 @@ export async function callOllama(promptAndTranscript, tempPath, modelName = 'LLA try { // Map the model name to the Ollama model identifier const ollamaModelName = ollamaModels[modelName] || 'llama3.2:1b' + + // Get host and port from environment variables or use defaults + const ollamaHost = process.env.OLLAMA_HOST || 'localhost' + const ollamaPort = process.env.OLLAMA_PORT || '11434' + const baseUrl = `http://${ollamaHost}:${ollamaPort}` + + // Create a new OllamaClient with the baseUrl + const client = new Ollama({ baseUrl }) + console.log(` - Using Ollama model: ${ollamaModelName} at ${baseUrl}`) // Check if the model is available - const models = await ollama.list() + const models = await client.list() const isAvailable = models.models.some(model => model.name === ollamaModelName) // If the model is not available, pull it if (!isAvailable) { console.log(`Model ${ollamaModelName} not found. Pulling it now...`) - const pullStream = await ollama.pull({ model: ollamaModelName, stream: true }) - for await (const part of pullStream) { - console.log(`Pulling ${ollamaModelName}: ${part.status}`) + try { + const pullStream = await client.pull({ model: ollamaModelName, stream: true }) + for await (const part of pullStream) { + console.log(`Pulling ${ollamaModelName}: ${part.status}`) + } + console.log(`Model ${ollamaModelName} successfully pulled.`) + } catch (pullError) { + console.error(`Error pulling model ${ollamaModelName}: ${pullError.message}`) + throw pullError } - console.log(`Model ${ollamaModelName} successfully pulled.`) } // Call the Ollama chat API - const response = await ollama.chat({ + const response = await client.chat({ model: ollamaModelName, messages: [{ role: 'user', content: promptAndTranscript }], }) @@ -62,10 +76,9 @@ export async function callOllama(promptAndTranscript, tempPath, modelName = 'LLA // Write the response to the output file await writeFile(tempPath, assistantReply) - console.log(`\nTranscript saved to:\n - ${tempPath}`) - console.log(`\nModel used:\n - ${ollamaModelName}\n`) + console.log(`\nResponse saved to ${tempPath}`) } catch (error) { - console.error('Error in callLlama:', error) + console.error(`Error in callOllama: ${error.message}`) throw error } } \ No newline at end of file diff --git a/src/llms/ollamaClient.js b/src/llms/ollamaClient.js deleted file mode 100644 index 64037c5..0000000 --- a/src/llms/ollamaClient.js +++ /dev/null @@ -1,92 +0,0 @@ -// src/llms/ollamaClient.js - -import { writeFile } from 'node:fs/promises' -import { Ollama } from 'ollama' - -/** @import { LLMFunction, LlamaModelType } from '../types.js' */ - -/** - * Map of model identifiers to their corresponding names in Ollama - * @type {Record} - */ -const ollamaModels = { - LLAMA_3_2_1B_MODEL: 'llama3.2:1b', - LLAMA_3_2_3B_MODEL: 'llama3.2:3b', - LLAMA_3_1_8B_MODEL: 'llama3.1:8b', - GEMMA_2_2B_MODEL: 'gemma2:2b', - GEMMA_2_9B_MODEL: 'gemma2:9b', - PHI_3_5_MODEL: 'phi3.5:3.8b', - QWEN_2_5_1B_MODEL: 'qwen2.5:1.5b', - QWEN_2_5_3B_MODEL: 'qwen2.5:3b', - QWEN_2_5_7B_MODEL: 'qwen2.5:7b', -} - -/** - * Main function to call the Llama model using the Ollama library. - * This function initializes the client, checks if the model is available, - * pulls it if necessary, and then proceeds with the chat. - * @type {LLMFunction} - * @param {string} promptAndTranscript - The combined prompt and transcript content. - * @param {string} tempPath - The temporary file path to write the LLM output. - * @param {LlamaModelType} [modelName='LLAMA_3_2_1B_MODEL'] - The name of the model to use. - * @returns {Promise} - * @throws {Error} - If an error occurs during processing. - */ -export async function callOllama(promptAndTranscript, tempPath, modelName = 'LLAMA_3_2_1B_MODEL') { - const ollamaHost = process.env.OLLAMA_HOST || '127.0.0.1' - const ollamaPort = process.env.OLLAMA_PORT || 11434 - const baseUrl = `http://${ollamaHost}:${ollamaPort}` - - console.log(`Initializing Ollama client with base URL: ${baseUrl}`) - const ollamaClient = new Ollama({ host: baseUrl }) - - try { - // Test connection to Ollama server - console.log('Testing connection to Ollama server...') - await ollamaClient.list() - console.log('Successfully connected to Ollama server.') - - // Map the model name to the Ollama model identifier - const ollamaModelName = ollamaModels[modelName] || 'llama3.2:1b' - console.log(`Using Ollama model: ${ollamaModelName}`) - - // Check if the model is available - console.log('Checking model availability...') - const models = await ollamaClient.list() - const isAvailable = models.models.some(model => model.name === ollamaModelName) - - // If the model is not available, pull it - if (!isAvailable) { - console.log(`Model ${ollamaModelName} not found. Pulling it now...`) - const pullStream = await ollamaClient.pull({ model: ollamaModelName, stream: true }) - for await (const part of pullStream) { - console.log(`Pulling ${ollamaModelName}: ${part.status}`) - } - console.log(`Model ${ollamaModelName} successfully pulled.`) - } else { - console.log(`Model ${ollamaModelName} is available.`) - } - - // Call the Ollama chat API - console.log('Sending request to Ollama chat API...') - const response = await ollamaClient.chat({ - model: ollamaModelName, - messages: [{ role: 'user', content: promptAndTranscript }], - }) - - // Extract the assistant's reply - const assistantReply = response.message.content - - // Write the response to the output file - console.log(`Writing response to file: ${tempPath}`) - await writeFile(tempPath, assistantReply) - console.log(`\nTranscript saved to:\n - ${tempPath}`) - console.log(`\nModel used:\n - ${ollamaModelName}\n`) - } catch (error) { - console.error('Error in callOllama:', error) - if (error.code === 'ECONNREFUSED') { - console.error(`Failed to connect to Ollama server at ${baseUrl}. Please ensure it's running and accessible.`) - } - throw error - } -} \ No newline at end of file diff --git a/src/llms/ollamaOAI.js b/src/llms/ollamaOAI.js deleted file mode 100644 index 1ed3dfb..0000000 --- a/src/llms/ollamaOAI.js +++ /dev/null @@ -1,96 +0,0 @@ -// src/llms/ollamaOAI.js - -import { writeFile } from 'node:fs/promises' -import OpenAI from 'openai' - -/** @import { LLMFunction, LlamaModelType } from '../types.js' */ - -/** - * Map of model identifiers to their corresponding names in Ollama - * @type {Record} - */ -const ollamaModels = { - LLAMA_3_2_1B_MODEL: 'llama3.2:1b', - LLAMA_3_2_3B_MODEL: 'llama3.2:3b', - LLAMA_3_1_8B_MODEL: 'llama3.1:8b', - GEMMA_2_2B_MODEL: 'gemma2:2b', - GEMMA_2_9B_MODEL: 'gemma2:9b', - PHI_3_5_MODEL: 'phi3.5:3.8b', - QWEN_2_5_1B_MODEL: 'qwen2.5:1.5b', - QWEN_2_5_3B_MODEL: 'qwen2.5:3b', - QWEN_2_5_7B_MODEL: 'qwen2.5:7b', -} - -const ollamaBaseUrl = 'http://localhost:11434' -const openai = new OpenAI({ - baseURL: `${ollamaBaseUrl}/v1/`, - apiKey: 'ollama', // required but ignored -}) - -/** @type {LLMFunction} */ -/** - * Main function to call the Llama model using the Ollama OpenAI compatibility layer. - * This function checks if the model is available, pulls it if necessary, - * and then proceeds with the chat. - * @param {string} promptAndTranscript - The combined prompt and transcript content. - * @param {string} tempPath - The temporary file path to write the LLM output. - * @param {LlamaModelType} [modelName='LLAMA_3_2_1B_MODEL'] - The name of the model to use. - * @returns {Promise} - * @throws {Error} - If an error occurs during processing. - */ -export async function callOllama(promptAndTranscript, tempPath, modelName = 'LLAMA_3_2_1B_MODEL') { - try { - // Map the model name to the Ollama model identifier - const ollamaModelName = ollamaModels[modelName] || 'llama3.2:1b' - - // Check if the model is available - const modelsResponse = await fetch(`${ollamaBaseUrl}/api/tags`) - const models = await modelsResponse.json() - const isAvailable = models.models.some(model => model.name === ollamaModelName) - - // If the model is not available, pull it - if (!isAvailable) { - console.log(`Model ${ollamaModelName} not found. Pulling it now...`) - const pullResponse = await fetch(`${ollamaBaseUrl}/api/pull`, { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ name: ollamaModelName }), - }) - - const reader = pullResponse.body.getReader() - const decoder = new TextDecoder() - - while (true) { - const { done, value } = await reader.read() - if (done) break - const chunk = decoder.decode(value) - const lines = chunk.split('\n') - for (const line of lines) { - if (line.trim()) { - const data = JSON.parse(line) - console.log(`Pulling ${ollamaModelName}: ${data.status}`) - } - } - } - - console.log(`Model ${ollamaModelName} successfully pulled.`) - } - - // Call the Ollama chat API using OpenAI client - const response = await openai.chat.completions.create({ - model: ollamaModelName, - messages: [{ role: 'user', content: promptAndTranscript }], - }) - - // Extract the assistant's reply - const assistantReply = response.choices[0].message.content - - // Write the response to the output file - await writeFile(tempPath, assistantReply) - console.log(`\nTranscript saved to:\n - ${tempPath}`) - console.log(`\nModel used:\n - ${ollamaModelName}\n`) - } catch (error) { - console.error('Error in callOllama:', error) - throw error - } -} \ No newline at end of file diff --git a/src/transcription/assembly.js b/src/transcription/assembly.js index aa444c6..4dad688 100644 --- a/src/transcription/assembly.js +++ b/src/transcription/assembly.js @@ -17,7 +17,7 @@ import { AssemblyAI } from 'assemblyai' export async function callAssembly(finalPath, transcriptOpt, options) { // Check if the ASSEMBLY_API_KEY environment variable is set if (!env.ASSEMBLY_API_KEY) { - throw new Error('ASSEMBLY_API_KEY environment variable is not set.') + throw new Error('ASSEMBLY_API_KEY environment variable is not set. Please set it to your AssemblyAI API key.') } // Initialize the AssemblyAI client with API key from environment variables @@ -79,7 +79,7 @@ export async function callAssembly(finalPath, transcriptOpt, options) { return txtContent } catch (error) { // Log any errors that occur during the transcription process - console.error('Error processing the transcription:', error) + console.error(`Error processing the transcription: ${error.message}`) throw error // Re-throw the error for handling in the calling function } } \ No newline at end of file diff --git a/src/transcription/deepgram.js b/src/transcription/deepgram.js index 9a6ff3d..7621085 100644 --- a/src/transcription/deepgram.js +++ b/src/transcription/deepgram.js @@ -14,7 +14,7 @@ import { createClient } from '@deepgram/sdk' export async function callDeepgram(input, id) { // Check if the DEEPGRAM_API_KEY environment variable is set if (!env.DEEPGRAM_API_KEY) { - throw new Error('DEEPGRAM_API_KEY environment variable is not set.') + throw new Error('DEEPGRAM_API_KEY environment variable is not set. Please set it to your Deepgram API key.') } // Initialize the Deepgram client with the API key from environment variables @@ -48,7 +48,7 @@ export async function callDeepgram(input, id) { console.log(`\nTranscript saved:\n - ${id}.txt`) } catch (error) { // Log any errors that occur during the transcription process - console.error('Error processing the transcription:', error) + console.error(`Error processing the transcription: ${error.message}`) throw error // Re-throw the error for handling in the calling function } } \ No newline at end of file diff --git a/src/transcription/whisper.js b/src/transcription/whisper.js index 0a1ffdf..5bc3168 100644 --- a/src/transcription/whisper.js +++ b/src/transcription/whisper.js @@ -14,12 +14,12 @@ const execPromise = promisify(exec) * @type {Record} */ const WHISPER_MODELS = { - 'tiny': 'ggml-tiny.bin', 'tiny.en': 'ggml-tiny.en.bin', - 'base': 'ggml-base.bin', 'base.en': 'ggml-base.en.bin', - 'small': 'ggml-small.bin', 'small.en': 'ggml-small.en.bin', - 'medium': 'ggml-medium.bin', 'medium.en': 'ggml-medium.en.bin', + tiny: 'ggml-tiny.bin', 'tiny.en': 'ggml-tiny.en.bin', + base: 'ggml-base.bin', 'base.en': 'ggml-base.en.bin', + small: 'ggml-small.bin', 'small.en': 'ggml-small.en.bin', + medium: 'ggml-medium.bin', 'medium.en': 'ggml-medium.en.bin', 'large-v1': 'ggml-large-v1.bin', 'large-v2': 'ggml-large-v2.bin', - 'large': 'ggml-large-v2.bin', + large: 'ggml-large-v2.bin', } /** @@ -32,34 +32,42 @@ const WHISPER_MODELS = { */ export async function callWhisper(finalPath, transcriptOpt, options) { try { - /** @type {WhisperModelType} */ - const whisperModel = options.whisper || options.whisperDocker || 'base' - if (!(whisperModel in WHISPER_MODELS)) { - throw new Error(`Unknown model type: ${whisperModel}`) - } + // Get the whisper model from options or use 'base' as default + const whisperModel = options.whisperModel || 'base' + + // Check if the selected model is valid + if (!(whisperModel in WHISPER_MODELS)) throw new Error(`Unknown model type: ${whisperModel}`) + + // Get the model file name const modelName = WHISPER_MODELS[whisperModel] + + // Adjust download model name for 'large' model + const downloadModelName = whisperModel === 'large' ? 'large-v2' : whisperModel - // Call the appropriate Whisper function based on the transcription service - if (transcriptOpt === 'whisperDocker') { - await callWhisperDocker(finalPath, modelName, whisperModel) - } else { - await callWhisperMain(finalPath, modelName, whisperModel) - } + // Call appropriate Whisper function based on transcriptOpt + await (transcriptOpt === 'whisperDocker' ? callWhisperDocker : callWhisperMain)(finalPath, modelName, downloadModelName) - // Read, process, and format the generated LRC file + // Read the generated LRC file const lrcContent = await readFile(`${finalPath}.lrc`, 'utf8') + + // Process and format the LRC content const txtContent = lrcContent.split('\n') - .filter((line) => !line.startsWith('[by:whisper.cpp]')) - .map((line) => line.replace(/\[(\d{2,3}):(\d{2})\.(\d{2})\]/g, (_, p1, p2) => `[${p1}:${p2}]`)) + .filter(line => !line.startsWith('[by:whisper.cpp]')) // Remove whisper.cpp attribution + .map(line => line.replace(/\[(\d{2,3}):(\d{2})\.(\d{2})\]/g, (_, p1, p2) => `[${p1}:${p2}]`)) // Simplify timestamp format .join('\n') // Write the formatted content to a text file await writeFile(`${finalPath}.txt`, txtContent) - console.log(`Transcript transformation completed:\n - ${finalPath}.txt`) + + // Log completion message + console.log(` - Transcript transformation completed at ${finalPath}.txt`) + + // Return the processed content return txtContent } catch (error) { + // Log any errors and exit the process console.error('Error in callWhisper:', error) - throw error + process.exit(1) } } @@ -71,46 +79,32 @@ export async function callWhisper(finalPath, transcriptOpt, options) { * @returns {Promise} * @throws {Error} - If an error occurs during Docker transcription. */ -async function callWhisperDocker(finalPath, modelName, whisperModel) { +async function callWhisperDocker(finalPath, modelName, downloadModelName) { + // Define constants for Docker setup const WHISPER_CONTAINER_NAME = 'autoshow-whisper-1' const CONTENT_DIR = '/app/content' const MODELS_DIR = '/app/models' const modelPathContainer = `${MODELS_DIR}/${modelName}` try { - // Check if the Whisper container is running, start it if not - try { - await execPromise(`docker ps | grep ${WHISPER_CONTAINER_NAME}`) - console.log('\nWhisper container is already running.') - } catch { - console.log('\nWhisper container is not running. Starting it...') - await execPromise('docker-compose up -d whisper') - console.log('Whisper container started successfully.') - } + // Check if Whisper container is running, start it if not + await execPromise(`docker ps | grep ${WHISPER_CONTAINER_NAME}`) + .catch(() => execPromise('docker-compose up -d whisper')) // Check if the model exists in the container, download if not - try { - await execPromise(`docker exec ${WHISPER_CONTAINER_NAME} test -f ${modelPathContainer}`) - console.log(`\nWhisper.cpp ${whisperModel} model found:`) - console.log(` - ${modelName} model selected\n - Model located at ${modelPathContainer}`) - } catch { - console.log(`\nWhisper.cpp ${whisperModel} model not found in container:`) - console.log(` - ${modelName} model selected\n - Model downloading to ${modelPathContainer}`) - await execPromise(`docker exec ${WHISPER_CONTAINER_NAME} ${MODELS_DIR}/download-ggml-model.sh ${whisperModel}`) - console.log(` - Model downloaded successfully`) - } - - // Execute Whisper transcription in the Docker container + await execPromise(`docker exec ${WHISPER_CONTAINER_NAME} test -f ${modelPathContainer}`) + .catch(() => execPromise(`docker exec ${WHISPER_CONTAINER_NAME} ${MODELS_DIR}/download-ggml-model.sh ${downloadModelName}`)) + + // Get the base filename const fileName = basename(finalPath) - await execPromise( - `docker exec ${WHISPER_CONTAINER_NAME} /app/main \ - -m ${modelPathContainer} \ - -f ${join(CONTENT_DIR, `${fileName}.wav`)} \ - -of ${join(CONTENT_DIR, fileName)} \ - --output-lrc` - ) - console.log(`\nTranscript LRC file completed:\n - ${finalPath}.lrc`) + + // Execute Whisper transcription in Docker + await execPromise(`docker exec ${WHISPER_CONTAINER_NAME} /app/main -m ${modelPathContainer} -f ${join(CONTENT_DIR, `${fileName}.wav`)} -of ${join(CONTENT_DIR, fileName)} --output-lrc`) + + // Log completion message + console.log(` - Transcript LRC file completed at ${finalPath}.lrc`) } catch (error) { + // Log any errors and re-throw console.error('Error in callWhisperDocker:', error) throw error } @@ -124,40 +118,31 @@ async function callWhisperDocker(finalPath, modelName, whisperModel) { * @returns {Promise} * @throws {Error} - If an error occurs during transcription. */ -async function callWhisperMain(finalPath, modelName, whisperModel) { +async function callWhisperMain(finalPath, modelName, downloadModelName) { + // Define the path for the Whisper model const modelPath = `./whisper.cpp/models/${modelName}` try { // Check if whisper.cpp directory exists, clone and build if not await access('./whisper.cpp').catch(async () => { - console.log('\nwhisper.cpp directory not found. Cloning and building whisper.cpp...') - // Clone the whisper.cpp repository - await execPromise(`git clone https://github.com/ggerganov/whisper.cpp.git`) - // Build the project - await execPromise(`make -C whisper.cpp`) - // Copy the Dockerfile (adjust the path as needed) - await execPromise(`cp .github/whisper.Dockerfile whisper.cpp/Dockerfile`) - console.log('whisper.cpp cloned and built successfully.') + // Clone, build, and setup whisper.cpp + await execPromise('git clone https://github.com/ggerganov/whisper.cpp.git && make -C whisper.cpp && cp .github/whisper.Dockerfile whisper.cpp/Dockerfile') }) - console.log('\nwhisper.cpp directory found.') - + // Check if the model exists locally, download if not await access(modelPath).catch(async () => { - console.log(`\nWhisper.cpp ${whisperModel} model not found:`) - console.log(` - ${modelName} model selected\n - Model downloading to ${modelPath}`) - await execPromise(`bash ./whisper.cpp/models/download-ggml-model.sh ${whisperModel}`) - console.log(` - Model downloaded successfully`) + // Download the model + await execPromise(`bash ./whisper.cpp/models/download-ggml-model.sh ${downloadModelName}`) }) - console.log(`\nWhisper.cpp ${whisperModel} model found:`) - console.log(` - ${modelName} model selected\n - Model located at ${modelPath}`) - - // Proceed with transcription - await execPromise( - `./whisper.cpp/main -m "whisper.cpp/models/${modelName}" -f "${finalPath}.wav" -of "${finalPath}" --output-lrc` - ) - console.log(`\nTranscript LRC file completed:\n - ${finalPath}.lrc`) + + // Execute Whisper transcription + await execPromise(`./whisper.cpp/main -m "whisper.cpp/models/${modelName}" -f "${finalPath}.wav" -of "${finalPath}" --output-lrc`) + + // Log completion message + console.log(` - Transcript LRC file completed at ${finalPath}.lrc`) } catch (error) { + // Log any errors and re-throw console.error('Error in callWhisperMain:', error) throw error - } + } } \ No newline at end of file diff --git a/src/types.js b/src/types.js index 9d30d56..1323706 100644 --- a/src/types.js +++ b/src/types.js @@ -13,10 +13,9 @@ * @property {string} [file] - Local audio or video file path to process. * @property {string} [rss] - URL of the podcast RSS feed to process. * @property {string[]} [item] - Specific items (audio URLs) from the RSS feed to process. - * @property {boolean} [info] - Flag to generate JSON file with RSS feed information instead of processing items + * @property {boolean} [info] - Flag to generate JSON file with RSS feed information instead of processing items. * @property {boolean} [noCleanUp] - Flag to indicate whether to keep temporary files after processing. - * @property {WhisperModelType} [whisper] - Whisper model type to use for transcription (e.g., 'tiny', 'base'). - * @property {WhisperModelType} [whisperDocker] - Whisper model type to use in Docker for transcription. + * @property {WhisperModelType} [whisperModel] - The Whisper model to use (e.g., 'tiny', 'base'). * @property {boolean} [deepgram] - Flag to use Deepgram for transcription. * @property {boolean} [assembly] - Flag to use AssemblyAI for transcription. * @property {boolean} [speakerLabels] - Flag to use speaker labels in AssemblyAI transcription. @@ -33,6 +32,7 @@ * @property {string} [llamaModel] - Specific Llama model to use. * @property {number} [skip] - Number of items to skip in RSS feed processing. * @property {string} [order] - Order in which to process RSS feed items ('newest' or 'oldest'). + * @property {boolean} [interactive] - Whether to run in interactive mode. */ /** @@ -56,6 +56,7 @@ * @property {boolean} [noCleanUp] - Whether to keep temporary files after processing. * @property {string} [order] - Order in which to process RSS feed items ('newest' or 'oldest'). * @property {number} [skip] - Number of items to skip in RSS feed processing. + * @property {boolean} [confirmAction] - Whether to proceed with the action. */ /** @@ -169,7 +170,7 @@ */ /** - * Represents the object containing the different prompts, their instructions to the LLM, and their expected example output + * Represents the object containing the different prompts, their instructions to the LLM, and their expected example output. * @typedef {Object} PromptSection * @property {string} instruction - The instructions for the section. * @property {string} example - An example output for the section. @@ -214,14 +215,14 @@ */ /** - * Define all available LLM models - * @typedef {'GPT_4o_MINI' | 'GPT_4o' | 'GPT_4_TURBO' | 'GPT_4'} ChatGPTModelType - Define available GPT models - * @typedef {'CLAUDE_3_5_SONNET' | 'CLAUDE_3_OPUS' | 'CLAUDE_3_SONNET' | 'CLAUDE_3_HAIKU'} ClaudeModelType - Define available Claude models - * @typedef {'COMMAND_R' | 'COMMAND_R_PLUS'} CohereModelType - Define available Cohere models - * @typedef {'GEMINI_1_5_FLASH' | 'GEMINI_1_5_PRO'} GeminiModelType - Define available Gemini models - * @typedef {'MIXTRAL_8x7b' | 'MIXTRAL_8x22b' | 'MISTRAL_LARGE' | 'MISTRAL_NEMO'} MistralModelType - Define available Mistral AI models - * @typedef {'LLAMA_3_1_8B' | 'LLAMA_3_1_70B' | 'LLAMA_3_1_405B' | 'MISTRAL_7B' | 'MIXTRAL_8X_7B' | 'NOUS_HERMES_MIXTRAL_8X_7B' | 'WIZARD_2_8X_22B'} OctoModelType - Define available OctoAI models - * @typedef {'LLAMA_3_1_8B_Q4_MODEL' | 'LLAMA_3_1_8B_Q6_MODEL' | 'GEMMA_2_2B_Q4_MODEL' | 'GEMMA_2_2B_Q6_MODEL' | 'TINY_LLAMA_1B_Q4_MODEL' | 'TINY_LLAMA_1B_Q6_MODEL'} LlamaModelType - Define local model configurations + * Define all available LLM models. + * @typedef {'GPT_4o_MINI' | 'GPT_4o' | 'GPT_4_TURBO' | 'GPT_4'} ChatGPTModelType - Define available GPT models. + * @typedef {'CLAUDE_3_5_SONNET' | 'CLAUDE_3_OPUS' | 'CLAUDE_3_SONNET' | 'CLAUDE_3_HAIKU'} ClaudeModelType - Define available Claude models. + * @typedef {'COMMAND_R' | 'COMMAND_R_PLUS'} CohereModelType - Define available Cohere models. + * @typedef {'GEMINI_1_5_FLASH' | 'GEMINI_1_5_PRO'} GeminiModelType - Define available Gemini models. + * @typedef {'MIXTRAL_8x7b' | 'MIXTRAL_8x22b' | 'MISTRAL_LARGE' | 'MISTRAL_NEMO'} MistralModelType - Define available Mistral AI models. + * @typedef {'LLAMA_3_1_8B' | 'LLAMA_3_1_70B' | 'LLAMA_3_1_405B' | 'MISTRAL_7B' | 'MIXTRAL_8X_7B' | 'NOUS_HERMES_MIXTRAL_8X_7B' | 'WIZARD_2_8X_22B'} OctoModelType - Define available OctoAI models. + * @typedef {'LLAMA_3_1_8B_Q4_MODEL' | 'LLAMA_3_1_8B_Q6_MODEL' | 'GEMMA_2_2B_Q4_MODEL' | 'GEMMA_2_2B_Q6_MODEL' | 'TINY_LLAMA_1B_Q4_MODEL' | 'TINY_LLAMA_1B_Q6_MODEL'} LlamaModelType - Define local model configurations. */ /** diff --git a/src/utils/checkDependencies.js b/src/utils/checkDependencies.js new file mode 100644 index 0000000..0ac97df --- /dev/null +++ b/src/utils/checkDependencies.js @@ -0,0 +1,23 @@ +// src/utils/checkDependencies.js + +import { execFile } from 'node:child_process' +import { promisify } from 'node:util' + +const execFilePromise = promisify(execFile) + +/** + * Check if required dependencies are installed. + * @param {string[]} dependencies - List of command-line tools to check. + * @returns {Promise} + */ +export async function checkDependencies(dependencies) { + for (const command of dependencies) { + try { + await execFilePromise(command, ['--version']) + } catch (error) { + throw new Error( + `Dependency '${command}' is not installed or not found in PATH. Please install it to proceed.` + ) + } + } +} \ No newline at end of file diff --git a/src/utils/cleanUpFiles.js b/src/utils/cleanUpFiles.js index 7a15252..fd9a694 100644 --- a/src/utils/cleanUpFiles.js +++ b/src/utils/cleanUpFiles.js @@ -13,15 +13,15 @@ export async function cleanUpFiles(id) { const extensions = ['.wav', '.txt', '.md', '.lrc'] // Log the start of the cleanup process - console.log(`\nTemporary files removed:`) + console.log('\nStep 5 - Cleaning up temporary files...') for (const ext of extensions) { try { await unlink(`${id}${ext}`) - console.log(` - ${id}${ext}`) + console.log(` - Deleted: ${id}${ext}`) } catch (error) { if (error.code !== 'ENOENT') { - console.error(`Error deleting file ${id}${ext}:`, error) + console.error(`Error deleting file ${id}${ext}: ${error.message}`) } // If the file does not exist, silently continue } diff --git a/src/utils/downloadAudio.js b/src/utils/downloadAudio.js index c644857..39884b8 100644 --- a/src/utils/downloadAudio.js +++ b/src/utils/downloadAudio.js @@ -1,5 +1,6 @@ // src/utils/downloadAudio.js +import { checkDependencies } from './checkDependencies.js' import { exec, execFile } from 'node:child_process' import { promisify } from 'node:util' import { readFile, access } from 'node:fs/promises' @@ -20,8 +21,12 @@ const execPromise = promisify(exec) */ export async function downloadAudio(url, filename) { try { + // Check for required dependencies + await checkDependencies(['yt-dlp']) + // Set the final path for the downloaded file const finalPath = `content/${filename}` + console.log('\nStep 2 - Downloading audio...') // Execute yt-dlp to download the audio const { stderr } = await execFilePromise('yt-dlp', [ @@ -32,18 +37,20 @@ export async function downloadAudio(url, filename) { '--postprocessor-args', 'ffmpeg:-ar 16000 -ac 1', '--no-playlist', '-o', `${finalPath}.%(ext)s`, - url + url, ]) // Log any errors from yt-dlp - if (stderr) console.error('yt-dlp errors:', stderr) + if (stderr) { + console.error(`yt-dlp warnings: ${stderr}`) + } // Construct the path of the downloaded file const downloadedFile = `${finalPath}.wav` - console.log(`WAV file downloaded:\n - ${downloadedFile}`) + console.log(` - ${downloadedFile}\n - Audio downloaded successfully.`) return downloadedFile } catch (error) { - console.error('Error during audio download:', error) + console.error(`Error downloading audio: ${error.message}`) throw error } } @@ -59,7 +66,7 @@ export async function downloadFileAudio(filePath, sanitizedFilename) { // Define supported audio and video formats /** @type {Set} */ const supportedFormats = new Set([ - 'wav', 'mp3', 'm4a', 'aac', 'ogg', 'flac', 'mp4', 'mkv', 'avi', 'mov', 'webm' + 'wav', 'mp3', 'm4a', 'aac', 'ogg', 'flac', 'mp4', 'mkv', 'avi', 'mov', 'webm', ]) try { // Check if the file exists @@ -67,9 +74,6 @@ export async function downloadFileAudio(filePath, sanitizedFilename) { // Read the file into a buffer const buffer = await readFile(filePath) - console.log( - `File read successfully. Buffer length: ${buffer.length}\nDetermining file type...` - ) // Determine the file type const fileType = await fileTypeFromBuffer(buffer) @@ -78,22 +82,19 @@ export async function downloadFileAudio(filePath, sanitizedFilename) { fileType ? `Unsupported file type: ${fileType.ext}` : 'Unable to determine file type' ) } - console.log(`Detected file type: ${fileType.ext}`) + console.log(`\nStep 2 - File read successfully and type detected as ${fileType.ext}, converting to WAV...`) const outputPath = `content/${sanitizedFilename}.wav` - // If the file is not already a WAV, convert it - if (fileType.ext !== 'wav') { - await execPromise( - `${ffmpeg} -i "${filePath}" -acodec pcm_s16le -ar 16000 -ac 1 "${outputPath}"` - ) - console.log(`Converted ${filePath} to ${outputPath}`) - } else { - // If it's already a WAV, just copy it - await execPromise(`cp "${filePath}" "${outputPath}"`) - } + + // Convert the file to WAV format + await execPromise( + `${ffmpeg} -i "${filePath}" -ar 16000 -ac 1 -vn "${outputPath}"` + ) + console.log(` - ${outputPath}\n - File converted to WAV format successfully.`) + return outputPath } catch (error) { - console.error('Error in downloadFileAudio:', error.message) + console.error(`Error processing local file: ${error.message}`) throw error } } \ No newline at end of file diff --git a/src/utils/generateMarkdown.js b/src/utils/generateMarkdown.js index 9b1a48c..419422b 100644 --- a/src/utils/generateMarkdown.js +++ b/src/utils/generateMarkdown.js @@ -1,5 +1,6 @@ // src/utils/generateMarkdown.js +import { checkDependencies } from './checkDependencies.js' import { execFile } from 'node:child_process' import { promisify } from 'node:util' import { writeFile } from 'node:fs/promises' @@ -16,7 +17,11 @@ const execFilePromise = promisify(execFile) * @returns {Promise} - The video metadata. */ export async function extractVideoMetadata(url) { + console.log('\nStep 0 - Generating metadata...') try { + // Check for required dependencies + await checkDependencies(['yt-dlp']) + const { stdout } = await execFilePromise('yt-dlp', [ '--restrict-filenames', '--print', '%(webpage_url)s', @@ -25,23 +30,28 @@ export async function extractVideoMetadata(url) { '--print', '%(title)s', '--print', '%(upload_date>%Y-%m-%d)s', '--print', '%(thumbnail)s', - url + url, ]) const [showLink, channel, channelURL, title, publishDate, coverImage] = stdout.trim().split('\n') + // Ensure all metadata is present + if (!showLink || !channel || !channelURL || !title || !publishDate || !coverImage) { + throw new Error('Incomplete metadata received from yt-dlp.') + } + return { showLink, channel, channelURL, title, - description: "", + description: '', publishDate, - coverImage + coverImage, } } catch (error) { - console.error(`Error extracting metadata for ${url}:`, error) - return null + console.error(`Error extracting metadata for ${url}: ${error.message}`) + throw error } } @@ -53,17 +63,12 @@ export async function extractVideoMetadata(url) { */ export async function generateRSSMarkdown(item) { try { + console.log('\nStep 1 - Generating RSS markdown...') // Destructure the item object const { publishDate, title, coverImage, showLink, channel, channelURL } = item // Sanitize the title for use in the filename - const sanitizedTitle = title - .replace(/[^\w\s-]/g, '') - .trim() - .replace(/[\s_]+/g, '-') - .replace(/-+/g, '-') - .toLowerCase() - .slice(0, 200) + const sanitizedTitle = sanitizeTitle(title) // Construct the filename, path, and front matter for the markdown file const filename = `${publishDate}-${sanitizedTitle}` @@ -82,10 +87,10 @@ export async function generateRSSMarkdown(item) { // Write the front matter to the markdown file await writeFile(`${finalPath}.md`, frontMatter) - console.log(`\nInitial markdown file created:\n - ${finalPath}.md`) + console.log(` - ${finalPath}.md\n - Front matter successfully created and saved.`) return { frontMatter, finalPath, filename } } catch (error) { - console.error('Error generating markdown:', error) + console.error(`Error generating markdown for RSS item: ${error.message}`) throw error } } @@ -98,29 +103,15 @@ export async function generateRSSMarkdown(item) { */ export async function generateFileMarkdown(filePath) { try { + console.log('\nStep 1 - Generating file markdown...') // Extract the original filename from the full file path const originalFilename = basename(filePath) - // Get the file extension - const fileExtension = extname(originalFilename) - // Remove the file extension from the original filename - const filenameWithoutExt = originalFilename.slice(0, -fileExtension.length) + const filenameWithoutExt = originalFilename.replace(extname(originalFilename), '') // Sanitize the filename - const sanitizedFilename = filenameWithoutExt - // Replace any character that's not alphanumeric, whitespace, or hyphen with a hyphen - .replace(/[^\w\s-]/g, '-') - // Trim whitespace from both ends - .trim() - // Replace any sequence of whitespace or underscores with a single hyphen - .replace(/[\s_]+/g, '-') - // Replace any sequence of multiple hyphens with a single hyphen - .replace(/-+/g, '-') - // Convert to lowercase - .toLowerCase() - // Limit the length to 200 characters - .slice(0, 200) + const sanitizedFilename = sanitizeTitle(filenameWithoutExt) // Construct the final path for the markdown file const finalPath = `content/${sanitizedFilename}` @@ -142,13 +133,13 @@ export async function generateFileMarkdown(filePath) { await writeFile(`${finalPath}.md`, frontMatter) // Log the creation of the markdown file - console.log(`\nInitial markdown file created:\n - ${finalPath}.md`) + console.log(` - ${finalPath}.md\n - Front matter successfully created and saved.`) // Return an object with the generated data return { frontMatter, finalPath, filename: sanitizedFilename } } catch (error) { // Log any errors that occur during the process - console.error('Error generating markdown for file:', error) + console.error(`Error generating markdown for file: ${error.message}`) // Re-throw the error to be handled by the calling function throw error } @@ -162,6 +153,10 @@ export async function generateFileMarkdown(filePath) { */ export async function generateMarkdown(url) { try { + console.log('\nStep 1 - Generating video markdown...') + // Check for required dependencies + await checkDependencies(['yt-dlp']) + // Execute yt-dlp to get video information const { stdout } = await execFilePromise('yt-dlp', [ '--restrict-filenames', @@ -171,7 +166,7 @@ export async function generateMarkdown(url) { '--print', '%(webpage_url)s', '--print', '%(channel)s', '--print', '%(uploader_url)s', - url + url, ]) // Parse the output from yt-dlp @@ -179,19 +174,13 @@ export async function generateMarkdown(url) { formattedDate, title, thumbnail, webpage_url, channel, uploader_url ] = stdout.trim().split('\n') - // Check for undefined variables + // Ensure all metadata is present if (!formattedDate || !title || !thumbnail || !webpage_url || !channel || !uploader_url) { - throw new Error('Missing video metadata from yt-dlp output') + throw new Error('Incomplete metadata received from yt-dlp.') } // Sanitize the title for use in the filename - const sanitizedTitle = title - .replace(/[^\w\s-]/g, '') - .trim() - .replace(/[\s_]+/g, '-') - .replace(/-+/g, '-') - .toLowerCase() - .slice(0, 200) + const sanitizedTitle = sanitizeTitle(title) // Construct the filename, path, and front matter for the markdown file const filename = `${formattedDate}-${sanitizedTitle}` @@ -210,12 +199,25 @@ export async function generateMarkdown(url) { // Write the front matter to the markdown file await writeFile(`${finalPath}.md`, frontMatter) - console.log( - `\nFrontmatter created:\n\n${frontMatter}\nInitial markdown file created:\n - ${finalPath}.md` - ) + console.log(` - ${finalPath}.md\n - Front matter successfully created and saved.`) return { frontMatter, finalPath, filename } } catch (error) { - console.error('Error generating markdown:', error) + console.error(`Error generating markdown for video: ${error.message}`) throw error } +} + +/** + * Sanitize the title to create a safe filename. + * @param {string} title - The title to sanitize. + * @returns {string} - The sanitized title. + */ +function sanitizeTitle(title) { + return title + .replace(/[^\w\s-]/g, '') + .trim() + .replace(/[\s_]+/g, '-') + .replace(/-+/g, '-') + .toLowerCase() + .slice(0, 200) } \ No newline at end of file diff --git a/src/utils/runLLM.js b/src/utils/runLLM.js index 7bb3711..0d60a1c 100644 --- a/src/utils/runLLM.js +++ b/src/utils/runLLM.js @@ -36,36 +36,41 @@ const llmFunctions = { */ export async function runLLM(finalPath, frontMatter, llmOpt, options) { try { - // Read the transcript file, generate the prompt, and combine it with the transcript + // Read the transcript file const tempTranscript = await readFile(`${finalPath}.txt`, 'utf8') const transcript = `## Transcript\n\n${tempTranscript}` - const promptAndTranscript = `${generatePrompt(options.prompt)}${transcript}` - + + // Generate the prompt + const prompt = generatePrompt(options.prompt) + const promptAndTranscript = `${prompt}${transcript}` + if (llmOpt) { - /** Get the appropriate LLM function based on the option - * @type {LLMFunction} - */ + console.log(`\nStep 4 - Processing with ${llmOpt} Language Model...`) + /** Get the appropriate LLM function based on the option + * @type {LLMFunction} + */ const llmFunction = llmFunctions[llmOpt] - if (!llmFunction) throw new Error(`Invalid LLM option: ${llmOpt}`) - + if (!llmFunction) { + throw new Error(`Invalid LLM option: ${llmOpt}`) + } // Set up a temporary file path and call the LLM function const tempPath = `${finalPath}-${llmOpt}-temp.md` await llmFunction(promptAndTranscript, tempPath, options[llmOpt]) - - // Read generated content and write frontmatter, show notes, and transcript to final markdown file + console.log(` - Transcript saved to temporary file at ${tempPath}`) + // Read generated content and write front matter, show notes, and transcript to final markdown file const showNotes = await readFile(tempPath, 'utf8') await writeFile(`${finalPath}-${llmOpt}-shownotes.md`, `${frontMatter}\n${showNotes}\n${transcript}`) - // Remove the temporary file await unlink(tempPath) - console.log(`Updated markdown file:\n - ${finalPath}-${llmOpt}-shownotes.md`) + console.log(` - ${finalPath}-${llmOpt}-shownotes.md\n - Generated show notes saved to markdown file.`) } else { + console.log('\nStep 4 - No LLM selected, skipping processing...') // If no LLM is selected, just write the prompt and transcript await writeFile(`${finalPath}-prompt.md`, `${frontMatter}\n${promptAndTranscript}`) - console.log(`\nFinal markdown file with prompt:\n - ${finalPath}-prompt.md`) + console.log(` - ${finalPath}-prompt.md\n - Prompt and transcript saved to markdown file.`) } } catch (error) { - console.error('Error running LLM:', error) + console.error(`Error running Language Model: ${error.message}`) throw error } } \ No newline at end of file diff --git a/src/utils/runTranscription.js b/src/utils/runTranscription.js index d5a2050..37d95fb 100644 --- a/src/utils/runTranscription.js +++ b/src/utils/runTranscription.js @@ -18,7 +18,7 @@ import { callAssembly } from '../transcription/assembly.js' */ export async function runTranscription( finalPath, - transcriptOpt, + transcriptOpt = 'whisper', options = {}, frontMatter = '' ) { @@ -28,27 +28,25 @@ export async function runTranscription( // Choose the transcription service based on the provided option switch (transcriptOpt) { case 'deepgram': - // Use Deepgram for transcription and read the transcription result + console.log('\nStep 3 - Using Deepgram for transcription...') await callDeepgram(`${finalPath}.wav`, finalPath) txtContent = await readFile(`${finalPath}.txt`, 'utf8') break case 'assembly': - // Use AssemblyAI for transcription and pass options + console.log('\nStep 3 - Using AssemblyAI for transcription...') txtContent = await callAssembly(finalPath, transcriptOpt, options) break case 'whisperDocker': case 'whisper': - // Use Whisper (either local or Docker version) for transcription + console.log('\nStep 3 - Using Whisper for transcription...') txtContent = await callWhisper(finalPath, transcriptOpt, options) break default: - // If no service is specified, default to Whisper - console.log('No transcription service specified, defaulting to Whisper') - txtContent = await callWhisper(finalPath, transcriptOpt, options) - break + console.error(`Error: Unsupported transcription option '${transcriptOpt}'.`) + throw new Error('Unsupported transcription option.') } let mdContent = frontMatter @@ -57,10 +55,11 @@ export async function runTranscription( const existingContent = await readFile(`${finalPath}.md`, 'utf8') mdContent += existingContent } catch (error) { - // If the file doesn't exist, ignore the error if (error.code !== 'ENOENT') { - throw error // Re-throw if it's not a 'file not found' error + console.error(`Error reading markdown file: ${error.message}`) + throw error } + // If the file does not exist, proceed without appending } // Combine existing markdown content with the transcript @@ -68,13 +67,11 @@ export async function runTranscription( // Write final markdown file, including existing content and the new transcript await writeFile(`${finalPath}.md`, finalContent) - console.log(`Markdown file with frontmatter and transcript:\n - ${finalPath}.md`) + console.log(` - Markdown file updated with transcript at ${finalPath}.md`) - // Return final content including the original markdown and transcript return finalContent } catch (error) { - // Log any errors that occur during the transcription process - console.error('Error in runTranscription:', error) - throw error // Re-throw the error for handling by the calling function + console.error(`Error in transcription process: ${error.message}`) + throw error } } \ No newline at end of file