Merge pull request #41 from ajcwebdev/docker

SQLite and Frontend Persistence
ajcwebdev · Nov 18, 2024 · 6f0fb8a · 6f0fb8a
2 parents b7620bf + 4155122
commit 6f0fb8a
Show file tree

Hide file tree

Showing 44 changed files with 991 additions and 409 deletions.
diff --git a/.dockerignore b/.dockerignore
@@ -0,0 +1,18 @@
+# .dockerignore
+
+# Ignore node_modules (should be installed inside the container)
+node_modules
+
+# Ignore content directory (will be mounted as a volume)
+content
+
+# Ignore whisper.cpp directory (not needed for autoshow build)
+whisper.cpp
+
+# Ignore git metadata
+.git
+.gitignore
+
+# Ignore logs and temporary files
+*.log
+tmp/
diff --git a/.github/whisper.Dockerfile b/.github/whisper.Dockerfile
@@ -1,30 +1,39 @@
-# .github/whisper.cpp/Dockerfile
+# .github/whisper.Dockerfile
 
-FROM --platform=linux/arm64 ubuntu:22.04 AS build
+# Use the Ubuntu 22.04 base image for the build stage
+FROM ubuntu:22.04 AS build
 
+# Set the working directory
 WORKDIR /app
 
+# Install build dependencies
 RUN apt-get update && \
-    apt-get install -y build-essential libopenblas-dev pkg-config \
-    && rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
+    apt-get install -y build-essential libopenblas-dev pkg-config git wget && \
+    rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
 
-COPY .. .
+# Clone the whisper.cpp repository into the container
+RUN git clone https://github.com/ggerganov/whisper.cpp.git .
 
-ENV CFLAGS="-march=armv8-a"
-ENV CXXFLAGS="-march=armv8-a"
+# Build the whisper.cpp project with OpenBLAS support
+RUN make clean && make GGML_OPENBLAS=1
 
-RUN make clean
-
-RUN make GGML_OPENBLAS=1
-
-FROM --platform=linux/arm64 ubuntu:22.04 AS runtime
+# Use the Ubuntu 22.04 base image for the runtime stage
+FROM ubuntu:22.04 AS runtime
 
+# Set the working directory
 WORKDIR /app
 
+# Install runtime dependencies
 RUN apt-get update && \
-    apt-get install -y curl ffmpeg libopenblas-dev \
-    && rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
+    apt-get install -y curl ffmpeg libopenblas-dev git wget && \
+    rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
 
+# Copy the built binaries and scripts from the build stage
 COPY --from=build /app /app
 
+# Ensure that the main executable and scripts have execute permissions
+RUN chmod +x /app/main && \
+    chmod +x /app/models/download-ggml-model.sh
+
+# Set the entrypoint to bash
 ENTRYPOINT [ "bash", "-c" ]
diff --git a/.github/whisper.dockerignore b/.github/whisper.dockerignore
@@ -0,0 +1,16 @@
+# .github/whisper.dockerignore
+
+# Ignore models directory (will be mounted as a volume)
+models
+
+# Ignore git metadata
+.git
+.gitignore
+
+# Ignore build artifacts
+build/
+bin/
+
+# Ignore any logs or temporary files
+*.log
+tmp/
diff --git a/.gitignore b/.gitignore
@@ -16,4 +16,5 @@ NEW.md
 TODO.md
 nemo_msdd_configs
 temp_outputs
-tsconfig.tsbuildinfo
+tsconfig.tsbuildinfo
+show_notes.db
diff --git a/Dockerfile b/Dockerfile
@@ -1,9 +1,12 @@
 # Dockerfile
 
-FROM node:20
+# Use the official Node.js 22 image as the base
+FROM node:22
 
-# Install ffmpeg, git, make, and curl
+# Install necessary packages
 RUN apt-get update && apt-get install -y ffmpeg git make curl docker.io
+
+# Set the working directory
 WORKDIR /usr/src/app
 
 # Install yt-dlp
@@ -13,16 +16,21 @@ RUN curl -L https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp -o
 # Install tsx globally
 RUN npm install -g tsx
 
-# Copy package.json, package-lock.json, and install dependencies
+# Copy only package.json and package-lock.json to install dependencies
 COPY package*.json ./
+
+# Install Node.js dependencies
 RUN npm ci
 
-# Copy the rest of the application and create a directory for content
-COPY . .
-RUN mkdir -p /usr/src/app/content
+# Copy the application source code (excluding files specified in .dockerignore)
+COPY src ./src
+COPY packages ./packages
+COPY docker-entrypoint.sh ./
 
-# Make sure the entrypoint script is executable and set the entrypoint
+# Ensure the entrypoint script is executable
 RUN chmod +x /usr/src/app/docker-entrypoint.sh
+
+# Set the entrypoint
 ENTRYPOINT ["/usr/src/app/docker-entrypoint.sh"]
 
 # Default command (can be overridden)

diff --git a/docker-compose.yml b/docker-compose.yml
@@ -1,43 +1,84 @@
 # docker-compose.yml
 
 services:
+  # Main application service
   autoshow:
+    # Build configuration for this service
     build:
+      # Use the current directory as build context
       context: .
+      # Use the Dockerfile in the root directory
       dockerfile: Dockerfile
+    # Mount these directories/files from host to container
     volumes:
+      # Share content directory between host and container
       - ./content:/usr/src/app/content
+      # Mount environment variables file
       - ./.env:/usr/src/app/.env
+      # Allow container to interact with Docker daemon
       - /var/run/docker.sock:/var/run/docker.sock
+    # Specify services that must start before this one
     depends_on:
+      # Depends on the whisper service
       - whisper
+      # Depends on the ollama service
       - ollama
+    # Set environment variables for this container
     environment:
+      # Configure Ollama host to point to the ollama service
       - OLLAMA_HOST=ollama
+    # Connect to the autoshownet network
     networks:
       - autoshownet
+
+  # Speech-to-text service using whisper.cpp
   whisper:
+    # Build configuration for whisper service
     build:
+      # Use whisper.cpp directory as build context
       context: ./whisper.cpp
+      # Use the Dockerfile in the whisper.cpp directory
       dockerfile: Dockerfile
+    # Mount these directories between host and container
     volumes:
+      # Share content directory
       - ./content:/app/content
-      - ./whisper.cpp/models:/app/models
+      # Use a named volume for models
+      - whisper-models:/app/models
+    # Keep container running (placeholder command)
     command: tail -f /dev/null
+    # Allocate a pseudo-TTY
     tty: true
+    # Keep STDIN open
     stdin_open: true
+    # Connect to the autoshownet network
     networks:
       - autoshownet
+
+  # Large language model service
   ollama:
+    # Use the official Ollama image
     image: ollama/ollama
+    # Mount these volumes
     volumes:
-      - ollama:/root/.ollama
+      # Use a named volume for models
+      - ollama-models:/root/.ollama
+    # Set environment variables
     environment:
+      # Make Ollama accessible on all network interfaces
       - OLLAMA_HOST=0.0.0.0
+    # Connect to the autoshownet network
     networks:
       - autoshownet
+
+# Define networks used by the services
 networks:
+  # Custom network for internal communication
   autoshownet:
+    # Use bridge network driver (standard Docker network type)
     driver: bridge
+
+# Define named volumes used by the services
 volumes:
-  ollama:
+  ollama-models:
+  whisper-models:
diff --git a/docker-entrypoint.sh b/docker-entrypoint.sh
@@ -2,4 +2,4 @@
 
 # docker-entrypoint.sh
 # Run the autoshow command with all arguments passed to the container
-exec tsx --env-file=.env --no-warnings src/autoshow.ts "$@"
+exec tsx --env-file=.env --no-warnings --experimental-sqlite src/autoshow.ts "$@"
diff --git a/docs/examples.md b/docs/examples.md
@@ -659,6 +659,16 @@ This will start `whisper.cpp`, Ollama, and the AutoShow Commander CLI in their o
 npm run docker-up
 ```
 
+Inspect various aspects of the containers, images, and volumes:
+
+```bash
+docker images && docker ps -a && docker system df -v && docker volume ls
+docker volume inspect autoshow_ollama
+du -sh ./whisper.cpp/models
+docker history autoshow-autoshow:latest
+docker history autoshow-whisper:latest
+```
+
 Replace `as` with `docker` to run most other commands explained in this document. Does not support all options at this time, notably `--whisperPython` and `--whisperDiarization`.
 
 ```bash

diff --git a/package.json b/package.json
@@ -20,7 +20,7 @@
     "autoshow": "./dist/autoshow.js"
   },
   "scripts": {
-    "tsx:base": "tsx --env-file=.env --no-warnings",
+    "tsx:base": "tsx --env-file=.env --no-warnings --experimental-sqlite",
     "setup": "bash ./scripts/setup.sh",
     "setup-python": "bash ./scripts/setup-python.sh",
     "autoshow": "npm run tsx:base -- src/autoshow.ts",
@@ -38,7 +38,7 @@
     "docker-up": "docker compose up --build -d --remove-orphans --no-start",
     "ds": "docker compose images && docker compose ls",
     "prune": "docker system prune -af --volumes && docker image prune -af && docker container prune -f && docker volume prune -af",
-    "serve": "npm run tsx:base -- --watch packages/server/index.ts",
+    "serve": "npm run tsx:base -- --watch --experimental-sqlite packages/server/index.ts",
     "fetch-local": "npm run tsx:base -- packages/server/tests/fetch-local.ts",
     "fetch-all": "npm run tsx:base -- packages/server/tests/fetch-all.ts",
     "t": "npm run test-local",
@@ -65,11 +65,13 @@
     "fastify": "5.0.0",
     "file-type": "19.6.0",
     "inquirer": "12.0.1",
+    "js-yaml": "^4.1.0",
     "ollama": "0.5.9",
     "openai": "4.68.4"
   },
   "devDependencies": {
     "@types/inquirer": "9.0.7",
+    "@types/js-yaml": "^4.0.9",
     "@types/node": "22.8.1",
     "tsx": "4.19.1",
     "typedoc": "0.26.10",

diff --git a/packages/server/db.ts b/packages/server/db.ts
@@ -0,0 +1,16 @@
+// packages/server/db.ts
+
+import { DatabaseSync } from 'node:sqlite'
+
+// Initialize the database connection
+export const db = new DatabaseSync('show_notes.db', { open: true })
+
+// Create the show_notes table if it doesn't exist
+db.exec(`
+  CREATE TABLE IF NOT EXISTS show_notes (
+    id INTEGER PRIMARY KEY AUTOINCREMENT,
+    title TEXT NOT NULL,
+    date TEXT NOT NULL,
+    content TEXT NOT NULL
+  )
+`)
diff --git a/packages/server/index.ts b/packages/server/index.ts
@@ -9,6 +9,8 @@ import { handleFileRequest } from './routes/file.js'
 import { handleRSSRequest } from './routes/rss.js'
 import { l } from '../../src/globals.js'
 import { env } from 'node:process'
+import { getShowNotes } from './routes/showNotes.js'
+import { getShowNote } from './routes/showNote.js'
 
 // Set the port from environment variable or default to 3000
 const port = Number(env.PORT) || 3000
@@ -37,6 +39,8 @@ async function start() {
   fastify.post('/urls', handleURLsRequest)
   fastify.post('/file', handleFileRequest)
   fastify.post('/rss', handleRSSRequest)
+  fastify.get('/show-notes', getShowNotes)
+  fastify.get('/show-notes/:id', getShowNote)
 
   try {
     // Start the server and listen on the specified port

diff --git a/packages/server/routes/showNote.ts b/packages/server/routes/showNote.ts
@@ -0,0 +1,19 @@
+// packages/server/routes/showNote.ts
+
+import { db } from '../db.js'
+
+export const getShowNote = async (request, reply) => {
+  try {
+    const { id } = request.params
+    // Fetch the show note from the database
+    const showNote = db.prepare(`SELECT * FROM show_notes WHERE id = ?`).get(id)
+    if (showNote) {
+      reply.send({ showNote })
+    } else {
+      reply.status(404).send({ error: 'Show note not found' })
+    }
+  } catch (error) {
+    console.error('Error fetching show note:', error)
+    reply.status(500).send({ error: 'An error occurred while fetching the show note' })
+  }
+}
diff --git a/packages/server/routes/showNotes.ts b/packages/server/routes/showNotes.ts
@@ -0,0 +1,14 @@
+// packages/server/routes/showNotes.ts
+
+import { db } from '../db.js'
+
+export const getShowNotes = async (request, reply) => {
+  try {
+    // Fetch all show notes from the database
+    const showNotes = db.prepare(`SELECT * FROM show_notes ORDER BY date DESC`).all()
+    reply.send({ showNotes })
+  } catch (error) {
+    console.error('Error fetching show notes:', error)
+    reply.status(500).send({ error: 'An error occurred while fetching show notes' })
+  }
+}
diff --git a/packages/server/routes/video.ts b/packages/server/routes/video.ts
@@ -39,11 +39,13 @@ export const handleVideoRequest = async (
       options,
     })
 
-    // Call processVideo with the mapped options and extracted URL
-    await processVideo(options, youtubeUrl, llmServices, transcriptServices)
+    // Call processVideo and get the content
+    const content = await processVideo(options, youtubeUrl, llmServices, transcriptServices)
 
     l('\nprocessVideo completed successfully')
-    reply.send({ message: 'Video processed successfully.' })
+
+    // Send the content back in the response
+    reply.send({ content })
   } catch (error) {
     err('Error processing video:', error)
     reply.status(500).send({ error: 'An error occurred while processing the video' })

diff --git a/packages/server/utils/reqToOpts.ts b/packages/server/utils/reqToOpts.ts
@@ -54,14 +54,14 @@ export function reqToOpts(requestData: any): {
 
   // Set transcript options based on the selected service
   if (transcriptServices === 'whisper') {
-    // Set the Whisper model or default to 'base'
-    options.whisper = requestData.whisperModel || 'base'
+    // Set the Whisper model or default to 'large-v3-turbo'
+    options.whisper = requestData.whisperModel || 'large-v3-turbo'
   } else if (transcriptServices === 'whisperDocker') {
-    options.whisperDocker = requestData.whisperModel || 'base'
+    options.whisperDocker = requestData.whisperModel || 'large-v3-turbo'
   } else if (transcriptServices === 'whisperPython') {
-    options.whisperPython = requestData.whisperModel || 'base'
+    options.whisperPython = requestData.whisperModel || 'large-v3-turbo'
   } else if (transcriptServices === 'whisperDiarization') {
-    options.whisperDiarization = requestData.whisperModel || 'base'
+    options.whisperDiarization = requestData.whisperModel || 'large-v3-turbo'
   } else if (transcriptServices === 'deepgram') {
     options.deepgram = true
   } else if (transcriptServices === 'assembly') {