feat: cuda support

doppeltilde · Jun 11, 2024 · e9ce750 · e9ce750
1 parent 4cd6c98
commit e9ce750
Show file tree

Hide file tree

Showing 3 changed files with 56 additions and 2 deletions.
diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
@@ -18,6 +18,13 @@ permissions:
 jobs:
     build_and_publish:
         runs-on: ubuntu-latest
+        strategy:
+          matrix:
+            include:
+              - docker_file: Dockerfile
+                label: "latest"
+              - docker_file: Dockerfile.cuda
+                label: "latest-cuda"
         steps:
             - uses: actions/checkout@v4
 
@@ -29,6 +36,13 @@ jobs:
               uses: docker/metadata-action@v5
               with:
                 images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
+                tags: |
+                  type=ref,event=branch
+                  type=ref,event=pr
+                  type=ref,event=tag
+                  type=sha,prefix=sha-
+                labels: |
+                  type=raw,value=image-type=${{ matrix.label }}
 
             - uses: docker/login-action@v3
               with:
@@ -40,7 +54,10 @@ jobs:
               uses: docker/build-push-action@v5
               with:
                 context: .
+                file: ${{ matrix.docker_file }}
                 push: ${{ github.event_name != 'pull_request' }}
-                tags: ${{ steps.meta.outputs.tags }}
-                labels: ${{ steps.meta.outputs.labels }}
+                tags: |
+                  ${{ steps.meta.outputs.tags }}
+                  ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:latest${{ matrix.label =='latest-cuda' && '-cuda' || '' }}
+                # labels: ${{ steps.meta.outputs.labels }}
                 platforms: linux/amd64,linux/arm64
diff --git a/Dockerfile.cuda b/Dockerfile.cuda
@@ -0,0 +1,8 @@
+FROM nvidia/cuda:12.2.2-cudnn8-runtime-ubuntu22.04
+WORKDIR /app
+COPY . /app
+RUN apt-get update && \
+    apt-get install -y python3-pip && \
+    pip install --upgrade pip
+RUN pip install --no-cache-dir -r requirements.txt --extra-index-url https://download.pytorch.org/whl/121
+CMD ["fastapi", "run", "main.py", "--proxy-headers", "--host", "0.0.0.0", "--port", "8000"]
diff --git a/README.md b/README.md
@@ -8,6 +8,7 @@
 ## Installation
 
 - For ease of use it's recommended to use the provided [docker-compose.yml](https://github.com/doppeltilde/automatic_speech_recognition/blob/main/docker-compose.yml).
+**CPU Support:** Use the `latest` tag.
 ```yml
 services:
   automatic_speech_recognition:
@@ -27,6 +28,34 @@ volumes:
   models:
 ```
 
+**NVIDIA GPU Support:** Use the `latest-cuda` tag.
+```yml
+services:
+  automatic_speech_recognition_cuda:
+    image: ghcr.io/doppeltilde/automatic_speech_recognition:latest-cuda
+    ports:
+      - "8000:8000"
+    volumes:
+      - models:/root/.cache/huggingface/hub:rw
+    environment:
+      - DEFAULT_ASR_MODEL_NAME
+      - COMPUTE_TYPE
+      - USE_API_KEYS
+      - API_KEYS
+    restart: unless-stopped
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: all
+              capabilities: [ gpu ]
+
+volumes:
+  models:
+```
+
+
 - Create a `.env` file and set the preferred values.
 ```sh
 DEFAULT_ASR_MODEL_NAME=base