Skip to content

Commit

Permalink
ci: added eval ci workflow, build eval-full in docker job
Browse files Browse the repository at this point in the history
  • Loading branch information
ErikBjare committed Sep 28, 2024
1 parent f42be8f commit 5a54f3e
Show file tree
Hide file tree
Showing 2 changed files with 126 additions and 1 deletion.
17 changes: 16 additions & 1 deletion .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ jobs:
context: .
push: true

# Now the eval image
# Now the base eval image
- name: Build and push Docker image (eval)
uses: docker/build-push-action@v3
with:
Expand All @@ -130,6 +130,21 @@ jobs:
context: .
push: true

# Now the full eval image
# NOTE: takes a long time and leads to a big (1GB+) image
#- name: Build and push Docker image (eval-full)
# uses: docker/build-push-action@v3
# with:
# tags: |
# ghcr.io/${{ env.OWNER_LC }}/gptme-eval-full:latest
# ghcr.io/${{ env.OWNER_LC }}/gptme-eval-full:${{ env.SANITIZED_REF_NAME }}
# file: ./scripts/Dockerfile.eval
# context: .
# push: true
# build-args: |
# RUST=yes
# BROWSER=yes

lint:
runs-on: ubuntu-latest
steps:
Expand Down
110 changes: 110 additions & 0 deletions .github/workflows/eval.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
name: Evals

on:
schedule:
- cron: '0 2 * * *' # Run daily at 2 AM UTC
workflow_dispatch:
inputs:
suite:
description: 'Eval suite to run (leave empty for basic suite)'
required: false
default: ''
models:
description: 'Comma-separated list of models to evaluate'
required: false
default: ''
timeout:
description: 'Timeout for each eval in seconds'
required: false
default: '60'
image-suffix:
description: 'Suffix to use for docker image, e.g. -full'
required: false
default: ''

jobs:
run-evals:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v3
with:
ref: eval-results

- name: Log in to GitHub Container Registry
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}

- name: Set environment variables
run: |
echo "OWNER_LC=${OWNER,,}" >>${GITHUB_ENV}
env:
OWNER: '${{ github.repository_owner }}'

- name: Pull Docker image
run: |
docker pull ghcr.io/${{ env.OWNER_LC }}/gptme-eval${{ github.event.inputs.image-suffix }}:latest
- name: Write gptme config.toml
shell: bash
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }}
run: |
mkdir -p ~/.config/gptme
cat > ~/.config/gptme/config.toml << EOF
[prompt]
about_user = "I am a curious human programmer."
response_preference = "Don't explain basic concepts"
[env]
OPENAI_API_KEY = "$OPENAI_API_KEY"
ANTHROPIC_API_KEY = "$ANTHROPIC_API_KEY"
OPENROUTER_API_KEY = "$OPENROUTER_API_KEY"
EOF
cat ~/.config/gptme/config.toml
chmod -R o=rwx ~/.config/gptme
- name: Run evals
run: |
SUITE="${{ github.event.inputs.suite }}"
MODELS="${{ github.event.inputs.models }}"
TIMEOUT="${{ github.event.inputs.timeout }}"
MODEL_ARGS=""
if [ -n "$MODELS" ]; then
IFS=',' read -ra MODEL_ARRAY <<< "$MODELS"
for MODEL in "${MODEL_ARRAY[@]}"; do
MODEL_ARGS="$MODEL_ARGS -m $MODEL"
done
fi
echo "Running evals for suite: $SUITE"
echo "Models: ${MODELS:-default}"
RESULTS_DIR=$(pwd)/eval_results
echo "Results dir: $RESULTS_DIR"
mkdir -p $RESULTS_DIR
chmod -R o=rwx $RESULTS_DIR # make sure docker can read/write the dir
docker run \
-v ~/.config/gptme:/home/appuser/.config/gptme \
-v $RESULTS_DIR:/app/eval_results \
ghcr.io/${{ env.OWNER_LC }}/gptme-eval${{ github.event.inputs.image-suffix }}:latest \
$SUITE \
--timeout $TIMEOUT $MODEL_ARGS
- name: Configure Git
run: |
git config --local user.email "[email protected]"
git config --local user.name "GitHub Action"
- name: Update eval results
run: |
git add -f eval_results
git commit -m "chore: add eval results for run ${{ github.run_number }} [skip ci]" || echo "No changes to commit"
git push origin eval-results

0 comments on commit 5a54f3e

Please sign in to comment.