-
-
Notifications
You must be signed in to change notification settings - Fork 164
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
ci: added eval ci workflow, build eval-full in docker job
- Loading branch information
Showing
2 changed files
with
126 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,110 @@ | ||
name: Evals | ||
|
||
on: | ||
schedule: | ||
- cron: '0 2 * * *' # Run daily at 2 AM UTC | ||
workflow_dispatch: | ||
inputs: | ||
suite: | ||
description: 'Eval suite to run (leave empty for basic suite)' | ||
required: false | ||
default: '' | ||
models: | ||
description: 'Comma-separated list of models to evaluate' | ||
required: false | ||
default: '' | ||
timeout: | ||
description: 'Timeout for each eval in seconds' | ||
required: false | ||
default: '60' | ||
image-suffix: | ||
description: 'Suffix to use for docker image, e.g. -full' | ||
required: false | ||
default: '' | ||
|
||
jobs: | ||
run-evals: | ||
runs-on: ubuntu-latest | ||
steps: | ||
- name: Checkout code | ||
uses: actions/checkout@v3 | ||
with: | ||
ref: eval-results | ||
|
||
- name: Log in to GitHub Container Registry | ||
uses: docker/login-action@v3 | ||
with: | ||
registry: ghcr.io | ||
username: ${{ github.actor }} | ||
password: ${{ secrets.GITHUB_TOKEN }} | ||
|
||
- name: Set environment variables | ||
run: | | ||
echo "OWNER_LC=${OWNER,,}" >>${GITHUB_ENV} | ||
env: | ||
OWNER: '${{ github.repository_owner }}' | ||
|
||
- name: Pull Docker image | ||
run: | | ||
docker pull ghcr.io/${{ env.OWNER_LC }}/gptme-eval${{ github.event.inputs.image-suffix }}:latest | ||
- name: Write gptme config.toml | ||
shell: bash | ||
env: | ||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} | ||
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} | ||
OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }} | ||
run: | | ||
mkdir -p ~/.config/gptme | ||
cat > ~/.config/gptme/config.toml << EOF | ||
[prompt] | ||
about_user = "I am a curious human programmer." | ||
response_preference = "Don't explain basic concepts" | ||
[env] | ||
OPENAI_API_KEY = "$OPENAI_API_KEY" | ||
ANTHROPIC_API_KEY = "$ANTHROPIC_API_KEY" | ||
OPENROUTER_API_KEY = "$OPENROUTER_API_KEY" | ||
EOF | ||
cat ~/.config/gptme/config.toml | ||
chmod -R o=rwx ~/.config/gptme | ||
- name: Run evals | ||
run: | | ||
SUITE="${{ github.event.inputs.suite }}" | ||
MODELS="${{ github.event.inputs.models }}" | ||
TIMEOUT="${{ github.event.inputs.timeout }}" | ||
MODEL_ARGS="" | ||
if [ -n "$MODELS" ]; then | ||
IFS=',' read -ra MODEL_ARRAY <<< "$MODELS" | ||
for MODEL in "${MODEL_ARRAY[@]}"; do | ||
MODEL_ARGS="$MODEL_ARGS -m $MODEL" | ||
done | ||
fi | ||
echo "Running evals for suite: $SUITE" | ||
echo "Models: ${MODELS:-default}" | ||
RESULTS_DIR=$(pwd)/eval_results | ||
echo "Results dir: $RESULTS_DIR" | ||
mkdir -p $RESULTS_DIR | ||
chmod -R o=rwx $RESULTS_DIR # make sure docker can read/write the dir | ||
docker run \ | ||
-v ~/.config/gptme:/home/appuser/.config/gptme \ | ||
-v $RESULTS_DIR:/app/eval_results \ | ||
ghcr.io/${{ env.OWNER_LC }}/gptme-eval${{ github.event.inputs.image-suffix }}:latest \ | ||
$SUITE \ | ||
--timeout $TIMEOUT $MODEL_ARGS | ||
- name: Configure Git | ||
run: | | ||
git config --local user.email "[email protected]" | ||
git config --local user.name "GitHub Action" | ||
- name: Update eval results | ||
run: | | ||
git add -f eval_results | ||
git commit -m "chore: add eval results for run ${{ github.run_number }} [skip ci]" || echo "No changes to commit" | ||
git push origin eval-results |