Skip to content

Evaluation

Evaluation #5

Workflow file for this run

# This workflow will run integration tests for the current project once per day
name: Evaluation
on:
schedule:
- cron: "37 14 * * *" # Run at 7:37 AM Pacific Time (14:37 UTC) every day
workflow_dispatch: # Allows triggering the workflow manually in GitHub UI
# If another scheduled run starts while this workflow is still running,
# cancel the earlier run in favor of the next run.
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
jobs:
integration-tests:
name: Evaluation
strategy:
matrix:
os: [ubuntu-latest]
python-version: ["3.12"]
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v4
- name: Install uv
uses: astral-sh/setup-uv@v4
- name: Set up Python ${{ matrix.python-version }}
run: uv python install ${{ matrix.python-version }}
- name: Install dependencies
run: |
uv sync --all-extras --dev
uv pip install -U pytest-asyncio vcrpy
- name: Run integration tests
env:
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
LANGSMITH_API_KEY: ${{ secrets.LANGSMITH_API_KEY }}
LANGSMITH_TRACING: true
run: |
make evals