Evaluation #7
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This workflow will run integration tests for the current project once per day | |
name: Evaluation | |
on: | |
schedule: | |
- cron: "37 14 * * *" # Run at 7:37 AM Pacific Time (14:37 UTC) every day | |
workflow_dispatch: # Allows triggering the workflow manually in GitHub UI | |
# If another scheduled run starts while this workflow is still running, | |
# cancel the earlier run in favor of the next run. | |
concurrency: | |
group: ${{ github.workflow }}-${{ github.ref }} | |
cancel-in-progress: true | |
jobs: | |
integration-tests: | |
name: Evaluation | |
strategy: | |
matrix: | |
os: [ubuntu-latest] | |
python-version: ["3.12"] | |
runs-on: ${{ matrix.os }} | |
steps: | |
- uses: actions/checkout@v4 | |
- name: Install uv | |
uses: astral-sh/setup-uv@v4 | |
- name: Set up Python ${{ matrix.python-version }} | |
run: uv python install ${{ matrix.python-version }} | |
- name: Install dependencies | |
run: | | |
uv sync --all-extras --dev | |
uv pip install -U pytest-asyncio vcrpy | |
- name: Run integration tests | |
env: | |
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} | |
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} | |
LANGSMITH_API_KEY: ${{ secrets.LANGSMITH_API_KEY }} | |
LANGSMITH_TRACING: true | |
run: | | |
make evals |