Evaluation #5

	# This workflow will run integration tests for the current project once per day

	name: Evaluation

	on:
	schedule:
	- cron: "37 14 * * *" # Run at 7:37 AM Pacific Time (14:37 UTC) every day
	workflow_dispatch: # Allows triggering the workflow manually in GitHub UI

	# If another scheduled run starts while this workflow is still running,
	# cancel the earlier run in favor of the next run.
	concurrency:
	group: ${{ github.workflow }}-${{ github.ref }}
	cancel-in-progress: true

	jobs:
	integration-tests:
	name: Evaluation
	strategy:
	matrix:
	os: [ubuntu-latest]
	python-version: ["3.12"]
	runs-on: ${{ matrix.os }}
	steps:
	- uses: actions/checkout@v4
	- name: Install uv
	uses: astral-sh/setup-uv@v4
	- name: Set up Python ${{ matrix.python-version }}
	run: uv python install ${{ matrix.python-version }}
	- name: Install dependencies
	run: \|
	uv sync --all-extras --dev
	uv pip install -U pytest-asyncio vcrpy
	- name: Run integration tests
	env:
	ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
	OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
	LANGSMITH_API_KEY: ${{ secrets.LANGSMITH_API_KEY }}
	LANGSMITH_TRACING: true
	run: \|
	make evals

Provide feedback