diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 113733a84..2bdf4e7b0 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -78,7 +78,7 @@ jobs: - uses: actions/checkout@v3 - uses: pnpm/action-setup@v3 with: - version: 9.5.0 + version: 10.33.0 - name: Clone langfuse server run: | diff --git a/CLAUDE.md b/CLAUDE.md index 6dc8afbb2..51f3eaeeb 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -9,6 +9,7 @@ This is the Langfuse Python SDK, a client library for accessing the Langfuse obs ## Development Commands ### Setup + ```bash # Install Poetry plugins (one-time setup) poetry self add poetry-dotenv-plugin @@ -21,6 +22,7 @@ poetry run pre-commit install ``` ### Testing + ```bash # Run all tests with verbose output poetry run pytest -s -v --log-cli-level=INFO @@ -33,6 +35,7 @@ poetry run pytest -s -v --log-cli-level=INFO -n auto ``` ### Code Quality + ```bash # Format code with Ruff poetry run ruff format . @@ -48,6 +51,7 @@ poetry run pre-commit run --all-files ``` ### Building and Releasing + ```bash # Build the package locally (for testing) poetry build @@ -57,6 +61,7 @@ poetry run pdoc -o docs/ --docformat google --logo "https://langfuse.com/langfus ``` Releases are automated via GitHub Actions. To release: + 1. Go to Actions > "Release Python SDK" workflow 2. Click "Run workflow" 3. Select version bump type (patch/minor/major/prerelease) @@ -89,6 +94,7 @@ The workflow handles versioning, building, PyPI publishing (via OIDC), and GitHu ### Key Design Patterns The SDK is built on OpenTelemetry for observability, using: + - Spans for tracing LLM operations - Attributes for metadata (see `LangfuseOtelSpanAttributes`) - Resource management for efficient batching and flushing @@ -98,6 +104,7 @@ The client follows an async-first design with automatic batching of events and b ## Configuration Environment variables (defined in `_client/environment_variables.py`): + - `LANGFUSE_PUBLIC_KEY` / `LANGFUSE_SECRET_KEY`: API credentials - `LANGFUSE_HOST`: API endpoint (defaults to https://cloud.langfuse.com) - `LANGFUSE_DEBUG`: Enable debug logging @@ -127,9 +134,11 @@ The `langfuse/api/` directory is auto-generated from the Langfuse OpenAPI specif ## Testing Guidelines ### Approach to Test Changes + - Don't remove functionality from existing unit tests just to make tests pass. Only change the test, if underlying code changes warrant a test change. ## Python Code Rules ### Exception Handling + - Exception must not use an f-string literal, assign to variable first diff --git a/langfuse/_client/client.py b/langfuse/_client/client.py index a8781c9b9..9beed5f67 100644 --- a/langfuse/_client/client.py +++ b/langfuse/_client/client.py @@ -1818,7 +1818,7 @@ def create_score( try: new_body = ScoreBody( id=score_id, - session_id=session_id, + sessionId=session_id, datasetRunId=dataset_run_id, traceId=trace_id, observationId=observation_id, diff --git a/langfuse/_utils/request.py b/langfuse/_utils/request.py index 104552ee7..402d0b5a7 100644 --- a/langfuse/_utils/request.py +++ b/langfuse/_utils/request.py @@ -48,9 +48,8 @@ def generate_headers(self) -> dict: def batch_post(self, **kwargs: Any) -> httpx.Response: """Post the `kwargs` to the batch API endpoint for events""" - logger.debug("uploading data: %s", kwargs) - res = self.post(**kwargs) + return self._process_response( res, success_message="data uploaded successfully", return_json=False ) diff --git a/langfuse/langchain/CallbackHandler.py b/langfuse/langchain/CallbackHandler.py index 0cd4dd133..373461c51 100644 --- a/langfuse/langchain/CallbackHandler.py +++ b/langfuse/langchain/CallbackHandler.py @@ -1057,10 +1057,10 @@ def _convert_message_to_dict(self, message: BaseMessage) -> Dict[str, Any]: and len(message.tool_calls) > 0 ): message_dict["tool_calls"] = message.tool_calls - + if ( - hasattr(message, "invalid_tool_calls") - and message.invalid_tool_calls is not None + hasattr(message, "invalid_tool_calls") + and message.invalid_tool_calls is not None and len(message.invalid_tool_calls) > 0 ): message_dict["invalid_tool_calls"] = message.invalid_tool_calls diff --git a/tests/test_core_sdk.py b/tests/test_core_sdk.py index 10a6b0d80..91064de23 100644 --- a/tests/test_core_sdk.py +++ b/tests/test_core_sdk.py @@ -118,6 +118,49 @@ def test_invalid_score_data_does_not_raise_exception(): # We can't assert queue size in OTEL implementation, but we can verify it completes without exception +def test_create_session_score(): + langfuse = Langfuse() + + session_id = "my-session" + + # Create a span and set trace properties + with langfuse.start_as_current_observation(name="test-span"): + with propagate_attributes( + trace_name="this-is-so-great-new", + user_id="test", + metadata={"test": "test"}, + session_id=session_id, + ): + pass + + # Ensure data is sent + langfuse.flush() + sleep(2) + + # Create a numeric score + score_id = create_uuid() + + langfuse.create_score( + score_id=score_id, + session_id=session_id, + name="this-is-a-score", + value=1, + ) + + # Ensure data is sent + langfuse.flush() + sleep(2) + + # Retrieve and verify + score = langfuse.api.scores.get_by_id(score_id) + + # find the score by name (server may transform the id format) + assert score is not None + assert score.value == 1 + assert score.data_type == "NUMERIC" + assert score.session_id == session_id + + def test_create_numeric_score(): langfuse = Langfuse() api_wrapper = LangfuseAPI()