Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ public List<ServiceBinding> getServiceBindings() throws ServiceBindingAccessExce
throw new ServiceBindingAccessException("Failed to load service key from environment", e);
}
if (serviceKey == null) {
log.debug("No service key found in environment variable {}", ENV_VAR_KEY);
log.debug("No service key found in environment variable {}.", ENV_VAR_KEY);
return List.of();
}
final String logMessage =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ void reloadDeployments(@Nonnull final String resourceGroup) {
try {
val apiClient = new DeploymentApi(service);
val deployments = new HashSet<>(apiClient.query(resourceGroup).getResources());
log.info("Found {} deployments in resource group '{}'", deployments.size(), resourceGroup);
log.info("Found {} deployments in resource group '{}'.", deployments.size(), resourceGroup);
cache.put(resourceGroup, deployments);
} catch (final RuntimeException e) {
throw new DeploymentResolutionException(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ private T parseSuccess(@Nonnull final ClassicHttpResponse response) throws E {
try {
return objectMapper.readValue(content, successType);
} catch (final JsonProcessingException e) {
log.error("Failed to parse response to type {}", successType);
log.error("Failed to parse response to type {}.", successType);
throw exceptionFactory.build("Failed to parse response", e).setHttpResponse(response);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ public Stream<D> handleStreamingResponse(@Nonnull final ClassicHttpResponse resp
}
return objectMapper.treeToValue(jsonNode, successType);
} catch (final IOException e) {
log.error("Failed to parse delta chunk to type {}", successType);
log.error("Failed to parse delta chunk to type {}.", successType);
final String message = "Failed to parse delta chunk";
throw exceptionFactory.build(message, e).setHttpResponse(response);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@ class IterableStreamConverter<T> implements Iterator<T> {
static final int BUFFER_SIZE = 8192;

private static final String ERR_CONTENT = "Failed to read response content.";
private static final String ERR_INTERRUPTED = "Parsing response content was interrupted";
private static final String ERR_CLOSE = "Could not close input stream with error: {} (ignored)";
private static final String ERR_INTERRUPTED = "Parsing response content was interrupted.";
private static final String ERR_CLOSE = "Could not close input stream with error: {} (ignored).";

/** Read next entry for Stream or {@code null} when no further entry can be read. */
private final Callable<T> readHandler;
Expand Down Expand Up @@ -70,7 +70,7 @@ public boolean hasNext() {
} catch (final Exception e) {
isDone = true;
stopHandler.run();
log.debug("Reading next element failed with error {})", e.getClass().getSimpleName());
log.debug("Reading next element failed with error {}.", e.getClass().getSimpleName());
throw errorHandler.apply(e);
}
return !isDone;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@
@Beta
public class RequestLogContext {

private static void setRequestId(@Nonnull final String requestId) {
MDC.put(MdcKeys.REQUEST_ID, requestId);
private static void setCallId(@Nonnull final String callId) {
MDC.put(MdcKeys.CALL_ID, callId);
}

/**
Expand Down Expand Up @@ -65,7 +65,7 @@ public static void setService(@Nonnull final Service service) {

/** Clear all MDC request context information. */
public static void clear() {
MDC.remove(MdcKeys.REQUEST_ID);
MDC.remove(MdcKeys.CALL_ID);
MDC.remove(MdcKeys.ENDPOINT);
MDC.remove(MdcKeys.DESTINATION);
MDC.remove(MdcKeys.MODE);
Expand All @@ -74,13 +74,13 @@ public static void clear() {

/** Log the start of a request with generated request ID. */
public static void logRequestStart() {
val reqId = UUID.randomUUID().toString().substring(0, 8);
RequestLogContext.setRequestId(reqId);
val callId = UUID.randomUUID().toString().substring(0, 8);
RequestLogContext.setCallId(callId);

val message = "[reqId={}] Starting {} {} request to {}, destination={}";
val message = "[callId={}] Starting {} {} request to {}, destination={}.";
log.debug(
message,
reqId,
callId,
MDC.get(MdcKeys.SERVICE),
MDC.get(MdcKeys.MODE),
MDC.get(MdcKeys.ENDPOINT),
Expand All @@ -105,13 +105,13 @@ public static void logResponseSuccess(@Nonnull final ClassicHttpResponse respons
.filter(length -> length >= 0)
.map(length -> "%.1fKB".formatted(length / 1024.0))
.orElse("unknown");
val message = "[reqId={}] {} request completed successfully with duration={}, size={}.";
log.debug(message, MDC.get(MdcKeys.REQUEST_ID), MDC.get(MdcKeys.SERVICE), duration, sizeInfo);
val message = "[callId={}] {} request completed successfully with duration={}, size={}.";
log.debug(message, MDC.get(MdcKeys.CALL_ID), MDC.get(MdcKeys.SERVICE), duration, sizeInfo);
}

@UtilityClass
private static class MdcKeys {
private static final String REQUEST_ID = "reqId";
private static final String CALL_ID = "callId";
private static final String ENDPOINT = "endpoint";
private static final String DESTINATION = "destination";
private static final String MODE = "mode";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ void testLinesThrows() {
final var sut = IterableStreamConverter.lines(response, new TestClientExceptionFactory());
assertThatThrownBy(sut::count)
.isInstanceOf(TestClientException.class)
.hasMessage("Parsing response content was interrupted")
.hasMessage("Parsing response content was interrupted.")
.cause()
.isInstanceOf(IOException.class)
.hasMessage("Ups!");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,14 @@ void testRequestLogContextLifecycle() {
assertThat(MDC.get("destination")).isEqualTo("http://localhost:8000");

RequestLogContext.logRequestStart();
assertThat(MDC.get("reqId")).isNotNull().hasSize(8);
assertThat(MDC.get("callId")).isNotNull().hasSize(8);

RequestLogContext.clear();
assertThat(MDC.get("service")).isNull();
assertThat(MDC.get("mode")).isNull();
assertThat(MDC.get("endpoint")).isNull();
assertThat(MDC.get("destination")).isNull();
assertThat(MDC.get("reqId")).isNull();
assertThat(MDC.get("callId")).isNull();
assertThat(MDC.get("consumer-key")).isEqualTo("consumer-value");
}
}
192 changes: 192 additions & 0 deletions docs/adrs/006-logging-strategy.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,192 @@
# Logging Strategy

## Status

Accepted

## Context

The AI SDK identified some challenges with debugging and problem resolution that can be addressed with better logging.
Typically, users have to enable wire logs for troubleshooting which, along with helpful logs, surfaces a large volume of unrelated ones.
Additionally, we identified the need to improve visibility into the application progress to the user.

Key drivers for this decision include:

- **Debugging difficulties**: Limited visibility into request flows.
- **Security risks**: Accidental logging of sensitive data.
- **Poor user experience**: Lack of insight into application progress.
- **Troubleshooting dependency**: Reliance on external parties for critical details.

## Decision

We will implement and enforce comprehensive logging guidelines that prioritize **debugging capability** and **user visibility**.
This approach mandates descriptive, human-readable logs with structured request tracking through Mapped Diagnostic Context (MDC).
The SDK uses SLF4J API for all logging statements.

## Guidelines

### 1. Log Content and Security

* **Do not log sensitive information.**
Never log full request or response bodies.
Ensure that personally identifiable or confidential data — such as names, IDs, tokens, or payload content — is always excluded from logs.

* **Write concise and relevant logs.**
Every log must convey meaningful information.
Avoid verbose, repetitive, or purely cosmetic details.

* **Use descriptive, human-readable and standard formats.**
- Logs must be clear enough for a developer to understand what happened without checking the code.
- Start log messages with a capital letter (exceptions include case-sensitive identifiers) and end with a period (`.`).
- Avoid newlines (`\n`) and emojis within log messages to prevent parsing and encoding concerns.
- Use the `metric=value` pattern to include structured details with extensibility in mind.

```
[callId=e3eaa45c] OpenAI request completed successfully with duration=1628ms, responseSize=1.2KB.
```

* **Correlate logs.**
Include a call identifier (e.g., `callId`) in outgoing per-request logs to assist with correlation and debugging.
Throughout this document, the term 'request' refers to a single SDK operation that calls an AI service (e.g., `OrchestrationClient.chatCompletion()`, `OpenAiClient.embed()`), distinct from HTTP requests to the user's application.

* **Exception logging.**
When logging exceptions, use standard logging methods (e.g., `log.debug("Connection lost.", exception)`) rather than serializing exception objects.
Exception objects may contain custom fields with sensitive data that could be exposed through JSON serialization or custom `toString()` implementations.

* **Logging framework**
Write all logging statements against the SLF4J API.
The AI SDK relies on `logback-classic` as the provided runtime implementation.

---

### 2. Log Levels and Scope

* **Per-request logs.**
Keep per-request logs **below INFO level** (e.g., DEBUG or TRACE) to prevent cluttering normal application output.

* **Application runtime logs.**
Prefer **INFO level** only for one-time or startup/shutdown logs that occur once per application run.

* **Avoid unnecessary warnings.**
Use the WARN level only for actionable or genuinely concerning conditions.
Expected transient states (retries, fallbacks, cache misses) should not generate a warning.

* **Explicit request logging.**
Always log at **request start** to provide immediate visibility that an operation has begun.
This helps users understand that their request is being processed even before a result is available.
Do not rely solely on response-time logging — requests may fail, hang, or take long durations.
This approach also avoids the need for stack-trace investigation when surface error responses are ambiguous.

```
[callId=e3eaa45c] Starting OpenAI synchronous request to /v1/chat/completions, destination=<some-uri>.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(Comment/Question)

@MatKuhr quick check - destination (uri) is not-sensitive information for logging, right?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes URL should be fine 👍🏻

```

* **Performance-aware logging.**
If a log statement requires computation (such as object creation or method invocation), guard it with a log-level check to prevent performance degradation.

``` java
Optional<Destination> maybeDestination;
Destination fallbackDestination = /* existing instance */;

// Bad: Creates objects or invokes methods even when logging is disabled
log.debug("Destination: {}", maybeDestination.toString());
log.debug("Destination: {}", maybeDestination.orElseGet(() -> new Destination()));
log.debug("Destination: {}", maybeDestination.orElseGet(this::getFallback));
log.debug("Destination: {}", maybeDestination.orElse(getFallback()));

// Good: No additional computation
log.debug("Destination: {}", maybeDestination);
log.debug("Destination: {}", maybeDestination.orElse(fallbackDestination));

// Good: Guard object creation
if (log.isDebugEnabled()) {
log.debug("Destination: {}", maybeDestination.orElse(getFallback()));
}

// Exception: Singletons require no guarding (no object creation)
Optional<List<Destination>> maybeDestinations;
log.debug("Destinations: {}", maybeDestinations.orElse(Collections.emptyList()));
```

---

### 3. MDC (Mapped Diagnostic Context)

* **Purpose and usage.**
MDC is used to carry contextual information (e.g., `callId`, `endpoint`, `service`) across execution blocks within the same thread.

* **Setting and clearing context.**
Set MDC values deliberately and close to their scope of relevance.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

MDC uses ThreadLocal variables internally, which will get lost when creating new Threads, either explicitly or implicitly via Cloud SDK resilience.

In addition, please keep in mind that the application developer may also use MDC. So the SDK should not erase anything that is set by app developers.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  1. About using MDC across threads, I have outlined the issue with an example
  2. Also there is a following section that mentions with example on cautiously clearing the MDC.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not 100% sure about the correctness of the example about incorrect usage of MDC. Just let me know if I missed something.

Copy link
Contributor

@newtork newtork Nov 17, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To highlight @MatKuhr point; in future we way need a logic that propagates the MDC fields across threads.

For example, if we ever decide to introduce Resilience:

ResilienceConfiguration configuration =
    ResilienceConfiguration.of("orchestration-deployment-id")
        .timeLimiterConfiguration(
            ResilienceConfiguration.TimeLimiterConfiguration.of(Duration.ofSeconds(5)));

String deploymentId = ResilienceDecorator.executeSupplier(() -> getOrchestrationDeploymentId(), configuration);

When we implement a method getOrchestrationDeploymentId we will not have the MDC fields.
If you want, we can draft a best practice for this case.


Unfortunately you cannot declare this an edge-case or bad-practice by nature, we need a proper solution for this.
I think chances are very high, we need this soon.

Copy link
Member Author

@rpanackal rpanackal Nov 17, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I modified the point to not imply that logging within callback (using MDC) as a "bad practice". And also added a example for propagating MDC context between threads.

Per-request MDC context must be cleared appropriately when the response completes.
Avoid setting per-request values in long-lived objects that outlive the request lifecycle, as this can result in corrupted or incomplete log context.

* **Centralized MDC management.**
Avoid using magic strings for MDC keys or values.
Define them in a dedicated structure or utility (e.g., `RequestLogContext` class) to ensure discoverability and prevent errors during refactoring.
```java
// Bad: Magic strings scattered in code
MDC.put("service", "OpenAI");
log.debug("Service {}", MDC.get("service"));

// Good: Centralized in utility class
RequestLogContext.setService(Service.OPENAI);
log.debug("Service {}", RequestLogContext.get(MdcKeys.SERVICE));
```

* **Granular clearing only.**
Never clear the entire MDC context, as this will remove entries set by the application developer or other libraries.
Instead, remove entries key-by-key to preserve unrelated context items that may remain valid for longer periods.
```java
// Bad: Risk clearing useful context entries from other components
MDC.clear();

// Good: Clear only your own entries, centralized in utility class
class RequestLogContext {
//...
static void clear(){
MDC.remove(MdcKeys.CALL_ID);
MDC.remove(MdcKeys.SERVICE);
}
}
```

* **Responsibility and ownership.**
The component or class that sets MDC context values is also responsible for clearing them.
This maintains clarity and ensures proper lifecycle management.

* **Safe consumption.**
Since MDC uses `ThreadLocal` storage, any new thread (created implicitly or explicitly) will not have access to the parent thread's MDC context.
Always audit for thread switches through async operations, resilience patterns etc., as these may lead to corrupted logs due to invalid MDC.

```java
// Thread A
RequestLogContext.setCallId("abc123");
log.debug("[callId={}] Starting request", RequestLogContext.get(MdcKeys.CALL_ID));

// Problem: Async callback runs in Thread B without original MDC context
client.executeAsync(() -> {
// Thread B: RequestLogContext.get(MdcKeys.CALL_ID) returns null
log.debug("[callId={}] Processing", RequestLogContext.get(MdcKeys.CALL_ID));
});
```

To maintain logging context across thread boundaries, manually propagate the MDC context:

```java
// Capture parent thread's MDC context
Map<String, String> context = MDC.getCopyOfContextMap();

client.executeAsync(() -> {
// Restore the captured context in new thread
MDC.setContextMap(context);
// Thread B: RequestLogContext.get(MdcKeys.CALL_ID) returns abc123
log.debug("[callId={}] Processing", RequestLogContext.get(MdcKeys.CALL_ID));
});
```
---

### 4. Logging Boundaries and Generation

* **Deliberate logging boundaries.**
Generated code (such as those in modules like *document-grounding* or *prompt-registry*) should log minimally or preferably be avoided entirely.
Logging should be centralized in higher-level components to maintain consistency and reduce noise.
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ public List<OpenAiToolMessage> executeTools() {
final var tools = Optional.ofNullable(originalRequest.getToolsExecutable()).orElseGet(List::of);
if (log.isDebugEnabled() && !tools.isEmpty()) {
final var toolNames = tools.stream().map(OpenAiTool::getName).toList();
log.debug("Executing {} tool call(s) - {}", toolNames.size(), toolNames);
log.debug("Executing {} tool call(s) - {}.", toolNames.size(), toolNames);
}
return OpenAiTool.execute(tools, getMessage());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ private static Map<OpenAiFunctionCall, Object> executeInternal(
if (toolCall instanceof OpenAiFunctionCall functionCall) {
final var tool = toolMap.get(functionCall.getName());
if (tool == null) {
log.warn("Tool not found for function call: {}", functionCall.getName());
log.warn("Tool not found for function call: {}.", functionCall.getName());
continue;
}
final var toolResult = executeFunction(tool, functionCall);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ public ChatResponse call(@Nonnull final Prompt prompt) {
if (options != null && isInternalToolExecutionEnabled(options) && response.hasToolCalls()) {
val toolCalls =
response.getResult().getOutput().getToolCalls().stream().map(ToolCall::name).toList();
log.info("Executing {} tool call(s) - {}", toolCalls.size(), toolCalls);
log.info("Executing {} tool call(s) - {}.", toolCalls.size(), toolCalls);
val toolExecutionResult = toolCallingManager.executeToolCalls(prompt, response);
// Send the tool execution result back to the model.
log.debug("Re-invoking model with tool execution results.");
Expand Down Expand Up @@ -219,7 +219,7 @@ private static List<ChatCompletionTool> extractTools(final ToolCallingChatOption
val tool = new ChatCompletionTool().type(toolType).function(toolFunction);
tools.add(tool);
} catch (JsonProcessingException e) {
log.warn("Failed to add tool to the chat request: {}", e.getMessage());
log.warn("Failed to add tool to the chat request: {}.", e.getMessage());
}
}
return tools;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ public ChatResponse call(@Nonnull final Prompt prompt) {
if (log.isDebugEnabled()) {
val tools = response.getResult().getOutput().getToolCalls();
val toolsStr = tools.stream().map(ToolCall::name).collect(Collectors.joining(", "));
log.debug("Executing {} tool call(s) - {}", tools.size(), toolsStr);
log.debug("Executing {} tool call(s) - {}.", tools.size(), toolsStr);
}

val toolExecutionResult = toolCallingManager.executeToolCalls(prompt, response);
Expand Down
Loading