Skip to content

Commit 27b7e99

Browse files
zigzagoglaforge
authored andcommitted
feat: add transcription in event
1 parent 88eb0f5 commit 27b7e99

File tree

7 files changed

+293
-4
lines changed

7 files changed

+293
-4
lines changed

core/src/main/java/com/google/adk/events/Event.java

Lines changed: 57 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
import com.google.genai.types.FunctionResponse;
3434
import com.google.genai.types.GenerateContentResponseUsageMetadata;
3535
import com.google.genai.types.GroundingMetadata;
36+
import com.google.genai.types.Transcription;
3637
import java.time.Instant;
3738
import java.util.List;
3839
import java.util.Objects;
@@ -42,6 +43,7 @@
4243
import org.jspecify.annotations.Nullable;
4344

4445
// TODO - b/413761119 update Agent.java when resolved.
46+
4547
/** Represents an event in a session. */
4648
@JsonDeserialize(builder = Event.Builder.class)
4749
public class Event extends JsonBaseModel {
@@ -64,6 +66,9 @@ public class Event extends JsonBaseModel {
6466
private @Nullable GroundingMetadata groundingMetadata;
6567
private @Nullable List<CustomMetadata> customMetadata;
6668
private @Nullable String modelVersion;
69+
private @Nullable Transcription inputTranscription;
70+
private @Nullable Transcription outputTranscription;
71+
6772
private long timestamp;
6873

6974
private Event() {}
@@ -266,6 +271,32 @@ public void setModelVersion(@Nullable String modelVersion) {
266271
this.modelVersion = modelVersion;
267272
}
268273

274+
/**
275+
* Input transcription. The transcription is independent to the model turn which means it doesn’t
276+
* imply any ordering between transcription and model turn.
277+
*/
278+
@JsonProperty("inputTranscription")
279+
public Optional<Transcription> inputTranscription() {
280+
return Optional.ofNullable(inputTranscription);
281+
}
282+
283+
public void setInputTranscription(@Nullable Transcription inputTranscription) {
284+
this.inputTranscription = inputTranscription;
285+
}
286+
287+
/**
288+
* Output transcription. The transcription is independent to the model turn which means it doesn’t
289+
* imply any ordering between transcription and model turn.
290+
*/
291+
@JsonProperty("outputTranscription")
292+
public Optional<Transcription> outputTranscription() {
293+
return Optional.ofNullable(outputTranscription);
294+
}
295+
296+
public void setOutputTranscription(@Nullable Transcription outputTranscription) {
297+
this.outputTranscription = outputTranscription;
298+
}
299+
269300
/** The timestamp of the event. */
270301
@JsonProperty("timestamp")
271302
public long timestamp() {
@@ -362,6 +393,8 @@ public static class Builder {
362393
private @Nullable GroundingMetadata groundingMetadata;
363394
private @Nullable List<CustomMetadata> customMetadata;
364395
private @Nullable String modelVersion;
396+
private @Nullable Transcription inputTranscription;
397+
private @Nullable Transcription outputTranscription;
365398
private @Nullable Long timestamp;
366399

367400
@JsonCreator
@@ -520,6 +553,20 @@ public Builder modelVersion(@Nullable String value) {
520553
return this;
521554
}
522555

556+
@CanIgnoreReturnValue
557+
@JsonProperty("inputTranscription")
558+
public Builder inputTranscription(@Nullable Transcription value) {
559+
this.inputTranscription = value;
560+
return this;
561+
}
562+
563+
@CanIgnoreReturnValue
564+
@JsonProperty("outputTranscription")
565+
public Builder outputTranscription(@Nullable Transcription value) {
566+
this.outputTranscription = value;
567+
return this;
568+
}
569+
523570
public Event build() {
524571
Event event = new Event();
525572
event.setId(id);
@@ -541,6 +588,8 @@ public Event build() {
541588
event.setModelVersion(modelVersion);
542589
event.setActions(actions().orElseGet(() -> EventActions.builder().build()));
543590
event.setTimestamp(timestamp().orElseGet(() -> Instant.now().toEpochMilli()));
591+
event.setInputTranscription(inputTranscription);
592+
event.setOutputTranscription(outputTranscription);
544593
return event;
545594
}
546595
}
@@ -575,7 +624,9 @@ public Builder toBuilder() {
575624
.branch(this.branch)
576625
.groundingMetadata(this.groundingMetadata)
577626
.customMetadata(this.customMetadata)
578-
.modelVersion(this.modelVersion);
627+
.modelVersion(this.modelVersion)
628+
.inputTranscription(this.inputTranscription)
629+
.outputTranscription(this.outputTranscription);
579630
if (this.timestamp != 0) {
580631
builder.timestamp(this.timestamp);
581632
}
@@ -608,7 +659,9 @@ public boolean equals(Object obj) {
608659
&& Objects.equals(branch, other.branch)
609660
&& Objects.equals(groundingMetadata, other.groundingMetadata)
610661
&& Objects.equals(customMetadata, other.customMetadata)
611-
&& Objects.equals(modelVersion, other.modelVersion);
662+
&& Objects.equals(modelVersion, other.modelVersion)
663+
&& Objects.equals(inputTranscription, other.inputTranscription)
664+
&& Objects.equals(outputTranscription, other.outputTranscription);
612665
}
613666

614667
@Override
@@ -637,6 +690,8 @@ public int hashCode() {
637690
groundingMetadata,
638691
customMetadata,
639692
modelVersion,
693+
inputTranscription,
694+
outputTranscription,
640695
timestamp);
641696
}
642697
}

core/src/main/java/com/google/adk/flows/llmflows/BaseLlmFlow.java

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -677,7 +677,9 @@ private Flowable<Event> buildPostprocessingEvents(
677677
&& updatedResponse.errorCode().isEmpty()
678678
&& !updatedResponse.interrupted().orElse(false)
679679
&& !updatedResponse.turnComplete().orElse(false)
680-
&& updatedResponse.usageMetadata().isEmpty()) {
680+
&& updatedResponse.usageMetadata().isEmpty()
681+
&& updatedResponse.inputTranscription().isEmpty()
682+
&& updatedResponse.outputTranscription().isEmpty()) {
681683
return processorEvents;
682684
}
683685

@@ -740,7 +742,9 @@ private Event buildModelResponseEvent(
740742
.avgLogprobs(llmResponse.avgLogprobs().orElse(null))
741743
.finishReason(llmResponse.finishReason().orElse(null))
742744
.usageMetadata(llmResponse.usageMetadata().orElse(null))
743-
.modelVersion(llmResponse.modelVersion().orElse(null));
745+
.modelVersion(llmResponse.modelVersion().orElse(null))
746+
.inputTranscription(llmResponse.inputTranscription().orElse(null))
747+
.outputTranscription(llmResponse.outputTranscription().orElse(null));
744748

745749
Event event = eventBuilder.build();
746750

core/src/main/java/com/google/adk/models/GeminiLlmConnection.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,8 @@ private static LlmResponse createServerContentResponse(LiveServerContent serverC
192192
.partial(serverContent.turnComplete().map(completed -> !completed).orElse(false))
193193
.turnComplete(serverContent.turnComplete().orElse(false))
194194
.interrupted(serverContent.interrupted().orElse(null))
195+
.inputTranscription(serverContent.inputTranscription().orElse(null))
196+
.outputTranscription(serverContent.outputTranscription().orElse(null))
195197
.build();
196198
}
197199

core/src/main/java/com/google/adk/models/LlmResponse.java

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
import com.google.genai.types.GenerateContentResponsePromptFeedback;
3232
import com.google.genai.types.GenerateContentResponseUsageMetadata;
3333
import com.google.genai.types.GroundingMetadata;
34+
import com.google.genai.types.Transcription;
3435
import java.util.List;
3536
import java.util.Optional;
3637
import org.jspecify.annotations.Nullable;
@@ -115,6 +116,20 @@ public abstract class LlmResponse extends JsonBaseModel {
115116
@JsonProperty("modelVersion")
116117
public abstract Optional<String> modelVersion();
117118

119+
/**
120+
* Input transcription. The transcription is independent to the model turn which means it doesn’t
121+
* imply any ordering between transcription and model turn.
122+
*/
123+
@JsonProperty("inputTranscription")
124+
public abstract Optional<Transcription> inputTranscription();
125+
126+
/**
127+
* Output transcription. The transcription is independent to the model turn which means it doesn’t
128+
* imply any ordering between transcription and model turn.
129+
*/
130+
@JsonProperty("outputTranscription")
131+
public abstract Optional<Transcription> outputTranscription();
132+
118133
public abstract Builder toBuilder();
119134

120135
/** Builder for constructing {@link LlmResponse} instances. */
@@ -164,6 +179,12 @@ public abstract Builder usageMetadata(
164179
@JsonProperty("modelVersion")
165180
public abstract Builder modelVersion(@Nullable String modelVersion);
166181

182+
@JsonProperty("inputTranscription")
183+
public abstract Builder inputTranscription(@Nullable Transcription inputTranscription);
184+
185+
@JsonProperty("outputTranscription")
186+
public abstract Builder outputTranscription(@Nullable Transcription outputTranscription);
187+
167188
@CanIgnoreReturnValue
168189
public final Builder response(GenerateContentResponse response) {
169190
Optional<List<Candidate>> candidatesOpt = response.candidates();

core/src/test/java/com/google/adk/events/EventTest.java

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
import com.google.genai.types.FunctionCall;
2727
import com.google.genai.types.GenerateContentResponseUsageMetadata;
2828
import com.google.genai.types.Part;
29+
import com.google.genai.types.Transcription;
2930
import java.time.Instant;
3031
import java.util.concurrent.ConcurrentHashMap;
3132
import org.junit.Test;
@@ -192,6 +193,81 @@ public void event_json_serialization_works() throws Exception {
192193
assertThat(deserializedEvent).isEqualTo(EVENT);
193194
}
194195

196+
@Test
197+
public void event_builder_with_transcriptions_works() {
198+
Transcription inputTranscription =
199+
Transcription.builder().text("user said hello").finished(true).build();
200+
Transcription outputTranscription =
201+
Transcription.builder().text("model said hi").finished(false).build();
202+
Event event =
203+
Event.builder()
204+
.id("event_id")
205+
.invocationId("invocation_id")
206+
.author("agent")
207+
.timestamp(123456789L)
208+
.inputTranscription(inputTranscription)
209+
.outputTranscription(outputTranscription)
210+
.build();
211+
212+
assertThat(event.inputTranscription()).hasValue(inputTranscription);
213+
assertThat(event.outputTranscription()).hasValue(outputTranscription);
214+
}
215+
216+
@Test
217+
public void event_transcriptions_empty_by_default() {
218+
Event event =
219+
Event.builder().id("event_id").invocationId("invocation_id").author("agent").build();
220+
221+
assertThat(event.inputTranscription()).isEmpty();
222+
assertThat(event.outputTranscription()).isEmpty();
223+
}
224+
225+
@Test
226+
public void event_equals_differentiates_transcriptions() {
227+
Transcription transcription = Transcription.builder().text("hello").finished(true).build();
228+
Event eventWithTranscription =
229+
Event.builder()
230+
.id("event_id")
231+
.invocationId("invocation_id")
232+
.author("agent")
233+
.timestamp(123456789L)
234+
.inputTranscription(transcription)
235+
.build();
236+
Event eventWithoutTranscription =
237+
Event.builder()
238+
.id("event_id")
239+
.invocationId("invocation_id")
240+
.author("agent")
241+
.timestamp(123456789L)
242+
.build();
243+
244+
assertThat(eventWithTranscription).isNotEqualTo(eventWithoutTranscription);
245+
}
246+
247+
@Test
248+
public void event_json_serialization_with_transcriptions_works() throws Exception {
249+
Transcription inputTranscription =
250+
Transcription.builder().text("user said hello").finished(true).build();
251+
Transcription outputTranscription =
252+
Transcription.builder().text("model said hi").finished(false).build();
253+
Event event =
254+
Event.builder()
255+
.id("event_id")
256+
.invocationId("invocation_id")
257+
.author("agent")
258+
.timestamp(123456789L)
259+
.inputTranscription(inputTranscription)
260+
.outputTranscription(outputTranscription)
261+
.build();
262+
263+
String json = event.toJson();
264+
Event deserialized = Event.fromJson(json);
265+
266+
assertThat(deserialized).isEqualTo(event);
267+
assertThat(deserialized.inputTranscription()).hasValue(inputTranscription);
268+
assertThat(deserialized.outputTranscription()).hasValue(outputTranscription);
269+
}
270+
195271
@Test
196272
public void finalResponse_returnsTrueIfNoToolCalls() {
197273
Event event =

core/src/test/java/com/google/adk/flows/llmflows/BaseLlmFlowTest.java

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
import com.google.genai.types.FunctionDeclaration;
4444
import com.google.genai.types.GenerateContentResponseUsageMetadata;
4545
import com.google.genai.types.Part;
46+
import com.google.genai.types.Transcription;
4647
import io.opentelemetry.context.Context;
4748
import io.opentelemetry.context.ContextKey;
4849
import io.opentelemetry.context.Scope;
@@ -641,6 +642,94 @@ public void run_contextPropagation() {
641642
assertThat(events.get(0).content()).hasValue(content);
642643
}
643644

645+
@Test
646+
public void postprocess_onlyInputTranscription_returnsEvent() {
647+
Transcription inputTranscription =
648+
Transcription.builder().text("user said hello").finished(true).build();
649+
LlmResponse llmResponse = LlmResponse.builder().inputTranscription(inputTranscription).build();
650+
InvocationContext invocationContext =
651+
createInvocationContext(createTestAgent(createTestLlm(llmResponse)));
652+
BaseLlmFlow baseLlmFlow = createBaseLlmFlowWithoutProcessors();
653+
Event baseEvent =
654+
Event.builder()
655+
.invocationId(invocationContext.invocationId())
656+
.author(invocationContext.agent().name())
657+
.build();
658+
659+
List<Event> events =
660+
baseLlmFlow
661+
.postprocess(
662+
invocationContext,
663+
baseEvent,
664+
LlmRequest.builder().build(),
665+
llmResponse,
666+
Context.current())
667+
.toList()
668+
.blockingGet();
669+
670+
assertThat(events).hasSize(1);
671+
Event event = getOnlyElement(events);
672+
assertThat(event.inputTranscription()).hasValue(inputTranscription);
673+
assertThat(event.outputTranscription()).isEmpty();
674+
}
675+
676+
@Test
677+
public void postprocess_onlyOutputTranscription_returnsEvent() {
678+
Transcription outputTranscription =
679+
Transcription.builder().text("model replied hi").finished(false).build();
680+
LlmResponse llmResponse =
681+
LlmResponse.builder().outputTranscription(outputTranscription).build();
682+
InvocationContext invocationContext =
683+
createInvocationContext(createTestAgent(createTestLlm(llmResponse)));
684+
BaseLlmFlow baseLlmFlow = createBaseLlmFlowWithoutProcessors();
685+
Event baseEvent =
686+
Event.builder()
687+
.invocationId(invocationContext.invocationId())
688+
.author(invocationContext.agent().name())
689+
.build();
690+
691+
List<Event> events =
692+
baseLlmFlow
693+
.postprocess(
694+
invocationContext,
695+
baseEvent,
696+
LlmRequest.builder().build(),
697+
llmResponse,
698+
Context.current())
699+
.toList()
700+
.blockingGet();
701+
702+
assertThat(events).hasSize(1);
703+
Event event = getOnlyElement(events);
704+
assertThat(event.outputTranscription()).hasValue(outputTranscription);
705+
assertThat(event.inputTranscription()).isEmpty();
706+
}
707+
708+
@Test
709+
public void run_responseWithTranscriptions_propagatesTranscriptionsToEvent() {
710+
Transcription inputTranscription =
711+
Transcription.builder().text("user said hello").finished(true).build();
712+
Transcription outputTranscription =
713+
Transcription.builder().text("model replied hi").finished(true).build();
714+
Content content = Content.fromParts(Part.fromText("model replied hi"));
715+
LlmResponse llmResponse =
716+
LlmResponse.builder()
717+
.content(content)
718+
.inputTranscription(inputTranscription)
719+
.outputTranscription(outputTranscription)
720+
.build();
721+
TestLlm testLlm = createTestLlm(llmResponse);
722+
InvocationContext invocationContext = createInvocationContext(createTestAgent(testLlm));
723+
BaseLlmFlow baseLlmFlow = createBaseLlmFlowWithoutProcessors();
724+
725+
List<Event> events = baseLlmFlow.run(invocationContext).toList().blockingGet();
726+
727+
assertThat(events).hasSize(1);
728+
Event event = getOnlyElement(events);
729+
assertThat(event.inputTranscription()).hasValue(inputTranscription);
730+
assertThat(event.outputTranscription()).hasValue(outputTranscription);
731+
}
732+
644733
@Test
645734
public void postprocess_noResponseProcessors_onlyUsageMetadata_returnsEvent() {
646735
GenerateContentResponseUsageMetadata usageMetadata =

0 commit comments

Comments
 (0)