Skip to content

Commit

Permalink
Merge pull request #96 from AssemblyAI/niels/add-realtime-encoding
Browse files Browse the repository at this point in the history
feat(streaming): Add realtime encoding
  • Loading branch information
Swimburger committed Apr 15, 2024
2 parents 5306d12 + dfa6f56 commit 689508f
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 7 deletions.
2 changes: 2 additions & 0 deletions sample-app/src/main/java/sample/App.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import com.assemblyai.api.resources.files.types.UploadedFile;
import com.assemblyai.api.resources.lemur.requests.LemurTaskParams;
import com.assemblyai.api.resources.lemur.types.LemurTaskResponse;
import com.assemblyai.api.resources.realtime.types.AudioEncoding;
import com.assemblyai.api.resources.transcripts.requests.*;
import com.assemblyai.api.resources.transcripts.types.*;
import java.io.File;
Expand Down Expand Up @@ -87,6 +88,7 @@ public static void main(String... args) throws IOException, InterruptedException

RealtimeTranscriber realtimeTranscriber = RealtimeTranscriber.builder()
.apiKey(System.getenv("ASSEMBLYAI_API_KEY"))
.encoding(AudioEncoding.PCM_S16LE)
.onSessionBegins(System.out::println)
.onPartialTranscript(System.out::println)
.onFinalTranscript(System.out::println)
Expand Down
27 changes: 20 additions & 7 deletions src/main/java/com/assemblyai/api/RealtimeTranscriber.java
Original file line number Diff line number Diff line change
@@ -1,13 +1,7 @@
package com.assemblyai.api;

import com.assemblyai.api.core.ObjectMappers;
import com.assemblyai.api.resources.realtime.types.FinalTranscript;
import com.assemblyai.api.resources.realtime.types.PartialTranscript;
import com.assemblyai.api.resources.realtime.types.RealtimeError;
import com.assemblyai.api.resources.realtime.types.RealtimeMessage;
import com.assemblyai.api.resources.realtime.types.RealtimeTranscript;
import com.assemblyai.api.resources.realtime.types.SessionBegins;
import com.assemblyai.api.resources.realtime.types.SessionTerminated;
import com.assemblyai.api.resources.realtime.types.*;
import com.fasterxml.jackson.core.JsonProcessingException;

import java.io.IOException;
Expand All @@ -33,6 +27,7 @@ public final class RealtimeTranscriber implements AutoCloseable {
private static final OkHttpClient OK_HTTP_CLIENT = new OkHttpClient.Builder().build();
private final String apiKey;
private final int sampleRate;
private final AudioEncoding encoding;
private final boolean disablePartialTranscripts;
private final Optional<List<String>> wordBoost;
private final Optional<Integer> endUtteranceSilenceThreshold;
Expand All @@ -48,6 +43,7 @@ public final class RealtimeTranscriber implements AutoCloseable {
private RealtimeTranscriber(
String apiKey,
int sampleRate,
AudioEncoding encoding,
boolean disablePartialTranscripts,
Optional<List<String>> wordBoost,
Optional<Integer> endUtteranceSilenceThreshold,
Expand All @@ -59,6 +55,7 @@ private RealtimeTranscriber(
BiConsumer<Integer, String> onClose) {
this.apiKey = apiKey;
this.sampleRate = sampleRate;
this.encoding = encoding;
this.disablePartialTranscripts = disablePartialTranscripts;
this.wordBoost = wordBoost;
this.endUtteranceSilenceThreshold = endUtteranceSilenceThreshold;
Expand All @@ -76,6 +73,9 @@ private RealtimeTranscriber(
*/
public void connect() {
String url = BASE_URL + "/v2/realtime/ws?sample_rate=" + sampleRate;
if (encoding != null) {
url += "&encoding=" + encoding;
}
if (disablePartialTranscripts) {
url += "&disable_partial_transcripts=true";
}
Expand Down Expand Up @@ -151,6 +151,7 @@ public static final class Builder {
private static final int DEFAULT_SAMPLE_RATE = 16_000;
private String apiKey;
private Integer sampleRate;
private AudioEncoding encoding;
private boolean disablePartialTranscripts;
private List<String> wordBoost;
private Optional<Integer> endUtteranceSilenceThreshold = Optional.empty();
Expand Down Expand Up @@ -183,6 +184,17 @@ public RealtimeTranscriber.Builder sampleRate(int sampleRate) {
return this;
}

/**
* Sets audio encoding
*
* @param encoding The encoding of the audio data
* @return this
*/
public RealtimeTranscriber.Builder encoding(AudioEncoding encoding) {
this.encoding = encoding;
return this;
}

/**
* Disable partial transcripts.
*
Expand Down Expand Up @@ -301,6 +313,7 @@ public RealtimeTranscriber build() {
return new RealtimeTranscriber(
apiKey,
sampleRate == null ? DEFAULT_SAMPLE_RATE : sampleRate,
encoding,
disablePartialTranscripts,
Optional.ofNullable(wordBoost),
endUtteranceSilenceThreshold,
Expand Down

0 comments on commit 689508f

Please sign in to comment.