Skip to content

Commit 13137bf

Browse files
committed
feat(android): support transcribeData & transcribeFile with base64
1 parent bf8ba4e commit 13137bf

File tree

5 files changed

+104
-48
lines changed

5 files changed

+104
-48
lines changed

android/src/main/java/com/rnwhisper/AudioUtils.java

Lines changed: 27 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2,32 +2,29 @@
22

33
import android.util.Log;
44

5-
import java.io.IOException;
6-
import java.io.FileReader;
75
import java.io.ByteArrayOutputStream;
86
import java.io.File;
97
import java.io.IOException;
108
import java.io.InputStream;
119
import java.nio.ByteBuffer;
1210
import java.nio.ByteOrder;
1311
import java.nio.ShortBuffer;
12+
import java.util.Base64;
13+
14+
import java.util.Arrays;
1415

1516
public class AudioUtils {
1617
private static final String NAME = "RNWhisperAudioUtils";
1718

18-
public static float[] decodeWaveFile(InputStream inputStream) throws IOException {
19-
ByteArrayOutputStream baos = new ByteArrayOutputStream();
20-
byte[] buffer = new byte[1024];
21-
int bytesRead;
22-
while ((bytesRead = inputStream.read(buffer)) != -1) {
23-
baos.write(buffer, 0, bytesRead);
24-
}
25-
ByteBuffer byteBuffer = ByteBuffer.wrap(baos.toByteArray());
19+
private static float[] bufferToFloatArray(byte[] buffer, Boolean cutHeader) {
20+
ByteBuffer byteBuffer = ByteBuffer.wrap(buffer);
2621
byteBuffer.order(ByteOrder.LITTLE_ENDIAN);
27-
byteBuffer.position(44);
2822
ShortBuffer shortBuffer = byteBuffer.asShortBuffer();
2923
short[] shortArray = new short[shortBuffer.limit()];
3024
shortBuffer.get(shortArray);
25+
if (cutHeader) {
26+
shortArray = Arrays.copyOfRange(shortArray, 44, shortArray.length);
27+
}
3128
float[] floatArray = new float[shortArray.length];
3229
for (int i = 0; i < shortArray.length; i++) {
3330
floatArray[i] = ((float) shortArray[i]) / 32767.0f;
@@ -36,4 +33,22 @@ public static float[] decodeWaveFile(InputStream inputStream) throws IOException
3633
}
3734
return floatArray;
3835
}
39-
}
36+
37+
public static float[] decodeWaveFile(InputStream inputStream) throws IOException {
38+
ByteArrayOutputStream baos = new ByteArrayOutputStream();
39+
byte[] buffer = new byte[1024];
40+
int bytesRead;
41+
while ((bytesRead = inputStream.read(buffer)) != -1) {
42+
baos.write(buffer, 0, bytesRead);
43+
}
44+
return bufferToFloatArray(baos.toByteArray(), true);
45+
}
46+
47+
public static float[] decodeWaveData(String dataBase64) throws IOException {
48+
return bufferToFloatArray(Base64.getDecoder().decode(dataBase64), true);
49+
}
50+
51+
public static float[] decodePcmData(String dataBase64) {
52+
return bufferToFloatArray(Base64.getDecoder().decode(dataBase64), false);
53+
}
54+
}

android/src/main/java/com/rnwhisper/RNWhisper.java

Lines changed: 66 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
import java.util.Random;
2020
import java.io.File;
2121
import java.io.FileInputStream;
22+
import java.io.InputStream;
2223
import java.io.PushbackInputStream;
2324

2425
public class RNWhisper implements LifecycleEventListener {
@@ -119,44 +120,16 @@ protected void onPostExecute(Integer id) {
119120
tasks.put(task, "initContext");
120121
}
121122

122-
public void transcribeFile(double id, double jobId, String filePath, ReadableMap options, Promise promise) {
123-
final WhisperContext context = contexts.get((int) id);
124-
if (context == null) {
125-
promise.reject("Context not found");
126-
return;
127-
}
128-
if (context.isCapturing()) {
129-
promise.reject("The context is in realtime transcribe mode");
130-
return;
131-
}
132-
if (context.isTranscribing()) {
133-
promise.reject("Context is already transcribing");
134-
return;
135-
}
123+
private AsyncTask transcribe(WhisperContext context, double jobId, final float[] audioData, final ReadableMap options, Promise promise) {
136124
AsyncTask task = new AsyncTask<Void, Void, WritableMap>() {
137125
private Exception exception;
138126

139127
@Override
140128
protected WritableMap doInBackground(Void... voids) {
141129
try {
142-
String waveFilePath = filePath;
143-
144-
if (filePath.startsWith("http://") || filePath.startsWith("https://")) {
145-
waveFilePath = downloader.downloadFile(filePath);
146-
}
147-
148-
int resId = getResourceIdentifier(waveFilePath);
149-
if (resId > 0) {
150-
return context.transcribeInputStream(
151-
(int) jobId,
152-
reactContext.getResources().openRawResource(resId),
153-
options
154-
);
155-
}
156-
157-
return context.transcribeInputStream(
130+
return context.transcribe(
158131
(int) jobId,
159-
new FileInputStream(new File(waveFilePath)),
132+
audioData,
160133
options
161134
);
162135
} catch (Exception e) {
@@ -175,7 +148,66 @@ protected void onPostExecute(WritableMap data) {
175148
tasks.remove(this);
176149
}
177150
}.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR);
178-
tasks.put(task, "transcribeFile-" + id);
151+
return task;
152+
}
153+
154+
public void transcribeFile(double id, double jobId, String filePathOrBase64, ReadableMap options, Promise promise) {
155+
final WhisperContext context = contexts.get((int) id);
156+
if (context == null) {
157+
promise.reject("Context not found");
158+
return;
159+
}
160+
if (context.isCapturing()) {
161+
promise.reject("The context is in realtime transcribe mode");
162+
return;
163+
}
164+
if (context.isTranscribing()) {
165+
promise.reject("Context is already transcribing");
166+
return;
167+
}
168+
169+
String waveFilePath = filePathOrBase64;
170+
try {
171+
if (filePathOrBase64.startsWith("http://") || filePathOrBase64.startsWith("https://")) {
172+
waveFilePath = downloader.downloadFile(filePathOrBase64);
173+
}
174+
175+
float[] audioData;
176+
int resId = getResourceIdentifier(waveFilePath);
177+
if (resId > 0) {
178+
audioData = AudioUtils.decodeWaveFile(reactContext.getResources().openRawResource(resId));
179+
} else if (filePathOrBase64.startsWith("data:audio/wav;base64,")) {
180+
audioData = AudioUtils.decodeWaveData(filePathOrBase64);
181+
} else {
182+
audioData = AudioUtils.decodeWaveFile(new FileInputStream(new File(waveFilePath)));
183+
}
184+
185+
AsyncTask task = transcribe(context, jobId, audioData, options, promise);
186+
tasks.put(task, "transcribeFile-" + id);
187+
} catch (Exception e) {
188+
promise.reject(e);
189+
}
190+
}
191+
192+
public void transcribeData(double id, double jobId, String dataBase64, ReadableMap options, Promise promise) {
193+
final WhisperContext context = contexts.get((int) id);
194+
if (context == null) {
195+
promise.reject("Context not found");
196+
return;
197+
}
198+
if (context.isCapturing()) {
199+
promise.reject("The context is in realtime transcribe mode");
200+
return;
201+
}
202+
if (context.isTranscribing()) {
203+
promise.reject("Context is already transcribing");
204+
return;
205+
}
206+
207+
float[] audioData = AudioUtils.decodePcmData(dataBase64);
208+
AsyncTask task = transcribe(context, jobId, audioData, options, promise);
209+
210+
tasks.put(task, "transcribeData-" + id);
179211
}
180212

181213
public void startRealtimeTranscribe(double id, double jobId, ReadableMap options, Promise promise) {
@@ -211,7 +243,7 @@ protected Void doInBackground(Void... voids) {
211243
context.stopTranscribe((int) jobId);
212244
AsyncTask completionTask = null;
213245
for (AsyncTask task : tasks.keySet()) {
214-
if (tasks.get(task).equals("transcribeFile-" + id)) {
246+
if (tasks.get(task).equals("transcribeFile-" + id) || tasks.get(task).equals("transcribeData-" + id)) {
215247
task.get();
216248
break;
217249
}
@@ -259,7 +291,7 @@ protected Void doInBackground(Void... voids) {
259291
context.stopCurrentTranscribe();
260292
AsyncTask completionTask = null;
261293
for (AsyncTask task : tasks.keySet()) {
262-
if (tasks.get(task).equals("transcribeFile-" + contextId)) {
294+
if (tasks.get(task).equals("transcribeFile-" + contextId) || tasks.get(task).equals("transcribeData-" + contextId)) {
263295
task.get();
264296
break;
265297
}

android/src/main/java/com/rnwhisper/WhisperContext.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -332,7 +332,7 @@ void onNewSegments(int nNew) {
332332
}
333333
}
334334

335-
public WritableMap transcribeInputStream(int jobId, InputStream inputStream, ReadableMap options) throws IOException, Exception {
335+
public WritableMap transcribe(int jobId, float[] audioData, ReadableMap options) throws IOException, Exception {
336336
if (isCapturing || isTranscribing) {
337337
throw new Exception("Context is already in capturing or transcribing");
338338
}
@@ -341,7 +341,6 @@ public WritableMap transcribeInputStream(int jobId, InputStream inputStream, Rea
341341
this.isTdrzEnable = options.hasKey("tdrzEnable") && options.getBoolean("tdrzEnable");
342342

343343
isTranscribing = true;
344-
float[] audioData = AudioUtils.decodeWaveFile(inputStream);
345344

346345
boolean hasProgressCallback = options.hasKey("onProgress") && options.getBoolean("onProgress");
347346
boolean hasNewSegmentsCallback = options.hasKey("onNewSegments") && options.getBoolean("onNewSegments");

android/src/newarch/java/com/rnwhisper/RNWhisperModule.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,11 @@ public void transcribeFile(double id, double jobId, String filePath, ReadableMap
4747
rnwhisper.transcribeFile(id, jobId, filePath, options, promise);
4848
}
4949

50+
@ReactMethod
51+
public void transcribeData(double id, double jobId, String dataBase64, ReadableMap options, Promise promise) {
52+
rnwhisper.transcribeData(id, jobId, dataBase64, options, promise);
53+
}
54+
5055
@ReactMethod
5156
public void startRealtimeTranscribe(double id, double jobId, ReadableMap options, Promise promise) {
5257
rnwhisper.startRealtimeTranscribe(id, jobId, options, promise);

android/src/oldarch/java/com/rnwhisper/RNWhisperModule.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,11 @@ public void transcribeFile(double id, double jobId, String filePath, ReadableMap
4747
rnwhisper.transcribeFile(id, jobId, filePath, options, promise);
4848
}
4949

50+
@ReactMethod
51+
public void transcribeData(double id, double jobId, String dataBase64, ReadableMap options, Promise promise) {
52+
rnwhisper.transcribeData(id, jobId, dataBase64, options, promise);
53+
}
54+
5055
@ReactMethod
5156
public void startRealtimeTranscribe(double id, double jobId, ReadableMap options, Promise promise) {
5257
rnwhisper.startRealtimeTranscribe(id, jobId, options, promise);

0 commit comments

Comments
 (0)