Skip to content

Commit 5cda9a2

Browse files
committed
feat(example): wip in TranscribeData example
1 parent 3f07980 commit 5cda9a2

File tree

8 files changed

+527
-25
lines changed

8 files changed

+527
-25
lines changed

example/ios/Podfile.lock

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1230,6 +1230,8 @@ PODS:
12301230
- React-logger (= 0.74.6)
12311231
- React-perflogger (= 0.74.6)
12321232
- React-utils (= 0.74.6)
1233+
- RNAudioPcmStream (1.1.4):
1234+
- React
12331235
- RNCClipboard (1.14.3):
12341236
- DoubleConversion
12351237
- glog
@@ -1404,6 +1406,7 @@ DEPENDENCIES:
14041406
- React-runtimescheduler (from `../node_modules/react-native/ReactCommon/react/renderer/runtimescheduler`)
14051407
- React-utils (from `../node_modules/react-native/ReactCommon/react/utils`)
14061408
- ReactCommon/turbomodule/core (from `../node_modules/react-native/ReactCommon`)
1409+
- "RNAudioPcmStream (from `../node_modules/@fugood/react-native-audio-pcm-stream`)"
14071410
- "RNCClipboard (from `../node_modules/@react-native-clipboard/clipboard`)"
14081411
- RNFS (from `../node_modules/react-native-fs`)
14091412
- RNGestureHandler (from `../node_modules/react-native-gesture-handler`)
@@ -1528,6 +1531,8 @@ EXTERNAL SOURCES:
15281531
:path: "../node_modules/react-native/ReactCommon/react/utils"
15291532
ReactCommon:
15301533
:path: "../node_modules/react-native/ReactCommon"
1534+
RNAudioPcmStream:
1535+
:path: "../node_modules/@fugood/react-native-audio-pcm-stream"
15311536
RNCClipboard:
15321537
:path: "../node_modules/@react-native-clipboard/clipboard"
15331538
RNFS:
@@ -1600,6 +1605,7 @@ SPEC CHECKSUMS:
16001605
React-runtimescheduler: b63ebebd3e000e0ba4ac19ca69bdac071559ad57
16011606
React-utils: 2955bdc1b2ed495f14dc7d3bfbbb7e3624cfc0fc
16021607
ReactCommon: 5c504a77030c7ab89eee75b1725b80d8cee7f5d7
1608+
RNAudioPcmStream: d7491fdfe6bddcebd6ab325df8327014be16743f
16031609
RNCClipboard: 99d86f515e6262a8a1d0915f1f6e6b410698aa3a
16041610
RNFS: 4ac0f0ea233904cb798630b3c077808c06931688
16051611
RNGestureHandler: e723a54dfedabf2a6be36bbcb6c7d5c96de8a379

example/package.json

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,14 @@
33
"version": "0.0.1",
44
"private": true,
55
"scripts": {
6+
"postinstall": "patch-package",
67
"android": "react-native run-android",
78
"ios": "react-native run-ios",
89
"start": "react-native start",
910
"pods": "pod-install --quiet"
1011
},
1112
"dependencies": {
13+
"@fugood/react-native-audio-pcm-stream": "^1.1.4",
1214
"@react-native-clipboard/clipboard": "^1.14.3",
1315
"@react-native/babel-preset": "0.74.88",
1416
"@react-native/metro-config": "^0.74.79",
@@ -26,6 +28,7 @@
2628
"devDependencies": {
2729
"@babel/core": "^7.23.9",
2830
"@babel/preset-env": "^7.24.4",
29-
"@babel/runtime": "^7.23.9"
31+
"@babel/runtime": "^7.23.9",
32+
"patch-package": "^8.0.0"
3033
}
3134
}
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
diff --git a/node_modules/@fugood/react-native-audio-pcm-stream/index.d.ts b/node_modules/@fugood/react-native-audio-pcm-stream/index.d.ts
2+
index b846438..9e180cd 100644
3+
--- a/node_modules/@fugood/react-native-audio-pcm-stream/index.d.ts
4+
+++ b/node_modules/@fugood/react-native-audio-pcm-stream/index.d.ts
5+
@@ -1,4 +1,4 @@
6+
-declare module "react-native-live-audio-stream" {
7+
+declare module "@fugood/react-native-audio-pcm-stream" {
8+
export interface IAudioRecord {
9+
init: (options: Options) => void
10+
start: () => void

example/src/App.tsx

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import { enableScreens } from 'react-native-screens'
88
import { NavigationContainer } from '@react-navigation/native'
99
import { createNativeStackNavigator } from '@react-navigation/native-stack'
1010
import Transcribe from './Transcribe'
11+
import TranscribeData from './TranscribeData'
1112
import Bench from './Bench'
1213

1314
enableScreens()
@@ -39,6 +40,12 @@ function HomeScreen({ navigation }: { navigation: any }) {
3940
>
4041
<Text style={styles.buttonText}>Transcribe Examples</Text>
4142
</TouchableOpacity>
43+
<TouchableOpacity
44+
style={styles.button}
45+
onPress={() => navigation.navigate('TranscribeData')}
46+
>
47+
<Text style={styles.buttonText}>Transcribe Data Example</Text>
48+
</TouchableOpacity>
4249
<TouchableOpacity
4350
style={styles.button}
4451
onPress={() => navigation.navigate('Bench')}
@@ -58,6 +65,7 @@ function App() {
5865
<Stack.Navigator>
5966
<Stack.Screen name="Home" component={HomeScreen} />
6067
<Stack.Screen name="Transcribe" component={Transcribe} />
68+
<Stack.Screen name="TranscribeData" component={TranscribeData} />
6169
<Stack.Screen name="Bench" component={Bench} />
6270
</Stack.Navigator>
6371
</NavigationContainer>

example/src/Transcribe.tsx

Lines changed: 1 addition & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ import { initWhisper, libVersion, AudioSessionIos } from '../../src' // whisper.
1414
import type { WhisperContext } from '../../src'
1515
import { Button } from './Button'
1616
import contextOpts from './context-opts'
17-
import { createDir, fileDir, modelHost } from './util'
17+
import { createDir, fileDir, modelHost, toTimestamp } from './util'
1818

1919
const sampleFile = require('../assets/jfk.wav')
2020

@@ -52,32 +52,10 @@ const styles = StyleSheet.create({
5252
logText: { fontSize: 12, color: '#333' },
5353
})
5454

55-
function toTimestamp(t: number, comma = false) {
56-
let msec = t * 10
57-
const hr = Math.floor(msec / (1000 * 60 * 60))
58-
msec -= hr * (1000 * 60 * 60)
59-
const min = Math.floor(msec / (1000 * 60))
60-
msec -= min * (1000 * 60)
61-
const sec = Math.floor(msec / 1000)
62-
msec -= sec * 1000
63-
64-
const separator = comma ? ',' : '.'
65-
const timestamp = `${String(hr).padStart(2, '0')}:${String(min).padStart(
66-
2,
67-
'0',
68-
)}:${String(sec).padStart(2, '0')}${separator}${String(msec).padStart(
69-
3,
70-
'0',
71-
)}`
72-
73-
return timestamp
74-
}
75-
7655
const mode = process.env.NODE_ENV === 'development' ? 'debug' : 'release'
7756

7857
const recordFile = `${fileDir}/realtime.wav`
7958

80-
8159
const filterPath = (path: string) =>
8260
path.replace(RNFS.DocumentDirectoryPath, '<DocumentDir>')
8361

example/src/TranscribeData.tsx

Lines changed: 225 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,225 @@
1+
import React, { useCallback, useEffect, useRef, useState } from 'react'
2+
import { StyleSheet, ScrollView, View, Text } from 'react-native'
3+
import LiveAudioStream from '@fugood/react-native-audio-pcm-stream'
4+
import { Buffer } from 'buffer'
5+
import RNFS from 'react-native-fs'
6+
import Sound from 'react-native-sound'
7+
import { initWhisper, libVersion } from '../../src'
8+
import type { WhisperContext } from '../../src'
9+
import { Button } from './Button'
10+
import contextOpts from './context-opts'
11+
import { createDir, fileDir } from './util'
12+
13+
const styles = StyleSheet.create({
14+
scrollview: { flexGrow: 1, justifyContent: 'center' },
15+
container: {
16+
flex: 1,
17+
alignItems: 'center',
18+
justifyContent: 'center',
19+
padding: 4,
20+
},
21+
buttons: { flexDirection: 'row' },
22+
button: { margin: 4, backgroundColor: '#333', borderRadius: 4, padding: 8 },
23+
buttonClear: { backgroundColor: '#888' },
24+
buttonText: { fontSize: 14, color: 'white', textAlign: 'center' },
25+
logContainer: {
26+
backgroundColor: 'lightgray',
27+
padding: 8,
28+
width: '95%',
29+
borderRadius: 8,
30+
marginVertical: 8,
31+
},
32+
logText: { fontSize: 12, color: '#333' },
33+
})
34+
35+
const mode = process.env.NODE_ENV === 'development' ? 'debug' : 'release'
36+
const recordFile = `${fileDir}/record.wav`
37+
38+
const audioOptions = {
39+
sampleRate: 16000,
40+
channels: 1,
41+
bitsPerSample: 16,
42+
audioSource: 6,
43+
wavFile: recordFile,
44+
bufferSize: 16 * 1024,
45+
}
46+
47+
export default function TranscribeData() {
48+
const whisperContextRef = useRef<WhisperContext | null>(null)
49+
const whisperContext = whisperContextRef.current
50+
const [logs, setLogs] = useState([`whisper.cpp version: ${libVersion}`])
51+
const [transcibeResult, setTranscibeResult] = useState<string | null>(null)
52+
const [isRecording, setIsRecording] = useState(false)
53+
const recordedDataRef = useRef<Buffer | null>(null)
54+
55+
const log = useCallback((...messages: any[]) => {
56+
setLogs((prev) => [...prev, messages.join(' ')])
57+
}, [])
58+
59+
useEffect(
60+
() => () => {
61+
whisperContextRef.current?.release()
62+
whisperContextRef.current = null
63+
},
64+
[],
65+
)
66+
67+
const startRecording = async () => {
68+
try {
69+
await createDir(log)
70+
recordedDataRef.current = null
71+
72+
LiveAudioStream.init(audioOptions)
73+
LiveAudioStream.on('data', (data: string) => {
74+
if (!recordedDataRef.current) {
75+
recordedDataRef.current = Buffer.from(data, 'base64')
76+
} else {
77+
recordedDataRef.current = Buffer.concat([
78+
recordedDataRef.current,
79+
Buffer.from(data, 'base64'),
80+
])
81+
}
82+
})
83+
84+
LiveAudioStream.start()
85+
setIsRecording(true)
86+
log('Started recording...')
87+
} catch (error) {
88+
log('Error starting recording:', error)
89+
}
90+
}
91+
92+
const stopRecording = async () => {
93+
try {
94+
// Stop recording and get the wav file path
95+
await LiveAudioStream.stop()
96+
setIsRecording(false)
97+
log('Stopped recording')
98+
99+
if (!recordedDataRef.current) return log('No recorded data')
100+
if (!whisperContext) return log('No context')
101+
102+
// Read the wav file as base64
103+
const base64Data = recordedDataRef.current!.toString('base64')
104+
log('Start transcribing...')
105+
106+
const startTime = Date.now()
107+
const { promise } = await whisperContext.transcribeData(base64Data, {
108+
language: 'en',
109+
onProgress: (progress) => {
110+
log(`Transcribing progress: ${progress}%`)
111+
},
112+
})
113+
const { result } = await promise
114+
const endTime = Date.now()
115+
116+
setTranscibeResult(
117+
`Transcribed result: ${result}\n` +
118+
`Transcribed in ${endTime - startTime}ms in ${mode} mode`,
119+
)
120+
log('Finished transcribing')
121+
} catch (error) {
122+
log('Error stopping recording:', error)
123+
}
124+
}
125+
126+
return (
127+
<ScrollView
128+
contentInsetAdjustmentBehavior="automatic"
129+
contentContainerStyle={styles.scrollview}
130+
>
131+
<View style={styles.container}>
132+
<View style={styles.buttons}>
133+
<Button
134+
title="Initialize Context"
135+
onPress={async () => {
136+
if (whisperContext) {
137+
log('Found previous context')
138+
await whisperContext.release()
139+
whisperContextRef.current = null
140+
log('Released previous context')
141+
}
142+
log('Initialize context...')
143+
const startTime = Date.now()
144+
const ctx = await initWhisper({
145+
filePath: require('../assets/ggml-base.bin'),
146+
...contextOpts,
147+
})
148+
const endTime = Date.now()
149+
log('Loaded model, ID:', ctx.id)
150+
log('Loaded model in', endTime - startTime, `ms in ${mode} mode`)
151+
whisperContextRef.current = ctx
152+
}}
153+
/>
154+
</View>
155+
156+
<View style={styles.buttons}>
157+
<Button
158+
title={isRecording ? 'Stop Recording' : 'Start Recording'}
159+
onPress={isRecording ? stopRecording : startRecording}
160+
disabled={!whisperContext}
161+
/>
162+
</View>
163+
164+
<View style={styles.logContainer}>
165+
{logs.map((msg, index) => (
166+
<Text key={index} style={styles.logText}>
167+
{msg}
168+
</Text>
169+
))}
170+
</View>
171+
172+
{transcibeResult && (
173+
<View style={styles.logContainer}>
174+
<Text style={styles.logText}>{transcibeResult}</Text>
175+
</View>
176+
)}
177+
178+
<Button
179+
title="Release Context"
180+
style={styles.buttonClear}
181+
onPress={async () => {
182+
if (!whisperContext) return
183+
await whisperContext.release()
184+
whisperContextRef.current = null
185+
log('Released context')
186+
}}
187+
/>
188+
189+
<Button
190+
title="Clear Logs"
191+
style={styles.buttonClear}
192+
onPress={() => {
193+
setLogs([])
194+
setTranscibeResult(null)
195+
}}
196+
/>
197+
198+
<Button
199+
title="Play Recorded file"
200+
style={styles.buttonClear}
201+
onPress={async () => {
202+
if (!(await RNFS.exists(recordFile))) {
203+
log('Recorded file does not exist')
204+
return
205+
}
206+
const player = new Sound(recordFile, '', (e) => {
207+
if (e) {
208+
log('error', e)
209+
return
210+
}
211+
player.play((success) => {
212+
if (success) {
213+
log('successfully finished playing')
214+
} else {
215+
log('playback failed due to audio decoding errors')
216+
}
217+
player.release()
218+
})
219+
})
220+
}}
221+
/>
222+
</View>
223+
</ScrollView>
224+
)
225+
}

example/src/util.ts

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,3 +12,24 @@ export const createDir = async (log: any) => {
1212
await RNFS.mkdir(fileDir)
1313
}
1414
}
15+
16+
export function toTimestamp(t: number, comma = false) {
17+
let msec = t * 10
18+
const hr = Math.floor(msec / (1000 * 60 * 60))
19+
msec -= hr * (1000 * 60 * 60)
20+
const min = Math.floor(msec / (1000 * 60))
21+
msec -= min * (1000 * 60)
22+
const sec = Math.floor(msec / 1000)
23+
msec -= sec * 1000
24+
25+
const separator = comma ? ',' : '.'
26+
const timestamp = `${String(hr).padStart(2, '0')}:${String(min).padStart(
27+
2,
28+
'0',
29+
)}:${String(sec).padStart(2, '0')}${separator}${String(msec).padStart(
30+
3,
31+
'0',
32+
)}`
33+
34+
return timestamp
35+
}

0 commit comments

Comments
 (0)