feat(example): wip in TranscribeData example

jhen0409 · jhen0409 · commit 5cda9a201b5b · 2024-11-08T14:35:54.000+08:00
diff --git a/example/ios/Podfile.lock b/example/ios/Podfile.lock
@@ -1230,6 +1230,8 @@ PODS:
     - React-logger (= 0.74.6)
     - React-perflogger (= 0.74.6)
     - React-utils (= 0.74.6)
+  - RNAudioPcmStream (1.1.4):
+    - React
   - RNCClipboard (1.14.3):
     - DoubleConversion
     - glog
@@ -1404,6 +1406,7 @@ DEPENDENCIES:
   - React-runtimescheduler (from `../node_modules/react-native/ReactCommon/react/renderer/runtimescheduler`)
   - React-utils (from `../node_modules/react-native/ReactCommon/react/utils`)
   - ReactCommon/turbomodule/core (from `../node_modules/react-native/ReactCommon`)
+  - "RNAudioPcmStream (from `../node_modules/@fugood/react-native-audio-pcm-stream`)"
   - "RNCClipboard (from `../node_modules/@react-native-clipboard/clipboard`)"
   - RNFS (from `../node_modules/react-native-fs`)
   - RNGestureHandler (from `../node_modules/react-native-gesture-handler`)
@@ -1528,6 +1531,8 @@ EXTERNAL SOURCES:
     :path: "../node_modules/react-native/ReactCommon/react/utils"
   ReactCommon:
     :path: "../node_modules/react-native/ReactCommon"
+  RNAudioPcmStream:
+    :path: "../node_modules/@fugood/react-native-audio-pcm-stream"
   RNCClipboard:
     :path: "../node_modules/@react-native-clipboard/clipboard"
   RNFS:
@@ -1600,6 +1605,7 @@ SPEC CHECKSUMS:
   React-runtimescheduler: b63ebebd3e000e0ba4ac19ca69bdac071559ad57
   React-utils: 2955bdc1b2ed495f14dc7d3bfbbb7e3624cfc0fc
   ReactCommon: 5c504a77030c7ab89eee75b1725b80d8cee7f5d7
+  RNAudioPcmStream: d7491fdfe6bddcebd6ab325df8327014be16743f
   RNCClipboard: 99d86f515e6262a8a1d0915f1f6e6b410698aa3a
   RNFS: 4ac0f0ea233904cb798630b3c077808c06931688
   RNGestureHandler: e723a54dfedabf2a6be36bbcb6c7d5c96de8a379
diff --git a/example/package.json b/example/package.json
@@ -3,12 +3,14 @@
   "version": "0.0.1",
   "private": true,
   "scripts": {
+    "postinstall": "patch-package",
     "android": "react-native run-android",
     "ios": "react-native run-ios",
     "start": "react-native start",
     "pods": "pod-install --quiet"
   },
   "dependencies": {
+    "@fugood/react-native-audio-pcm-stream": "^1.1.4",
     "@react-native-clipboard/clipboard": "^1.14.3",
     "@react-native/babel-preset": "0.74.88",
     "@react-native/metro-config": "^0.74.79",
@@ -26,6 +28,7 @@
   "devDependencies": {
     "@babel/core": "^7.23.9",
     "@babel/preset-env": "^7.24.4",
-    "@babel/runtime": "^7.23.9"
+    "@babel/runtime": "^7.23.9",
+    "patch-package": "^8.0.0"
   }
 }
diff --git a/example/patches/@fugood+react-native-audio-pcm-stream+1.1.4.patch b/example/patches/@fugood+react-native-audio-pcm-stream+1.1.4.patch
@@ -0,0 +1,10 @@
+diff --git a/node_modules/@fugood/react-native-audio-pcm-stream/index.d.ts b/node_modules/@fugood/react-native-audio-pcm-stream/index.d.ts
+index b846438..9e180cd 100644
+--- a/node_modules/@fugood/react-native-audio-pcm-stream/index.d.ts
++++ b/node_modules/@fugood/react-native-audio-pcm-stream/index.d.ts
+@@ -1,4 +1,4 @@
+-declare module "react-native-live-audio-stream" {
++declare module "@fugood/react-native-audio-pcm-stream" {
+   export interface IAudioRecord {
+     init: (options: Options) => void
+     start: () => void
diff --git a/example/src/App.tsx b/example/src/App.tsx
@@ -8,6 +8,7 @@ import { enableScreens } from 'react-native-screens'
 import { NavigationContainer } from '@react-navigation/native'
 import { createNativeStackNavigator } from '@react-navigation/native-stack'
 import Transcribe from './Transcribe'
+import TranscribeData from './TranscribeData'
 import Bench from './Bench'
 
 enableScreens()
@@ -39,6 +40,12 @@ function HomeScreen({ navigation }: { navigation: any }) {
       >
         <Text style={styles.buttonText}>Transcribe Examples</Text>
       </TouchableOpacity>
+      <TouchableOpacity
+        style={styles.button}
+        onPress={() => navigation.navigate('TranscribeData')}
+      >
+        <Text style={styles.buttonText}>Transcribe Data Example</Text>
+      </TouchableOpacity>
       <TouchableOpacity
         style={styles.button}
         onPress={() => navigation.navigate('Bench')}
@@ -58,6 +65,7 @@ function App() {
         <Stack.Navigator>
           <Stack.Screen name="Home" component={HomeScreen} />
           <Stack.Screen name="Transcribe" component={Transcribe} />
+          <Stack.Screen name="TranscribeData" component={TranscribeData} />
           <Stack.Screen name="Bench" component={Bench} />
         </Stack.Navigator>
       </NavigationContainer>
diff --git a/example/src/Transcribe.tsx b/example/src/Transcribe.tsx
@@ -14,7 +14,7 @@ import { initWhisper, libVersion, AudioSessionIos } from '../../src' // whisper.
 import type { WhisperContext } from '../../src'
 import { Button } from './Button'
 import contextOpts from './context-opts'
-import { createDir, fileDir, modelHost } from './util'
+import { createDir, fileDir, modelHost, toTimestamp } from './util'
 
 const sampleFile = require('../assets/jfk.wav')
 
@@ -52,32 +52,10 @@ const styles = StyleSheet.create({
   logText: { fontSize: 12, color: '#333' },
 })
 
-function toTimestamp(t: number, comma = false) {
-  let msec = t * 10
-  const hr = Math.floor(msec / (1000 * 60 * 60))
-  msec -= hr * (1000 * 60 * 60)
-  const min = Math.floor(msec / (1000 * 60))
-  msec -= min * (1000 * 60)
-  const sec = Math.floor(msec / 1000)
-  msec -= sec * 1000
-
-  const separator = comma ? ',' : '.'
-  const timestamp = `${String(hr).padStart(2, '0')}:${String(min).padStart(
-    2,
-    '0',
-  )}:${String(sec).padStart(2, '0')}${separator}${String(msec).padStart(
-    3,
-    '0',
-  )}`
-
-  return timestamp
-}
-
 const mode = process.env.NODE_ENV === 'development' ? 'debug' : 'release'
 
 const recordFile = `${fileDir}/realtime.wav`
 
-
 const filterPath = (path: string) =>
   path.replace(RNFS.DocumentDirectoryPath, '<DocumentDir>')
 
diff --git a/example/src/TranscribeData.tsx b/example/src/TranscribeData.tsx
@@ -0,0 +1,225 @@
+import React, { useCallback, useEffect, useRef, useState } from 'react'
+import { StyleSheet, ScrollView, View, Text } from 'react-native'
+import LiveAudioStream from '@fugood/react-native-audio-pcm-stream'
+import { Buffer } from 'buffer'
+import RNFS from 'react-native-fs'
+import Sound from 'react-native-sound'
+import { initWhisper, libVersion } from '../../src'
+import type { WhisperContext } from '../../src'
+import { Button } from './Button'
+import contextOpts from './context-opts'
+import { createDir, fileDir } from './util'
+
+const styles = StyleSheet.create({
+  scrollview: { flexGrow: 1, justifyContent: 'center' },
+  container: {
+    flex: 1,
+    alignItems: 'center',
+    justifyContent: 'center',
+    padding: 4,
+  },
+  buttons: { flexDirection: 'row' },
+  button: { margin: 4, backgroundColor: '#333', borderRadius: 4, padding: 8 },
+  buttonClear: { backgroundColor: '#888' },
+  buttonText: { fontSize: 14, color: 'white', textAlign: 'center' },
+  logContainer: {
+    backgroundColor: 'lightgray',
+    padding: 8,
+    width: '95%',
+    borderRadius: 8,
+    marginVertical: 8,
+  },
+  logText: { fontSize: 12, color: '#333' },
+})
+
+const mode = process.env.NODE_ENV === 'development' ? 'debug' : 'release'
+const recordFile = `${fileDir}/record.wav`
+
+const audioOptions = {
+  sampleRate: 16000,
+  channels: 1,
+  bitsPerSample: 16,
+  audioSource: 6,
+  wavFile: recordFile,
+  bufferSize: 16 * 1024,
+}
+
+export default function TranscribeData() {
+  const whisperContextRef = useRef<WhisperContext | null>(null)
+  const whisperContext = whisperContextRef.current
+  const [logs, setLogs] = useState([`whisper.cpp version: ${libVersion}`])
+  const [transcibeResult, setTranscibeResult] = useState<string | null>(null)
+  const [isRecording, setIsRecording] = useState(false)
+  const recordedDataRef = useRef<Buffer | null>(null)
+
+  const log = useCallback((...messages: any[]) => {
+    setLogs((prev) => [...prev, messages.join(' ')])
+  }, [])
+
+  useEffect(
+    () => () => {
+      whisperContextRef.current?.release()
+      whisperContextRef.current = null
+    },
+    [],
+  )
+
+  const startRecording = async () => {
+    try {
+      await createDir(log)
+      recordedDataRef.current = null
+
+      LiveAudioStream.init(audioOptions)
+      LiveAudioStream.on('data', (data: string) => {
+        if (!recordedDataRef.current) {
+          recordedDataRef.current = Buffer.from(data, 'base64')
+        } else {
+          recordedDataRef.current = Buffer.concat([
+            recordedDataRef.current,
+            Buffer.from(data, 'base64'),
+          ])
+        }
+      })
+
+      LiveAudioStream.start()
+      setIsRecording(true)
+      log('Started recording...')
+    } catch (error) {
+      log('Error starting recording:', error)
+    }
+  }
+
+  const stopRecording = async () => {
+    try {
+      // Stop recording and get the wav file path
+      await LiveAudioStream.stop()
+      setIsRecording(false)
+      log('Stopped recording')
+
+      if (!recordedDataRef.current) return log('No recorded data')
+      if (!whisperContext) return log('No context')
+
+      // Read the wav file as base64
+      const base64Data = recordedDataRef.current!.toString('base64')
+      log('Start transcribing...')
+
+      const startTime = Date.now()
+      const { promise } = await whisperContext.transcribeData(base64Data, {
+        language: 'en',
+        onProgress: (progress) => {
+          log(`Transcribing progress: ${progress}%`)
+        },
+      })
+      const { result } = await promise
+      const endTime = Date.now()
+
+      setTranscibeResult(
+        `Transcribed result: ${result}\n` +
+          `Transcribed in ${endTime - startTime}ms in ${mode} mode`,
+      )
+      log('Finished transcribing')
+    } catch (error) {
+      log('Error stopping recording:', error)
+    }
+  }
+
+  return (
+    <ScrollView
+      contentInsetAdjustmentBehavior="automatic"
+      contentContainerStyle={styles.scrollview}
+    >
+      <View style={styles.container}>
+        <View style={styles.buttons}>
+          <Button
+            title="Initialize Context"
+            onPress={async () => {
+              if (whisperContext) {
+                log('Found previous context')
+                await whisperContext.release()
+                whisperContextRef.current = null
+                log('Released previous context')
+              }
+              log('Initialize context...')
+              const startTime = Date.now()
+              const ctx = await initWhisper({
+                filePath: require('../assets/ggml-base.bin'),
+                ...contextOpts,
+              })
+              const endTime = Date.now()
+              log('Loaded model, ID:', ctx.id)
+              log('Loaded model in', endTime - startTime, `ms in ${mode} mode`)
+              whisperContextRef.current = ctx
+            }}
+          />
+        </View>
+
+        <View style={styles.buttons}>
+          <Button
+            title={isRecording ? 'Stop Recording' : 'Start Recording'}
+            onPress={isRecording ? stopRecording : startRecording}
+            disabled={!whisperContext}
+          />
+        </View>
+
+        <View style={styles.logContainer}>
+          {logs.map((msg, index) => (
+            <Text key={index} style={styles.logText}>
+              {msg}
+            </Text>
+          ))}
+        </View>
+
+        {transcibeResult && (
+          <View style={styles.logContainer}>
+            <Text style={styles.logText}>{transcibeResult}</Text>
+          </View>
+        )}
+
+        <Button
+          title="Release Context"
+          style={styles.buttonClear}
+          onPress={async () => {
+            if (!whisperContext) return
+            await whisperContext.release()
+            whisperContextRef.current = null
+            log('Released context')
+          }}
+        />
+
+        <Button
+          title="Clear Logs"
+          style={styles.buttonClear}
+          onPress={() => {
+            setLogs([])
+            setTranscibeResult(null)
+          }}
+        />
+
+        <Button
+          title="Play Recorded file"
+          style={styles.buttonClear}
+          onPress={async () => {
+            if (!(await RNFS.exists(recordFile))) {
+              log('Recorded file does not exist')
+              return
+            }
+            const player = new Sound(recordFile, '', (e) => {
+              if (e) {
+                log('error', e)
+                return
+              }
+              player.play((success) => {
+                if (success) {
+                  log('successfully finished playing')
+                } else {
+                  log('playback failed due to audio decoding errors')
+                }
+                player.release()
+              })
+            })
+          }}
+        />
+      </View>
+    </ScrollView>
+  )
+}
diff --git a/example/src/util.ts b/example/src/util.ts
@@ -12,3 +12,24 @@ export const createDir = async (log: any) => {
     await RNFS.mkdir(fileDir)
   }
 }
+
+export function toTimestamp(t: number, comma = false) {
+  let msec = t * 10
+  const hr = Math.floor(msec / (1000 * 60 * 60))
+  msec -= hr * (1000 * 60 * 60)
+  const min = Math.floor(msec / (1000 * 60))
+  msec -= min * (1000 * 60)
+  const sec = Math.floor(msec / 1000)
+  msec -= sec * 1000
+
+  const separator = comma ? ',' : '.'
+  const timestamp = `${String(hr).padStart(2, '0')}:${String(min).padStart(
+    2,
+    '0',
+  )}:${String(sec).padStart(2, '0')}${separator}${String(msec).padStart(
+    3,
+    '0',
+  )}`
+
+  return timestamp
+}
diff --git a/example/yarn.lock b/example/yarn.lock