From 52cb13ff678afd06153c8fd3bf46e5f72212d4a7 Mon Sep 17 00:00:00 2001 From: hlorenzi Date: Sun, 10 Mar 2024 23:01:42 -0300 Subject: [PATCH] add recorder --- src/AnalysisChart.tsx | 8 +- src/RecordingPanel.tsx | 278 ++++++++++++++++++++++++++++++++++++++++ src/VowelChart.tsx | 6 +- src/audioWorklet.ts | 66 ++++++++++ src/common.ts | 15 +++ src/formantExtractor.ts | 84 ++++++++++-- src/main.tsx | 46 ++++--- src/vowelSynth.ts | 111 ++++++++++++++-- src/wavEncode.ts | 85 ++++++++++++ webpack.config.cjs | 1 + 10 files changed, 653 insertions(+), 47 deletions(-) create mode 100644 src/RecordingPanel.tsx create mode 100644 src/audioWorklet.ts create mode 100644 src/common.ts create mode 100644 src/wavEncode.ts diff --git a/src/AnalysisChart.tsx b/src/AnalysisChart.tsx index 3d9d5a5..d32d7c1 100644 --- a/src/AnalysisChart.tsx +++ b/src/AnalysisChart.tsx @@ -20,9 +20,9 @@ export function AnalysisChart(props: { return } @@ -107,7 +107,7 @@ function draw( } canvasCtx.stroke() - const timeData = synth.getAnalyserTimeData() + const timeData = synth.getWaveformLatest(Math.floor(synth.ctx.sampleRate * 0.025)) canvasCtx.lineWidth = 2 canvasCtx.strokeStyle = "#000" diff --git a/src/RecordingPanel.tsx b/src/RecordingPanel.tsx new file mode 100644 index 0000000..f75de59 --- /dev/null +++ b/src/RecordingPanel.tsx @@ -0,0 +1,278 @@ +import * as Solid from "solid-js" +import { VowelSynth } from "./vowelSynth.ts" +import * as Data from "./data.ts" +import * as Common from "./common.ts" +import * as Wav from "./wavEncode.ts" +//import * as Styled from "solid-styled-components" + + +export function RecordingPanel(props: { + synth: VowelSynth, +}) +{ + const state: State = { + canvas: undefined!, + ctx: undefined!, + synth: props.synth, + + sampleBuffer: new Float32Array(props.synth.recordingBuffer.length), + + recording: false, + recordingIndex: 0, + waveformBufferIndex: 0, + + playing: false, + playingIndex: 0, + } + + + Solid.onMount(() => { + state.ctx = state.canvas.getContext("2d")! + + window.requestAnimationFrame(() => draw(state)) + }) + + + return <> + +
+ + { " " } + + { " " } + + { " " } + + +} + + +interface State +{ + canvas: HTMLCanvasElement + ctx: CanvasRenderingContext2D + synth: VowelSynth + + /// Each sample is in the range [-1, 1]. + sampleBuffer: Float32Array + + recording: boolean + recordingIndex: number + waveformBufferIndex: number + + playing: boolean + playingIndex: number +} + + +function recordingToggle(state: State) +{ + if (state.recording) + { + recordingFinish(state) + return + } + + state.sampleBuffer.fill(0) + state.recording = true + state.recordingIndex = 0 + state.waveformBufferIndex = state.synth.waveformBufferIndex + + window.requestAnimationFrame(() => recordingFrame(state)) +} + + +function recordingFinish(state: State) +{ + state.recording = false + state.synth.recordingBuffer.copyToChannel(state.sampleBuffer, 0, 0) +} + + +function recordingFrame(state: State) +{ + if (!state.recording) + return + + const waveform = state.synth + .getWaveform(state.waveformBufferIndex) + .slice(0, state.sampleBuffer.length - state.recordingIndex) + + state.sampleBuffer.set(waveform, state.recordingIndex) + state.recordingIndex += waveform.length + state.waveformBufferIndex = state.synth.waveformBufferIndex + + if (state.recordingIndex >= state.sampleBuffer.length) + { + recordingFinish(state) + return + } + + window.requestAnimationFrame(() => recordingFrame(state)) +} + + +function play(state: State) +{ + state.playing = true + state.playingIndex = 0 + state.synth.playRecording() + + window.requestAnimationFrame(() => playFrame(state)) +} + + +function playFrame(state: State) +{ + if (!state.playing) + return + + const timeDelta = state.synth.ctx.currentTime - state.synth.recordingPlaybackStartTime + state.playingIndex = timeDelta * state.synth.ctx.sampleRate + + if (state.playingIndex >= state.recordingIndex) + { + state.playing = false + return + } + + window.requestAnimationFrame(() => playFrame(state)) +} + + +function exportWav(state: State) +{ + const data = Wav.encode( + state.sampleBuffer.slice(0, state.recordingIndex), + state.synth.ctx.sampleRate) + + const blob = new Blob([data], { type: "octet/stream" }) + const url = window.URL.createObjectURL(blob) + + const element = document.createElement("a") + element.setAttribute("href", url) + element.setAttribute("download", "recording.wav") + + element.style.display = "none" + document.body.appendChild(element) + element.click() + document.body.removeChild(element) +} + + +async function importWav(state: State) +{ + const handles = await (window as any).showOpenFilePicker({ + multiple: false, + types: [{ + description: "Audio files", + accept: { + "audio/wav": [".wav"], + }, + }] + }) + const handle = handles[0] + const file: File = await handle.getFile() + const bytes = await file.arrayBuffer() + const buffer = await state.synth.ctx.decodeAudioData(bytes) + + const waveform = buffer + .getChannelData(0) + .slice(0, state.sampleBuffer.length) + + state.sampleBuffer.fill(0) + state.sampleBuffer.set(waveform, 0) + state.recordingIndex = waveform.length + recordingFinish(state) +} + + +function draw(state: State) +{ + window.requestAnimationFrame(() => draw(state)) + + Common.canvasResize(state.canvas) + + const w = state.canvas.width + const h = state.canvas.height + + state.ctx.save() + state.ctx.translate(0.5, 0.5) + + state.ctx.fillStyle = "#eee" + state.ctx.fillRect(0, 0, w, h) + + const sampleIndexToX = (index: number) => { + const margin = 20 + return (index / state.sampleBuffer.length) * (w - margin * 2) + margin + } + + const sampleAmplitudeToY = (amplitude: number) => { + const margin = 10 + return h / 2 - amplitude * (h / 2 - margin) + } + + + // Draw boundaries + state.ctx.strokeStyle = "#000" + state.ctx.lineWidth = 1 + state.ctx.beginPath() + state.ctx.moveTo(sampleIndexToX(0), 0) + state.ctx.lineTo(sampleIndexToX(0), h) + state.ctx.moveTo(sampleIndexToX(state.sampleBuffer.length), 0) + state.ctx.lineTo(sampleIndexToX(state.sampleBuffer.length), h) + state.ctx.stroke() + + // Draw waveform + state.ctx.strokeStyle = "#000" + state.ctx.lineWidth = 1 + state.ctx.beginPath() + state.ctx.moveTo(sampleIndexToX(0), sampleAmplitudeToY(0)) + for (let i = 0; i < state.recordingIndex; i++) + { + const amplitude = state.sampleBuffer.at(i) ?? 0 + state.ctx.lineTo(sampleIndexToX(i), sampleAmplitudeToY(amplitude)) + } + state.ctx.lineTo(sampleIndexToX(state.recordingIndex), sampleAmplitudeToY(0)) + state.ctx.lineTo(sampleIndexToX(state.sampleBuffer.length), sampleAmplitudeToY(0)) + state.ctx.stroke() + + // Draw recording head + if (state.recording) + { + state.ctx.strokeStyle = "#f00" + state.ctx.lineWidth = 2 + state.ctx.beginPath() + state.ctx.moveTo(sampleIndexToX(state.recordingIndex), 0) + state.ctx.lineTo(sampleIndexToX(state.recordingIndex), h) + state.ctx.stroke() + } + + // Draw playing head + if (state.playing) + { + state.ctx.strokeStyle = "#00f" + state.ctx.lineWidth = 2 + state.ctx.beginPath() + state.ctx.moveTo(sampleIndexToX(state.playingIndex), 0) + state.ctx.lineTo(sampleIndexToX(state.playingIndex), h) + state.ctx.stroke() + } + + state.ctx.restore() +} \ No newline at end of file diff --git a/src/VowelChart.tsx b/src/VowelChart.tsx index 940e79c..63fecee 100644 --- a/src/VowelChart.tsx +++ b/src/VowelChart.tsx @@ -42,9 +42,9 @@ export function VowelChart(props: { return } diff --git a/src/audioWorklet.ts b/src/audioWorklet.ts new file mode 100644 index 0000000..af3fcc2 --- /dev/null +++ b/src/audioWorklet.ts @@ -0,0 +1,66 @@ +interface AudioWorkletProcessor { + readonly port: MessagePort; +} + +interface AudioWorkletProcessorImpl extends AudioWorkletProcessor { + process( + inputs: Float32Array[][], + outputs: Float32Array[][], + parameters: Record + ): boolean; +} + +declare var AudioWorkletProcessor: { + prototype: AudioWorkletProcessor; + new (options?: AudioWorkletNodeOptions): AudioWorkletProcessor; +}; + +type AudioParamDescriptor = { + name: string, + automationRate: AutomationRate, + minValue: number, + maxValue: number, + defaultValue: number +} + +interface AudioWorkletProcessorConstructor { + new (options?: AudioWorkletNodeOptions): AudioWorkletProcessorImpl; + parameterDescriptors?: AudioParamDescriptor[]; +} + +declare function registerProcessor( + name: string, + processorCtor: AudioWorkletProcessorConstructor, +): void; + + +const buffer = new Float32Array(1024) + + +class WaveformCaptureProcessor extends AudioWorkletProcessor +{ + constructor() + { + super() + } + + + process( + inputs: Float32Array[][], + outputs: Float32Array[][], + parameters: Record) + { + const data = inputs[0][0] + for (let i = 0; i < data.length; i++) + buffer[i] = data[i] + + this.port.postMessage({ + buffer, + length: data.length + }) + + return true + } +} + +registerProcessor("waveformCapture", WaveformCaptureProcessor) \ No newline at end of file diff --git a/src/common.ts b/src/common.ts new file mode 100644 index 0000000..6d084aa --- /dev/null +++ b/src/common.ts @@ -0,0 +1,15 @@ +export function canvasResize(canvas: HTMLCanvasElement) +{ + const pixelRatio = window.devicePixelRatio || 1 + + const rect = canvas.getBoundingClientRect() + const w = + Math.round(pixelRatio * rect.right) - + Math.round(pixelRatio * rect.left) + const h = + Math.round(pixelRatio * rect.bottom) - + Math.round(pixelRatio * rect.top) + + canvas.width = w + canvas.height = h +} \ No newline at end of file diff --git a/src/formantExtractor.ts b/src/formantExtractor.ts index e6acc16..243fdc4 100644 --- a/src/formantExtractor.ts +++ b/src/formantExtractor.ts @@ -3,34 +3,38 @@ import { findRoots, Complex } from "./roots.ts" // From: https://www.mathworks.com/help/signal/ug/formant-estimation-with-lpc-coefficients.html +// From: https://github.com/praat/praat/blob/master/fon/Sound_to_Formant.cpp export function extractFormants( + /// An array of samples in the range [-1, 1] sample: Float32Array, + /// The sampling frequency in hertz samplingFrequency: number) + /// Returns the frequencies of the formants in hertz : number[] { const sampleWindowed = - sample.map((s, i) => s * hammingWindow(i, sample.length)) + //sample.map((s, i) => s * hammingWindow(i, sample.length)) + sample.map((s, i) => s * praatGaussianWindow(i, sample.length)) const sampleFiltered = - preemphasisFilter(sampleWindowed) + //preemphasisFilter(sampleWindowed) + praatPreemphasis(sampleWindowed, samplingFrequency) const lpc = - forwardLinearPrediction(sampleFiltered, 8) + forwardLinearPrediction(sampleFiltered, 10) const roots = findRoots(lpc) .filter(c => c.imag >= 0) + .map(c => praatFixRootToUnitCircle(c)) const angles = roots .map(c => Math.atan2(c.imag, c.real)) const frequencies = angles - .map(a => a * (samplingFrequency / (2 * Math.PI))) - - const complexMagnitude = - (c: Complex) => Math.sqrt(c.real * c.real + c.imag * c.imag) + .map(a => a * samplingFrequency / 2 / Math.PI) const bandwidths = roots - .map(r => -0.5 * (samplingFrequency / (2 * Math.PI)) * Math.log(complexMagnitude(r))) + .map(r => -Math.log(complexMagnitude(r)) * samplingFrequency / 2 / Math.PI) const formants = [] for (let i = 0; i < angles.length; i++) @@ -46,6 +50,31 @@ export function extractFormants( } +function complexMagnitude(c: Complex): number +{ + return Math.sqrt(c.real * c.real + c.imag * c.imag) +} + + +function complexConjugate(c: Complex): Complex +{ + return { + real: c.real, + imag: -c.imag, + } +} + + +function complexDivide(a: Complex, b: Complex): Complex +{ + const d = (b.imag * b.imag + b.real * b.real) + return { + real: (a.real * b.real - a.imag * b.imag) / d, + imag: (a.imag * b.real - a.real * b.imag) / d, + } +} + + function hammingWindow( n: number, nMax: number) @@ -55,6 +84,17 @@ function hammingWindow( } +function praatGaussianWindow( + n: number, + nMax: number) +{ + const nMid = 0.5 * (nMax + 1) + const edge = Math.exp(-12.0) + return (Math.exp(-48.0 * (n - nMid) * (n - nMid) / (nMax + 1) / (nMax + 1)) - edge) / + (1.0 - edge) +} + + function preemphasisFilter( array: Float32Array) { @@ -69,4 +109,32 @@ function preemphasisFilter( result[i] = a0 * array[i] - a1 * result[i - 1] return result +} + + +function praatPreemphasis( + array: Float32Array, + samplingFrequency: number) +{ + const result = new Float32Array(array) + + const frequency = 50 + const dx = 1 / samplingFrequency + const preEmphasis = Math.exp(-2.0 * Math.PI * frequency * dx) + + for (let i = array.length - 1; i >= 2; i--) + result[i] -= preEmphasis * result[i - 1] + + return result +} + + +function praatFixRootToUnitCircle(root: Complex) +{ + if (complexMagnitude(root) <= 1) + return root + + return complexDivide( + { imag: 0, real: 1 }, + complexConjugate(root)) } \ No newline at end of file diff --git a/src/main.tsx b/src/main.tsx index 8427cb8..fddaf43 100644 --- a/src/main.tsx +++ b/src/main.tsx @@ -5,32 +5,36 @@ import * as Styled from "solid-styled-components" import { VowelSynth } from "./vowelSynth.ts" import { VowelChart } from "./VowelChart.tsx" import { AnalysisChart } from "./AnalysisChart.tsx" +import { RecordingPanel } from "./RecordingPanel.tsx" function Page() { - const synth = new VowelSynth() + const [synth, setSynth] = Solid.createSignal(undefined!) + VowelSynth.create().then(setSynth) - return
-
-
- Click and drag to synthesize vowel sounds via formant frequencies.

- The blue bars on the bottom chart shows formant frequencies extracted from the waveform data. (Not working properly) -
-
- -
- -
- -
+ return +
+ Click and drag to synthesize vowel sounds via formant frequencies.

+ The blue bars on the bottom chart shows formant frequencies extracted from the waveform data. (Not working properly) +
+
+ +
+ +
+ + { " " } + +
+
} diff --git a/src/vowelSynth.ts b/src/vowelSynth.ts index 51bae17..d8256fc 100644 --- a/src/vowelSynth.ts +++ b/src/vowelSynth.ts @@ -1,3 +1,6 @@ +const recordingBufferLength = 80000 + + export class VowelSynth { ctx: AudioContext @@ -11,20 +14,43 @@ export class VowelSynth nodeAnalyserTimeDomainData: Float32Array nodeMicSrc?: MediaStreamAudioSourceNode nodeMicFilter: BiquadFilterNode + recordingBuffer: AudioBuffer + nodeRecordingSrc?: AudioBufferSourceNode + recordingPlaybackStartTime: number + waveformCaptureNode: AudioWorkletNode + waveformBuffer: Float32Array + waveformBufferReturn: Float32Array + waveformBufferIndex: number + + + static async create(): Promise + { + const ctx = new AudioContext() + await ctx.audioWorklet.addModule("build/audioWorklet.js") + return new VowelSynth(ctx) + } + - - constructor() + private constructor(ctx: AudioContext) { - this.ctx = new AudioContext() + this.ctx = ctx + this.waveformCaptureNode = new AudioWorkletNode(this.ctx, "waveformCapture") + this.waveformCaptureNode.port.onmessage = (ev) => this.captureWaveform(ev) + this.waveformCaptureNode.connect(this.ctx.destination) + this.nodeAnalyser = this.ctx.createAnalyser() this.nodeAnalyser.fftSize = 2048 this.nodeAnalyser.maxDecibels = -20 this.nodeAnalyser.minDecibels = -90 this.nodeAnalyser.smoothingTimeConstant = 0.8 - this.nodeAnalyserData = new Uint8Array(this.nodeAnalyser.frequencyBinCount) - this.nodeAnalyserTimeDomainData = new Float32Array(this.nodeAnalyser.frequencyBinCount) + this.nodeAnalyserData = new Uint8Array(this.nodeAnalyser.fftSize) + this.nodeAnalyserTimeDomainData = new Float32Array(this.nodeAnalyser.fftSize) + this.nodeAnalyser.connect(this.waveformCaptureNode) + + this.waveformBuffer = new Float32Array(80000) + this.waveformBufferIndex = 0 this.nodeMicFilter = this.ctx.createBiquadFilter() this.nodeMicFilter.type = "lowpass" @@ -33,8 +59,8 @@ export class VowelSynth this.nodeMicFilter.connect(this.nodeAnalyser) - this.nodeFormant1Gain = this.ctx.createGain() - this.nodeFormant1Gain.gain.value = 0 + this.nodeFormant1Gain = this.ctx.createGain() + this.nodeFormant1Gain.gain.value = 0 this.nodeFormant1Gain.connect(this.nodeAnalyser) this.nodeFormant1Gain.connect(this.ctx.destination) @@ -45,8 +71,8 @@ export class VowelSynth this.nodeFormant1Filter.connect(this.nodeFormant1Gain) - this.nodeFormant2Gain = this.ctx.createGain() - this.nodeFormant2Gain.gain.value = 0 + this.nodeFormant2Gain = this.ctx.createGain() + this.nodeFormant2Gain.gain.value = 0 this.nodeFormant2Gain.connect(this.nodeAnalyser) this.nodeFormant2Gain.connect(this.ctx.destination) @@ -63,6 +89,10 @@ export class VowelSynth this.nodeSource.connect(this.nodeFormant1Filter) this.nodeSource.connect(this.nodeFormant2Filter) this.nodeSource.start() + + + this.recordingBuffer = this.ctx.createBuffer(1, recordingBufferLength, this.ctx.sampleRate) + this.recordingPlaybackStartTime = 0 } @@ -74,8 +104,9 @@ export class VowelSynth setGain(gain: number) { - this.nodeFormant1Gain.gain.value = gain * 0.25 - this.nodeFormant2Gain.gain.value = gain * 0.25 * 0.8 + const generalGain = 0.5 + this.nodeFormant1Gain.gain.value = gain * generalGain + this.nodeFormant2Gain.gain.value = gain * generalGain * 0.8 } @@ -86,6 +117,46 @@ export class VowelSynth } + captureWaveform(ev: MessageEvent) + { + const buffer = ev.data.buffer as Float32Array + const length = ev.data.length as number + //console.log("captureWaveform", this.waveformBufferIndex, buffer, length) + + for (let i = 0; i < length; i++) + { + const outputIndex = (this.waveformBufferIndex + i) % this.waveformBuffer.length + this.waveformBuffer[outputIndex] = buffer[i] + } + + this.waveformBufferIndex = (this.waveformBufferIndex + length) % this.waveformBuffer.length + } + + + getWaveform(fromSampleIndex: number): Float32Array + { + const length = Math.max(0, + fromSampleIndex > this.waveformBufferIndex ? + this.waveformBuffer.length - fromSampleIndex + this.waveformBufferIndex : + this.waveformBufferIndex - fromSampleIndex) + + //console.log("getWaveform", fromSampleIndex, length) + const result = new Float32Array(length) + + for (let i = 0; i < length; i++) + result[i] = this.waveformBuffer[(fromSampleIndex + i) % this.waveformBuffer.length] + + return result + } + + + getWaveformLatest(latestNumSamples: number): Float32Array + { + return this.getWaveform( + (this.waveformBufferIndex + this.waveformBuffer.length - latestNumSamples) % this.waveformBuffer.length) + } + + getAnalyserData(): Uint8Array { this.nodeAnalyser.getByteFrequencyData(this.nodeAnalyserData) @@ -100,6 +171,24 @@ export class VowelSynth } + playRecording() + { + if (this.nodeRecordingSrc) + { + this.nodeRecordingSrc.stop() + this.nodeRecordingSrc.disconnect() + this.nodeRecordingSrc = undefined + } + + this.nodeRecordingSrc = this.ctx.createBufferSource() + this.nodeRecordingSrc.buffer = this.recordingBuffer + this.nodeRecordingSrc.connect(this.nodeAnalyser) + this.nodeRecordingSrc.connect(this.ctx.destination) + this.nodeRecordingSrc.start() + this.recordingPlaybackStartTime = this.ctx.currentTime + } + + async openMic() { if (this.nodeMicSrc) diff --git a/src/wavEncode.ts b/src/wavEncode.ts new file mode 100644 index 0000000..7a08e29 --- /dev/null +++ b/src/wavEncode.ts @@ -0,0 +1,85 @@ +// From https://github.com/Jam3/audiobuffer-to-wav +// with modifications + + +export function encode( + samples: Float32Array, + sampleRate: number) + : ArrayBuffer +{ + const format = 1 // or 3 + const bitDepth = 16 // or 32 + + return encodeWAV(samples, format, sampleRate, 1, bitDepth) +} + + +function encodeWAV( + samples: Float32Array, + format: number, + sampleRate: number, + numChannels: number, + bitDepth: number) +{ + var bytesPerSample = bitDepth / 8 + var blockAlign = numChannels * bytesPerSample + + var buffer = new ArrayBuffer(44 + samples.length * bytesPerSample) + var view = new DataView(buffer) + + /* RIFF identifier */ + writeString(view, 0, 'RIFF') + /* RIFF chunk length */ + view.setUint32(4, 36 + samples.length * bytesPerSample, true) + /* RIFF type */ + writeString(view, 8, 'WAVE') + /* format chunk identifier */ + writeString(view, 12, 'fmt ') + /* format chunk length */ + view.setUint32(16, 16, true) + /* sample format (raw) */ + view.setUint16(20, format, true) + /* channel count */ + view.setUint16(22, numChannels, true) + /* sample rate */ + view.setUint32(24, sampleRate, true) + /* byte rate (sample rate * block align) */ + view.setUint32(28, sampleRate * blockAlign, true) + /* block align (channel count * bytes per sample) */ + view.setUint16(32, blockAlign, true) + /* bits per sample */ + view.setUint16(34, bitDepth, true) + /* data chunk identifier */ + writeString(view, 36, 'data') + /* data chunk length */ + view.setUint32(40, samples.length * bytesPerSample, true) + if (format === 1) // Raw PCM + floatTo16BitPCM(view, 44, samples) + else + writeFloat32(view, 44, samples) + + return buffer +} + + +function writeFloat32(output: DataView, offset: number, input: Float32Array) +{ + for (var i = 0; i < input.length; i++, offset += 4) + output.setFloat32(offset, input[i], true) +} + + +function floatTo16BitPCM(output: DataView, offset: number, input: Float32Array) +{ + for (var i = 0; i < input.length; i++, offset += 2) + { + const s = Math.max(-1, Math.min(1, input[i])) + output.setInt16(offset, s * 0x7fff, true) + } +} + +function writeString(view: DataView, offset: number, string: string) +{ + for (var i = 0; i < string.length; i++) + view.setUint8(offset + i, string.charCodeAt(i)) +} \ No newline at end of file diff --git a/webpack.config.cjs b/webpack.config.cjs index d7502d3..f11a6e6 100644 --- a/webpack.config.cjs +++ b/webpack.config.cjs @@ -6,6 +6,7 @@ module.exports = { entry: { main: path.resolve(__dirname, "src/main.tsx"), + audioWorklet: path.resolve(__dirname, "src/audioWorklet.ts"), }, output: {