Skip to content

Commit

Permalink
Merge pull request #111 from one-click-studio/108-rework-audioactivit…
Browse files Browse the repository at this point in the history
…ys-logic

108 rework audioactivitys logic
  • Loading branch information
tillderoquefeuil authored Feb 14, 2024
2 parents 11fd360 + e0142b1 commit 033a55a
Show file tree
Hide file tree
Showing 4 changed files with 110 additions and 151 deletions.
230 changes: 94 additions & 136 deletions src/main/modules/audioActivity.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,27 +9,19 @@ import { Thresholds } from "@src/types/protocol"

const logger = getLogger('audio Activity')

interface Device {
id: number
data: AudioDevice
}

type ProcessedChannel = Map<number, {volume:number, speaking:boolean|undefined}>

const TIME_TO_WATCH = 30 * 1000

export class AudioActivity {
private _speaking: boolean[]
private _consecutive: number[]

private _speakingThreshold = 3
private _silenceThreshold = 10
private _speakingThreshold = 5
private _silenceThreshold = 20
private _buffer: AudioFeedback[] = []

private _vadThreshold = 0.75
private _minVolume = -35

private _bufferLength: number = 0

private _sharedState: unknown | undefined

private _deviceName: string
Expand All @@ -44,7 +36,6 @@ export class AudioActivity {
private _onAudio: (speaking: boolean, channel: number, volume: number) => void

private _restarting: boolean = false
private _lastProcess: number = 0
private _volumes: number[] = []
private _volFrameLength = 0

Expand Down Expand Up @@ -72,8 +63,9 @@ export class AudioActivity {
if (options.record) this._record = options.record
if (options.gain) this._gain = options.gain

this._speaking = Array(options.channels.length).fill(false)
this._consecutive = Array(options.channels.length).fill(0)
const maxChannel = options.channels[options.channels.length-1]+1
this._speaking = Array(maxChannel).fill(false)
this._consecutive = Array(maxChannel).fill(0)
this._isOpen = false

this._device = getAudioDevice(this._deviceName, options.host)
Expand Down Expand Up @@ -135,28 +127,12 @@ export class AudioActivity {
return true
}

public async init() {
if (!this._device) return

// 30 * 1000 milliseconds / (( samplerate * bitrate) / (framesPerBuffer * channels * 2))
// this._volFrameLength = Math.round(TIME_TO_WATCH/((this._sampleRate*8)/(this._framesPerBuffer*this._device.data.inputChannels*2)))

// setInterval(() => {
// if (this._isOpen && Date.now() - this._lastProcess > TIME_TO_WATCH) {
// logger.error('Audio stream not processing, restarting')
// this.restart()
// }
// }, TIME_TO_WATCH/2)
}

public async start() {
if (!this._device) return

this.init()
logger.info(this._device)
logger.debug(this._gain)


this._sharedState = audioManager.start(
this._device.name,
this._device.host,
Expand Down Expand Up @@ -210,145 +186,127 @@ export class AudioActivity {
return path.join(dir, `audio-${datestring}-${deviceName}.wav`)
}

private shouldRestart(): boolean {
const sum = this._volumes.reduce((a, b) => a + b, 0)
if (sum === 0 && this._volumes.length >= this._volFrameLength) return true
return false
}


async process(data: AudioFeedback[]) {
private async process(data: AudioFeedback[]) {
if (!this._device) return

const processed: ProcessedChannel = new Map()
this._lastProcess = Date.now()
for (let channelId of this._channels) {
this._buffer.push(data[channelId])
}

// let sumVolume = 0
for (let i = 0; i < data.length; i++) {
const shortIndex = this.shortIndex(i)
if (shortIndex === -1) continue
const bufferLength = (this._speakingThreshold+2)*this._channels.length
if (this._buffer.length > bufferLength) {
this._buffer = this._buffer.slice(this._buffer.length - bufferLength)
}
if (this._buffer.length < bufferLength) {
return
}

const speaking = await this.processChannel(data[i]);
const processed: ProcessedChannel = new Map()
let start = this._speaking.reduce((p, v) => (p||v? true : false), false)
for (let channelId of this._channels) {
const filtred = this._buffer.filter((d) => d.channelId === channelId)
if (!start) {
const sf = this.speakingFrames(filtred.slice(0, this._speakingThreshold))
if (sf === this._speakingThreshold) start = true
}

// sumVolume += volume
processed.set(i, {
speaking,
volume: data[i].volume,
let isSpeaking = this.isSpeaking(channelId, filtred)
processed.set(channelId, {
speaking: isSpeaking,
volume: data[channelId].volume,
})
}
// this._volumes.push(sumVolume)
// this._volumes = this._volumes.slice(-this._volFrameLength)

// if (this.shouldRestart()) return this.restart()

if (this.tooManySpeakers(processed)){
const channelId = this.chooseSpeaker(processed)

for (let i = 0; i < this._channels.length; i++) {
const cId = this.longIndex(i)
const channel = processed.get(cId)
if (!channel || cId === channelId) continue
if (channel.speaking !== true && !this._speaking[i]) continue

channel.speaking = this._speaking[i]? false : undefined
if (start) {
let speakers = this.getSpeakers(processed)
if (speakers.length > 1) {
const loudestChanId = this.getLoudestSpeakers(speakers)

for (let channelId of speakers) {
if (channelId !== loudestChanId) {
const speaking = this._speaking[channelId]? false : undefined
processed.set(channelId, {
speaking: speaking,
volume: data[channelId].volume,
})
}
}
}
}

for (let i = 0; i < this._channels.length; i++) {
const cId = this.longIndex(i)
const channel = processed.get(cId)
if (!channel) continue
for (let channelId of this._channels) {
const c = processed.get(channelId)
if (!c) continue

if (channel.speaking !== undefined) this._speaking[i] = channel.speaking
this._onAudio(this._speaking[i], cId, channel.volume)
if (c.speaking !== undefined) this._speaking[channelId] = c.speaking
if (!start) this._speaking[channelId] = false
this._onAudio(this._speaking[channelId], channelId, c.volume)
}
}

private tooManySpeakers(processed: ProcessedChannel): boolean {
let speaking = 0
for (let i = 0; i < this._channels.length; i++) {
const cId = this.longIndex(i)
const channel = processed.get(cId)
if (channel?.speaking || (channel?.speaking === undefined && this._speaking[i])) {
speaking++
private speakingFrames(frames: AudioFeedback[]): number {
return frames.reduce((total, d) => {
if (d.speakingProb >= this._vadThreshold) {
total += 1
}
}

return (speaking > 1)
return total
}, 0)
}

private chooseSpeaker(processed: ProcessedChannel): number {
const wasSpeaking = this.wasSpeaking(processed)
if (wasSpeaking !== -1) return wasSpeaking
private avgDecibels(frames: AudioFeedback[]): number {
const totalNoiseEnergy = frames
.map((d) => Math.pow(10, d.volume/10))
.reduce((total, v) => total + v, 0)

return this.getLoudestChannel(processed)
return 10*Math.log10(totalNoiseEnergy/frames.length)
}

private wasSpeaking(processed: ProcessedChannel): number {
for (let i = 0; i < this._channels.length; i++) {
const cId = this.longIndex(i)
const channel = processed.get(cId)

if ((channel?.speaking || channel?.speaking === undefined) && this._speaking[i]) {
return cId
}
}

return -1
}

private getLoudestChannel(processed: ProcessedChannel): number {
let maxVolume = 0
let maxChannel = 0
for (let i = 0; i < this._channels.length; i++) {
const cId = this.longIndex(i)
const channel = processed.get(cId)
if (channel?.volume && channel.volume > maxVolume) {
maxVolume = channel.volume
maxChannel = cId
}
}
return maxChannel
}
private isSpeaking(channelId: number, frames: AudioFeedback[]): boolean | undefined {
const LIMIT = 30
let isSpeaking: boolean | undefined = (this.speakingFrames(frames) >= this._speakingThreshold)
const toAdd = isSpeaking? 1 : -1

// channels : [1, 2, 4]
// inputChannels : 6
// buffers : [null, buf, buf, null, buf, null]
// reset consecutive if the sign changes
if (this._consecutive[channelId] * toAdd < 0) this._consecutive[channelId] = 0
if (-LIMIT < this._consecutive[channelId] && this._consecutive[channelId] < LIMIT) this._consecutive[channelId] += toAdd

// index 1 -> 0
private shortIndex(index: number): number {
return this._channels.indexOf(index)
}
if (!this._speaking[channelId] && isSpeaking) {
isSpeaking = true
} else if (this._speaking[channelId] && this._consecutive[channelId] < this._silenceThreshold*-1) {
isSpeaking = false
} else {
isSpeaking = undefined
}

// index 0 -> 1
private longIndex(index: number): number {
return this._channels[index]
return isSpeaking
}

private async processChannel(data: AudioFeedback): Promise<boolean|undefined> {
let isSpeaking = false
const index = this.shortIndex(data.channelId)
private getSpeakers(processed: ProcessedChannel): number[] {
let speakers: number[] = []
for (let channelId of this._channels) {
const c = processed.get(channelId)
if (!c) continue

if (data.volume > this._minVolume) {
if (data.speakingProb > this._vadThreshold) {
isSpeaking = true
if (c.speaking || (c.speaking === undefined && this._speaking[channelId])) {
speakers.push(channelId)
}
}

const toAdd = isSpeaking? 1 : -1

if (this._consecutive[index] * toAdd < 0)
this._consecutive[index] = 0

this._consecutive[index] += toAdd
return speakers
}

if (!this._speaking[index] && this._consecutive[index] > this._speakingThreshold) {
return true
} else if (this._speaking[index] && this._consecutive[index] < this._silenceThreshold*-1) {
return false
private getLoudestSpeakers(speakers: number[]): number {
const loudestChan = {channelId: -1, volume: -100}
for (let channelId of speakers) {
const filtred = this._buffer.filter((d) => d.channelId === channelId)
const avg = this.avgDecibels(filtred)
if (avg > loudestChan.volume) {
loudestChan.channelId = channelId
loudestChan.volume = avg
}
}

return undefined
return loudestChan.channelId
}

}
Expand Down
16 changes: 8 additions & 8 deletions src/main/servers/ObsServer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,10 @@ export class ObsServer extends Server {
}

async connect(connection?: Connection, once: boolean = false) {
if (this._expo.getAttempts() === -1) {
this._expo.reset()
}

if (connection) {
this.obsConfig = connection
}
Expand All @@ -49,7 +53,6 @@ export class ObsServer extends Server {

override async clean() {
this._expo.stop()
this._expo.reset()

if (this.websocket){
this.websocket.removeAllListeners()
Expand Down Expand Up @@ -80,10 +83,13 @@ export class ObsServer extends Server {
private async websocketConnection() {
this.websocket.removeAllListeners()

this.websocket.once('ConnectionError', (err) => this.logger.error('socket error', err))
this.websocket.once('ConnectionError', (err) => this.logger.error('ConnectionError', err))

this.websocket.once('ConnectionClosed', () => {
this.logger.error('ConnectionClosed')

if (!this.tryToConnectOnce) {
this.logger.info(`reconnection try in ${this._expo.humanTimeout()}`)
this._expo.reconnectAfterError(() => { this.connect() })
}
this.reachable$.next(false)
Expand All @@ -92,17 +98,11 @@ export class ObsServer extends Server {
try {
await this.websocket.connect('ws://'+this.obsConfig?.ip, this.obsConfig?.password, { rpcVersion:1 })

this._expo.reset()
await this.initWebsocket()
this.isTryingToConnect = false
this.reachable$.next(true)
} catch (err) {
this.logger.error('obs connect error', err)
this.logger.error(JSON.stringify(err))
if (!this.tryToConnectOnce) {
this.logger.info(`reconnection try in ${this._expo.humanTimeout()}`)
this._expo.reconnectAfterError(() => { this.connect() })
}
}

}
Expand Down
2 changes: 2 additions & 0 deletions src/main/utils/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ export class expoAttempt {
}

reconnectAfterError(callback: ()=>void): void {
if (this.attempts === -1) return

this.incrementAttempts()
this.timeout = setTimeout(() => {
callback()
Expand Down
Loading

0 comments on commit 033a55a

Please sign in to comment.