Skip to content

Commit

Permalink
Fix some bugs (#1199)
Browse files Browse the repository at this point in the history
* refactor echogarden & log

* more logs

* fix audio/video create with compressing config

* upgrade echogarden
  • Loading branch information
an-lee authored Nov 25, 2024
1 parent 15746d4 commit 3c337c2
Show file tree
Hide file tree
Showing 8 changed files with 773 additions and 176 deletions.
2 changes: 1 addition & 1 deletion enjoy/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@
"dayjs": "^1.11.13",
"decamelize": "^6.0.0",
"decamelize-keys": "^2.0.1",
"echogarden": "2.0.3",
"echogarden": "^2.0.7",
"electron-context-menu": "^4.0.4",
"electron-log": "^5.2.2",
"electron-settings": "^4.0.4",
Expand Down
1 change: 1 addition & 0 deletions enjoy/src/main/db/handlers/audios-handler.ts
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ class AudiosHandler {
compressing?: boolean;
} = {}
) {
logger.info("Creating audio...", { uri, params });
let file = uri;
let source;
if (uri.startsWith("http")) {
Expand Down
1 change: 1 addition & 0 deletions enjoy/src/main/db/handlers/videos-handler.ts
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ class VideosHandler {
compressing?: boolean;
} = {}
) {
logger.info("Creating video...", { uri, params });
let file = uri;
let source;
if (uri.startsWith("http")) {
Expand Down
65 changes: 55 additions & 10 deletions enjoy/src/main/echogarden.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ import {
type Timeline,
type TimelineEntry,
} from "echogarden/dist/utilities/Timeline.d.js";
import { WhisperOptions } from "echogarden/dist/recognition/WhisperSTT.js";
import { ensureAndGetPackagesDir } from "echogarden/dist/utilities/PackageManager.js";
import path from "path";
import log from "@main/logger";
Expand Down Expand Up @@ -71,7 +70,12 @@ class EchogardenWrapper {
// Set the whisper executable path for macOS
if (process.platform === "darwin") {
options.whisperCpp = options.whisperCpp || {};
options.whisperCpp.executablePath = path.join(__dirname, "lib", "whisper", "main");
options.whisperCpp.executablePath = path.join(
__dirname,
"lib",
"whisper",
"main"
);
}

// Call the original recognize function
Expand All @@ -84,8 +88,46 @@ class EchogardenWrapper {
.catch(reject);
});
};
this.align = Echogarden.align;
this.alignSegments = Echogarden.alignSegments;
this.align = (input, transcript, options) => {
return new Promise((resolve, reject) => {
const handler = (reason: any) => {
// Remove the handler after it's triggered
process.removeListener("unhandledRejection", handler);
reject(reason);
};

// Add temporary unhandledRejection listener
process.on("unhandledRejection", handler);

Echogarden.align(input, transcript, options)
.then((result) => {
// Remove the handler if successful
process.removeListener("unhandledRejection", handler);
resolve(result);
})
.catch(reject);
});
};
this.alignSegments = (input, timeline, options) => {
return new Promise((resolve, reject) => {
const handler = (reason: any) => {
// Remove the handler after it's triggered
process.removeListener("unhandledRejection", handler);
reject(reason);
};

// Add temporary unhandledRejection listener
process.on("unhandledRejection", handler);

Echogarden.alignSegments(input, timeline, options)
.then((result) => {
// Remove the handler if successful
process.removeListener("unhandledRejection", handler);
resolve(result);
})
.catch(reject);
});
};
this.denoise = Echogarden.denoise;
this.encodeRawAudioToWave = encodeRawAudioToWave;
this.decodeWaveToRawAudio = decodeWaveToRawAudio;
Expand All @@ -105,13 +147,13 @@ class EchogardenWrapper {
},
whisperCpp: {
model: "tiny.en",
}
},
}
) {
const sampleFile = path.join(__dirname, "samples", "jfk.wav");

try {
logger.info("check:", options);
logger.info("echogarden-check:", options);
const result = await this.recognize(sampleFile, options);
logger.info("transcript:", result?.transcript);
fs.writeJsonSync(
Expand All @@ -138,6 +180,7 @@ class EchogardenWrapper {
* @returns A promise that resolves to the enjoy:// protocal URL of the transcoded WAV file.
*/
async transcode(url: string, sampleRate = 16000): Promise<string> {
logger.info("echogarden-transcode:", url, sampleRate);
const filePath = enjoyUrlToPath(url);
const rawAudio = await this.ensureRawAudio(filePath, sampleRate);
const audioBuffer = this.encodeRawAudioToWave(rawAudio);
Expand All @@ -152,7 +195,7 @@ class EchogardenWrapper {
ipcMain.handle(
"echogarden-recognize",
async (_event, url: string, options: RecognitionOptions) => {
logger.debug("echogarden-recognize:", options);
logger.info("echogarden-recognize:", options);
try {
const input = enjoyUrlToPath(url);
return await this.recognize(input, options);
Expand All @@ -171,7 +214,7 @@ class EchogardenWrapper {
transcript: string,
options: AlignmentOptions
) => {
logger.debug("echogarden-align:", transcript, options);
logger.info("echogarden-align:", options);
try {
return await this.align(input, transcript, options);
} catch (err) {
Expand All @@ -189,7 +232,7 @@ class EchogardenWrapper {
timeline: Timeline,
options: AlignmentOptions
) => {
logger.debug("echogarden-align-segments:", timeline, options);
logger.info("echogarden-align-segments:", options);
if (typeof input === "string") {
input = enjoyUrlToPath(input);
}
Expand All @@ -211,7 +254,7 @@ class EchogardenWrapper {
transcript: string,
language: string
) => {
logger.debug("echogarden-word-to-sentence-timeline:", transcript);
logger.info("echogarden-word-to-sentence-timeline:", language);

const { segmentTimeline } =
await this.wordTimelineToSegmentSentenceTimeline(
Expand All @@ -237,6 +280,7 @@ class EchogardenWrapper {
ipcMain.handle(
"echogarden-transcode",
async (_event, url: string, sampleRate?: number) => {
logger.info("echogarden-transcode:", url, sampleRate);
try {
return await this.transcode(url, sampleRate);
} catch (err) {
Expand All @@ -247,6 +291,7 @@ class EchogardenWrapper {
);

ipcMain.handle("echogarden-check", async (_event, options: any) => {
logger.info("echogarden-check:", options);
return this.check(options);
});

Expand Down
6 changes: 3 additions & 3 deletions enjoy/src/renderer/components/medias/media-add-button.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ export const MediaAddButton = (props: { type?: "Audio" | "Video" }) => {
if (files.length > 1) {
Promise.allSettled(
files.map((f) =>
EnjoyApp[type.toLowerCase() as "audios" | "videos"].create(f, {
EnjoyApp[`${type.toLowerCase()}s` as "audios" | "videos"].create(f, {
compressing,
})
)
Expand Down Expand Up @@ -97,8 +97,8 @@ export const MediaAddButton = (props: { type?: "Audio" | "Video" }) => {
setOpen(false);
});
} else {
EnjoyApp.audios
.create(uri)
EnjoyApp[`${type.toLowerCase()}s` as "audios" | "videos"]
.create(uri, { compressing })
.then((media) => {
toast.success(t("resourceAdded"));
navigate(`/${type.toLowerCase()}s/${media.id}`);
Expand Down
4 changes: 3 additions & 1 deletion enjoy/src/renderer/context/db-provider.tsx
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import { createContext, useState, useEffect, useContext } from "react";
import log from "electron-log/renderer";

const logger = log.scope("db-provider.tsx");

type DbStateEnum =
| "connected"
| "connecting"
Expand Down Expand Up @@ -83,7 +85,7 @@ export const DbProvider = ({ children }: { children: React.ReactNode }) => {
useEffect(() => {
if (state === "connected") {
EnjoyApp.db.onTransaction((_event, state) => {
log.debug("db-on-transaction", state);
logger.debug("db-on-transaction", state);

const event = new CustomEvent("db-on-transaction", { detail: state });
document.dispatchEvent(event);
Expand Down
29 changes: 22 additions & 7 deletions enjoy/src/renderer/hooks/use-transcribe.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,14 @@ import {
TimelineEntry,
type TimelineEntryType,
} from "echogarden/dist/utilities/Timeline";
import { parseText } from "media-captions";
import { type ParsedCaptionsResult, parseText } from "media-captions";
import { SttEngineOptionEnum } from "@/types/enums";
import { RecognitionResult } from "echogarden/dist/api/API.js";
import take from "lodash/take";
import sortedUniqBy from "lodash/sortedUniqBy";
import log from "electron-log/renderer";

const logger = log.scope("use-transcribe.tsx");

// test a text string has any punctuations or not
// some transcribed text may not have any punctuations
Expand Down Expand Up @@ -133,6 +136,7 @@ export const useTranscribe = () => {
};
} else if (transcript) {
setOutput("Aligning the transcript...");
logger.info("Aligning the transcript...");
const alignmentResult = await EnjoyApp.echogarden.align(
new Uint8Array(await blob.arrayBuffer()),
transcript,
Expand Down Expand Up @@ -171,7 +175,13 @@ export const useTranscribe = () => {
transcript: string;
segmentTimeline: TimelineEntry[];
}> => {
const caption = await parseText(originalText, { type: "srt" });
let caption: ParsedCaptionsResult;
try {
caption = await parseText(originalText, { type: "srt" });
} catch (err) {
logger.error("parseTextFailed", { error: err.message });
throw err;
}

if (caption.cues.length > 0) {
// valid srt file
Expand Down Expand Up @@ -210,7 +220,7 @@ export const useTranscribe = () => {
transcript = punctuatedText;
} catch (err) {
toast.error(err.message);
console.warn(err);
logger.error("punctuateTextFailed", { error: err.message });
}
}

Expand All @@ -237,6 +247,7 @@ export const useTranscribe = () => {
let model: string;

let res: RecognitionResult;
logger.info("Start transcribing from Whisper...");
try {
model =
echogardenSttConfig[
Expand Down Expand Up @@ -282,6 +293,7 @@ export const useTranscribe = () => {
});

setOutput("Transcribing from OpenAI...");
logger.info("Start transcribing from OpenAI...");
try {
const res: {
text: string;
Expand Down Expand Up @@ -325,6 +337,7 @@ export const useTranscribe = () => {
segmentTimeline: TimelineEntry[];
}> => {
setOutput("Transcribing from Cloudflare...");
logger.info("Start transcribing from Cloudflare...");
try {
const res: CfWhipserOutputType = (
await axios.postForm(
Expand Down Expand Up @@ -394,6 +407,7 @@ export const useTranscribe = () => {
const reco = new sdk.SpeechRecognizer(config, audioConfig);

setOutput("Transcribing from Azure...");
logger.info("Start transcribing from Azure...");
let results: SpeechRecognitionResultType[] = [];

const { transcript, segmentTimeline }: any = await new Promise(
Expand All @@ -412,18 +426,18 @@ export const useTranscribe = () => {

reco.canceled = (_s, e) => {
if (e.reason === sdk.CancellationReason.Error) {
logger.error("CANCELED: Reason=" + e.reason);
return reject(new Error(e.errorDetails));
}

reco.stopContinuousRecognitionAsync();
console.log("CANCELED: Reason=" + e.reason);
logger.info("CANCELED: Reason=" + e.reason);
};

reco.sessionStopped = async (_s, e) => {
console.log(
logger.info(
"Session stopped. Stop continuous recognition.",
e.sessionId,
results
e.sessionId
);
reco.stopContinuousRecognitionAsync();

Expand Down Expand Up @@ -460,6 +474,7 @@ export const useTranscribe = () => {
segmentTimeline,
});
} catch (err) {
logger.error("azureTranscribeFailed", { error: err.message });
reject(t("azureTranscribeFailed", { error: err.message }));
}
};
Expand Down
Loading

0 comments on commit 3c337c2

Please sign in to comment.