From 540e58d2c30ff2a52c5075140f5fd5b49d55e488 Mon Sep 17 00:00:00 2001 From: Luke Oliff Date: Fri, 12 Jan 2024 11:24:10 +0000 Subject: [PATCH 1/3] feat: add audio sits features to JS sdk --- src/lib/types/SyncPrerecordedResponse.ts | 124 ++++++++++++++++------- src/lib/types/TranscriptionSchema.ts | 35 +++++++ 2 files changed, 120 insertions(+), 39 deletions(-) diff --git a/src/lib/types/SyncPrerecordedResponse.ts b/src/lib/types/SyncPrerecordedResponse.ts index 81d0a6cc..63b00505 100644 --- a/src/lib/types/SyncPrerecordedResponse.ts +++ b/src/lib/types/SyncPrerecordedResponse.ts @@ -3,6 +3,51 @@ export interface SyncPrerecordedResponse { results: Result; } +interface Result { + channels: Channel[]; + utterances?: Utterance[]; + summary?: TranscriptionSummary; + sentiments?: Sentiment; + topics?: Topics; + intents?: Intents; +} + +interface Channel { + alternatives: Alternative[]; + search?: Search[]; + detected_language?: string; + language_confidence?: number; +} + +interface Utterance { + start: number; + end: number; + confidence: number; + channel: number; + transcript: string; + words: WordBase[]; + speaker?: number; + id: string; +} + +interface TranscriptionSummary { + result: string; + short: string; +} + +interface Sentiment { + segments?: SentimentSegment[]; + average?: Average; +} + +interface Topics { + segments?: TopicsSegment[]; +} + +interface Intents { + segments?: IntentsSegment[]; +} + interface Alternative { transcript: string; confidence: number; @@ -14,11 +59,21 @@ interface Alternative { topics?: TopicGroup[]; } -interface Channel { - search?: Search[]; - alternatives: Alternative[]; - detected_language?: string; - language_confidence?: number; +interface Search { + query: string; + hits: Hit[]; +} + +interface WordBase { + word: string; + start: number; + end: number; + confidence: number; + punctuated_word?: string; + speaker?: number; + speaker_confidence?: number; + sentiment: string; + sentiment_score: number; } interface Entity { @@ -66,17 +121,6 @@ interface ParagraphGroup { paragraphs: Paragraph[]; } -interface Result { - channels: Channel[]; - utterances?: Utterance[]; - summary?: TranscriptionSummary; -} - -interface Search { - query: string; - hits: Hit[]; -} - interface Sentence { text: string; start: number; @@ -88,10 +132,6 @@ interface Summary { start_word?: number; end_word?: number; } -interface TranscriptionSummary { - result: string; - short: string; -} interface Topic { topic: string; @@ -110,29 +150,35 @@ interface Translation { translation: string; } -interface Utterance { - start: number; - end: number; - confidence: number; - channel: number; - transcript: string; - words: WordBase[]; - speaker?: number; - id: string; -} - interface Warning { parameter: string; type: string; message: string; } -interface WordBase { - word: string; - start: number; - end: number; - confidence: number; - punctuated_word?: string; - speaker?: number; - speaker_confidence?: number; +interface SentimentSegment { + text: string; + start_word: number; + end_word: number; + sentiment: "positive" | "neutral" | "negative"; + sentiment_score: number; +} + +interface Average { + sentiment: string; + sentiment_score: number; +} + +interface TopicsSegment { + text: string; + start_word: number; + end_word: number; + topics: { topic: string; confidence_score: number }[]; +} + +interface IntentsSegment { + text: string; + start_word: number; + end_word: number; + intents: { intent: string; confidence_score: number }[]; } diff --git a/src/lib/types/TranscriptionSchema.ts b/src/lib/types/TranscriptionSchema.ts index 68dd775b..2dd121b7 100644 --- a/src/lib/types/TranscriptionSchema.ts +++ b/src/lib/types/TranscriptionSchema.ts @@ -83,6 +83,41 @@ interface TranscriptionSchema extends Record { */ tag?: string[]; + /** + * As yet unreleased. + */ + sentiment?: boolean; + + /** + * As yet unreleased. + */ + intents?: boolean; + + /** + * As yet unreleased. + */ + custom_intent?: string[] | string; + + /** + * As yet unreleased. + */ + custom_intent_mode?: "strict" | "extended"; + + /** + * As yet unreleased. + */ + topics?: boolean; + + /** + * As yet unreleased. + */ + custom_topic?: string[] | string; + + /** + * As yet unreleased. + */ + custom_topic_mode?: "strict" | "extended"; + [key: string]: unknown; } From aa2acda371534bdfd1efb554d985ef0716845d94 Mon Sep 17 00:00:00 2001 From: Luke Oliff Date: Thu, 18 Jan 2024 12:47:23 +0000 Subject: [PATCH 2/3] fix: fix typos in sync+callback error --- src/packages/PrerecordedClient.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/packages/PrerecordedClient.ts b/src/packages/PrerecordedClient.ts index 9277f129..3575fa99 100644 --- a/src/packages/PrerecordedClient.ts +++ b/src/packages/PrerecordedClient.ts @@ -28,7 +28,7 @@ export class PrerecordedClient extends AbstractRestfulClient { if (options !== undefined && "callback" in options) { throw new DeepgramError( - "Callback cannot be provided as an option to a synchronous transcription. Use `asyncPrerecordedUrl` or `asyncPrerecordedFile` instead." + "Callback cannot be provided as an option to a synchronous transcription. Use `transcribeUrlCallback` or `transcribeFileCallback` instead." ); } @@ -65,7 +65,7 @@ export class PrerecordedClient extends AbstractRestfulClient { if (options !== undefined && "callback" in options) { throw new DeepgramError( - "Callback cannot be provided as an option to a synchronous transcription. Use `asyncPrerecordedUrl` or `asyncPrerecordedFile` instead." + "Callback cannot be provided as an option to a synchronous transcription. Use `transcribeUrlCallback` or `transcribeFileCallback` instead." ); } From a9fed0408992a1a790835438001d3286dd6feb87 Mon Sep 17 00:00:00 2001 From: Luke Oliff Date: Mon, 22 Jan 2024 16:01:34 +0000 Subject: [PATCH 3/3] feat: sits analyze types and schema --- src/lib/helpers.ts | 21 +- src/lib/types/AnalyzeSchema.ts | 28 +++ src/lib/types/AsyncAnalyzeResponse.ts | 3 + ...PrerecordedSource.ts => DeepgramSource.ts} | 6 + src/lib/types/SyncAnalyzeResponse.ts | 88 ++++++++ src/lib/types/SyncPrerecordedResponse.ts | 195 ++++++++++-------- src/lib/types/index.ts | 11 +- src/packages/ReadClient.ts | 161 +++++++++++++++ 8 files changed, 424 insertions(+), 89 deletions(-) create mode 100644 src/lib/types/AnalyzeSchema.ts create mode 100644 src/lib/types/AsyncAnalyzeResponse.ts rename src/lib/types/{PrerecordedSource.ts => DeepgramSource.ts} (65%) create mode 100644 src/lib/types/SyncAnalyzeResponse.ts create mode 100644 src/packages/ReadClient.ts diff --git a/src/lib/helpers.ts b/src/lib/helpers.ts index de979566..8e18f190 100644 --- a/src/lib/helpers.ts +++ b/src/lib/helpers.ts @@ -1,5 +1,12 @@ import { Headers as CrossFetchHeaders } from "cross-fetch"; -import { DeepgramClientOptions, FileSource, PrerecordedSource, UrlSource } from "./types"; +import { + DeepgramClientOptions, + FileSource, + PrerecordedSource, + UrlSource, + TextSource, + AnalyzeSource, +} from "./types"; import { Readable } from "stream"; import merge from "deepmerge"; @@ -41,12 +48,22 @@ export const resolveHeadersConstructor = () => { return Headers; }; -export const isUrlSource = (providedSource: PrerecordedSource): providedSource is UrlSource => { +export const isUrlSource = ( + providedSource: PrerecordedSource | AnalyzeSource +): providedSource is UrlSource => { if ((providedSource as UrlSource).url) return true; return false; }; +export const isTextSource = ( + providedSource: PrerecordedSource | AnalyzeSource +): providedSource is TextSource => { + if ((providedSource as TextSource).text) return true; + + return false; +}; + export const isFileSource = (providedSource: PrerecordedSource): providedSource is FileSource => { if (isReadStreamSource(providedSource) || isBufferSource(providedSource)) return true; diff --git a/src/lib/types/AnalyzeSchema.ts b/src/lib/types/AnalyzeSchema.ts new file mode 100644 index 00000000..093a41a3 --- /dev/null +++ b/src/lib/types/AnalyzeSchema.ts @@ -0,0 +1,28 @@ +/** + * Options for read analysis + */ +interface AnalyzeSchema extends Record { + callback?: string; + + callback_method?: string; + + custom_intent?: string | string[]; + + custom_intent_mode?: "strict" | "extended"; + + custom_topic?: string | string[]; + + custom_topic_mode?: "strict" | "extended"; + + intents?: boolean; + + language?: string; + + summarize?: boolean; + + sentiment?: boolean; + + topics?: boolean; +} + +export type { AnalyzeSchema }; diff --git a/src/lib/types/AsyncAnalyzeResponse.ts b/src/lib/types/AsyncAnalyzeResponse.ts new file mode 100644 index 00000000..ba7dd76f --- /dev/null +++ b/src/lib/types/AsyncAnalyzeResponse.ts @@ -0,0 +1,3 @@ +export interface AsyncAnalyzeResponse { + request_id: string; +} diff --git a/src/lib/types/PrerecordedSource.ts b/src/lib/types/DeepgramSource.ts similarity index 65% rename from src/lib/types/PrerecordedSource.ts rename to src/lib/types/DeepgramSource.ts index e85d5c02..a22fb4c0 100644 --- a/src/lib/types/PrerecordedSource.ts +++ b/src/lib/types/DeepgramSource.ts @@ -7,3 +7,9 @@ export type FileSource = Buffer | Readable; export interface UrlSource { url: string; } + +export interface TextSource { + text: string; +} + +export type AnalyzeSource = UrlSource | TextSource; diff --git a/src/lib/types/SyncAnalyzeResponse.ts b/src/lib/types/SyncAnalyzeResponse.ts new file mode 100644 index 00000000..c725eab6 --- /dev/null +++ b/src/lib/types/SyncAnalyzeResponse.ts @@ -0,0 +1,88 @@ +export interface SyncAnalyzeResponse { + model_uuid: string; + metadata: Metadata; + results: Results; +} + +interface IntentsInfo { + model_uuid: string; + input_tokens: number; + output_tokens: number; +} + +interface SentimentInfo { + model_uuid: string; + input_tokens: number; + output_tokens: number; +} + +interface SummaryInfo { + model_uuid: string; + input_tokens: number; + output_tokens: number; +} + +interface TopicsInfo { + model_uuid: string; + input_tokens: number; + output_tokens: number; +} + +interface Metadata { + request_id: string; + created: string; + language: string; + intents_info: IntentsInfo; + sentiment_info: SentimentInfo; + summary_info: SummaryInfo; + topics_info: TopicsInfo; +} + +interface Average { + sentiment: string; + sentiment_score: number; +} + +interface Summary { + text: string; +} + +interface Topic { + topic: string; + confidence_score: number; +} + +interface Intent { + intent: string; + confidence_score: number; +} + +interface Segment { + text: string; + start_word: number; + end_word: number; + sentiment: "positive" | "neutral" | "negative"; + sentiment_score?: number; + topics?: Topic[]; + intents?: Intent[]; +} + +interface Sentiments { + segments: Segment[]; + average: Average; +} + +interface Topics { + segments: Segment[]; +} + +interface Intents { + segments: Segment[]; +} + +interface Results { + sentiments?: Sentiments; + summary?: Summary; + topics?: Topics; + intents?: Intents; +} diff --git a/src/lib/types/SyncPrerecordedResponse.ts b/src/lib/types/SyncPrerecordedResponse.ts index 63b00505..5716af78 100644 --- a/src/lib/types/SyncPrerecordedResponse.ts +++ b/src/lib/types/SyncPrerecordedResponse.ts @@ -3,51 +3,6 @@ export interface SyncPrerecordedResponse { results: Result; } -interface Result { - channels: Channel[]; - utterances?: Utterance[]; - summary?: TranscriptionSummary; - sentiments?: Sentiment; - topics?: Topics; - intents?: Intents; -} - -interface Channel { - alternatives: Alternative[]; - search?: Search[]; - detected_language?: string; - language_confidence?: number; -} - -interface Utterance { - start: number; - end: number; - confidence: number; - channel: number; - transcript: string; - words: WordBase[]; - speaker?: number; - id: string; -} - -interface TranscriptionSummary { - result: string; - short: string; -} - -interface Sentiment { - segments?: SentimentSegment[]; - average?: Average; -} - -interface Topics { - segments?: TopicsSegment[]; -} - -interface Intents { - segments?: IntentsSegment[]; -} - interface Alternative { transcript: string; confidence: number; @@ -59,21 +14,11 @@ interface Alternative { topics?: TopicGroup[]; } -interface Search { - query: string; - hits: Hit[]; -} - -interface WordBase { - word: string; - start: number; - end: number; - confidence: number; - punctuated_word?: string; - speaker?: number; - speaker_confidence?: number; - sentiment: string; - sentiment_score: number; +interface Channel { + search?: Search[]; + alternatives: Alternative[]; + detected_language?: string; + language_confidence?: number; } interface Entity { @@ -99,8 +44,12 @@ interface Metadata { duration: number; channels: number; models: string[]; - model_info: Record; warnings?: Warning[]; + model_info: Record; + summary_info?: SummaryInfo; + intents_info?: IntentsInfo; + sentiment_info?: SentimentInfo; + topics_info?: TopicsInfo; } interface ModelInfo { @@ -109,6 +58,30 @@ interface ModelInfo { arch: string; } +interface SummaryInfo { + input_tokens: number; + output_tokens: number; + model_uuid: string; +} + +interface IntentsInfo { + model_uuid: string; + input_tokens: number; + output_tokens: number; +} + +interface SentimentInfo { + model_uuid: string; + input_tokens: number; + output_tokens: number; +} + +interface TopicsInfo { + model_uuid: string; + input_tokens: number; + output_tokens: number; +} + interface Paragraph { sentences: Sentence[]; start: number; @@ -121,6 +94,58 @@ interface ParagraphGroup { paragraphs: Paragraph[]; } +interface Result { + channels: Channel[]; + utterances?: Utterance[]; + summary?: TranscriptionSummary; + sentiments?: Sentiments; + topics?: Topics; + intents?: Intents; +} + +interface Sentiments { + segments: Segment[]; + average: Average; +} + +interface Topics { + segments: Segment[]; +} + +interface Intents { + segments: Segment[]; +} + +interface Intent { + intent: string; + confidence_score: number; +} + +interface Average { + sentiment: string; + sentiment_score: number; +} + +interface Topic { + topic: string; + confidence_score: number; +} + +interface Segment { + text: string; + start_word: number; + end_word: number; + sentiment?: string; + sentiment_score?: number; + topics?: Topic[]; + intents?: Intent[]; +} + +interface Search { + query: string; + hits: Hit[]; +} + interface Sentence { text: string; start: number; @@ -132,6 +157,10 @@ interface Summary { start_word?: number; end_word?: number; } +interface TranscriptionSummary { + result: string; + short: string; +} interface Topic { topic: string; @@ -150,35 +179,29 @@ interface Translation { translation: string; } +interface Utterance { + start: number; + end: number; + confidence: number; + channel: number; + transcript: string; + words: WordBase[]; + speaker?: number; + id: string; +} + interface Warning { parameter: string; type: string; message: string; } -interface SentimentSegment { - text: string; - start_word: number; - end_word: number; - sentiment: "positive" | "neutral" | "negative"; - sentiment_score: number; -} - -interface Average { - sentiment: string; - sentiment_score: number; -} - -interface TopicsSegment { - text: string; - start_word: number; - end_word: number; - topics: { topic: string; confidence_score: number }[]; -} - -interface IntentsSegment { - text: string; - start_word: number; - end_word: number; - intents: { intent: string; confidence_score: number }[]; +interface WordBase { + word: string; + start: number; + end: number; + confidence: number; + punctuated_word?: string; + speaker?: number; + speaker_confidence?: number; } diff --git a/src/lib/types/index.ts b/src/lib/types/index.ts index 7fc52cb9..7f8a3268 100644 --- a/src/lib/types/index.ts +++ b/src/lib/types/index.ts @@ -1,4 +1,6 @@ +export type { AnalyzeSchema } from "./AnalyzeSchema"; export type { AsyncPrerecordedResponse } from "./AsyncPrerecordedResponse"; +export type { AsyncAnalyzeResponse } from "./AsyncAnalyzeResponse"; export type { CreateOnPremCredentialsSchema } from "./CreateOnPremCredentialsSchema"; export type { CreateProjectKeySchema } from "./CreateProjectKeySchema"; export type { CreateProjectKeyResponse } from "./CreateProjectKeyResponse"; @@ -32,9 +34,16 @@ export type { LiveConfigOptions } from "./LiveConfigOptions"; export type { LiveMetadataEvent } from "./LiveMetadataEvent"; export type { LiveTranscriptionEvent } from "./LiveTranscriptionEvent"; export type { MessageResponse } from "./MessageResponse"; -export type { FileSource, PrerecordedSource, UrlSource } from "./PrerecordedSource"; +export type { + FileSource, + PrerecordedSource, + UrlSource, + TextSource, + AnalyzeSource, +} from "./DeepgramSource"; export type { SendProjectInviteSchema } from "./SendProjectInviteSchema"; export type { SyncPrerecordedResponse } from "./SyncPrerecordedResponse"; +export type { SyncAnalyzeResponse } from "./SyncAnalyzeResponse"; export type { TranscriptionSchema, PrerecordedSchema, LiveSchema } from "./TranscriptionSchema"; export type { UpdateProjectMemberScopeSchema } from "./UpdateProjectMemberScopeSchema"; export type { UpdateProjectSchema } from "./UpdateProjectSchema"; diff --git a/src/packages/ReadClient.ts b/src/packages/ReadClient.ts new file mode 100644 index 00000000..17f82dbb --- /dev/null +++ b/src/packages/ReadClient.ts @@ -0,0 +1,161 @@ +import { AbstractRestfulClient } from "./AbstractRestfulClient"; +import { CallbackUrl, appendSearchParams, isTextSource, isUrlSource } from "../lib/helpers"; +import { DeepgramError, isDeepgramError } from "../lib/errors"; +import type { + AnalyzeSchema, + AsyncAnalyzeResponse, + DeepgramResponse, + Fetch, + PrerecordedSchema, + SyncAnalyzeResponse, + TextSource, + UrlSource, +} from "../lib/types"; + +export class ReadClient extends AbstractRestfulClient { + async analyzeUrl( + source: UrlSource, + options?: AnalyzeSchema, + endpoint = "v1/read" + ): Promise> { + try { + let body; + + if (isUrlSource(source)) { + body = JSON.stringify(source); + } else { + throw new DeepgramError("Unknown source type"); + } + + if (options !== undefined && "callback" in options) { + throw new DeepgramError( + "Callback cannot be provided as an option to a synchronous transcription. Use `analyzeUrlCallback` or `analyzeTextCallback` instead." + ); + } + + const analyzeOptions: AnalyzeSchema = { ...{}, ...options }; + + const url = new URL(endpoint, this.baseUrl); + appendSearchParams(url.searchParams, analyzeOptions); + + const result: SyncAnalyzeResponse = await this.post(this.fetch as Fetch, url, body); + + return { result, error: null }; + } catch (error) { + if (isDeepgramError(error)) { + return { result: null, error }; + } + + throw error; + } + } + + async analyzeText( + source: TextSource, + options?: AnalyzeSchema, + endpoint = "v1/read" + ): Promise> { + try { + let body; + + if (isTextSource(source)) { + body = JSON.stringify(source); + } else { + throw new DeepgramError("Unknown source type"); + } + + if (options !== undefined && "callback" in options) { + throw new DeepgramError( + "Callback cannot be provided as an option to a synchronous requests. Use `analyzeUrlCallback` or `analyzeTextCallback` instead." + ); + } + + const analyzeOptions: AnalyzeSchema = { ...{}, ...options }; + + const url = new URL(endpoint, this.baseUrl); + appendSearchParams(url.searchParams, analyzeOptions); + + const result: SyncAnalyzeResponse = await this.post(this.fetch as Fetch, url, body); + + return { result, error: null }; + } catch (error) { + if (isDeepgramError(error)) { + return { result: null, error }; + } + + throw error; + } + } + + async analyzeUrlCallback( + source: UrlSource, + callback: CallbackUrl, + options?: AnalyzeSchema, + endpoint = "v1/read" + ): Promise> { + try { + let body; + + if (isUrlSource(source)) { + body = JSON.stringify(source); + } else { + throw new DeepgramError("Unknown source type"); + } + + const transcriptionOptions: PrerecordedSchema = { + ...options, + ...{ callback: callback.toString() }, + }; + + const url = new URL(endpoint, this.baseUrl); + appendSearchParams(url.searchParams, transcriptionOptions); + + const result: AsyncAnalyzeResponse = await this.post(this.fetch as Fetch, url, body); + + return { result, error: null }; + } catch (error) { + if (isDeepgramError(error)) { + return { result: null, error }; + } + + throw error; + } + } + + async analyzeTextCallback( + source: TextSource, + callback: CallbackUrl, + options?: AnalyzeSchema, + endpoint = "v1/read" + ): Promise> { + try { + let body; + + if (isTextSource(source)) { + body = source; + } else { + throw new DeepgramError("Unknown source type"); + } + + const transcriptionOptions: PrerecordedSchema = { + ...options, + ...{ callback: callback.toString() }, + }; + + const url = new URL(endpoint, this.baseUrl); + appendSearchParams(url.searchParams, transcriptionOptions); + + const result: AsyncAnalyzeResponse = await this.post(this.fetch as Fetch, url, body, { + "Content-Type": "deepgram/audio+video", + }); + + return { result, error: null }; + } catch (error) { + if (isDeepgramError(error)) { + return { result: null, error }; + } + + throw error; + } + } +}