diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml new file mode 100644 index 0000000..fdf4667 --- /dev/null +++ b/.github/workflows/publish.yml @@ -0,0 +1,45 @@ +name: Publish to npmjs.com + +on: + release: + types: [published] + +jobs: + build-and-publish: + name: Build and publish to npmjs.com + runs-on: ubuntu-latest + steps: + - name: Setup Node.js 20 + uses: actions/setup-node@v3 + with: + node-version: 20 + - name: Checkout source code + uses: actions/checkout@v3 + - name: Install pnpm + run: npm install -g pnpm@8.7.4 + # doesn't work with Node 20 + # - name: Install pnpm + # uses: pnpm/action-setup@v2 + # with: + # version: 8.7.4 + # run_install: false + - name: Get pnpm store directory + shell: bash + run: | + echo "STORE_PATH=$(pnpm store path --silent)" >> $GITHUB_ENV + - uses: actions/cache@v3 + name: Setup pnpm cache + with: + path: ${{ env.STORE_PATH }} + key: ${{ runner.os }}-pnpm-store-${{ hashFiles('**/pnpm-lock.yaml') }} + restore-keys: | + ${{ runner.os }}-pnpm-store- + - name: Install dependencies + run: pnpm install --frozen-lockfile false + - name: Build code + run: npm run build + - run: | + npm config set //registry.npmjs.org/:_authToken=$NPM_AUTH_TOKEN + npm publish + env: + NPM_AUTH_TOKEN: ${{ secrets.NPM_AUTH_TOKEN }} diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 0000000..a974a01 --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,54 @@ +name: Test +on: + push: + branches: ["main"] + pull_request: + branches: ["main"] + +jobs: + test: + name: Node.js ${{ matrix['node-version'] }} on ${{ matrix.os }} + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + node-version: + - "20" + # - "18" + # - "16" + os: + - ubuntu-latest + # - macos-latest + # - windows-latest + steps: + - name: Setup Node.js ${{ matrix['node-version'] }} + uses: actions/setup-node@v3 + with: + node-version: ${{ matrix['node-version'] }} + - name: Checkout source code + uses: actions/checkout@v3 + - name: Install pnpm + run: npm install -g pnpm@8.7.4 + # doesn't work with Node 20 + # - name: Install pnpm + # uses: pnpm/action-setup@v2 + # with: + # version: 8.7.4 + # run_install: false + - name: Get pnpm store directory + shell: bash + run: | + echo "STORE_PATH=$(pnpm store path --silent)" >> $GITHUB_ENV + - uses: actions/cache@v3 + name: Setup pnpm cache + with: + path: ${{ env.STORE_PATH }} + key: ${{ matrix.os }}${{ matrix['node-version'] }}-pnpm-store-${{ hashFiles('**/pnpm-lock.yaml') }} + restore-keys: | + ${{ matrix.os }}${{ matrix['node-version'] }}-pnpm-store- + - name: Install dependencies + run: pnpm install --frozen-lockfile false + - name: Build code + run: npm run build + - name: Run tests + run: npm test diff --git a/.gitignore b/.gitignore index 7837156..5bf8d6c 100644 --- a/.gitignore +++ b/.gitignore @@ -1,80 +1,22 @@ - -# Created by https://www.gitignore.io/api/node - -### Node ### -# Logs -logs -*.log -npm-debug.log* -yarn-debug.log* -yarn-error.log* - -# Runtime data -pids -*.pid -*.seed -*.pid.lock - -# Directory for instrumented libs generated by jscoverage/JSCover -lib-cov - -# Coverage directory used by tools like istanbul +# Coverage reports coverage -# nyc test coverage -.nyc_output - -# Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files) -.grunt - -# Bower dependency directory (https://bower.io/) -bower_components - -# node-waf configuration -.lock-wscript - -# Compiled binary addons (https://nodejs.org/api/addons.html) -build/Release - -# Dependency directories -node_modules/ -jspm_packages/ - -# TypeScript v1 declaration files -typings/ - -# Optional npm cache directory -.npm - -# Optional eslint cache -.eslintcache - -# Optional REPL history -.node_repl_history - -# Output of 'npm pack' -*.tgz - -# Yarn Integrity file -.yarn-integrity - -# dotenv environment variables file +# API keys and secrets .env -# parcel-bundler cache (https://parceljs.org/) -.cache - -# next.js build output -.next - -# nuxt.js build output -.nuxt +# Dependency directory +node_modules -# vuepress build output -.vuepress/dist +# Editors +.idea +*.iml -# Serverless directories -.serverless +# OS metadata +.DS_Store +Thumbs.db +# Ignore built ts files +dist/**/* -# End of https://www.gitignore.io/api/node +# Ignore Jest directory +.jest diff --git a/.prettierrc b/.prettierrc new file mode 100644 index 0000000..d89eb42 --- /dev/null +++ b/.prettierrc @@ -0,0 +1 @@ +semi: true diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..f132aec --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2023 AssemblyAI + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md index 109a313..6d21c13 100644 --- a/README.md +++ b/README.md @@ -1,116 +1,225 @@ -## Installing the module: + - `npm i assemblyai` +--- -## Authenticating with the API +# AssemblyAI Node.js SDK -### Using Environment Variables +The AssemblyAI Node.js SDK provides an easy-to-use interface for interacting with the AssemblyAI API, +which supports async and real-time transcription, as well as the latest LeMUR models. -If you have the `ASSEMBLYAI_API_KEY` environment variable set, then the application -will attempt to read it and use this value to authenticate with the API. +## Installation -### Setting the value manually +You can install the AssemblyAI SDK by running: -Here is what the code would look like if you were to set the API token manually. +```bash +npm install assemblyai +``` + +```bash +yarn add assemblyai +``` + +```bash +pnpm add assemblyai +``` + +```bash +bun add assemblyai +``` + +# Usage + +Import the AssemblyAI package and create an AssemblyAI object with your API key: ```javascript -const assemblyai = require('assemblyai') -assemblyai.setAPIKey("ENTER YOUR KEY HERE") +import AssemblyAI from "assemblyai"; + +const client = new AssemblyAI({ + apiKey: process.env.ASSEMBLYAI_API_KEY, +}) ``` -## Usage +You can now use the `client` object to interact with the AssemblyAI API. -### Initialization +## Create a transcript -The initialization of the module of course has to be at the beginning of your project. +When you create a transcript, you can either pass in a URL to an audio file, or upload a file directly. ```javascript -const assemblyai = require('assemblyai') -assemblyai.setAPIKey("ENTER YOUR KEY HERE") +// Using a remote URL +const transcript = await client.transcripts.create({ + audio_url: 'https://storage.googleapis.com/aai-web-samples/espn-bears.m4a', +}) ``` -### Upload an audio file for transcription +```javascript +// Uploading a file +const transcript = await client.transcripts.create({ + audio_url: './news.mp4', +}) +``` + +By default, when you create a transcript, it'll be polled until the status is `completed` or `error`. +You can configure whether to poll, the polling interval, and polling timeout using these options: ```javascript -async function upload () { - try { - const transcript = new assemblyai.Upload('/path/to/audiofile.wav') - const response = await transcript.create() - const data = response.get() - - // do something with the JSON response - console.log(data); - - } catch (e) { - // Do some error handling here - } -} +const transcript = await client.transcripts.create({ + audio_url: 'https://storage.googleapis.com/aai-web-samples/espn-bears.m4a', +}, +{ + // Enable or disable polling. Defaults to true. + poll: true, + // How frequently the transcript is polled in ms. Defaults to 3000. + pollingInterval: 1000, + // How long to wait in ms until the "Polling timeout" error is thrown. Defaults to 180000. + pollingTimeout: 5000, +}) ``` -### Transcribe audio from a URL +## Get a transcript -The only required parameter is the `audio_src_url` parameter. For more information about transcribing audio, please see the full API documentation [here](https://docs.assemblyai.com/api/#posttranscript). +This will return the transcript object in its current state. If the transcript is still processing, the `status` field will be `queued` or `processing`. Once the transcript is complete, the `status` field will be `completed`. ```javascript -async function transcribe () { - try { - const transcript = new assemblyai.Transcript() - const response = await transcript.create({ - audio_src_url: "https://example.com/example.wav", - model_id: 123, - options: { - format_text: true || false - } - }) - const { id } = response.get() - const data = await transcript.poll(id) - - // do something with the response data. - // `data` is a wrapper of the API's JSON - // response. `data.get()` returns the JSON - // response of the API - var responseJson = data.get(); - console.log(responseJson); - - } catch (e) { - // Do some error handling here - } -} +const transcript = await client.transcripts.get(transcript.id) ``` -### Create a custom model +## List transcripts -Boost accuracy for keywords/phrases, and add custom terms to the vocabulary with a custom model. For more information, please see the full API documentation [here](https://docs.assemblyai.com/guides/custom_models_101/). +This will return a paged list of transcripts that you have transcript. ```javascript -async function model() { - try { - const instance = new assemblyai.Model() - const response = await instance.create({ - phrases: ['foo', 'bar'] - }) - const { id } = response.get() - const data = await instance.poll(id) - - // do something with the response data. - // `data` is a wrapper of the API's JSON - // response. `data.get()` returns the JSON - // response of the API - var responseJson = data.get(); - console.log(responseJson); - - } catch (e) { - // Do some error handling +const page = await client.transcripts.list() +``` + +You can also paginate over all pages. + +```typescript +let nextPageUrl: string | null = null; +do { + const page = await client.transcripts.list(nextPageUrl) + nextPageUrl = page.page_details.next_url +} while(nextPageUrl !== null) +``` + +## Delete a transcript + +```javascript +const res = await client.transcripts.delete(transcript.id) +``` + +## Use LeMUR + +Call [LeMUR endpoints](https://www.assemblyai.com/docs/API%20reference/lemur) to summarize, ask questions, generate action items, or run a custom task. + +Custom Summary: +```javascript +const { response } = await client.lemur.summary({ + transcript_ids: ['0d295578-8c75-421a-885a-2c487f188927'], + answer_format: 'one sentence', + context: { + speakers: ['Alex', 'Bob'], } -} +}) +``` + +Question & Answer: +```javascript +const { response } = await client.lemur.questionAnswer({ + transcript_ids: ['0d295578-8c75-421a-885a-2c487f188927'], + questions: [ + { + question: 'What are they discussing?', + answer_format: 'text', + } + ] +}) ``` - -### The Response Object -When using the `Response` object, you will find a couple of methods: +Action Items: +```javascript +const { response } = await client.lemur.actionItems({ + transcript_ids: ['0d295578-8c75-421a-885a-2c487f188927'], +}) +``` + +Custom Task: +```javascript +const { response } = await client.lemur.task({ + transcript_ids: ['0d295578-8c75-421a-885a-2c487f188927'], + prompt: 'Write a haiku about this conversation.', +}) +``` + +## Transcribe in real time + +Create the real-time service. + +```typescript +const service = client.realtime.createService(); +``` + +You can also pass in the following options. + +```typescript +const service = client.realtime.createService({ + realtimeUrl: 'wss://localhost/override', + apiKey: process.env.ASSEMBLYAI_API_KEY // The API key passed to `AssemblyAI` will be used by default, + sampleRate: 16_000, + wordBoost: ['foo', 'bar'] +}); +``` + +You can also generate a temporary auth token for real-time. + +```typescript +const token = await client.realtime.createTemporaryToken({expires_in = 60}); +const rt = client.realtime.createService({ + token: token +}); +``` -- `get()` -- `toString()` -- `stringify()` +> [!WARNING] +> Storing your API key in client-facing applications exposes your API key. +> Generate a temporary auth token on the server and pass it to your client. -The method that you will most likely want to use will be `get()` which returns the full JSON object from the API, one level down. +You can configure the following events. + +```typescript +rt.on("open", ({ sessionId, expiresAt }) => console.log('Session ID:', sessionId, 'Expires at:', expiresAt)); +rt.on("close", (code: number, reason: string) => console.log('Closed', code, reason)); +rt.on("transcript", (transcript: TranscriptMessage) => console.log('Transcript:', transcript)); +rt.on("transcript.partial", (transcript: PartialTranscriptMessage) => console.log('Partial transcript:', transcript)); +rt.on("transcript.final", (transcript: FinalTranscriptMessage) => console.log('Final transcript:', transcript)); +rt.on("error", (error: Error) => console.error('Error', error)); +``` + +After configuring your events, connect to the server. + +```typescript +await rt.connect(); +``` + +Send audio data. + +```typescript +// Pseudo code for getting audio +getAudio((chunk) => { + rt.sendAudio(chunk); +}); +``` + +Close the connection when you're finished. + +```typescript +rt.close(); +``` + +# Tests + +To run the test suite, first install the dependencies, then run `pnpm test`: + +```bash +pnpm install +pnpm test +``` diff --git a/assemblyai.png b/assemblyai.png new file mode 100644 index 0000000..dca925a Binary files /dev/null and b/assemblyai.png differ diff --git a/jest.config.rollup.ts b/jest.config.rollup.ts new file mode 100644 index 0000000..c5b7901 --- /dev/null +++ b/jest.config.rollup.ts @@ -0,0 +1,14 @@ +import type { JestConfigWithTsJest } from "ts-jest"; + +const jestConfig: JestConfigWithTsJest = { + preset: "ts-jest", + testEnvironment: "node", + collectCoverage: true, + moduleNameMapper: { + "^@/(.*)$": "/src/$1", + }, +}; + +process.env.TESTDATA_DIR = "tests/static"; + +export default jestConfig; diff --git a/package.json b/package.json index ba521c0..cfb97d6 100644 --- a/package.json +++ b/package.json @@ -1,31 +1,68 @@ { "name": "assemblyai", - "version": "1.0.1", - "description": "", - "main": "index.js", - "scripts": { - "test": "echo \"Error: no test specified\" && exit 1" - }, + "version": "2.0.0-beta", + "description": "The AssemblyAI Node.js SDK provides an easy-to-use interface for interacting with the AssemblyAI API, which supports async and real-time transcription, as well as the latest LeMUR models.", + "main": "dist/index.js", + "module": "dist/index.esm.js", + "types": "dist/index.d.ts", + "typings": "dist/index.d.ts", "repository": { "type": "git", - "url": "git+https://github.com/AssemblyAI/assemblyai-node-sdk.git" + "url": "git+https://github.com/AssemblyAI/assemblyai-typescript-sdk.git" }, - "keywords": [], - "author": "", - "license": "ISC", - "bugs": { - "url": "https://github.com/AssemblyAI/assemblyai-node-sdk/issues" + "publishConfig": { + "tag": "beta", + "access": "public", + "registry": "https://registry.npmjs.org/" }, - "homepage": "https://github.com/AssemblyAI/assemblyai-node-sdk#readme", - "dependencies": { - "request": "^2.88.0" + "scripts": { + "build": "pnpm clean && pnpm rollup -c", + "clean": "rimraf dist", + "lint": "tslint -p tsconfig.json", + "test": "pnpm lint && pnpm test:unit", + "test:unit": "jest --config jest.config.rollup.ts", + "prettier": "prettier --write 'src/**/*.ts'", + "generate-types": "tsx ./scripts/generate-types.ts && pnpm prettier" }, + "keywords": [ + "AssemblyAI", + "Speech-to-text" + ], + "author": "AssemblyAI (https://www.assemblyai.com)", + "license": "MIT", + "homepage": "https://www.assemblyai.com/docs", + "files": [ + "dist", + "src", + "types" + ], "devDependencies": { - "eslint": "^5.4.0", - "eslint-config-standard": "^11.0.0", - "eslint-plugin-import": "^2.14.0", - "eslint-plugin-node": "^7.0.1", - "eslint-plugin-promise": "^4.0.0", - "eslint-plugin-standard": "^3.1.0" + "@types/jest": "^29.5.5", + "@types/node": "^20.5.7", + "@types/ws": "^8.5.5", + "dotenv": "^16.3.1", + "eslint": "^8.43.0", + "i": "^0.3.7", + "jest": "^29.5.0", + "jest-cli": "^29.5.0", + "jest-junit": "^16.0.0", + "jest-mock-extended": "^3.0.4", + "jest-websocket-mock": "^2.4.1", + "mock-socket": "^9.2.1", + "npm": "^9.7.1", + "openapi-typescript": "^6.6.1", + "prettier": "^2.8.8", + "rimraf": "^5.0.1", + "rollup": "^3.25.1", + "rollup-plugin-typescript2": "^0.34.1", + "ts-jest": "^29.1.0", + "ts-node": "^10.9.1", + "tslib": "^2.5.3", + "tslint": "^6.1.3", + "typescript": "^5.2.2" + }, + "dependencies": { + "axios": "^1.4.0", + "ws": "^8.13.0" } } diff --git a/rollup.config.js b/rollup.config.js new file mode 100644 index 0000000..0060484 --- /dev/null +++ b/rollup.config.js @@ -0,0 +1,20 @@ +const pkg = require('./package.json') +const ts = require('rollup-plugin-typescript2') + +const plugins = [ + ts({ + tsconfigOverride: { exclude: ['**/*.test.ts'] }, + }), +] + +module.exports = [ + { + input: 'src/index.ts', + output: [ + { file: pkg.main, format: 'cjs' }, + { file: pkg.module, format: 'es' } + ], + plugins, + external: ['axios', 'fs/promises', 'ws'] + }, +] diff --git a/scripts/generate-types.ts b/scripts/generate-types.ts new file mode 100644 index 0000000..22a2495 --- /dev/null +++ b/scripts/generate-types.ts @@ -0,0 +1,59 @@ +import openapiTS from "openapi-typescript"; +import fs from 'fs' + +async function generateTypes(apiSpecPath: string, outputPath: string) { + const localPath = new URL(apiSpecPath, import.meta.url); + let output = await openapiTS(localPath, + { + alphabetize: true, + exportType: true, + transform(schemaObject, metadata) { + if ('x-fern-type' in schemaObject && schemaObject['x-fern-type'] === "datetime") { + // Use Date as type instead of String, even though it will be a string. + // The service code manually converts the string into a Date. + // Fe see `TranscriptService#list`. + return schemaObject.nullable ? "Date | null" : "Date"; + } + } + }); + const schemasPosition = output.indexOf('schemas: {') + 10; + output = output + // Remove everything before and after the schemas, as we're only interested in schemas. + .substring(schemasPosition, output.indexOf('\n };\n responses', schemasPosition)) + // Turn components["schemas"]["{TYPE_NAME}"] into TYPE_NAME + .replace(/components\[\"schemas\"]\[\"(\w*)\"\]/gm, "$1") + .split('\n') + // De-indent everything by 4 + .map(l => l.substring(4)) + .map(l => { + if (l.trim() === '' || l.startsWith(' ') || l.startsWith('/')) return l; + // Add newlines after each type + if (l.endsWith(';')) l += "\n"; + // Replace `{TYPE_NAME}: ` with `export type {TYPE_NAME} = ` for each type + l = l.replace(/(?!\s)(.*): /, "export type $1 = ") + return l; + }) + .join('\n'); + + // Add file header + output = `// this file is generated by typescript/scripts/generate-types.ts +/* tslint:disable */ +/* eslint-disable */ + +/** OneOf type helpers */ +type Without = { [P in Exclude]?: never }; +type XOR = (T | U) extends object ? (Without & U) | (Without & T) : T | U; +type OneOf = T extends [infer Only] ? Only : T extends [infer A, infer B, ...infer Rest] ? OneOf<[XOR, ...Rest]> : never; +` + output; + + fs.writeFileSync(outputPath, output) +} + +generateTypes( + '../../spec/openapi.yml', + './src/types/openapi.generated.ts' +) +generateTypes( + '../../spec/asyncapi.yml', + './src/types/asyncapi.generated.ts' +) diff --git a/scripts/kitchensink.ts b/scripts/kitchensink.ts new file mode 100644 index 0000000..fab1f45 --- /dev/null +++ b/scripts/kitchensink.ts @@ -0,0 +1,340 @@ +import { createReadStream } from 'fs' +import 'dotenv/config' +import AssemblyAI, { Transcript, CreateTranscriptParameters } from '../src/index'; +import { FinalTranscript, PartialTranscript, RealtimeTranscript } from '../src/types' + +const client = new AssemblyAI({ + apiKey: process.env.ASSEMBLYAI_API_KEY || '', +}); + +(async function transcribeUsingRealtime() { + const useToken = false; + const serviceParams: any = { + sample_rate: 16_000, + word_boost: ['gore', 'climate'] + }; + if (useToken) { + serviceParams.token = await client.realtime.createTemporaryToken({ expires_in: 480 }); + } + const rt = client.realtime.createService(serviceParams); + + rt.on("open", ({ sessionId, expiresAt }) => { + console.log('Session ID:', sessionId, 'Expires At:', expiresAt); + }); + rt.on("close", (code: number, reason: string) => console.log('Closed', code, reason)) + rt.on("transcript", (transcript: RealtimeTranscript) => console.log('Transcript:', transcript)); + rt.on("transcript.partial", (transcript: PartialTranscript) => console.log('Transcript:', transcript)); + rt.on("transcript.final", (transcript: FinalTranscript) => console.log('Transcript:', transcript)); + rt.on("error", (error: Error) => console.error('Error', error)); + + + try { + await rt.connect(); + + const chunkSize = 8 * 1024; + const audio = createReadStream( + './tests/static/gore-short.wav', + { highWaterMark: chunkSize } + ); + for await (const chunk of audio) { + if (chunk.length < chunkSize) continue; + rt.sendAudio(chunk); + await new Promise((resolve) => + setTimeout(resolve, 300) + ); + } + console.log('File end') + + await rt.close(); + } catch (error) { + console.error(error); + } +})(); + +const audioUrl = 'https://storage.googleapis.com/aai-docs-samples/espn.m4a'; +const createTranscriptParams: CreateTranscriptParameters = { + audio_url: audioUrl, + boost_param: 'high', + word_boost: ['Chicago', 'draft'], + disfluencies: true, + dual_channel: true, + format_text: false, + language_code: 'en', + punctuate: false, + speech_threshold: 0.5, +}; + +(async function createStandardTranscript() { + const transcript = await client.transcripts.create(createTranscriptParams); + console.log(transcript); + return transcript; +})() + .then(async (transcript) => { + await exportAsSubtitles(transcript); + await getParagraphs(transcript); + await getSentences(transcript); + await searchTranscript(transcript); + await deleteTranscript(transcript); + }); + +(async function runLemurModels() { + const transcript = await client.transcripts.create(createTranscriptParams); + await lemurSummary(transcript); + await lemurQuestionAnswer(transcript); + await lemurActionPoints(transcript); + await lemurCustomTask(transcript); + await deleteTranscript(transcript); +})(); + +(async function createTranscriptWithBadUrl() { + const transcript = await client.transcripts.create({ + audio_url: 'https://storage.googleapis.com/api-docs-samples/oops.m4a' + }); + console.log(transcript); + return transcript; +})().then(async (transcript) => { + try { + await getParagraphs(transcript); + console.error("Error expected but not thrown."); + } catch (error) { + console.log("Error expected:", error.toString()); + await deleteTranscript(transcript); + } +}); + +(async function createTranscriptWithNullUrl() { + try { + await client.transcripts.create({ + audio_url: null as unknown as string + }); + console.error("Error expected but not thrown."); + } catch (error) { + console.log("Error expected:", error.toString()); + } +})(); + +(async function createTranscriptWithword_boost() { + const transcript = await client.transcripts.create({ + ...createTranscriptParams, + boost_param: 'high', + word_boost: ['knee', 'hip'], + }); + console.log(transcript); + return transcript; +})().then(deleteTranscript); + +(async function createTranscriptWithSummarization() { + const transcript = await client.transcripts.create({ + ...createTranscriptParams, + summarization: true, + summary_model: 'conversational', + summary_type: 'bullets_verbose', + punctuate: true, + format_text: true + }) + console.log(transcript); + return transcript; +})().then(deleteTranscript); + +(async function createTranscriptWithContentSafety() { + const transcript = await client.transcripts.create({ + ...createTranscriptParams, + content_safety: true, + }) + console.log(transcript); + return transcript; +})().then(deleteTranscript); + +(async function createTranscriptWithCustomSpelling() { + const transcript = await client.transcripts.create({ + ...createTranscriptParams, + custom_spelling: [ + { from: ['quarterback', 'QB'], to: 'nickelback' }, + { from: ['bear'], to: 'cub' }, + ] + }) + console.log(transcript); + return transcript; +})().then(deleteTranscript); + +(async function createTranscriptWithEntityDetection() { + const transcript = await client.transcripts.create({ + ...createTranscriptParams, + entity_detection: true, + }) + console.log(transcript); + return transcript; +})().then(deleteTranscript); + +(async function createTranscriptWithFilterProfanity() { + const transcript = await client.transcripts.create({ + ...createTranscriptParams, + filter_profanity: true, + }) + console.log(transcript); + return transcript; +})().then(deleteTranscript); + +(async function createTranscriptWithTopicDetection() { + const transcript = await client.transcripts.create({ + ...createTranscriptParams, + iab_categories: true + }) + console.log(transcript); + return transcript; +})().then(deleteTranscript); + +(async function createTranscriptWithLanguageDetection() { + const transcript = await client.transcripts.create({ + ...createTranscriptParams, + language_code: undefined, + language_detection: true + }) + console.log(transcript); + return transcript; +})().then(deleteTranscript); + +(async function createTranscriptWithPiiRedaction() { + const transcript = await client.transcripts.create({ + ...createTranscriptParams, + format_text: true, + redact_pii: true, + redact_pii_audio: true, + redact_pii_audio_quality: 'wav', + redact_pii_policies: [ + 'injury', + 'medical_condition', + 'medical_process' + ], + redact_pii_sub: 'hash', + }) + console.log(transcript); + return transcript; +})().then(deleteTranscript); + +(async function createTranscriptWithSentimentAnalysis() { + const transcript = await client.transcripts.create({ + ...createTranscriptParams, + punctuate: true, + sentiment_analysis: true, + }) + console.log(transcript); + return transcript; +})().then(deleteTranscript); + +(async function createTranscriptWithSpeakerLabels() { + const transcript = await client.transcripts.create({ + ...createTranscriptParams, + dual_channel: false, + punctuate: true, + speaker_labels: true, + speakers_expected: 2, + }) + console.log(transcript); + return transcript; +})().then(deleteTranscript); + +(async function createTranscriptWithWebhook() { + const transcript = await client.transcripts.create({ + ...createTranscriptParams, + webhook_auth_header_name: 'x-foo', + webhook_auth_header_value: 'bar', + webhook_url: 'https://www.assemblyai.com/404' + }) + console.log(transcript); + return transcript; +})().then(deleteTranscript); + +(async function listTranscripts() { + let nextPageUrl: string | null = null; + do { + const page = await client.transcripts.list(nextPageUrl) + console.log(page); + nextPageUrl = page.page_details.next_url; + } while (nextPageUrl !== null) +})(); + +async function searchTranscript(transcript: Transcript) { + console.error('Search is not yet implemented'); + // const result = await client.transcripts.search(transcript.id, { + // words: ['draft', 'football'] + // }); + // console.log(result); +} + +async function exportAsSubtitles(transcript: Transcript) { + const srt = await client.transcripts.subtitles(transcript.id, 'srt') + const vtt = await client.transcripts.subtitles(transcript.id, 'vtt') + console.log('SRT subtitles', srt); + console.log('VTT subtitles', vtt); +} + +async function getParagraphs(transcript: Transcript) { + const paragraphs = await client.transcripts.paragraphs(transcript.id) + console.dir(paragraphs, { depth: null }); +} + +async function getSentences(transcript: Transcript) { + const sentences = await client.transcripts.sentences(transcript.id) + console.dir(sentences, { depth: null }); +} + +async function deleteTranscript(transcript: Transcript) { + await client.transcripts.delete(transcript.id); +} + +const lemurContext = 'This is a podcast on the ESPN channel talking about NFL draft picks.'; + +async function lemurSummary(transcript: Transcript) { + const response = await client.lemur.summary({ + transcript_ids: [transcript.id], + context: lemurContext, + final_model: 'basic', + max_output_size: 3000, + answer_format: 'bullet points' + }) + console.log(response.response); +} + +async function lemurQuestionAnswer(transcript: Transcript) { + const response = await client.lemur.questionAnswer({ + transcript_ids: [transcript.id], + questions: [ + { + question: 'Which players were mentioned?', + context: lemurContext, + answer_format: ' ', + }, + { + question: 'Were they excited', + context: lemurContext, + answer_options: ['yes', 'no'] + } + ], + context: lemurContext, + final_model: 'basic', + max_output_size: 3000 + }) + console.log(response.response); +} + +async function lemurActionPoints(transcript: Transcript) { + const response = await client.lemur.actionItems({ + transcript_ids: [transcript.id], + context: lemurContext, + final_model: 'basic', + max_output_size: 3000 + }) + console.log(response.response); +} + +async function lemurCustomTask(transcript: Transcript) { + const response = await client.lemur.task({ + transcript_ids: [transcript.id], + prompt: 'List all the teams and their players that are mentioned.', + context: lemurContext, + final_model: 'basic', + max_output_size: 3000 + }) + console.log(response.response); +} diff --git a/src/Client.js b/src/Client.js deleted file mode 100644 index cd96dcb..0000000 --- a/src/Client.js +++ /dev/null @@ -1,28 +0,0 @@ -let _apiKey = '' - -class Client { - static set API_KEY (key) { - _apiKey = key - } - - static get API_KEY () { - return _apiKey - } - - static checkKey () { - if (process.env.ASSEMBLYAI_API_KEY) { - _apiKey = process.env.ASSEMBLYAI_API_KEY - } - if (!_apiKey) { - throw new Error(` - Unable to find the API Key. - You can set this value by using the setAPIKey method. - Example: """ const assemblyai = require('assemblyai') - assemblyai.setAPIKey('example') """ - `) - } - return true - } -} - -module.exports = Client diff --git a/src/api/Http/Request.js b/src/api/Http/Request.js deleted file mode 100644 index f3c60b7..0000000 --- a/src/api/Http/Request.js +++ /dev/null @@ -1,108 +0,0 @@ -const request = require('request') -const Client = require('../../Client') - -class Request { - /** - * Initializes the class - * @param {Object} options The HTTP Options - * @param {String} options.method The HTTP Method - * @param {String} options.url THE HTTP URL - * @param {Object} options.body optional HTTP Body - */ - constructor (options) { - this.method = options.method || 'GET' - this.url = options.url || '' - this.body = options.body || '' - } - - /** - * Sends an HTTP request using the node HTTP module - * - * @returns {Promise} Parsed JSON of the response - */ - _request (isJSON) { - const options = { - uri: this.url, - method: this.method, - headers: { - authorization: Client.API_KEY - } - } - - if (this.method === 'POST' || this.method === 'PUT') { - if (this.body && this.body.constructor !== String) { - options.body = JSON.stringify(this.body) - } else { - options.body = '' - } - } - - return new Promise((resolve, reject) => { - request(options, (err, response, body) => { - if (err) return reject(err) - try { - if (isJSON) { - resolve(JSON.parse(body)) - } else { - resolve(body) - } - } catch (e) { - console.log('HERE:', e) - reject(new Error(` - Unable to recieve a proper JSON response from the API. - Please contact customer service. - `)) - } - }) - }) - } - - /** - * Sends a request to the API - * - * @throws {Error} the error receieved from the API - */ - async send (isJSON = true) { - Client.checkKey() - let response = null - let retries = 1 - while (!response) { - if (retries === 5) { - throw new Error(` - Retry limit reached. Some things that could cause this to happen - would be your network connection or slow internet (request timeout). - Please look into this before continuing to use this SDK - `) - } - - response = await new Promise(resolve => { - setTimeout(async () => { - try { - const res = await this._request(isJSON) - resolve(res) - } catch (e) { - retries += 1 - /** - * The reason for resolving the null value would be so response is still *falsey* - * allowing the loop to run again. If no value is resolved here, the loop will hang - * until the value is resolved (which would be never) - */ - resolve(null) - } - }, (retries * retries) * 100) // TODO: exponential timeout. Change this value to whatever you want - }) - } - return response - } -} - -module.exports = Request - -// curl --request POST \ -// --url https://api.assemblyai.com/transcript \ -// --header 'authorization: 6f33815060fa4eb29e96356a3ec536c8' \ -// --data ' -// { -// "audio_src_url": "https://s3-us-west-2.amazonaws.com/blog.assemblyai.com/audio/8-7-2018-post/7510.mp3", -// "model_id": 265 -// }' diff --git a/src/api/Http/Response.js b/src/api/Http/Response.js deleted file mode 100644 index 278e8d7..0000000 --- a/src/api/Http/Response.js +++ /dev/null @@ -1,23 +0,0 @@ -class Response { - constructor (responseJSON) { - this.json = responseJSON - if (responseJSON.error) { - throw new Error(responseJSON.error) - } - this.type = responseJSON.transcript ? 'transcript' : 'model' - } - - toString () { - return JSON.stringify(this.json[this.type]) - } - - stringify () { - return this.toString() - } - - get () { - return this.json[this.type] - } -} - -module.exports = Response diff --git a/src/api/Model.js b/src/api/Model.js deleted file mode 100644 index 667009a..0000000 --- a/src/api/Model.js +++ /dev/null @@ -1,17 +0,0 @@ -const { Poll, Create } = require('./util') - -class Model { - constructor () { - this.url = 'https://api.assemblyai.com/model' - } - - poll (id) { - return Poll(this.url, id) - } - - create (options) { - return Create(this.url, options) - } -} - -module.exports = Model diff --git a/src/api/Transcript.js b/src/api/Transcript.js deleted file mode 100644 index 30a29ff..0000000 --- a/src/api/Transcript.js +++ /dev/null @@ -1,16 +0,0 @@ -const { Poll, Create } = require('./util') -class Transcript { - constructor () { - this.url = 'https://api.assemblyai.com/transcript' - } - - poll (id) { - return Poll(this.url, id) - } - - create (options) { - return Create(this.url, options) - } -} - -module.exports = Transcript diff --git a/src/api/Upload.js b/src/api/Upload.js deleted file mode 100644 index fe232f3..0000000 --- a/src/api/Upload.js +++ /dev/null @@ -1,41 +0,0 @@ -const fs = require('fs') -const Request = require('./Http/Request') -const request = require('request') - -const Transcribe = require('./Transcript') - -class Upload { - constructor (filePath) { - this.url = 'https://api.assemblyai.com/upload' - this.filePath = filePath - } - - async create () { - const req = new Request({ - method: 'POST', - url: this.url - }) - const uploadUrl = await req.send(false) - - await new Promise((resolve, reject) => { - fs.readFile(this.filePath, (err, data) => { - if (err) return reject(err) - request.put(uploadUrl, { body: data }, (err, response, body) => { - if (err) return reject(err) - resolve(body) - }) - }) - }) - - const url = uploadUrl.split('?')[0] - const transcribe = new Transcribe() - const response = await transcribe.create({ - audio_src_url: url - }) - const { id } = response.get() - - return transcribe.poll(id) - } -} - -module.exports = Upload diff --git a/src/api/util.js b/src/api/util.js deleted file mode 100644 index 1f3a06b..0000000 --- a/src/api/util.js +++ /dev/null @@ -1,48 +0,0 @@ -const Request = require('./Http/Request') -const Response = require('./Http/Response') - -module.exports = { - Poll (url, id) { - return new Promise((resolve, reject) => { - const request = new Request({ - method: 'GET', - url: `${url}/${id}` - }) - - const interval = setInterval(async () => { - try { - const apiResponse = await request.send() - const response = new Response(apiResponse) - const json = response.get() - if (json.status === 'completed') { - clearInterval(interval) - resolve(response) - } - if (json.status === 'trained') { - clearInterval(interval) - resolve(response) - } - if (json.status === 'error') { - clearInterval(interval) - reject(json.error) - } - } catch (e) { - clearInterval(interval) - reject(e) - } - }, 3000) - }) - }, - async Create (url, options) { - const request = new Request({ - method: 'POST', - url: url, - body: options.upload ? '' : options || {} - }) - const apiResponse = await request.send() - if (options.json === false) { - return apiResponse - } - return new Response(apiResponse) - } -} diff --git a/src/index.ts b/src/index.ts new file mode 100644 index 0000000..38b7dc1 --- /dev/null +++ b/src/index.ts @@ -0,0 +1,5 @@ +import AssemblyAI from "./services"; + +export * from "./services"; +export type * from "./types"; +export default AssemblyAI; diff --git a/src/services/base.ts b/src/services/base.ts new file mode 100644 index 0000000..484ff93 --- /dev/null +++ b/src/services/base.ts @@ -0,0 +1,14 @@ +import { AxiosInstance } from "axios"; + +/** + * Base class for services that communicate with the API. + */ +abstract class BaseService { + /** + * Create a new service. + * @param params The AxiosInstance to send HTTP requests to the API. + */ + constructor(protected client: AxiosInstance) {} +} + +export default BaseService; diff --git a/src/services/files/index.ts b/src/services/files/index.ts new file mode 100644 index 0000000..b15f137 --- /dev/null +++ b/src/services/files/index.ts @@ -0,0 +1,22 @@ +import { readFile } from "fs/promises"; +import BaseService from "@/services/base"; +import { UploadedFile } from "@/types"; + +export default class FileService extends BaseService { + /** + * Upload a local file to AssemblyAI. + * @param path The local file to upload. + * @return A promise that resolves to the uploaded file URL. + */ + async upload(path: string): Promise { + const file = await readFile(path); + + const { data } = await this.client.post("/v2/upload", file, { + headers: { + "Content-Type": "application/octet-stream", + }, + }); + + return data.upload_url; + } +} diff --git a/src/services/index.ts b/src/services/index.ts new file mode 100644 index 0000000..f296e81 --- /dev/null +++ b/src/services/index.ts @@ -0,0 +1,49 @@ +import { createAxiosClient } from "@/utils/axios"; +import { BaseServiceParams } from "@/types"; +import LemurService from "./lemur"; +import { RealtimeService, RealtimeServiceFactory } from "./realtime"; +import TranscriptService from "./transcripts"; +import FileService from "./files"; + +export default class AssemblyAI { + /** + * The files service. + */ + public files: FileService; + + /** + * The transcripts service. + */ + public transcripts: TranscriptService; + + /** + * The LeMUR service. + */ + public lemur: LemurService; + + /** + * The realtime service. + */ + public realtime: RealtimeServiceFactory; + + /** + * Create a new AssemblyAI client. + * @param params The parameters for the service, including the API key and base URL, if any. + */ + constructor(params: BaseServiceParams) { + params.baseUrl = params.baseUrl || "https://api.assemblyai.com"; + const client = createAxiosClient(params); + this.files = new FileService(client); + this.transcripts = new TranscriptService(client, this.files); + this.lemur = new LemurService(client); + this.realtime = new RealtimeServiceFactory(client, params); + } +} + +export { + LemurService, + RealtimeServiceFactory, + RealtimeService, + TranscriptService, + FileService, +}; diff --git a/src/services/lemur/index.ts b/src/services/lemur/index.ts new file mode 100644 index 0000000..3aabea9 --- /dev/null +++ b/src/services/lemur/index.ts @@ -0,0 +1,49 @@ +import { + LemurSummaryParameters, + LemurActionItemsParameters, + LemurQuestionAnswerParameters, + LemurTaskParameters, + LemurSummaryResponse, + LemurQuestionAnswerResponse, + LemurActionItemsResponse, + LemurTaskResponse, +} from "@/types"; +import BaseService from "@/services/base"; + +export default class LemurService extends BaseService { + async summary(params: LemurSummaryParameters): Promise { + const { data } = await this.client.post( + "/lemur/v3/generate/summary", + params + ); + return data; + } + + async questionAnswer( + params: LemurQuestionAnswerParameters + ): Promise { + const { data } = await this.client.post( + "/lemur/v3/generate/question-answer", + params + ); + return data; + } + + async actionItems( + params: LemurActionItemsParameters + ): Promise { + const { data } = await this.client.post( + "/lemur/v3/generate/action-items", + params + ); + return data; + } + + async task(params: LemurTaskParameters): Promise { + const { data } = await this.client.post( + "/lemur/v3/generate/task", + params + ); + return data; + } +} diff --git a/src/services/realtime/factory.ts b/src/services/realtime/factory.ts new file mode 100644 index 0000000..abc9bf0 --- /dev/null +++ b/src/services/realtime/factory.ts @@ -0,0 +1,32 @@ +import { + BaseServiceParams, + RealtimeTokenParams, + CreateRealtimeServiceParams, + RealtimeServiceParams, +} from "@/types"; +import { AxiosInstance } from "axios"; +import { RealtimeService } from "./service"; + +export class RealtimeServiceFactory { + constructor( + private client: AxiosInstance, + private params: BaseServiceParams + ) {} + + createService(params?: CreateRealtimeServiceParams): RealtimeService { + if (!params) params = { apiKey: this.params.apiKey }; + else if (!("token" in params) && !params.apiKey) { + params.apiKey = this.params.apiKey; + } + + return new RealtimeService(params as RealtimeServiceParams); + } + + async createTemporaryToken(params: RealtimeTokenParams) { + const response = await this.client.post<{ token: string }>( + "/v2/realtime/token", + params + ); + return response.data.token; + } +} diff --git a/src/services/realtime/index.ts b/src/services/realtime/index.ts new file mode 100644 index 0000000..a167b4d --- /dev/null +++ b/src/services/realtime/index.ts @@ -0,0 +1,2 @@ +export * from "@/services/realtime/factory"; +export * from "@/services/realtime/service"; diff --git a/src/services/realtime/service.ts b/src/services/realtime/service.ts new file mode 100644 index 0000000..f89d8ab --- /dev/null +++ b/src/services/realtime/service.ts @@ -0,0 +1,184 @@ +import WebSocket from "ws"; +import { + RealtimeEvents, + RealtimeListeners, + RealtimeServiceParams, + RealtimeMessage, + RealtimeTranscript, + PartialTranscript, + FinalTranscript, + SessionBeginsEventData, +} from "@/types"; +import { + RealtimeError, + RealtimeErrorMessages, + RealtimeErrorType, +} from "@/utils/errors"; + +const defaultRealtimeUrl = "wss://api.assemblyai.com/v2/realtime/ws"; + +export class RealtimeService { + private realtimeUrl: string; + private sampleRate: number; + private wordBoost?: string[]; + private apiKey?: string; + private token?: string; + private socket?: WebSocket; + private listeners: RealtimeListeners = {}; + private sessionTerminatedResolve?: () => void; + + constructor(params: RealtimeServiceParams) { + this.realtimeUrl = params.realtimeUrl ?? defaultRealtimeUrl; + this.sampleRate = params.sampleRate ?? 16_000; + this.wordBoost = params.wordBoost; + this.realtimeUrl = params.realtimeUrl ?? defaultRealtimeUrl; + if ("apiKey" in params) this.apiKey = params.apiKey; + if ("token" in params) this.token = params.token; + + if (!(this.apiKey || this.token)) { + throw new Error("API key or temporary token is required."); + } + } + + private connectionUrl(): URL { + const url = new URL(this.realtimeUrl); + + if (url.protocol !== "wss:") { + throw new Error("Invalid protocol, must be wss"); + } + + const searchParams = new URLSearchParams(); + if (this.token) { + searchParams.set("token", this.token); + } + searchParams.set("sample_rate", this.sampleRate.toString()); + if (this.wordBoost && this.wordBoost.length > 0) { + searchParams.set("word_boost", JSON.stringify(this.wordBoost)); + } + url.search = searchParams.toString(); + + return url; + } + + on(event: "open", listener: (event: SessionBeginsEventData) => void): void; + on( + event: "transcript", + listener: (transcript: RealtimeTranscript) => void + ): void; + on( + event: "transcript.partial", + listener: (transcript: PartialTranscript) => void + ): void; + on( + event: "transcript.final", + listener: (transcript: FinalTranscript) => void + ): void; + on(event: "error", listener: (error: Error) => void): void; + on(event: "close", listener: (code: number, reason: string) => void): void; + on(event: RealtimeEvents, listener: (...args: any[]) => void) { + this.listeners[event] = listener; + } + + connect() { + return new Promise((resolve, _) => { + if (this.socket) { + throw new Error("Already connected"); + } + + const url = this.connectionUrl(); + + let headers; + if (this.token) { + headers = undefined; + } else if (this.apiKey) { + headers = { Authorization: this.apiKey }; + } + + this.socket = new WebSocket(url.toString(), { headers }); + + this.socket.onclose = ({ code, reason }: WebSocket.CloseEvent) => { + if (!reason) { + if (code in RealtimeErrorType) { + reason = RealtimeErrorMessages[code as RealtimeErrorType]; + } + } + this.listeners.close?.(code, reason); + }; + + this.socket.onerror = (errorEvent: WebSocket.ErrorEvent) => { + if (errorEvent.error) this.listeners.error?.(errorEvent.error as Error); + else this.listeners.error?.(new Error(errorEvent.message)); + }; + + this.socket.onmessage = ({ data }: WebSocket.MessageEvent) => { + const message = JSON.parse(data.toString()) as RealtimeMessage; + if ("error" in message) { + this.listeners.error?.(new RealtimeError(message.error)); + return; + } + switch (message.message_type) { + case "SessionBegins": { + const openObject: SessionBeginsEventData = { + sessionId: message.session_id, + expiresAt: new Date(message.expires_at), + }; + resolve(openObject); + this.listeners.open?.(openObject); + break; + } + case "PartialTranscript": { + // message.created is actually a string when coming from the socket + message.created = new Date(message.created); + this.listeners.transcript?.(message); + this.listeners["transcript.partial"]?.(message); + break; + } + case "FinalTranscript": { + // message.created is actually a string when coming from the socket + message.created = new Date(message.created); + this.listeners.transcript?.(message); + this.listeners["transcript.final"]?.(message); + break; + } + case "SessionTerminated": { + this.sessionTerminatedResolve?.(); + break; + } + } + }; + }); + } + + sendAudio(audio: ArrayBuffer) { + if (!this.socket || this.socket.readyState !== WebSocket.OPEN) { + throw new Error("Socket is not open for communication"); + } + + const payload = { + audio_data: Buffer.from(audio).toString("base64"), + }; + this.socket.send(JSON.stringify(payload)); + } + + async close(waitForSessionTermination = true) { + if (this.socket) { + if (this.socket.readyState === WebSocket.OPEN) { + const terminateSessionMessage = `{"terminate_session": true}`; + if (waitForSessionTermination) { + const sessionTerminatedPromise = new Promise((resolve, _) => { + this.sessionTerminatedResolve = resolve; + }); + this.socket.send(terminateSessionMessage); + await sessionTerminatedPromise; + } else { + this.socket.send(terminateSessionMessage); + } + } + this.socket.removeAllListeners(); + this.socket.close(); + } + + this.listeners = {}; + this.socket = undefined; + } +} diff --git a/src/services/transcripts/index.ts b/src/services/transcripts/index.ts new file mode 100644 index 0000000..af123a7 --- /dev/null +++ b/src/services/transcripts/index.ts @@ -0,0 +1,178 @@ +import BaseService from "@/services/base"; +import { + ParagraphsResponse, + SentencesResponse, + Transcript, + TranscriptList, + CreateTranscriptParameters, + CreateTranscriptOptions, + Createable, + Deletable, + Listable, + Retrieveable, + SubtitleFormat, + RedactedAudioResponse, +} from "@/types"; +import { AxiosInstance } from "axios"; +import FileService from "../files"; + +export default class TranscriptService + extends BaseService + implements + Createable, + Retrieveable, + Deletable, + Listable +{ + constructor(client: AxiosInstance, private files: FileService) { + super(client); + } + + /** + * Create a transcript. + * @param params The parameters to create a transcript. + * @param options The options used for creating the new transcript. + * @returns A promise that resolves to the newly created transcript. + */ + async create( + params: CreateTranscriptParameters, + options?: CreateTranscriptOptions + ): Promise { + const path = getPath(params.audio_url); + if (path !== null) { + const uploadUrl = await this.files.upload(path); + params.audio_url = uploadUrl; + } + + const res = await this.client.post("/v2/transcript", params); + + if (options?.poll ?? true) { + return await this.poll(res.data.id, options); + } + + return res.data; + } + + private async poll( + transcriptId: string, + options?: CreateTranscriptOptions + ): Promise { + const startTime = Date.now(); + while (true) { + const transcript = await this.get(transcriptId); + if (transcript.status === "completed" || transcript.status === "error") { + return transcript; + } else if ( + Date.now() - startTime < + (options?.pollingTimeout ?? 180_000) + ) { + await new Promise((resolve) => + setTimeout(resolve, options?.pollingInterval ?? 3_000) + ); + } else { + throw new Error("Polling timeout"); + } + } + } + + /** + * Retrieve a transcript. + * @param id The identifier of the transcript. + * @returns A promise that resolves to the transcript. + */ + async get(id: string): Promise { + const res = await this.client.get(`/v2/transcript/${id}`); + return res.data; + } + + // TODO: add options overload to support list querystring parameters + /** + * Retrieves a paged list of transcript listings. + * @param nextUrl The URL to retrieve the transcript list from. If not provided, the first page will be retrieved. + * @returns + */ + async list(nextUrl?: string | null): Promise { + const { data } = await this.client.get( + nextUrl ?? "/v2/transcript" + ); + for (const transcriptListItem of data.transcripts) { + transcriptListItem.created = new Date(transcriptListItem.created); + if (transcriptListItem.completed) { + transcriptListItem.completed = new Date(transcriptListItem.completed); + } + } + + return data as unknown as TranscriptList; + } + + /** + * Delete a transcript + * @param id The identifier of the transcript. + * @returns A promise that resolves to the transcript. + */ + async delete(id: string): Promise { + const res = await this.client.delete(`/v2/transcript/${id}`); + return res.data; + } + + /** + * Retrieve all sentences of a transcript. + * @param id The identifier of the transcript. + * @return A promise that resolves to the sentences. + */ + async sentences(id: string): Promise { + const { data } = await this.client.get( + `/v2/transcript/${id}/sentences` + ); + return data; + } + + /** + * Retrieve all paragraphs of a transcript. + * @param id The identifier of the transcript. + * @return A promise that resolves to the paragraphs. + */ + async paragraphs(id: string): Promise { + const { data } = await this.client.get( + `/v2/transcript/${id}/paragraphs` + ); + return data; + } + + /** + * Retrieve subtitles of a transcript. + * @param id The identifier of the transcript. + * @param format The format of the subtitles. + * @return A promise that resolves to the subtitles text. + */ + async subtitles(id: string, format: SubtitleFormat = "srt"): Promise { + const { data } = await this.client.get( + `/v2/transcript/${id}/${format}` + ); + + return data; + } + + /** + * Retrieve redactions of a transcript. + * @param id The identifier of the transcript. + * @return A promise that resolves to the subtitles text. + */ + async redactions(id: string): Promise { + const { data } = await this.client.get( + `/v2/transcript/${id}/redacted-audio` + ); + return data; + } +} + +function getPath(path: string) { + let url: URL; + try { + url = new URL(path); + if (url.protocol === "file:") return url.pathname; + else return null; + } catch { + return path; + } +} diff --git a/src/types/asyncapi.generated.ts b/src/types/asyncapi.generated.ts new file mode 100644 index 0000000..55959d2 --- /dev/null +++ b/src/types/asyncapi.generated.ts @@ -0,0 +1,124 @@ +// this file is generated by typescript/scripts/generate-types.ts +/* tslint:disable */ +/* eslint-disable */ + +/** OneOf type helpers */ +type Without = { [P in Exclude]?: never }; +type XOR = T | U extends object + ? (Without & U) | (Without & T) + : T | U; +type OneOf = T extends [infer Only] + ? Only + : T extends [infer A, infer B, ...infer Rest] + ? OneOf<[XOR, ...Rest]> + : never; + +export type AudioData = { + /** @description Raw audio data, base64 encoded. This can be the raw data recorded directly from a microphone or read from an audio file. */ + audio_data: string; +}; + +export type FinalTranscript = RealtimeBaseTranscript & { + /** + * @description Describes the type of message. + * @constant + */ + message_type: "FinalTranscript"; + /** @description Whether the text has been punctuated and cased. */ + punctuated: boolean; + /** @description Whether the text has been formatted (e.g. Dollar -> $) */ + text_formatted: boolean; +}; + +/** @enum {string} */ +export type MessageType = + | "SessionBegins" + | "PartialTranscript" + | "FinalTranscript" + | "SessionTerminated"; + +export type PartialTranscript = RealtimeBaseTranscript & { + /** + * @description Describes the type of message. + * @constant + */ + message_type: "PartialTranscript"; +}; + +export type RealtimeBaseMessage = { + /** @description Describes the type of the message. */ + message_type: MessageType; +}; + +export type RealtimeBaseTranscript = { + /** @description End time of audio sample relative to session start, in milliseconds. */ + audio_end: number; + /** @description Start time of audio sample relative to session start, in milliseconds. */ + audio_start: number; + /** + * Format: double + * @description The confidence score of the entire transcription, between 0 and 1. + */ + confidence: number; + /** @description The timestamp for the partial transcript. */ + created: Date; + /** @description The partial transcript for your audio. */ + text: string; + /** @description An array of objects, with the information for each word in the transcription text. Includes the start/end time (in milliseconds) of the word, the confidence score of the word, and the text (i.e. the word itself). */ + words: Word[]; +}; + +export type RealtimeError = { + error: string; +}; + +export type RealtimeMessage = + | SessionBegins + | PartialTranscript + | FinalTranscript + | SessionTerminated + | RealtimeError; + +export type RealtimeTranscript = PartialTranscript | FinalTranscript; + +/** @enum {string} */ +export type RealtimeTranscriptType = "PartialTranscript" | "FinalTranscript"; + +export type SessionBegins = RealtimeBaseMessage & { + /** @description Timestamp when this session will expire. */ + expires_at: Date; + /** + * @description Describes the type of the message. + * @constant + */ + message_type: "SessionBegins"; + /** @description Unique identifier for the established session. */ + session_id: string; +}; + +export type SessionTerminated = RealtimeBaseMessage & { + /** + * @description Describes the type of the message. + * @constant + */ + message_type: "SessionTerminated"; +}; + +export type TerminateSession = RealtimeBaseMessage & { + /** @description A boolean value to communicate that you wish to end your real-time session forever. */ + terminate_session: boolean; +}; + +export type Word = { + /** + * Format: double + * @description Confidence score of the word + */ + confidence: number; + /** @description End time of the word in milliseconds */ + end: number; + /** @description Start time of the word in milliseconds */ + start: number; + /** @description The word itself */ + text: string; +}; diff --git a/src/types/index.ts b/src/types/index.ts new file mode 100644 index 0000000..7aca1e9 --- /dev/null +++ b/src/types/index.ts @@ -0,0 +1,5 @@ +export type * from "./transcripts"; +export type * from "./realtime"; +export type * from "./services"; +export type * from "./asyncapi.generated"; +export type * from "./openapi.generated"; diff --git a/src/types/openapi.generated.ts b/src/types/openapi.generated.ts new file mode 100644 index 0000000..3814cdb --- /dev/null +++ b/src/types/openapi.generated.ts @@ -0,0 +1,834 @@ +// this file is generated by typescript/scripts/generate-types.ts +/* tslint:disable */ +/* eslint-disable */ + +/** OneOf type helpers */ +type Without = { [P in Exclude]?: never }; +type XOR = T | U extends object + ? (Without & U) | (Without & T) + : T | U; +type OneOf = T extends [infer Only] + ? Only + : T extends [infer A, infer B, ...infer Rest] + ? OneOf<[XOR, ...Rest]> + : never; + +/** + * @description Will be either success, or unavailable in the rare case that the model failed. + * @enum {string} + */ +export type AudioIntelligenceModelStatus = "success" | "unavailable"; + +export type AutoHighlightResult = { + /** @description The total number of times the key phrase appears in the audio file */ + count: number; + /** + * Format: float + * @description The total relevancy to the overall audio file of this key phrase - a greater number means more relevant + */ + rank: number; + /** @description The text itself of the key phrase */ + text: string; + /** @description The timestamp of the of the key phrase */ + timestamps: Timestamp[]; +}; + +/** + * @description An array of results for the Key Phrases model, if it was enabled during the transcription request. + * See [Key phrases](https://www.assemblyai.com/docs/Models/key_phrases) for more information. + */ +export type AutoHighlightsResult = { + /** @description A temporally-sequential array of Key Phrases */ + results: AutoHighlightResult[]; +} | null; + +/** @description Chapter of the audio file */ +export type Chapter = { + /** @description The starting time, in milliseconds, for the chapter */ + end: number; + /** @description An ultra-short summary (just a few words) of the content spoken in the chapter */ + gist: string; + /** @description A single sentence summary of the content spoken during the chapter */ + headline: string; + /** @description The starting time, in milliseconds, for the chapter */ + start: number; + /** @description A one paragraph summary of the content spoken during the chapter */ + summary: string; +}; + +export type ContentSafetyLabel = { + /** + * Format: double + * @description The confidence score for the topic being discussed, from 0 to 1 + */ + confidence: number; + /** @description The label of the sensitive topic */ + label: string; + /** + * Format: double + * @description How severely the topic is discussed in the section, from 0 to 1 + */ + severity: number; +}; + +export type ContentSafetyLabelResult = { + /** @description An array of objects, one per sensitive topic that was detected in the section */ + labels: ContentSafetyLabel[]; + /** @description The sentence index at which the section ends */ + sentences_idx_end: number; + /** @description The sentence index at which the section begins */ + sentences_idx_start: number; + /** @description A summary of the Content Moderation severity results for the entire audio file */ + severity_score_summary: { + [key: string]: SeverityScoreSummary; + }; + /** @description A summary of the Content Moderation confidence results for the entire audio file */ + summary: { + [key: string]: number; + }; + /** @description The transcript of the section flagged by the Content Moderation model */ + text: string; + /** @description Timestamp information for the section */ + timestamp: Timestamp; +}; + +export type CreateRealtimeTemporaryTokenParameters = { + /** @description The amount of time until the token expires in seconds. */ + expires_in: number; +}; + +/** @description The parameters for creating a transcript */ +export type CreateTranscriptOptionalParameters = { + /** @description The point in time, in milliseconds, to stop transcribing in your media file */ + audio_end_at?: number; + /** @description The point in time, in milliseconds, to begin transcription from in your media file */ + audio_start_from?: number; + /** @description Enable [Auto Chapters](https://www.assemblyai.com/docs/Models/auto_chapters), can be true or false */ + auto_chapters?: boolean; + /** @description Whether Key Phrases was enabled in the transcription request, either true or false */ + auto_highlights?: boolean; + /** @description The word boost parameter value, if provided in the transcription request. */ + boost_param?: TranscriptBoostParam; + /** @description Enable [Content Moderation](https://www.assemblyai.com/docs/Models/content_moderation), can be true or false */ + content_safety?: boolean; + /** @description Customize how words are spelled and formatted using to and from values */ + custom_spelling?: TranscriptCustomSpelling[]; + /** @description Whether custom topics was enabled in the transcription request, either true or false */ + custom_topics?: boolean; + /** @description Transcribe Filler Words, like "umm", in your media file; can be true or false. */ + disfluencies?: boolean; + /** @description Enable [Dual Channel](https://assemblyai.com/docs/Models/speech_recognition#dual-channel-transcription) transcription, can be true or false. */ + dual_channel?: boolean; + /** @description Enable [Entity Detection](https://www.assemblyai.com/docs/Models/entity_detection), can be true or false */ + entity_detection?: boolean; + /** @description Filter profanity from the transcribed text, can be true or false. */ + filter_profanity?: boolean; + /** @description Enable Text Formatting, can be true or false. */ + format_text?: boolean; + /** @description Enable [Topic Detection](https://www.assemblyai.com/docs/Models/iab_classification), can be true or false */ + iab_categories?: boolean; + /** + * @description The language of your audio file. Possible values are found in [Supported Languages](https://www.assemblyai.com/docs/Concepts/supported_languages). + * The default value is 'en_us'. + */ + language_code?: TranscriptLanguageCode; + /** @description Whether [Automatic language detection](https://www.assemblyai.com/docs/Models/speech_recognition#automatic-language-detection) was enabled in the transcription request, either true or false. */ + language_detection?: boolean; + /** @description Enable Automatic Punctuation, can be true or false. */ + punctuate?: boolean; + /** @description Redact PII from the transcribed text using the Redact PII model, can be true or false */ + redact_pii?: boolean; + /** @description Generate a copy of the original media file with spoken PII "beeped" out, can be true or false. See [PII redaction](https://www.assemblyai.com/docs/Models/pii_redaction) for more details. */ + redact_pii_audio?: boolean; + /** + * @description Controls the filetype of the audio created by redact_pii_audio. Currently supports mp3 (default) and wav. See [PII redaction](https://www.assemblyai.com/docs/Models/pii_redaction) for more details. + * @default mp3 + */ + redact_pii_audio_quality?: string; + /** @description The list of PII Redaction policies to enable. See [PII redaction](https://www.assemblyai.com/docs/Models/pii_redaction) for more details. */ + redact_pii_policies?: PiiPolicy[]; + /** @description The replacement logic for detected PII, can be "entity_type" or "hash". See [PII redaction](https://www.assemblyai.com/docs/Models/pii_redaction) for more details. */ + redact_pii_sub?: SubstitutionPolicy; + /** @description Enable [Sentiment Analysis](https://www.assemblyai.com/docs/Models/sentiment_analysis), can be true or false */ + sentiment_analysis?: boolean; + /** @description Enable [Speaker diarization](https://www.assemblyai.com/docs/Models/speaker_diarization), can be true or false */ + speaker_labels?: boolean; + /** + * @description Tells the speaker label model how many speakers it should attempt to identify, up to 10. See [Speaker diarization](https://www.assemblyai.com/docs/Models/speaker_diarization) for more details. + * @default null + */ + speakers_expected?: number | null; + /** + * Format: float + * @description Reject audio files that contain less than this fraction of speech. + * Valid values are in the range [0, 1] inclusive. + * + * @default null + */ + speech_threshold?: number | null; + /** @description Enable [Summarization](https://www.assemblyai.com/docs/Models/summarization), can be true or false */ + summarization?: boolean; + /** + * @description The model to summarize the transcript + * @default informative + */ + summary_model?: SummaryModel; + /** + * @description The type of summary + * @default bullets + */ + summary_type?: SummaryType; + /** @description The list of custom topics provided if custom topics was enabled in the transcription request */ + topics?: string[]; + /** + * @description The header name which should be sent back with webhook calls, if provided in the transcription request. + * @default null + */ + webhook_auth_header_name?: string | null; + /** + * @description Defaults to null. Optionally allows a user to specify a header name and value to send back with a webhook call for added security. + * @default null + */ + webhook_auth_header_value?: string | null; + /** @description The URL to which we send webhooks upon trancription completion, if provided in the transcription request. */ + webhook_url?: string; + /** @description The list of custom vocabulary to boost transcription probability for, if provided in the transcription request. */ + word_boost?: string[]; +}; + +/** @description The parameters for creating a transcript */ +export type CreateTranscriptParameters = CreateTranscriptOptionalParameters & { + /** @description The URL of the audio or video file to transcribe. */ + audio_url: string; +}; + +/** @description A detected entity */ +export type Entity = { + /** @description The ending time, in milliseconds, for the detected entity in the audio file */ + end: number; + /** @description The type of entity for the detected entity */ + entity_type: EntityType; + /** @description The starting time, in milliseconds, at which the detected entity appears in the audio file */ + start: number; + /** @description The text for the detected entity */ + text: string; +}; + +/** + * @description The type of entity for the detected entity + * @enum {string} + */ +export type EntityType = + | "banking_information" + | "blood_type" + | "credit_card_cvv" + | "credit_card_expiration" + | "credit_card_number" + | "date" + | "date_of_birth" + | "drivers_license" + | "drug" + | "email_address" + | "event" + | "injury" + | "language" + | "location" + | "medical_condition" + | "medical_process" + | "money_amount" + | "nationality" + | "occupation" + | "organization" + | "password" + | "person_age" + | "person_name" + | "phone_number" + | "political_affiliation" + | "religion" + | "time" + | "url" + | "us_social_security_number"; + +export type Error = { + /** @description Error message */ + error: string; + /** @constant */ + status?: "error"; +}; + +export type LemurActionItemsParameters = LemurBaseParameters; + +export type LemurActionItemsResponse = LemurBaseResponse & { + /** @description The response generated by LeMUR. */ + response: string; +}; + +export type LemurBaseParameters = { + /** @description Context to provide the model. This can be a string or a free-form JSON value. */ + context?: OneOf< + [ + string, + { + [key: string]: unknown; + } + ] + >; + final_model?: LemurModel; + /** @description Max output size in tokens. Up to 4000 allowed. */ + max_output_size?: number; + /** + * Format: float + * @description The temperature to use for the model. + * Higher values result in answers that are more creative, lower values are more conservative. + * Can be any value between 0.0 and 1.0 inclusive. + */ + temperature?: number; + /** @description A list of completed transcripts with text. Up to 100 files max, or 100 hours max. Whichever is lower. */ + transcript_ids: string[]; +}; + +export type LemurBaseResponse = { + /** @description The ID of the LeMUR request */ + request_id: string; +}; + +/** + * @description The model that is used for the final prompt after compression is performed (options: "basic" and "default"). + * + * @enum {string} + */ +export type LemurModel = "default" | "basic"; + +export type LemurQuestion = { + /** @description How you want the answer to be returned. This can be any text. Can't be used with answer_options. Examples: "short sentence", "bullet points" */ + answer_format?: string; + /** @description What discrete options to return. Useful for precise responses. Can't be used with answer_format. Example: ["Yes", "No"] */ + answer_options?: string[]; + /** @description Any context about the transcripts you wish to provide. This can be a string, or free-form JSON. */ + context?: OneOf< + [ + string, + { + [key: string]: unknown; + } + ] + >; + /** @description The question you wish to ask. For more complex questions use default model. */ + question: string; +}; + +/** @description An answer generated by LeMUR and its question. */ +export type LemurQuestionAnswer = { + /** @description The answer generated by LeMUR. */ + answer: string; + /** @description The question for LeMUR to answer. */ + question: string; +}; + +export type LemurQuestionAnswerParameters = LemurBaseParameters & { + /** @description A list of questions to ask. */ + questions: LemurQuestion[]; +}; + +export type LemurQuestionAnswerResponse = LemurBaseResponse & { + /** @description The answers generated by LeMUR and their questions. */ + response: LemurQuestionAnswer[]; +}; + +export type LemurSummaryParameters = LemurBaseParameters & { + /** @description How you want the summary to be returned. This can be any text. Examples: "TLDR", "bullet points" */ + answer_format?: string; +}; + +export type LemurSummaryResponse = LemurBaseResponse & { + /** @description The response generated by LeMUR. */ + response: string; +}; + +export type LemurTaskParameters = LemurBaseParameters & { + /** @description Your text to prompt the model to produce a desired output, including any context you want to pass into the model. */ + prompt: string; +}; + +export type LemurTaskResponse = LemurBaseResponse & { + /** @description The response generated by LeMUR. */ + response: string; +}; + +export type PageDetails = { + current_url: string; + limit: number; + next_url?: string | null; + prev_url: string; + result_count: number; +}; + +export type ParagraphsResponse = { + audio_duration: number; + /** Format: double */ + confidence: number; + id: string; + paragraphs: TranscriptParagraph[]; +}; + +/** @enum {string} */ +export type PiiPolicy = + | "medical_process" + | "medical_condition" + | "blood_type" + | "drug" + | "injury" + | "number_sequence" + | "email_address" + | "date_of_birth" + | "phone_number" + | "us_social_security_number" + | "credit_card_number" + | "credit_card_expiration" + | "credit_card_cvv" + | "date" + | "nationality" + | "event" + | "language" + | "location" + | "money_amount" + | "person_name" + | "person_age" + | "organization" + | "political_affiliation" + | "occupation" + | "religion" + | "drivers_license" + | "banking_information"; + +export type PurgeLemurRequestDataResponse = { + /** @description Whether the request data was deleted. */ + deleted: boolean; + /** @description The ID of the LeMUR request */ + request_id: string; + /** @description The ID of the deletion request of the LeMUR request */ + request_id_to_purge: string; +}; + +export type RealtimeTemporaryTokenResponse = { + /** @description The temporary authentication token for real-time transcription */ + token: string; +}; + +export type RedactedAudioResponse = { + /** @description The URL of the redacted audio file */ + redacted_audio_url: string; + /** @description The status of the redacted audio */ + status: RedactedAudioStatus; +}; + +/** + * @description The status of the redacted audio + * @enum {string} + */ +export type RedactedAudioStatus = "redacted_audio_ready"; + +export type SentencesResponse = { + audio_duration: number; + /** Format: double */ + confidence: number; + id: string; + sentences: TranscriptSentence[]; +}; + +/** @enum {unknown} */ +export type Sentiment = "POSITIVE" | "NEUTRAL" | "NEGATIVE"; + +/** @description The result of the sentiment analysis model. */ +export type SentimentAnalysisResult = { + /** + * Format: double + * @description The confidence score for the detected sentiment of the sentence, from 0 to 1 + */ + confidence: number; + /** @description The ending time, in milliseconds, of the sentence */ + end: number; + /** @description The detected sentiment for the sentence, one of POSITIVE, NEUTRAL, NEGATIVE */ + sentiment: Sentiment; + /** @description The speaker of the sentence if Speaker Diarization is enabled, else null */ + speaker?: string | null; + /** @description The starting time, in milliseconds, of the sentence */ + start: number; + /** @description The transcript of the sentence */ + text: string; +}; + +export type SeverityScoreSummary = { + /** Format: double */ + high: number; + /** Format: double */ + low: number; + /** Format: double */ + medium: number; +}; + +/** + * @description The replacement logic for detected PII, can be "entity_type" or "hash". See [PII redaction](https://www.assemblyai.com/docs/Models/pii_redaction) for more details. + * @enum {string|null} + */ +export type SubstitutionPolicy = "entity_type" | "hash"; + +/** + * @description Format of the subtitles + * @enum {string} + */ +export type SubtitleFormat = "srt" | "vtt"; + +/** + * @description The model to summarize the transcript + * @default informative + * @enum {string} + */ +export type SummaryModel = "informative" | "conversational" | "catchy"; + +/** + * @description The type of summary + * @default bullets + * @enum {string} + */ +export type SummaryType = + | "bullets" + | "bullets_verbose" + | "gist" + | "headline" + | "paragraph"; + +/** @description Timestamp containing a start and end property in milliseconds. */ +export type Timestamp = { + /** @description The end time in milliseconds */ + end: number; + /** @description The start time in milliseconds */ + start: number; +}; + +/** @description THe result of the topic detection model. */ +export type TopicDetectionResult = { + labels?: { + /** @description The IAB taxonomical label for the label of the detected topic, where > denotes supertopic/subtopic relationship */ + label: string; + /** + * Format: double + * @description How relevant the detected topic is of a detected topic + */ + relevance: number; + }[]; + /** @description The text in the transcript in which a detected topic occurs */ + text: string; + timestamp?: Timestamp; +}; + +/** @description A transcript object */ +export type Transcript = { + /** + * @deprecated + * @description The acoustic model that was used for the transcription + */ + acoustic_model: string; + /** + * Format: float + * @description The duration of this transcript object's media file, in seconds + */ + audio_duration?: number | null; + /** @description The point in time, in milliseconds, in the file at which the transcription was terminated, if provided in the transcription request */ + audio_end_at?: number | null; + /** @description The point in time, in milliseconds, in the file at which the transcription was started, if provided in the transcription request */ + audio_start_from?: number | null; + /** @description The URL of the media that was transcribed */ + audio_url: string; + /** @description Enable [Auto Chapters](https://www.assemblyai.com/docs/Models/auto_chapters), can be true or false */ + auto_chapters?: boolean | null; + /** @description Whether Key Phrases was enabled in the transcription request, either true or false */ + auto_highlights: boolean; + /** + * @description An array of results for the Key Phrases model, if it was enabled during the transcription request. + * See [Key phrases](https://www.assemblyai.com/docs/Models/key_phrases) for more information. + */ + auto_highlights_result?: AutoHighlightsResult; + /** @description The word boost parameter value, if provided in the transcription request */ + boost_param?: string | null; + /** @description An array of temporally sequential chapters for the audio file */ + chapters?: Chapter[] | null; + /** + * Format: double + * @description The confidence score for the transcript, between 0.0 (low confidence) and 1.0 (high confidence) + */ + confidence?: number | null; + /** @description Enable [Content Moderation](https://www.assemblyai.com/docs/Models/content_moderation), can be true or false */ + content_safety?: boolean | null; + /** + * @description An array of results for the Content Moderation model, if it was enabled during the transcription request. + * See [Content moderation](https://www.assemblyai.com/docs/Models/content_moderation) for more information. + */ + content_safety_labels?: { + results: ContentSafetyLabelResult[]; + /** @description Will be either success, or unavailable in the rare case that the Content Safety Labels model failed. */ + status: AudioIntelligenceModelStatus; + } | null; + /** @description Customize how words are spelled and formatted using to and from values */ + custom_spelling?: TranscriptCustomSpelling[] | null; + /** @description Whether custom topics was enabled in the transcription request, either true or false */ + custom_topics?: boolean | null; + /** @description Transcribe Filler Words, like "umm", in your media file; can be true or false */ + disfluencies?: boolean | null; + /** @description Whether [Dual channel transcription](https://www.assemblyai.com/docs/Models/speech_recognition#dual-channel-transcription) was enabled in the transcription request, either true or false */ + dual_channel?: boolean | null; + /** + * @description An array of results for the Entity Detection model, if it was enabled during the transcription request. + * See [Entity detection](https://www.assemblyai.com/docs/Models/entity_detection) for more information. + */ + entities?: Entity[] | null; + /** @description Enable [Entity Detection](https://www.assemblyai.com/docs/Models/entity_detection), can be true or false */ + entity_detection?: boolean | null; + /** @description Error message of why the transcript failed */ + error?: string; + /** @description Whether [Profanity Filtering](https://www.assemblyai.com/docs/Models/speech_recognition#profanity-filtering) was enabled in the transcription request, either true or false */ + filter_profanity?: boolean | null; + /** @description Whether Text Formatting was enabled in the transcription request, either true or false */ + format_text?: boolean | null; + /** @description Enable [Topic Detection](https://www.assemblyai.com/docs/Models/iab_classification), can be true or false */ + iab_categories?: boolean | null; + /** + * @description An array of results for the Topic Detection model, if it was enabled during the transcription request. + * See [Topic Detection](https://www.assemblyai.com/docs/Models/iab_classification) for more information. + */ + iab_categories_result?: { + /** @description An array of results for the Topic Detection model. */ + results: TopicDetectionResult[]; + /** @description Will be either success, or unavailable in the rare case that the Content Moderation model failed. */ + status: AudioIntelligenceModelStatus; + /** @description The overall relevance of topic to the entire audio file */ + summary: { + [key: string]: number; + }; + } | null; + /** @description The unique identifier of your transcription */ + id: string; + /** + * @description The language of your audio file. + * Possible values are found in [Supported Languages](https://www.assemblyai.com/docs/Concepts/supported_languages). + * The default value is 'en_us'. + */ + language_code?: TranscriptLanguageCode; + /** @description Whether [Automatic language detection](https://www.assemblyai.com/docs/Models/speech_recognition#automatic-language-detection) was enabled in the transcription request, either true or false */ + language_detection?: boolean | null; + /** + * @deprecated + * @description The language model that was used for the transcription + */ + language_model: string; + /** @description Whether Automatic Punctuation was enabled in the transcription request, either true or false. */ + punctuate?: boolean | null; + /** @description Whether [PII Redaction](https://www.assemblyai.com/docs/Models/pii_redaction) was enabled in the transcription request, either true or false */ + redact_pii: boolean; + /** + * @description Whether a redacted version of the audio file was generated (enabled or disabled in the transcription request), + * either true or false. See [PII redaction](https://www.assemblyai.com/docs/Models/pii_redaction) for more information. + */ + redact_pii_audio?: boolean | null; + /** + * @description The audio quality of the PII-redacted audio file, if enabled in the transcription request. + * See [PII redaction](https://www.assemblyai.com/docs/Models/pii_redaction) for more information. + */ + redact_pii_audio_quality?: string | null; + /** + * @description The list of PII Redaction policies that were enabled, if PII Redaction is enabled. + * See [PII redaction](https://www.assemblyai.com/docs/Models/pii_redaction) for more information. + */ + redact_pii_policies?: PiiPolicy[] | null; + /** @description The replacement logic for detected PII, can be "entity_type" or "hash". See [PII redaction](https://www.assemblyai.com/docs/Models/pii_redaction) for more details. */ + redact_pii_sub?: SubstitutionPolicy; + /** @description Enable [Sentiment Analysis](https://www.assemblyai.com/docs/Models/sentiment_analysis), can be true or false */ + sentiment_analysis?: boolean | null; + /** + * @description An array of results for the Sentiment Analysis model, if it was enabled during the transcription request. + * See [Sentiment analysis](https://www.assemblyai.com/docs/Models/sentiment_analysis) for more information. + */ + sentiment_analysis_results?: SentimentAnalysisResult[] | null; + /** @description Enable [Speaker diarization](https://www.assemblyai.com/docs/Models/speaker_diarization), can be true or false */ + speaker_labels?: boolean | null; + /** @description Defaults to null. Tells the speaker label model how many speakers it should attempt to identify, up to 10. See [Speaker diarization](https://www.assemblyai.com/docs/Models/speaker_diarization) for more details. */ + speakers_expected?: number | null; + /** + * Format: float + * @description Defaults to null. Reject audio files that contain less than this fraction of speech. + * Valid values are in the range [0, 1] inclusive. + */ + speech_threshold?: number | null; + /** + * @deprecated + * @description Whether speed boost was enabled in the transcription request + */ + speed_boost?: boolean | null; + /** @description The status of your transcription. Possible values are queued, processing, completed, or error. */ + status: TranscriptStatus; + /** @description Whether [Summarization](https://www.assemblyai.com/docs/Models/summarization) was enabled in the transcription request, either true or false */ + summarization: boolean; + /** @description The generated summary of the media file, if [Summarization](https://www.assemblyai.com/docs/Models/summarization) was enabled in the transcription request */ + summary?: string | null; + /** + * @description The Summarization model used to generate the summary, + * if [Summarization](https://www.assemblyai.com/docs/Models/summarization) was enabled in the transcription request + */ + summary_model?: string | null; + /** @description The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/Models/summarization) was enabled in the transcription request */ + summary_type?: string | null; + /** @description The textual transcript of your media file */ + text?: string | null; + /** @description True while a request is throttled and false when a request is no longer throttled */ + throttled?: boolean | null; + /** @description The list of custom topics provided if custom topics was enabled in the transcription request */ + topics?: string[]; + /** + * @description When dual_channel or speaker_labels is enabled, a list of turn-by-turn utterance objects. + * See [Speaker diarization](https://www.assemblyai.com/docs/Models/speaker_diarization) for more information. + */ + utterances?: TranscriptUtterance[] | null; + /** @description Whether webhook authentication details were provided in the transcription request */ + webhook_auth: boolean; + /** @description The header name which should be sent back with webhook calls, if provided in the transcription request */ + webhook_auth_header_name?: string | null; + /** @description The status code we received from your server when delivering your webhook, if a webhook URL was provided in the transcription request */ + webhook_status_code?: number | null; + /** @description The URL to which we send webhooks upon trancription completion, if provided in the transcription request */ + webhook_url?: string | null; + /** @description The list of custom vocabulary to boost transcription probability for, if provided in the transcription request */ + word_boost?: string[]; + /** + * @description An array of temporally-sequential word objects, one for each word in the transcript. + * See [Speech recognition](https://www.assemblyai.com/docs/Models/speech_recognition) for more information. + */ + words?: TranscriptWord[] | null; +}; + +/** + * @description The word boost parameter value, if provided in the transcription request. + * @enum {string} + */ +export type TranscriptBoostParam = "low" | "default" | "high"; + +/** @description Object containing words or phrases to replace, and the word or phrase to replace with */ +export type TranscriptCustomSpelling = { + /** @description Words or phrases to replace */ + from: string[]; + /** @description Word or phrase to replace with */ + to: string; +}; + +/** + * @description The language of your audio file. Possible values are found in [Supported Languages](https://www.assemblyai.com/docs/Concepts/supported_languages). + * The default value is 'en_us'. + * + * @default en_us + * @enum {string|null} + */ +export type TranscriptLanguageCode = + | "en" + | "en_au" + | "en_uk" + | "en_us" + | "es" + | "fr" + | "de" + | "it" + | "pt" + | "nl" + | "hi" + | "ja" + | "zh" + | "fi" + | "ko" + | "pl" + | "ru" + | "tr" + | "uk" + | "vi"; + +export type TranscriptList = { + page_details: PageDetails; + transcripts: TranscriptListItem[]; +}; + +export type TranscriptListItem = { + audio_url: string; + completed?: Date; + created: Date; + id: string; + resource_url: string; + status: TranscriptStatus; +}; + +export type TranscriptParagraph = { + /** Format: double */ + confidence: number; + end: number; + start: number; + text: string; + words: TranscriptWord[]; +}; + +export type TranscriptSentence = { + /** Format: double */ + confidence: number; + end: number; + start: number; + text: string; + words: TranscriptWord[]; +}; + +/** + * @description The status of your transcription. Possible values are queued, processing, completed, or error. + * @enum {string} + */ +export type TranscriptStatus = "queued" | "processing" | "completed" | "error"; + +export type TranscriptUtterance = { + channel: string; + /** Format: double */ + confidence: number; + end: number; + start: number; + text: string; + words: TranscriptWord[]; +}; + +export type TranscriptWord = { + /** Format: double */ + confidence: number; + end: number; + speaker?: string | null; + start: number; + text: string; +}; + +export type UploadedFile = { + /** @description A URL that points to your audio file, accessible only by AssemblyAI's servers */ + upload_url: string; +}; + +export type WordSearchMatch = { + /** @description The total amount of times the word is in the transcript */ + count: number; + /** @description An array of all index locations for that word within the `words` array of the completed transcript */ + indexes: number[]; + /** @description The matched word */ + text: string; + /** @description An array of timestamps */ + timestamps: WordSearchTimestamp[]; +}; + +export type WordSearchResponse = { + /** @description The ID of the transcript */ + id: string; + /** @description The matches of the search */ + matches: WordSearchMatch[]; + /** @description The total count of all matched instances. For e.g., word 1 matched 2 times, and word 2 matched 3 times, `total_count` will equal 5. */ + total_count: number; +}; + +/** @description An array of timestamps structured as [`start_time`, `end_time`] in milliseconds */ +export type WordSearchTimestamp = number[]; diff --git a/src/types/realtime/index.ts b/src/types/realtime/index.ts new file mode 100644 index 0000000..9ac9e5d --- /dev/null +++ b/src/types/realtime/index.ts @@ -0,0 +1,68 @@ +import { + FinalTranscript, + PartialTranscript, + RealtimeTranscript, + RealtimeTranscriptType, +} from "../asyncapi.generated"; + +type CreateRealtimeServiceParams = { + realtimeUrl?: string; + sampleRate?: number; + wordBoost?: string[]; +} & ( + | { + apiKey?: string; + } + | { + token: string; + } +); + +type RealtimeServiceParams = { + realtimeUrl?: string; + sampleRate?: number; + wordBoost?: string[]; +} & ( + | { + apiKey: string; + } + | { + token: string; + } +); + +type RealtimeEvents = + | "open" + | "close" + | "transcript" + | "transcript.partial" + | "transcript.final" + | "error"; + +type SessionBeginsEventData = { + sessionId: string; + expiresAt: Date; +}; + +type RealtimeListeners = { + open?: (event: SessionBeginsEventData) => void; + close?: (code: number, reason: string) => void; + transcript?: (transcript: RealtimeTranscript) => void; + "transcript.partial"?: (transcript: PartialTranscript) => void; + "transcript.final"?: (transcript: FinalTranscript) => void; + error?: (error: Error) => void; +}; + +type RealtimeTokenParams = { + expires_in: number; +}; + +export type { + CreateRealtimeServiceParams, + RealtimeServiceParams, + RealtimeEvents, + RealtimeTranscriptType, + SessionBeginsEventData, + RealtimeListeners, + RealtimeTokenParams, +}; diff --git a/src/types/services/abstractions.ts b/src/types/services/abstractions.ts new file mode 100644 index 0000000..31dcba8 --- /dev/null +++ b/src/types/services/abstractions.ts @@ -0,0 +1,56 @@ +/** + * Interface for classes that can create resources. + * @template T The type of the resource. + * @template Parameters The type of the parameters required to create the resource. + */ +interface Createable> { + /** + * Create a new resource. + * @param params The parameters of the new resource. + * @param options The options used for creating the new resource. + * @return A promise that resolves to the newly created resource. + */ + create(params: Parameters, options?: Options): Promise; +} + +/** + * Interface for classes that can retrieve resources. + * @template T The type of the resource. + * @template Id The type of the resource's identifier. Defaults to string. + */ +interface Retrieveable { + /** + * Get a resource. + * @param id The identifier of the resource to retrieve. + * @return A promise that resolves to the retrieved resource. + */ + get(id: Id): Promise; +} + +/** + * Interface for classes that can delete resources. + * @template T The type of the resource. + * @template Id The type of the resource's identifier. Defaults to string. + */ +interface Deletable { + /** + * Delete a resource. + * @param id The identifier of the resource to delete. + * @return A promise that resolves to a boolean indicating whether the deletion was successful. + */ + delete(id: Id): Promise; +} + +/** + * Interface for classes that can list resources. + * @template T The type of the resource. + */ +interface Listable { + /** + * List all resources. + * @return A promise that resolves to an array of resources. + */ + list(page?: Page): Promise; +} + +export type { Createable, Retrieveable, Deletable, Listable }; diff --git a/src/types/services/index.ts b/src/types/services/index.ts new file mode 100644 index 0000000..274fd99 --- /dev/null +++ b/src/types/services/index.ts @@ -0,0 +1,7 @@ +type BaseServiceParams = { + apiKey: string; + baseUrl?: string; +}; + +export type * from "./abstractions"; +export type { BaseServiceParams }; diff --git a/src/types/transcripts/index.ts b/src/types/transcripts/index.ts new file mode 100644 index 0000000..7ca67e5 --- /dev/null +++ b/src/types/transcripts/index.ts @@ -0,0 +1,5 @@ +export type CreateTranscriptOptions = { + poll?: boolean; + pollingInterval?: number; + pollingTimeout?: number; +}; diff --git a/src/utils/.gitkeep b/src/utils/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/src/utils/axios.ts b/src/utils/axios.ts new file mode 100644 index 0000000..efb7a70 --- /dev/null +++ b/src/utils/axios.ts @@ -0,0 +1,19 @@ +import axios, { isAxiosError } from "axios"; +import { BaseServiceParams } from "../."; + +export function createAxiosClient(params: BaseServiceParams) { + const client = axios.create({ + baseURL: params.baseUrl, + headers: { Authorization: params.apiKey }, + }); + + client.interceptors.response.use(undefined, throwApiError); + return client; +} + +export function throwApiError(error: unknown) { + if (isAxiosError(error) && error.response?.data?.error) { + return Promise.reject(new Error(error.response.data.error)); + } + return Promise.reject(error); +} diff --git a/src/utils/errors/index.ts b/src/utils/errors/index.ts new file mode 100644 index 0000000..5ebd77c --- /dev/null +++ b/src/utils/errors/index.ts @@ -0,0 +1,5 @@ +export { + default as RealtimeError, + RealtimeErrorType, + RealtimeErrorMessages, +} from "./realtime"; diff --git a/src/utils/errors/realtime.ts b/src/utils/errors/realtime.ts new file mode 100644 index 0000000..e5c2f91 --- /dev/null +++ b/src/utils/errors/realtime.ts @@ -0,0 +1,45 @@ +enum RealtimeErrorType { + BadSampleRate = 4000, + AuthFailed = 4001, + // Both InsufficientFunds and FreeAccount error use 4002 + InsufficientFundsOrFreeAccount = 4002, + NonexistentSessionId = 4004, + SessionExpired = 4008, + ClosedSession = 4010, + RateLimited = 4029, + UniqueSessionViolation = 4030, + SessionTimeout = 4031, + AudioTooShort = 4032, + AudioTooLong = 4033, + BadJson = 4100, + BadSchema = 4101, + TooManyStreams = 4102, + Reconnected = 4103, + ReconnectAttemptsExhausted = 1013, +} + +const RealtimeErrorMessages: Record = { + [RealtimeErrorType.BadSampleRate]: "Sample rate must be a positive integer", + [RealtimeErrorType.AuthFailed]: "Not Authorized", + [RealtimeErrorType.InsufficientFundsOrFreeAccount]: + "Insufficient funds or you are using a free account. This feature is paid-only and requires you to add a credit card. Please visit https://assemblyai.com/dashboard/ to add a credit card to your account.", + [RealtimeErrorType.NonexistentSessionId]: "Session ID does not exist", + [RealtimeErrorType.SessionExpired]: "Session has expired", + [RealtimeErrorType.ClosedSession]: "Session is closed", + [RealtimeErrorType.RateLimited]: "Rate limited", + [RealtimeErrorType.UniqueSessionViolation]: "Unique session violation", + [RealtimeErrorType.SessionTimeout]: "Session Timeout", + [RealtimeErrorType.AudioTooShort]: "Audio too short", + [RealtimeErrorType.AudioTooLong]: "Audio too long", + [RealtimeErrorType.BadJson]: "Bad JSON", + [RealtimeErrorType.BadSchema]: "Bad schema", + [RealtimeErrorType.TooManyStreams]: "Too many streams", + [RealtimeErrorType.Reconnected]: "Reconnected", + [RealtimeErrorType.ReconnectAttemptsExhausted]: + "Reconnect attempts exhausted", +}; + +class RealtimeError extends Error {} + +export { RealtimeErrorType, RealtimeErrorMessages }; +export default RealtimeError; diff --git a/tests/__mocks__/api.ts b/tests/__mocks__/api.ts new file mode 100644 index 0000000..25cc273 --- /dev/null +++ b/tests/__mocks__/api.ts @@ -0,0 +1,105 @@ +/** + * mock api with reproduction of the common behaviors/endpoints of the real api + */ + +import type { LemurBaseParameters } from '../../src/' + +export const knownTranscriptIds = ['123'] +const lemurResponse = { + response: 'some response', + requestId: 'request_id', +} + +const withTranscriptId = ( + input: LemurBaseParameters, + output: unknown | (() => unknown), +) => { + // omitting the nullish getter (?.) here causes a weird issue with jest + // TODO: investigate why that happens``` + if (input.transcript_ids?.some((id: string) => !knownTranscriptIds.includes(id))) { + throw 'each transcript source id must be valid' + } + + return typeof output === 'function' ? output() : output +} + +export const post = (input: unknown) => ({ + // lemur + '/lemur/v3/generate/summary': withTranscriptId( + input as LemurBaseParameters, + lemurResponse, + ), + '/lemur/v3/generate/action-items': withTranscriptId( + input as LemurBaseParameters, + lemurResponse, + ), + '/lemur/v3/generate/task': withTranscriptId( + input as LemurBaseParameters, + lemurResponse, + ), + '/lemur/v3/generate/question-answer': withTranscriptId( + input as LemurBaseParameters, + { + ...lemurResponse, + response: [ + { + question: 'question', + answer: 'answer', + }, + ], + }, + ), + + // files + '/v2/upload': { upload_url: 'https://some-url.com' }, + // transcript + '/v2/transcript': { id: knownTranscriptIds[0], status: 'queued' }, + // realtime + '/v2/realtime/token': { token: '123' } +}) + +export const get = () => ({ + // transcript + '/v2/transcript': { + transcripts: [], + page_details: { + limit: 20, + result_count: 10, + next_url: `https://api.assemblyai.com/v2/transcript?after_id=${knownTranscriptIds[0]}`, + previous_url: 'https://api.assemblyai.com/v2/transcript', + } + }, + ...Object.fromEntries( + knownTranscriptIds.map((id) => [[`/v2/transcript/${id}`], { id }]), + ), + // transcript segments + ...Object.fromEntries( + knownTranscriptIds.map((id) => [ + [`/v2/transcript/${id}/paragraphs`], + { id, paragraphs: ['paragraph 1'] }, + ]), + ), + ...Object.fromEntries( + knownTranscriptIds.map((id) => [ + [`/v2/transcript/${id}/sentences`], + { id, sentences: ['sentence 1'] }, + ]), + ), + // transcript subtitles + ...Object.fromEntries( + knownTranscriptIds.map((id) => [[`/v2/transcript/${id}/srt`], 'subtitle']), + ), + ...Object.fromEntries( + knownTranscriptIds.map((id) => [[`/v2/transcript/${id}/vtt`], 'subtitle']), + ), + // redactions + ...Object.fromEntries( + knownTranscriptIds.map((id) => [ + [`/v2/transcript/${id}/redacted-audio`], + { + status: 'redacted_audio_ready', + redacted_audio_url: 'https://some-url.com', + }, + ]), + ), +}) diff --git a/tests/__mocks__/axios.ts b/tests/__mocks__/axios.ts new file mode 100644 index 0000000..8039a0c --- /dev/null +++ b/tests/__mocks__/axios.ts @@ -0,0 +1,32 @@ +import * as axiosImport from 'axios' +import type { LemurBaseParameters } from '../../src' +import { get, post } from './api' +import { type } from 'os'; + +const axios = jest.createMockFromModule('axios') as jest.Mocked< + typeof axiosImport.default +> +axios.create = jest.fn(() => axios) + +type MakeEndpoints = ( + input: unknown, +) => Record unknown)> +const useKnownEndpoints = + (makeEndpoints: MakeEndpoints) => (url: string, input: unknown) => { + const endpoints = makeEndpoints(input as LemurBaseParameters) + if (!(url in endpoints)) + return Promise.reject(new axiosImport.AxiosError(`${url}: not found`, '404')) + + const resolver = endpoints[url as keyof typeof endpoints] + if (typeof resolver === 'function') + return Promise.resolve(withData(resolver(input))) + + return Promise.resolve(withData(resolver)) + } + +axios.post.mockImplementation(useKnownEndpoints(post)) +axios.get.mockImplementation(useKnownEndpoints(get)) + +export const withData = (data: unknown) => ({ data }) +export const isAxiosError = axiosImport.isAxiosError +export default axios diff --git a/tests/__mocks__/ws.ts b/tests/__mocks__/ws.ts new file mode 100644 index 0000000..54ee7e2 --- /dev/null +++ b/tests/__mocks__/ws.ts @@ -0,0 +1,15 @@ +import { WebSocket } from "mock-socket"; + +export default class MockWebSocket extends WebSocket { + constructor(address: string | URL, options?: unknown){ + super(address); + } + + removeAllListeners(){ + this.listeners = {}; + this.onclose = () => {}; + this.onopen = () => {}; + this.onerror = () => {}; + this.onmessage = () => {}; + } +} diff --git a/tests/file.test.ts b/tests/file.test.ts new file mode 100644 index 0000000..4417bd4 --- /dev/null +++ b/tests/file.test.ts @@ -0,0 +1,23 @@ +import AssemblyAI from '../src' +import path from "path" + +const testDir = process.env["TESTDATA_DIR"] ?? '.' + +const assembly = new AssemblyAI({ + apiKey: '', +}) + +describe('files', () => { + it('should upload a file', async () => { + const uploadUrl = await assembly.files.upload(path.join(testDir, 'gore.wav')) + + expect(uploadUrl).toBeTruthy() + }, 10_000) + + it('should not find file', async () => { + const promise = assembly.files.upload(path.join(testDir, 'bad-path.wav')) + await expect(promise).rejects.toThrowError( + "ENOENT: no such file or directory, open '" + testDir + "/bad-path.wav'", + ) + }) +}) diff --git a/tests/lemur.test.ts b/tests/lemur.test.ts new file mode 100644 index 0000000..e4e0777 --- /dev/null +++ b/tests/lemur.test.ts @@ -0,0 +1,65 @@ +import { knownTranscriptIds } from './__mocks__/api' +import AssemblyAI from '../src' + +const assembly = new AssemblyAI({ + apiKey: '', +}) + +describe('lemur', () => { + it('should generate a summary', async () => { + const { response } = await assembly.lemur.summary({ + final_model: 'basic', + transcript_ids: knownTranscriptIds, + answer_format: 'one sentence', + }) + + expect(response).toBeTruthy() + }, 15_000) + + it('should generate an answer', async () => { + const { response } = await assembly.lemur.questionAnswer({ + final_model: 'basic', + transcript_ids: knownTranscriptIds, + questions: [ + { + question: 'What are they discussing?', + answer_format: 'text', + }, + ], + }) + + expect(response).toBeTruthy() + expect(response).toHaveLength(1) + }, 15_000) + + it('should generate action items', async () => { + const { response } = await assembly.lemur.actionItems({ + final_model: 'basic', + transcript_ids: knownTranscriptIds, + }) + + expect(response).toBeTruthy() + }, 15_000) + + it('should generate a task', async () => { + const { response } = await assembly.lemur.task({ + final_model: 'basic', + transcript_ids: knownTranscriptIds, + prompt: 'Write a haiku about this conversation.', + }) + + expect(response).toBeTruthy() + }, 15_000) + + it('should fail to generate a summary', async () => { + const promise = assembly.lemur.summary({ + final_model: 'basic', + transcript_ids: ['bad-id'], + answer_format: 'one sentence', + }) + + await expect(promise).rejects.toBe( + 'each transcript source id must be valid', + ) + }) +}) diff --git a/tests/realtime.test.ts b/tests/realtime.test.ts new file mode 100644 index 0000000..81f27f5 --- /dev/null +++ b/tests/realtime.test.ts @@ -0,0 +1,221 @@ +import WS from "jest-websocket-mock"; +import AssemblyAI, { RealtimeService } from '../src' +import RealtimeError, { + RealtimeErrorType, + RealtimeErrorMessages +} from '../src/utils/errors/realtime' + +const apiKey = '123'; +const token = '123'; +const baseUrl = 'https://localhost:1234' +const realtimeUrl = 'wss://localhost:1234' +const sessionBeginsMessage = { + message_type: 'SessionBegins', + session_id: '123', + expires_at: '2023-09-14T03:37:11.516967' +}; +const sessionTerminatedMessage = { + message_type: 'SessionTerminated' +}; +let server: WS; +let aai: AssemblyAI; +let rt: RealtimeService; +let onOpen: jest.Mock; + +async function connect(rt: RealtimeService, server: WS) { + const connectPromise = rt.connect() + await server.connected + server.send(JSON.stringify(sessionBeginsMessage)) + await connectPromise; +} +async function close(rt: RealtimeService, server: WS) { + const closePromise = rt.close() + server.send(JSON.stringify(sessionTerminatedMessage)) + await closePromise; + await server.closed +} + +describe('realtime', () => { + beforeEach(async () => { + server = new WS(realtimeUrl) + aai = new AssemblyAI({ + apiKey, + baseUrl, + }) + rt = aai.realtime.createService({ realtimeUrl }) + onOpen = jest.fn() + rt.on('open', onOpen) + await connect(rt, server) + }) + afterEach(async () => await cleanup()) + + async function cleanup() { + await close(rt, server); + WS.clean() + } + + it('fails on redundant connection', async () => { + await expect(async () => await rt.connect()).rejects.toThrowError('Already connected') + }) + + it('fails with no websocket URL', async () => { + const rt = new RealtimeService({ + apiKey, + realtimeUrl: 'https://api.assemblyai.com', + }); + await expect(async () => await rt.connect()).rejects.toThrowError('Invalid protocol, must be wss') + }) + + it('fails to send audio with closed websocket', async () => { + await close(rt, server) + expect(() => rt.sendAudio(new ArrayBuffer(8))) + .toThrow('Socket is not open for communication') + }) + + it('creates service with override URL', async () => { + cleanup(); + const baseUrlOverride = 'wss://localhost:1235' + const server = new WS(baseUrlOverride) + const aai = new AssemblyAI({ apiKey }) + const rt = aai.realtime.createService({ realtimeUrl: baseUrlOverride, token: '123' }) + await connect(rt, server); + await server.connected + await close(rt, server) + }) + + it('creates service with token', async () => { + const realtimeUrl = 'wss://localhost:5678' + const server = new WS(realtimeUrl) + const aai = new AssemblyAI({ apiKey, baseUrl }) + const rt = aai.realtime.createService({ realtimeUrl, token: '123' }) + await connect(rt, server) + await close(rt, server) + }) + + it('creates service with API key', async () => { + const realtimeUrl = 'wss://localhost:5678' + const server = new WS(realtimeUrl) + const aai = new AssemblyAI({ apiKey, baseUrl }) + const rt = aai.realtime.createService({ realtimeUrl, apiKey: '123' }) + await connect(rt, server) + await close(rt, server) + }) + + it('receives open event', () => { + expect(onOpen).toHaveBeenCalled() + }) + + it('receives closed event', async () => { + const onClose = jest.fn() + rt.on('close', onClose) + server.close() + expect(onClose).toHaveBeenCalled() + }) + + it('closes without SessionTerminated', async () => { + const realtimeUrl = 'wss://localhost:5678' + const server = new WS(realtimeUrl) + const assembly = new AssemblyAI({ apiKey, baseUrl }) + const rt = assembly.realtime.createService({ realtimeUrl, apiKey: '123' }) + await connect(rt, server) + await rt.close(false) + await server.closed + }) + + it('can send audio', async () => { + const data = new ArrayBuffer(8) + rt.sendAudio(data) + await expect(server) + .toReceiveMessage(JSON.stringify({ audio_data: Buffer.from(data).toString('base64') })); + }) + + it('can receive transcript', () => { + const data = { + created: '2023-09-14T03:37:11.516967', + text: "Hello world", + message_type: 'FinalTranscript' + }; + const onTranscript = jest.fn() + rt.on('transcript', onTranscript) + server.send(JSON.stringify(data)) + expect(onTranscript).toHaveBeenCalledWith({ ...data, created: new Date(data.created) }) + }) + + it('can receive partial transcript', () => { + const data = { + created: '2023-09-14T03:37:11.516967', + text: "Hello world", + message_type: 'PartialTranscript' + }; + const onTranscript = jest.fn() + rt.on('transcript.partial', onTranscript) + server.send(JSON.stringify(data)) + expect(onTranscript).toHaveBeenCalledWith({ ...data, created: new Date(data.created) }) + }) + + it('can receive final transcript', () => { + const data = { + created: '2023-09-14T03:37:11.516967', + text: "Hello world", + message_type: 'FinalTranscript' + }; + const onTranscript = jest.fn() + rt.on('transcript.final', onTranscript) + server.send(JSON.stringify(data)) + expect(onTranscript).toHaveBeenCalledWith({ ...data, created: new Date(data.created) }) + }) + + it('can receive session begins', async () => { + const realtimeUrl = 'wss://localhost:5678' + const server = new WS(realtimeUrl) + const assembly = new AssemblyAI({ apiKey, baseUrl }) + const rt = assembly.realtime.createService({ realtimeUrl, apiKey: '123' }) + const onOpen = jest.fn() + rt.on('open', onOpen) + await connect(rt, server) + expect(onOpen).toHaveBeenCalledWith({ + sessionId: sessionBeginsMessage.session_id, + expiresAt: new Date(sessionBeginsMessage.expires_at) + }) + }) + + it('receives WebSocket error', () => { + const onError = jest.fn() + rt.on('error', onError) + server.error({ + code: 0, + reason: 'Some WebSocket issue', + wasClean: false + }) + expect(onError).toHaveBeenCalledWith(expect.any(Error)) + }) + + it('receives realtime error message', () => { + const error = { + "error": "my_error" + } + const onError = jest.fn() + rt.on('error', onError) + server.send(JSON.stringify(error)) + expect(onError).toHaveBeenCalledWith(new RealtimeError(error.error)) + }) + + it('receives close event', () => { + const onClose = jest.fn() + rt.on('close', onClose) + server.close({ + code: RealtimeErrorType.AudioTooLong, + reason: null as unknown as string, + wasClean: false + }) + expect(onClose).toHaveBeenCalledWith( + RealtimeErrorType.AudioTooLong, + RealtimeErrorMessages[RealtimeErrorType.AudioTooLong] + ) + }) + + it('can create a token', async () => { + const token = await aai.realtime.createTemporaryToken({ expires_in: 480 }) + expect(token).toEqual('123'); + }) +}) diff --git a/tests/static/gore-short.wav b/tests/static/gore-short.wav new file mode 100644 index 0000000..1b440f9 Binary files /dev/null and b/tests/static/gore-short.wav differ diff --git a/tests/static/gore.wav b/tests/static/gore.wav new file mode 100644 index 0000000..c702498 Binary files /dev/null and b/tests/static/gore.wav differ diff --git a/tests/transcript.test.ts b/tests/transcript.test.ts new file mode 100644 index 0000000..1030823 --- /dev/null +++ b/tests/transcript.test.ts @@ -0,0 +1,176 @@ +import { knownTranscriptIds } from './__mocks__/api' +import axios, { withData } from './__mocks__/axios' +import AssemblyAI from '../src' +import path from "path" + +const testDir = process.env["TESTDATA_DIR"] ?? '.' + +const assembly = new AssemblyAI({ + apiKey: '', +}) + +const transcriptId = knownTranscriptIds[0] +const remoteAudioURL = + 'https://storage.googleapis.com/aai-web-samples/espn-bears.m4a' +const badRemoteAudioURL = + 'https://storage.googleapis.com/aai-web-samples/does-not-exist.m4a' + +describe('core', () => { + it('should create the transcript object with a remote url', async () => { + const transcript = await assembly.transcripts.create( + { + audio_url: remoteAudioURL, + }, + { + poll: false + }, + ) + + expect(transcript.status).toBeTruthy() + expect(transcript.status).not.toBe('error') + expect(transcript.status).not.toBe('complete') + }) + + it('should create the transcript object with a local file', async () => { + const transcript = await assembly.transcripts.create( + { + audio_url: path.join(testDir, 'gore.wav'), + }, + { + poll: false + }, + ) + + expect(['processing', 'queued']).toContain(transcript.status) + }, 60_000) + + it('should get the transcript object', async () => { + const fetched = await assembly.transcripts.get(transcriptId) + + expect(fetched.id).toBeTruthy() + }) + + it('should poll the transcript object', async () => { + axios.get.mockResolvedValueOnce(withData({ status: 'queued' })) + axios.get.mockResolvedValueOnce(withData({ status: 'processing' })) + axios.get.mockResolvedValueOnce(withData({ status: 'completed' })) + const transcript = await assembly.transcripts.create( + { + audio_url: remoteAudioURL, + }, + { + pollingInterval: 1000, + pollingTimeout: 5000, + }, + ) + + expect(transcript.status).toBe('completed') + }, 6000) + + it('should list the transcript objects', async () => { + const page = await assembly.transcripts.list() + expect(page.transcripts).toBeInstanceOf(Array) + expect(page.page_details).not.toBeNull() + }) + + it('should delete the transcript object', async () => { + axios.delete.mockResolvedValueOnce(withData({ id: transcriptId })) + const deleted = await assembly.transcripts.delete(transcriptId) + + expect(axios.delete).toHaveBeenLastCalledWith( + `/v2/transcript/${transcriptId}`, + ) + expect(deleted.id).toBe(transcriptId) + }) +}) + +describe('failures', () => { + it('should fail to create the transcript object', async () => { + const errorResponse = { status: 'error' } + axios.post.mockResolvedValueOnce(withData(errorResponse)) + const created = await assembly.transcripts.create( + { + audio_url: badRemoteAudioURL, + }, + { + poll: false + }, + ) + + expect(created).toBe(errorResponse) + expect(axios.post).toHaveBeenLastCalledWith('/v2/transcript', { + audio_url: badRemoteAudioURL, + }) + }) + + it('should fail to poll', async () => { + const promise = assembly.transcripts.create( + { + audio_url: badRemoteAudioURL, + }, + { + pollingInterval: 1_000, + pollingTimeout: 1_000, + }, + ) + + await expect(promise).rejects.toThrow('Polling timeout') + }) +}) + +describe('segments', () => { + it('should get paragraphs', async () => { + const segment = await assembly.transcripts.paragraphs(transcriptId) + + expect(segment.paragraphs).toBeInstanceOf(Array) + expect(segment.paragraphs.length).toBeGreaterThan(0) + }) + + it('should get sentences', async () => { + const segment = await assembly.transcripts.sentences(transcriptId) + + expect(segment.sentences).toBeInstanceOf(Array) + expect(segment.sentences.length).toBeGreaterThan(0) + }) +}) + +describe('subtitles', () => { + it('should get srt subtitles', async () => { + const subtitle = await assembly.transcripts.subtitles(transcriptId, 'srt') + + expect(subtitle).toBeTruthy() + }) + + it('should get vtt subtitles', async () => { + const subtitle = await assembly.transcripts.subtitles(transcriptId, 'vtt') + + expect(subtitle).toBeTruthy() + }) +}) + +describe('redactions', () => { + it('should create a redactable transcript object', async () => { + const transcript = await assembly.transcripts.create( + { + audio_url: + 'https://storage.googleapis.com/aai-web-samples/espn-bears.m4a', + redact_pii: true, + redact_pii_audio: true, + redact_pii_policies: ['person_age', 'date_of_birth', 'phone_number'], + redact_pii_audio_quality: 'mp3', + }, + { + poll: false + }, + ) + + expect(['processing', 'queued']).toContain(transcript.status) + }) + + it('should get redactions', async () => { + const res = await assembly.transcripts.redactions(transcriptId) + + expect(res.status).toBe('redacted_audio_ready') + expect(res.redacted_audio_url).toBeTruthy() + }) +}) diff --git a/tests/utils.test.ts b/tests/utils.test.ts new file mode 100644 index 0000000..39638ac --- /dev/null +++ b/tests/utils.test.ts @@ -0,0 +1,34 @@ +import { + createAxiosClient, + throwApiError, +} from '../src/utils/axios'; +const apiKey = '1234'; +const baseUri = 'http://localhost:1234'; + +describe('utils', () => { + it('should create Axios client', () => { + const client = createAxiosClient({ + apiKey: apiKey, + baseUrl: baseUri, + }); + expect(client).toBeTruthy(); + }) + it('should throw AssemblyAI API error', async () => { + const error = { + isAxiosError: true, + response: { + data: { + error: 'Error message' + } + } + } + await expect(throwApiError(error)).rejects.toThrow(error.response.data.error); + }) + it('should throw HTTP error', async () => { + const error = { + isAxiosError: true, + response: null + } + await expect(throwApiError(error)).rejects.toBe(error); + }) +}) diff --git a/tsconfig.json b/tsconfig.json new file mode 100644 index 0000000..485ba67 --- /dev/null +++ b/tsconfig.json @@ -0,0 +1,22 @@ +{ + "compilerOptions": { + "outDir": "./dist", + "rootDir": "./src", + + "module": "esnext", + "moduleResolution": "node", + "target": "es6", + "allowJs": false, + "declaration": true, + "removeComments": false, + "strict": true, + "forceConsistentCasingInFileNames": true, + + "esModuleInterop": true, + "paths": { + "@/*": ["./src/*"] + } + }, + "include": ["src"], + "exclude": ["node_modules", "**/*.spec.ts"] +} diff --git a/tsconfig.test.json b/tsconfig.test.json new file mode 100644 index 0000000..ab8d44e --- /dev/null +++ b/tsconfig.test.json @@ -0,0 +1,12 @@ +{ + "extends": "./tsconfig.json", + "include": [ + "src/**/*.ts", + "tests/**/*.ts" + ], + "exclude": [ + "node_modules", + "**/*.spec.ts", + "**/*.ignore.*" + ] +} diff --git a/tslint.json b/tslint.json new file mode 100644 index 0000000..bd99f71 --- /dev/null +++ b/tslint.json @@ -0,0 +1,11 @@ +{ + "defaultSeverity": "error", + "extends": ["tslint:recommended"], + "jsRules": {}, + "rules": { + "no-shadowed-variable": false, + "no-console": false, + "no-var-requires": false + }, + "rulesDirectory": [] +}