From 3d9916dbadfd336ceb13a297267f69faae4f551c Mon Sep 17 00:00:00 2001 From: Vladislav Nesterov Date: Tue, 30 Jan 2024 17:10:44 +0300 Subject: [PATCH 01/15] modified: production version implementation start --- client/client.py | 27 +- client/convert.sh | 1 - docker-compose.yml | 3 +- notebooks/daily_audio_records_volume.ipynb | 2141 ----------------- ...ormance_whisper-timestamped_on1080ti.ipynb | 283 --- output/.gitkeep | 0 requirements.txt | 2 - transcribe.py | 137 +- 8 files changed, 57 insertions(+), 2537 deletions(-) delete mode 100644 client/convert.sh delete mode 100644 notebooks/daily_audio_records_volume.ipynb delete mode 100644 notebooks/performance_whisper-timestamped_on1080ti.ipynb delete mode 100644 output/.gitkeep diff --git a/client/client.py b/client/client.py index 2b0d4d9..cf68127 100644 --- a/client/client.py +++ b/client/client.py @@ -2,16 +2,17 @@ import os import requests -def upload_file(file_path: str, use_parallel: bool): + +def upload_file(file_path: str): if not os.path.isfile(file_path): print("The file does not exist.") return - - url = "http://localhost:8000/transcribe/parallel" if use_parallel else "http://localhost:8000/transcribe/single" - files = {'files': (os.path.basename(file_path), open(file_path, 'rb'), 'audio/wav')} - + + url = "http://localhost:8000/transcribe" + file = {"file": (os.path.basename(file_path), open(file_path, "rb"), "audio/wav")} + try: - response = requests.post(url, files=files) + response = requests.post(url, files=file) response.raise_for_status() print("Response:", response.json()) except requests.exceptions.HTTPError as err: @@ -21,10 +22,12 @@ def upload_file(file_path: str, use_parallel: bool): if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Upload an audio file to the FastAPI server for transcription.") - parser.add_argument('--file', type=str, required=True, help='File path of the audio file') - parser.add_argument('--parallel', action='store_true', help='Use parallel transcription endpoint') - + parser = argparse.ArgumentParser( + description="Upload an audio file to the FastAPI server for transcription." + ) + parser.add_argument( + "--file", type=str, required=True, help="File path of the audio file" + ) + args = parser.parse_args() - - upload_file(args.file, args.parallel) \ No newline at end of file + upload_file(args.file) diff --git a/client/convert.sh b/client/convert.sh deleted file mode 100644 index 5bcbf87..0000000 --- a/client/convert.sh +++ /dev/null @@ -1 +0,0 @@ -ffmpeg -i input.m4a -acodec pcm_s16le -ar 44100 output.wav diff --git a/docker-compose.yml b/docker-compose.yml index 8f74e65..5b2cf58 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -11,7 +11,6 @@ services: dockerfile: Dockerfile volumes: - ./input/:/app/input - - ./output:/app/output - ./cache:/app/cache - ./logs:/app/logs deploy: @@ -21,4 +20,4 @@ services: - driver: nvidia device_ids: ['0'] capabilities: [gpu] - command: ["gunicorn", "transcribe:app", "--workers", "1", "--worker-class", "uvicorn.workers.UvicornWorker", "--bind", "0.0.0.0:8000", "--timeout", "240"] \ No newline at end of file + command: ["gunicorn", "transcribe:app", "--workers", "1", "--worker-class", "uvicorn.workers.UvicornWorker", "--bind", "0.0.0.0:8000", "--timeout", "600"] \ No newline at end of file diff --git a/notebooks/daily_audio_records_volume.ipynb b/notebooks/daily_audio_records_volume.ipynb deleted file mode 100644 index c675130..0000000 --- a/notebooks/daily_audio_records_volume.ipynb +++ /dev/null @@ -1,2141 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Текущий дневной объём аудиозаписей для расшифровки" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd\n", - "import plotly.express as px\n", - "\n", - "SEED = 5908" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "trs = pd.read_csv(\n", - " 'transcribations_2024.csv',\n", - " dtype={19: object},\n", - " parse_dates= [1, 17, 23],\n", - " index_col=0\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Index: 3357462 entries, 0 to 3357461\n", - "Data columns (total 23 columns):\n", - " # Column Dtype \n", - "--- ------ ----- \n", - " 0 transcribation_date datetime64[ns]\n", - " 1 date_y float64 \n", - " 2 date_m float64 \n", - " 3 date_d float64 \n", - " 4 side bool \n", - " 5 text object \n", - " 6 start float64 \n", - " 7 audio_file_name object \n", - " 8 conf float64 \n", - " 9 end_time float64 \n", - " 10 sentiment float64 \n", - " 11 sentiment_pos float64 \n", - " 12 sentiment_neg float64 \n", - " 13 ID int64 \n", - " 14 linkedid float64 \n", - " 15 dst object \n", - " 16 record_date datetime64[ns]\n", - " 17 source_id int64 \n", - " 18 src object \n", - " 19 cpu_id int64 \n", - " 20 duration float64 \n", - " 21 file_size int64 \n", - " 22 queue_date datetime64[ns]\n", - "dtypes: bool(1), datetime64[ns](3), float64(11), int64(4), object(4)\n", - "memory usage: 592.4+ MB\n" - ] - }, - { - "data": { - "text/plain": [ - "None" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
 transcribation_datedate_ydate_mdate_dstartconfend_timesentimentsentiment_possentiment_negIDlinkedidrecord_datesource_idcpu_iddurationfile_sizequeue_date
count3357462.000.000.000.003357462.003357462.003357462.000.000.000.003357462.003356882.003357462.003357462.003357462.003357462.003357462.003357462.00
mean.2fnannannan102.380.89105.25nannannan167963000.501705127990.19.2f1.2010.30196.233139722.61.2f
min.2fnannannan0.000.000.00nannannan166284270.001704088914.15.2f1.000.000.000.00.2f
25%.2fnannannan28.350.8731.11nannannan167123635.251704719688.15.2f1.004.0085.161362604.00.2f
50%.2fnannannan68.430.9571.46nannannan167963000.501705129009.16.2f1.009.00165.742651884.00.2f
75%.2fnannannan138.691.00141.66nannannan168802365.751705568808.24.2f1.0016.00251.864029804.00.2f
max.2fnannannan3592.831.003595.14nannannan169641731.001706016461.16.2f2.0027.003599.0857585324.00.2f
stdnannannannan117.480.19117.80nannannan969215.94521451.97nan0.407.71172.362757835.52nan
\n" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
68436725201296691590063356555304008719145186663121541322787414
transcribation_date2024-01-07 13:51:192024-01-04 11:21:532024-01-02 09:52:012024-01-12 14:52:412024-01-04 19:35:412024-01-22 08:57:452024-01-03 16:19:182024-01-08 19:08:452024-01-16 11:43:152024-01-20 07:18:05
date_yNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
date_mNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
date_dNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
sideTrueFalseFalseFalseTrueFalseTrueTrueFalseFalse
textêàê ïîìî÷üàîíî âàñ êàê èç öåëàÿ áëèíû èëè ó íå¸ ñòåêëîêåð...àëëî çäðàâñòâóéòå ìàñòåð ïî òåëåâèçîðóíó òàê äèàãíîñòèêó òî ïîëîìêó òî ïîñìîòðèòåà ÷òî ïîëçóíêîâ ÷åãî çîâ¸ò êòî îñòàâëÿëè ÷òîáû...ñíîâà âûêëþ÷àþòäîáðûé âå÷åðäîáðûé äåíü äåæóðíûé ìàñòåð âëàäèìèð àäëåð âû ...ïðîâåðêà
start26.4326.3740.7111.94398.4312.72100.50864324.03660814.856.39
audio_file_namein_5164_2024-01-07-13-39-19rxtx-out.wavin_9164467778_2024-01-04-11-09-11rxtx-in.wavin_5063_2024-01-02-09-35-40rxtx-in.wava2024-01-12t14:41:23b_c9857207630d_e9252098540...in_8127770011_2024-01-04-19-14-43rxtx-out.wava2024-01-22t08:41:33b_c9169035066d_e9166055729...a2024-01-03t15:59:40b_c79032079978d_e912691150...in_5124_2024-01-08-18-56-35rxtx-out.wava2024-01-16t11:30:31b_c9966816168d_e4997342649...in_5077_2024-01-20-07-07-31rxtx-in.wav
conf0.8205030.8673860.8962421.00.9983560.8470191.00.8416330.8849950.90441
end_time27.026.6748.1513.59401.0116.68101.6724.621.576.96
sentimentNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
sentiment_posNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
sentiment_negNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
ID166968637166536282166293939167874333166640825169324357166475721167150901168438402169071684
linkedid1704623959.1533021704355750.1522751704177339.1516281705059683.2386231704384882.1524991705902093.241071704286780.2363381704729394.153731705393831.239571705723650.158147
dstmain5112main9252098540515291660557299126911502main4997342649main
record_date2024-01-07 13:39:192024-01-04 11:09:112024-01-02 09:35:402024-01-12 14:41:232024-01-04 19:14:432024-01-22 08:41:332024-01-03 15:59:402024-01-08 18:56:352024-01-16 11:30:312024-01-20 07:07:31
source_id1112122121
src51649164467778506398572076308127770011916903506679032079978512499668161685077
cpu_id81200114245212
duration45.3297.2246.238.42419.36230.98361.8847.4668.727.0
file_size725164155524439392446147646709804369572457901247594041099564112044
queue_date2024-01-07 13:50:542024-01-04 11:20:502024-01-02 09:50:072024-01-12 14:52:102024-01-04 19:32:442024-01-22 08:55:542024-01-03 16:16:232024-01-08 19:08:192024-01-16 11:42:272024-01-20 07:17:59
\n", - "
" - ], - "text/plain": [ - " 684367 \\\n", - "transcribation_date 2024-01-07 13:51:19 \n", - "date_y NaN \n", - "date_m NaN \n", - "date_d NaN \n", - "side True \n", - "text êàê ïîìî÷ü \n", - "start 26.43 \n", - "audio_file_name in_5164_2024-01-07-13-39-19rxtx-out.wav \n", - "conf 0.820503 \n", - "end_time 27.0 \n", - "sentiment NaN \n", - "sentiment_pos NaN \n", - "sentiment_neg NaN \n", - "ID 166968637 \n", - "linkedid 1704623959.153302 \n", - "dst main \n", - "record_date 2024-01-07 13:39:19 \n", - "source_id 1 \n", - "src 5164 \n", - "cpu_id 8 \n", - "duration 45.32 \n", - "file_size 725164 \n", - "queue_date 2024-01-07 13:50:54 \n", - "\n", - " 252012 \\\n", - "transcribation_date 2024-01-04 11:21:53 \n", - "date_y NaN \n", - "date_m NaN \n", - "date_d NaN \n", - "side False \n", - "text à \n", - "start 26.37 \n", - "audio_file_name in_9164467778_2024-01-04-11-09-11rxtx-in.wav \n", - "conf 0.867386 \n", - "end_time 26.67 \n", - "sentiment NaN \n", - "sentiment_pos NaN \n", - "sentiment_neg NaN \n", - "ID 166536282 \n", - "linkedid 1704355750.152275 \n", - "dst 5112 \n", - "record_date 2024-01-04 11:09:11 \n", - "source_id 1 \n", - "src 9164467778 \n", - "cpu_id 12 \n", - "duration 97.2 \n", - "file_size 1555244 \n", - "queue_date 2024-01-04 11:20:50 \n", - "\n", - " 9669 \\\n", - "transcribation_date 2024-01-02 09:52:01 \n", - "date_y NaN \n", - "date_m NaN \n", - "date_d NaN \n", - "side False \n", - "text îíî âàñ êàê èç öåëàÿ áëèíû èëè ó íå¸ ñòåêëîêåð... \n", - "start 40.71 \n", - "audio_file_name in_5063_2024-01-02-09-35-40rxtx-in.wav \n", - "conf 0.896242 \n", - "end_time 48.15 \n", - "sentiment NaN \n", - "sentiment_pos NaN \n", - "sentiment_neg NaN \n", - "ID 166293939 \n", - "linkedid 1704177339.151628 \n", - "dst main \n", - "record_date 2024-01-02 09:35:40 \n", - "source_id 1 \n", - "src 5063 \n", - "cpu_id 0 \n", - "duration 246.2 \n", - "file_size 3939244 \n", - "queue_date 2024-01-02 09:50:07 \n", - "\n", - " 1590063 \\\n", - "transcribation_date 2024-01-12 14:52:41 \n", - "date_y NaN \n", - "date_m NaN \n", - "date_d NaN \n", - "side False \n", - "text àëëî çäðàâñòâóéòå ìàñòåð ïî òåëåâèçîðó \n", - "start 11.94 \n", - "audio_file_name a2024-01-12t14:41:23b_c9857207630d_e9252098540... \n", - "conf 1.0 \n", - "end_time 13.59 \n", - "sentiment NaN \n", - "sentiment_pos NaN \n", - "sentiment_neg NaN \n", - "ID 167874333 \n", - "linkedid 1705059683.238623 \n", - "dst 9252098540 \n", - "record_date 2024-01-12 14:41:23 \n", - "source_id 2 \n", - "src 9857207630 \n", - "cpu_id 0 \n", - "duration 38.42 \n", - "file_size 614764 \n", - "queue_date 2024-01-12 14:52:10 \n", - "\n", - " 356555 \\\n", - "transcribation_date 2024-01-04 19:35:41 \n", - "date_y NaN \n", - "date_m NaN \n", - "date_d NaN \n", - "side True \n", - "text íó òàê äèàãíîñòèêó òî ïîëîìêó òî ïîñìîòðèòå \n", - "start 398.43 \n", - "audio_file_name in_8127770011_2024-01-04-19-14-43rxtx-out.wav \n", - "conf 0.998356 \n", - "end_time 401.01 \n", - "sentiment NaN \n", - "sentiment_pos NaN \n", - "sentiment_neg NaN \n", - "ID 166640825 \n", - "linkedid 1704384882.152499 \n", - "dst 5152 \n", - "record_date 2024-01-04 19:14:43 \n", - "source_id 1 \n", - "src 8127770011 \n", - "cpu_id 1 \n", - "duration 419.36 \n", - "file_size 6709804 \n", - "queue_date 2024-01-04 19:32:44 \n", - "\n", - " 3040087 \\\n", - "transcribation_date 2024-01-22 08:57:45 \n", - "date_y NaN \n", - "date_m NaN \n", - "date_d NaN \n", - "side False \n", - "text à ÷òî ïîëçóíêîâ ÷åãî çîâ¸ò êòî îñòàâëÿëè ÷òîáû... \n", - "start 12.72 \n", - "audio_file_name a2024-01-22t08:41:33b_c9169035066d_e9166055729... \n", - "conf 0.847019 \n", - "end_time 16.68 \n", - "sentiment NaN \n", - "sentiment_pos NaN \n", - "sentiment_neg NaN \n", - "ID 169324357 \n", - "linkedid 1705902093.24107 \n", - "dst 9166055729 \n", - "record_date 2024-01-22 08:41:33 \n", - "source_id 2 \n", - "src 9169035066 \n", - "cpu_id 14 \n", - "duration 230.98 \n", - "file_size 3695724 \n", - "queue_date 2024-01-22 08:55:54 \n", - "\n", - " 191451 \\\n", - "transcribation_date 2024-01-03 16:19:18 \n", - "date_y NaN \n", - "date_m NaN \n", - "date_d NaN \n", - "side True \n", - "text ñíîâà âûêëþ÷àþò \n", - "start 100.508643 \n", - "audio_file_name a2024-01-03t15:59:40b_c79032079978d_e912691150... \n", - "conf 1.0 \n", - "end_time 101.67 \n", - "sentiment NaN \n", - "sentiment_pos NaN \n", - "sentiment_neg NaN \n", - "ID 166475721 \n", - "linkedid 1704286780.236338 \n", - "dst 9126911502 \n", - "record_date 2024-01-03 15:59:40 \n", - "source_id 2 \n", - "src 79032079978 \n", - "cpu_id 24 \n", - "duration 361.88 \n", - "file_size 5790124 \n", - "queue_date 2024-01-03 16:16:23 \n", - "\n", - " 866631 \\\n", - "transcribation_date 2024-01-08 19:08:45 \n", - "date_y NaN \n", - "date_m NaN \n", - "date_d NaN \n", - "side True \n", - "text äîáðûé âå÷åð \n", - "start 24.036608 \n", - "audio_file_name in_5124_2024-01-08-18-56-35rxtx-out.wav \n", - "conf 0.841633 \n", - "end_time 24.6 \n", - "sentiment NaN \n", - "sentiment_pos NaN \n", - "sentiment_neg NaN \n", - "ID 167150901 \n", - "linkedid 1704729394.15373 \n", - "dst main \n", - "record_date 2024-01-08 18:56:35 \n", - "source_id 1 \n", - "src 5124 \n", - "cpu_id 5 \n", - "duration 47.46 \n", - "file_size 759404 \n", - "queue_date 2024-01-08 19:08:19 \n", - "\n", - " 2154132 \\\n", - "transcribation_date 2024-01-16 11:43:15 \n", - "date_y NaN \n", - "date_m NaN \n", - "date_d NaN \n", - "side False \n", - "text äîáðûé äåíü äåæóðíûé ìàñòåð âëàäèìèð àäëåð âû ... \n", - "start 14.85 \n", - "audio_file_name a2024-01-16t11:30:31b_c9966816168d_e4997342649... \n", - "conf 0.884995 \n", - "end_time 21.57 \n", - "sentiment NaN \n", - "sentiment_pos NaN \n", - "sentiment_neg NaN \n", - "ID 168438402 \n", - "linkedid 1705393831.23957 \n", - "dst 4997342649 \n", - "record_date 2024-01-16 11:30:31 \n", - "source_id 2 \n", - "src 9966816168 \n", - "cpu_id 2 \n", - "duration 68.72 \n", - "file_size 1099564 \n", - "queue_date 2024-01-16 11:42:27 \n", - "\n", - " 2787414 \n", - "transcribation_date 2024-01-20 07:18:05 \n", - "date_y NaN \n", - "date_m NaN \n", - "date_d NaN \n", - "side False \n", - "text ïðîâåðêà \n", - "start 6.39 \n", - "audio_file_name in_5077_2024-01-20-07-07-31rxtx-in.wav \n", - "conf 0.90441 \n", - "end_time 6.96 \n", - "sentiment NaN \n", - "sentiment_pos NaN \n", - "sentiment_neg NaN \n", - "ID 169071684 \n", - "linkedid 1705723650.158147 \n", - "dst main \n", - "record_date 2024-01-20 07:07:31 \n", - "source_id 1 \n", - "src 5077 \n", - "cpu_id 12 \n", - "duration 7.0 \n", - "file_size 112044 \n", - "queue_date 2024-01-20 07:17:59 " - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "display(\n", - " trs.info(),\n", - " trs.describe().style.format('{:.2f}'),\n", - " trs.sample(10, random_state=SEED).T\n", - " )" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Удалим ненужные пустые колонки." - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "trs = trs.drop(\n", - " ['date_y', 'date_m', 'sentiment', 'sentiment_pos', 'sentiment'],\n", - " axis=1\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Получим записи с уникальными именами файлов и ID разговора, чтобы учитывать длительность только уникальных незадублированных файлов. Оставим также только записи с распознанным текстом." - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "unique_files = trs[trs.text.notna()].drop_duplicates(\n", - " subset=['linkedid', 'audio_file_name']\n", - ")\n", - "unique_files.date_d = unique_files.record_date.dt.date" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Далее посчитаем суммарную длительность аудиозаписей для распознавания по дням в минутах." - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ - "daily_duration = unique_files.groupby('date_d')['duration'].sum().reset_index()\n", - "daily_duration.duration = daily_duration.duration / 60" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
date_dduration
02024-01-0139.969667
12024-01-0213928.201237
22024-01-0317771.537604
32024-01-0419107.857633
42024-01-0520873.734483
52024-01-0618026.749338
62024-01-0714085.537487
72024-01-0819878.013375
82024-01-0930633.502542
92024-01-1031820.098371
102024-01-1128557.912506
112024-01-1224634.361763
122024-01-1319175.180933
132024-01-1416946.708217
142024-01-1527398.953410
152024-01-1624504.804127
162024-01-1725390.787867
172024-01-1826516.760531
182024-01-1921678.013550
192024-01-2019302.831131
202024-01-2115657.104925
212024-01-2227623.006060
222024-01-2319257.227092
\n", - "
" - ], - "text/plain": [ - " date_d duration\n", - "0 2024-01-01 39.969667\n", - "1 2024-01-02 13928.201237\n", - "2 2024-01-03 17771.537604\n", - "3 2024-01-04 19107.857633\n", - "4 2024-01-05 20873.734483\n", - "5 2024-01-06 18026.749338\n", - "6 2024-01-07 14085.537487\n", - "7 2024-01-08 19878.013375\n", - "8 2024-01-09 30633.502542\n", - "9 2024-01-10 31820.098371\n", - "10 2024-01-11 28557.912506\n", - "11 2024-01-12 24634.361763\n", - "12 2024-01-13 19175.180933\n", - "13 2024-01-14 16946.708217\n", - "14 2024-01-15 27398.953410\n", - "15 2024-01-16 24504.804127\n", - "16 2024-01-17 25390.787867\n", - "17 2024-01-18 26516.760531\n", - "18 2024-01-19 21678.013550\n", - "19 2024-01-20 19302.831131\n", - "20 2024-01-21 15657.104925\n", - "21 2024-01-22 27623.006060\n", - "22 2024-01-23 19257.227092" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "daily_duration" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.plotly.v1+json": { - "config": { - "plotlyServerURL": "https://plot.ly" - }, - "data": [ - { - "hovertemplate": "date_d=%{x}
duration=%{y}", - "legendgroup": "", - "line": { - "color": "#636efa", - "dash": "solid" - }, - "marker": { - "symbol": "circle" - }, - "mode": "lines+markers", - "name": "", - "orientation": "v", - "showlegend": false, - "type": "scatter", - "x": [ - "2024-01-01", - "2024-01-02", - "2024-01-03", - "2024-01-04", - "2024-01-05", - "2024-01-06", - "2024-01-07", - "2024-01-08", - "2024-01-09", - "2024-01-10", - "2024-01-11", - "2024-01-12", - "2024-01-13", - "2024-01-14", - "2024-01-15", - "2024-01-16", - "2024-01-17", - "2024-01-18", - "2024-01-19", - "2024-01-20", - "2024-01-21", - "2024-01-22", - "2024-01-23" - ], - "xaxis": "x", - "y": [ - 39.96966666666666, - 13928.2012375, - 17771.537604166668, - 19107.857633333337, - 20873.73448333333, - 18026.7493375, - 14085.5374875, - 19878.013375, - 30633.50254166667, - 31820.098370833333, - 28557.912506250002, - 24634.3617625, - 19175.180933333333, - 16946.708216666666, - 27398.953410416667, - 24504.804127083335, - 25390.78786666667, - 26516.760531249998, - 21678.01355, - 19302.83113125, - 15657.104925, - 27623.00606041667, - 19257.227091666668 - ], - "yaxis": "y" - } - ], - "layout": { - "height": 600, - "legend": { - "tracegroupgap": 0 - }, - "margin": { - "t": 60 - }, - "template": { - "data": { - "bar": [ - { - "error_x": { - "color": "#2a3f5f" - }, - "error_y": { - "color": "#2a3f5f" - }, - "marker": { - "line": { - "color": "#E5ECF6", - "width": 0.5 - }, - "pattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - } - }, - "type": "bar" - } - ], - "barpolar": [ - { - "marker": { - "line": { - "color": "#E5ECF6", - "width": 0.5 - }, - "pattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - } - }, - "type": "barpolar" - } - ], - "carpet": [ - { - "aaxis": { - "endlinecolor": "#2a3f5f", - "gridcolor": "white", - "linecolor": "white", - "minorgridcolor": "white", - "startlinecolor": "#2a3f5f" - }, - "baxis": { - "endlinecolor": "#2a3f5f", - "gridcolor": "white", - "linecolor": "white", - "minorgridcolor": "white", - "startlinecolor": "#2a3f5f" - }, - "type": "carpet" - } - ], - "choropleth": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "type": "choropleth" - } - ], - "contour": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "contour" - } - ], - "contourcarpet": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "type": "contourcarpet" - } - ], - "heatmap": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "heatmap" - } - ], - "heatmapgl": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "heatmapgl" - } - ], - "histogram": [ - { - "marker": { - "pattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - } - }, - "type": "histogram" - } - ], - "histogram2d": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "histogram2d" - } - ], - "histogram2dcontour": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "histogram2dcontour" - } - ], - "mesh3d": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "type": "mesh3d" - } - ], - "parcoords": [ - { - "line": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "parcoords" - } - ], - "pie": [ - { - "automargin": true, - "type": "pie" - } - ], - "scatter": [ - { - "fillpattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - }, - "type": "scatter" - } - ], - "scatter3d": [ - { - "line": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatter3d" - } - ], - "scattercarpet": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattercarpet" - } - ], - "scattergeo": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattergeo" - } - ], - "scattergl": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattergl" - } - ], - "scattermapbox": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattermapbox" - } - ], - "scatterpolar": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatterpolar" - } - ], - "scatterpolargl": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatterpolargl" - } - ], - "scatterternary": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatterternary" - } - ], - "surface": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "surface" - } - ], - "table": [ - { - "cells": { - "fill": { - "color": "#EBF0F8" - }, - "line": { - "color": "white" - } - }, - "header": { - "fill": { - "color": "#C8D4E3" - }, - "line": { - "color": "white" - } - }, - "type": "table" - } - ] - }, - "layout": { - "annotationdefaults": { - "arrowcolor": "#2a3f5f", - "arrowhead": 0, - "arrowwidth": 1 - }, - "autotypenumbers": "strict", - "coloraxis": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "colorscale": { - "diverging": [ - [ - 0, - "#8e0152" - ], - [ - 0.1, - "#c51b7d" - ], - [ - 0.2, - "#de77ae" - ], - [ - 0.3, - "#f1b6da" - ], - [ - 0.4, - "#fde0ef" - ], - [ - 0.5, - "#f7f7f7" - ], - [ - 0.6, - "#e6f5d0" - ], - [ - 0.7, - "#b8e186" - ], - [ - 0.8, - "#7fbc41" - ], - [ - 0.9, - "#4d9221" - ], - [ - 1, - "#276419" - ] - ], - "sequential": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "sequentialminus": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ] - }, - "colorway": [ - "#636efa", - "#EF553B", - "#00cc96", - "#ab63fa", - "#FFA15A", - "#19d3f3", - "#FF6692", - "#B6E880", - "#FF97FF", - "#FECB52" - ], - "font": { - "color": "#2a3f5f" - }, - "geo": { - "bgcolor": "white", - "lakecolor": "white", - "landcolor": "#E5ECF6", - "showlakes": true, - "showland": true, - "subunitcolor": "white" - }, - "hoverlabel": { - "align": "left" - }, - "hovermode": "closest", - "mapbox": { - "style": "light" - }, - "paper_bgcolor": "white", - "plot_bgcolor": "#E5ECF6", - "polar": { - "angularaxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - }, - "bgcolor": "#E5ECF6", - "radialaxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - } - }, - "scene": { - "xaxis": { - "backgroundcolor": "#E5ECF6", - "gridcolor": "white", - "gridwidth": 2, - "linecolor": "white", - "showbackground": true, - "ticks": "", - "zerolinecolor": "white" - }, - "yaxis": { - "backgroundcolor": "#E5ECF6", - "gridcolor": "white", - "gridwidth": 2, - "linecolor": "white", - "showbackground": true, - "ticks": "", - "zerolinecolor": "white" - }, - "zaxis": { - "backgroundcolor": "#E5ECF6", - "gridcolor": "white", - "gridwidth": 2, - "linecolor": "white", - "showbackground": true, - "ticks": "", - "zerolinecolor": "white" - } - }, - "shapedefaults": { - "line": { - "color": "#2a3f5f" - } - }, - "ternary": { - "aaxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - }, - "baxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - }, - "bgcolor": "#E5ECF6", - "caxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - } - }, - "title": { - "x": 0.05 - }, - "xaxis": { - "automargin": true, - "gridcolor": "white", - "linecolor": "white", - "ticks": "", - "title": { - "standoff": 15 - }, - "zerolinecolor": "white", - "zerolinewidth": 2 - }, - "yaxis": { - "automargin": true, - "gridcolor": "white", - "linecolor": "white", - "ticks": "", - "title": { - "standoff": 15 - }, - "zerolinecolor": "white", - "zerolinewidth": 2 - } - } - }, - "title": { - "text": "Длительность записей в минутах", - "x": 0.5 - }, - "width": 1400, - "xaxis": { - "anchor": "y", - "domain": [ - 0, - 1 - ], - "title": { - "text": "Дата" - } - }, - "yaxis": { - "anchor": "x", - "domain": [ - 0, - 1 - ], - "title": { - "text": "Кол-во минут" - } - } - } - } - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "fig = px.line(\n", - " daily_duration,\n", - " x='date_d',\n", - " y='duration',\n", - " markers=True,\n", - " width=1400,\n", - " height=600\n", - ").update_layout(\n", - " xaxis_title='Дата',\n", - " yaxis_title='Кол-во минут',\n", - " title=dict(\n", - " text='Длительность записей в минутах',\n", - " x=.5\n", - " )\n", - ")\n", - "fig.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "При подсчётете медианного значения не будем учитывать выброс за 1 января." - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Медианная длительность файлов для расшифровки — 20376 мин/день\n" - ] - } - ], - "source": [ - "print(\n", - " 'Медианная длительность файлов для расшифровки —',\n", - " f'{round(daily_duration[1:].duration.median())} мин/день'\n", - ")" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "whisper-timestamped-reports-kkjTHbxf-py3.11", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.6" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/notebooks/performance_whisper-timestamped_on1080ti.ipynb b/notebooks/performance_whisper-timestamped_on1080ti.ipynb deleted file mode 100644 index 6c445a6..0000000 --- a/notebooks/performance_whisper-timestamped_on1080ti.ipynb +++ /dev/null @@ -1,283 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Производительность **whisper-timestamped** на текущем сервере с **1080Ti**" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Статистика по **2 584** записям длительностью от 10 сек до минуты." - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd\n", - "import plotly.graph_objects as go\n", - "import plotly.express as px\n", - "import plotly.io as pio\n", - "\n", - "pio.renderers.default = \"notebook_connected\"" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - " \n", - " " - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "durations_comparsion = pd.read_csv(\n", - " '/home/vladislav/ds/projects/a-iceberg/whisper-timestamped/output/performance_results.csv'\n", - ")\n", - "\n", - "trace1 = go.Scatter(\n", - " x=durations_comparsion.index,\n", - " y=durations_comparsion['duration_audio'],\n", - " mode='lines',\n", - " name='Длительность аудио',\n", - " line=dict(width=2)\n", - ")\n", - "trace2 = go.Scatter(\n", - " x=durations_comparsion.index,\n", - " y=durations_comparsion['duration_transcript'],\n", - " mode='lines',\n", - " name='Длительность распознавания',\n", - " line=dict(width=2)\n", - ")\n", - "\n", - "median_duration_audio = durations_comparsion['duration_audio'].median()\n", - "median_duration_transcript = durations_comparsion['duration_transcript'].median()\n", - "\n", - "trace3 = go.Scatter(\n", - " x=durations_comparsion.index,\n", - " y=[median_duration_audio] * len(durations_comparsion),\n", - " mode='lines',\n", - " name='Медианная длительность аудио',\n", - " line=dict(width=4)\n", - ")\n", - "trace4 = go.Scatter(\n", - " x=durations_comparsion.index,\n", - " y=[median_duration_transcript] * len(durations_comparsion),\n", - " mode='lines',\n", - " name='Медианная длительность распознавания',\n", - " line=dict(width=4)\n", - ")\n", - "\n", - "fig = go.Figure(data=[trace1, trace2, trace3, trace4])\n", - "fig.update_layout(\n", - " width=1800,\n", - " height=800,\n", - " xaxis_title='Записи',\n", - " yaxis_title='Секунды',\n", - " legend_orientation='h',\n", - " legend=dict(\n", - " font=dict(\n", - " size=15\n", - " )\n", - " ),\n", - " title=dict(\n", - " text='Длительности аудиозаписей и их распознавания',\n", - " x=.5\n", - " )\n", - ")\n", - "fig.show()" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "trace1 = go.Scatter(\n", - " x=durations_comparsion.index,\n", - " y=durations_comparsion['performance_ratio'],\n", - " mode='lines',\n", - " name='Производительность распознавания',\n", - " line=dict(width=2)\n", - ")\n", - "\n", - "median_performance_ratio = durations_comparsion['performance_ratio'].median()\n", - "\n", - "trace2 = go.Scatter(\n", - " x=durations_comparsion.index,\n", - " y=[median_performance_ratio] * len(durations_comparsion),\n", - " mode='lines',\n", - " name='Медианная производительность распознавания',\n", - " line=dict(width=4)\n", - ")\n", - "\n", - "fig = go.Figure(data=[trace1, trace2])\n", - "fig.update_layout(\n", - " width=1800,\n", - " height=800,\n", - " xaxis_title='Записи',\n", - " yaxis_title='Производительность, запись/распознавание',\n", - " legend_orientation='h',\n", - " legend=dict(\n", - " font=dict(\n", - " size=15\n", - " )\n", - " ),\n", - " title=dict(\n", - " text='Производительность распознавания whisper-timestamped',\n", - " x=.5\n", - " )\n", - ")\n", - "fig.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### В итоге медианная производительность составляет **0.06**, то есть в среднем запись распознается в **16** раз быстрее, чем длится сама.\n", - "### Колеблется производительность довольно сильно, от **0.02** (в **50** раз быстрее) до **0.45** (в **2** раза быстрее)." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### По результатам предыдущего исследования текущая медианная длительность файлов для расшифровки в сутки — **20 376** мин (**340** ч) — данные со 02.01.2024 по 23.01.2024.\n", - "### То есть в среднем записи за сутки на текущих GPU-мощностях распознаются за **1 223** мин (**20** ч)." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### В худшем случае (**533** часа записей в сутки при производительности **0.45**) аудио за день будут расшифрованы за **238** часов.\n", - "### В лучшем (**233** часа записей в сутки при производительности **0.02**) — за **5** часов." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Также в ближайших планах настроить параллельное распознавание нескольких записей одновременно, в теории, это должно ещё несколько повыстить общую производительность, а также максимально избавиться от галлюцинаций распознвания на отрезках тишины путем дальнейшей настройки параметров." - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "whisper-timestamped-reports-kkjTHbxf-py3.11", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.6" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/output/.gitkeep b/output/.gitkeep deleted file mode 100644 index e69de29..0000000 diff --git a/requirements.txt b/requirements.txt index 89bf8bb..66ec31b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,5 @@ setuptools-rust==1.8.1 tiktoken==0.5.2 -pandas==2.2.0 -pyarrow==15.0.0 fastapi==0.109.0 python-multipart==0.0.6 uvicorn==0.27.0 diff --git a/transcribe.py b/transcribe.py index ace2dcf..d35a56b 100644 --- a/transcribe.py +++ b/transcribe.py @@ -1,17 +1,11 @@ import os -import time -import wave -import json import logging from uuid import uuid4 -from typing import List -from statistics import median import torch -import pandas as pd import whisper_timestamped as whisper -from fastapi import FastAPI, UploadFile -from fastapi.responses import HTMLResponse +from fastapi import FastAPI, UploadFile, HTTPException +from fastapi.responses import HTMLResponse, JSONResponse app = FastAPI() @@ -23,82 +17,6 @@ ) logger = logging.getLogger() - -def get_audio_duration(wav_filename): - # Calculate the duration of an audio file - with wave.open(wav_filename, "r") as wav_file: - frames = wav_file.getnframes() - rate = wav_file.getframerate() - duration = frames / float(rate) - return duration - - -async def transcribe_audio(files: List[UploadFile], request_type: str): - results = [] - performance_ratios = [] - - for file in files: - # Checking and saving the file - if not file.filename.endswith(".wav"): - continue - - filename = f"{uuid4()}.wav" - file_path = os.path.join(os.path.dirname(__file__), "input", filename) - - with open(file_path, "wb") as buffer: - buffer.write(await file.read()) - - # Processing the audio - try: - start_time = time.time() - audio = whisper.load_audio(file_path) - result = whisper.transcribe( - model, - audio, - vad="auditok", - language="ru", - remove_empty_words=True, - beam_size=5, - best_of=5, - temperature=(0.0, 0.2, 0.4, 0.6, 0.8, 1.0), - ) - duration_transcript = time.time() - start_time - - with open(f"/app/output/{filename}.json", "w") as json_file: - json.dump(result, json_file, indent=2, ensure_ascii=False) - except Exception as e: - logger.error(f"Error in processing file {file.filename}: {e}") - continue - - # Statistic generation - duration_audio = get_audio_duration(file_path) - performance_ratio = duration_transcript / duration_audio - performance_ratios.append(performance_ratio) - - results.append( - { - "filename": file.filename, - "duration_audio": duration_audio, - "duration_transcript": duration_transcript, - "performance_ratio": performance_ratio, - "request_type": request_type, - } - ) - - # Deleting a file to save space on the server - if os.path.exists(file_path): - os.remove(file_path) - - median_performance_ratio = median(performance_ratios) - logger.info(f"Median Performance Ratio: {median_performance_ratio}") - - # Saving the results in CSV - df = pd.DataFrame(results) - df.to_csv("/app/output/performance_results.csv", index=False) - - return median_performance_ratio - - device = torch.device("cuda" if torch.cuda.is_available() else "cpu") logger.info(f"Using device: {device}") @@ -119,8 +37,8 @@ async def main(): # HTML-form for testing in a web browser html_content = """ -
- + +
@@ -128,15 +46,42 @@ async def main(): return HTMLResponse(content=html_content) -@app.post("/transcribe/single") -async def transcribe_audio_single(files: List[UploadFile]): - # Endpoint for single processing - median_performance_ratio = await transcribe_audio(files, "single") - return {"median_performance_ratio": median_performance_ratio} +@app.post("/transcribe") +async def transcribe_audio(file: UploadFile): + if not file.file: + raise HTTPException(status_code=400, detail="No file provided") + + if "." not in file.filename: + raise HTTPException( + status_code=400, detail="No file extension found. Check file name" + ) + + file_ext = file.filename.rsplit(".", maxsplit=1)[1] + filename = f"{uuid4()}.{file_ext}" + file_path = os.path.join(os.path.dirname(__file__), "input", filename) + + with open(file_path, "wb") as file_object: + file_object.write(await file.read()) + + # Processing the audio + try: + audio = whisper.load_audio(file_path) + result = whisper.transcribe( + model, + audio, + vad="auditok", + language="ru", + remove_empty_words=True, + beam_size=5, + best_of=5, + temperature=(0.0, 0.2, 0.4, 0.6, 0.8, 1.0), + ) + except Exception as e: + logger.error(f"Error in processing file {file.filename}: {e}") + return JSONResponse(status_code=500, content={"error": str(e)}) + # Deleting a file to save space on the server + if os.path.exists(file_path): + os.remove(file_path) -@app.post("/transcribe/parallel") -async def transcribe_audio_parallel(files: List[UploadFile]): - # Endpoint for parallel processing - median_performance_ratio = await transcribe_audio(files, "parallel") - return {"median_performance_ratio": median_performance_ratio} + return JSONResponse(content=result) From c2ef1223c65572b6f1ec1b054c3e46b85af2c346 Mon Sep 17 00:00:00 2001 From: Vladislav Nesterov Date: Tue, 30 Jan 2024 18:32:41 +0300 Subject: [PATCH 02/15] modified: change port to avaliable --- docker-compose.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index 5b2cf58..7fe757a 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -4,7 +4,7 @@ services: whisper-timestamped: container_name: whisper-timestamped ports: - - "8000:8000" + - "8080:8080" restart: unless-stopped build: context: . @@ -20,4 +20,4 @@ services: - driver: nvidia device_ids: ['0'] capabilities: [gpu] - command: ["gunicorn", "transcribe:app", "--workers", "1", "--worker-class", "uvicorn.workers.UvicornWorker", "--bind", "0.0.0.0:8000", "--timeout", "600"] \ No newline at end of file + command: ["gunicorn", "transcribe:app", "--workers", "1", "--worker-class", "uvicorn.workers.UvicornWorker", "--bind", "0.0.0.0:8080", "--timeout", "600"] \ No newline at end of file From 610e4fa8fbc6dcda62aa5e3478c70352c5ee72f2 Mon Sep 17 00:00:00 2001 From: Vladislav Nesterov Date: Tue, 30 Jan 2024 20:57:17 +0300 Subject: [PATCH 03/15] new file: init_server.py --- client/init_server.py | 114 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 114 insertions(+) create mode 100644 client/init_server.py diff --git a/client/init_server.py b/client/init_server.py new file mode 100644 index 0000000..41f77c2 --- /dev/null +++ b/client/init_server.py @@ -0,0 +1,114 @@ +import os +import requests +import logging + + +class Server: + def __init__(self): + self.gpu_url = os.environ.get( + "WHISPER_SERVER_DEFAULT", "http://localhost:8000/transcribe" + ) + self.temp_file_path = "" + self.temp_file_name = "" + + logging.basicConfig(level=logging.INFO) + + def accept_feature_extractor(self, sentences, accept): + if len(accept) > 1 and accept["text"] != "": + accept_text = str(accept["text"]) + conf_score = [] + i = 0 + accept_start = 0 + accept_end = 0 + for result_rec in accept["segments"]: + if i == 0: + accept_start = result_rec["start"] + conf_score.append(float(result_rec["confidence"])) + i += 1 + if i > 0: + accept_end = result_rec["end"] + sentences.append( + { + "text": accept_text, + "start": accept_start, + "end": accept_end, + "confidence": sum(conf_score) / len(conf_score), + } + ) + + def transcribation_process( + self, + duration, + side, + original_file_name, + rec_date, + src, + dst, + linkedid, + file_size, + queue_date, + transcribation_date, + ): + logger_text = " size: " + str(file_size) + logger_text += " file: " + self.temp_file_path + self.temp_file_name + + logging.info(logger_text) + + sentences = [] + + file_path = self.temp_file_path + self.temp_file_name + with open(file_path, "rb") as audio_file: + response = requests.post( + self.gpu_url, + files={"file": (os.path.basename(file_path), audio_file, "audio/wav")}, + ) + + if response.status_code == 200: + accept = response.json() + self.accept_feature_extractor(sentences, accept) + else: + logging.error(f"Error in file processing: {response.text}") + return 0, [], [] + + for i in range(0, len(sentences)): + self.save_result( + duration, + sentences[i]["text"], + sentences[i]["start"], + sentences[i]["end"], + side, + transcribation_date, + str(sentences[i]["conf"]), + original_file_name, + rec_date, + src, + dst, + linkedid, + file_size, + queue_date, + ) + + phrases = [sentences[i]["text"] for i in range(len(sentences))] + confidences = [sentences[i]["conf"] for i in range(len(sentences))] + + return len(sentences), phrases, confidences + + def save_result( + self, + duration, + accept_text, + accept_start, + accept_end, + side, + transcribation_date, + conf_mid, + original_file_name, + rec_date, + src, + dst, + linkedid, + file_size, + queue_date, + ): + logging.info("save result start") + print("=== save_result", accept_text) From cca6f4fa72ef0e012b2fcbe6afdaf16544a27502 Mon Sep 17 00:00:00 2001 From: Vladislav Nesterov Date: Tue, 30 Jan 2024 21:01:19 +0300 Subject: [PATCH 04/15] modified: init_server.py --- client/init_server.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/client/init_server.py b/client/init_server.py index 41f77c2..b8d25e8 100644 --- a/client/init_server.py +++ b/client/init_server.py @@ -60,7 +60,7 @@ def transcribation_process( with open(file_path, "rb") as audio_file: response = requests.post( self.gpu_url, - files={"file": (os.path.basename(file_path), audio_file, "audio/wav")}, + files={"file": (original_file_name, audio_file, "audio/wav")}, ) if response.status_code == 200: @@ -78,7 +78,7 @@ def transcribation_process( sentences[i]["end"], side, transcribation_date, - str(sentences[i]["conf"]), + str(sentences[i]["confidence"]), original_file_name, rec_date, src, @@ -89,7 +89,7 @@ def transcribation_process( ) phrases = [sentences[i]["text"] for i in range(len(sentences))] - confidences = [sentences[i]["conf"] for i in range(len(sentences))] + confidences = [sentences[i]["confidence"] for i in range(len(sentences))] return len(sentences), phrases, confidences From 9cd9a0392bd105f0b914dab4466de485b33996b1 Mon Sep 17 00:00:00 2001 From: Vladislav Nesterov Date: Wed, 31 Jan 2024 18:47:26 +0300 Subject: [PATCH 05/15] modified: init_server.py --- client/client.py | 2 +- client/init_server.py | 94 +++++++++++++++++++++++++------------------ 2 files changed, 56 insertions(+), 40 deletions(-) diff --git a/client/client.py b/client/client.py index cf68127..b1bd3a8 100644 --- a/client/client.py +++ b/client/client.py @@ -8,7 +8,7 @@ def upload_file(file_path: str): print("The file does not exist.") return - url = "http://localhost:8000/transcribe" + url = "http://localhost:8080/transcribe" file = {"file": (os.path.basename(file_path), open(file_path, "rb"), "audio/wav")} try: diff --git a/client/init_server.py b/client/init_server.py index b8d25e8..44f8b95 100644 --- a/client/init_server.py +++ b/client/init_server.py @@ -1,4 +1,5 @@ import os +import argparse import requests import logging @@ -6,61 +7,56 @@ class Server: def __init__(self): self.gpu_url = os.environ.get( - "WHISPER_SERVER_DEFAULT", "http://localhost:8000/transcribe" + "WHISPER_SERVER_DEFAULT", "http://localhost:8080/transcribe" ) - self.temp_file_path = "" - self.temp_file_name = "" + # self.temp_file_path = "" + # self.temp_file_name = "" logging.basicConfig(level=logging.INFO) def accept_feature_extractor(self, sentences, accept): if len(accept) > 1 and accept["text"] != "": - accept_text = str(accept["text"]) - conf_score = [] - i = 0 - accept_start = 0 - accept_end = 0 - for result_rec in accept["segments"]: - if i == 0: - accept_start = result_rec["start"] - conf_score.append(float(result_rec["confidence"])) - i += 1 - if i > 0: - accept_end = result_rec["end"] - sentences.append( - { - "text": accept_text, - "start": accept_start, - "end": accept_end, - "confidence": sum(conf_score) / len(conf_score), - } - ) + for segments_rec in accept["segments"]: + segment_text = str(segments_rec["text"]) + segment_start = segments_rec["start"] + segment_end = segments_rec["end"] + conf_score = float(segments_rec["confidence"]) + sentences.append( + { + "text": segment_text, + "start": segment_start, + "end": segment_end, + "confidence": conf_score, + } + ) def transcribation_process( self, - duration, - side, original_file_name, - rec_date, - src, - dst, - linkedid, - file_size, - queue_date, - transcribation_date, + duration=0, + side=True, + rec_date="31.01.2024", + src=1, + dst=2, + linkedid=3, + file_size=0, + queue_date="31.01.2024", + transcribation_date="31.01.2024", ): - logger_text = " size: " + str(file_size) - logger_text += " file: " + self.temp_file_path + self.temp_file_name + # logger_text = " size: " + str(file_size) + # logger_text += " file: " + self.temp_file_path + self.temp_file_name - logging.info(logger_text) + # logging.info(logger_text) sentences = [] - file_path = self.temp_file_path + self.temp_file_name + # file_path = self.temp_file_path + self.temp_file_name + + file_path = original_file_name with open(file_path, "rb") as audio_file: response = requests.post( self.gpu_url, - files={"file": (original_file_name, audio_file, "audio/wav")}, + files={"file": (os.path.basename(file_path), audio_file, "audio/wav")}, ) if response.status_code == 200: @@ -72,6 +68,7 @@ def transcribation_process( for i in range(0, len(sentences)): self.save_result( + original_file_name, duration, sentences[i]["text"], sentences[i]["start"], @@ -79,7 +76,6 @@ def transcribation_process( side, transcribation_date, str(sentences[i]["confidence"]), - original_file_name, rec_date, src, dst, @@ -95,6 +91,7 @@ def transcribation_process( def save_result( self, + original_file_name, duration, accept_text, accept_start, @@ -102,7 +99,6 @@ def save_result( side, transcribation_date, conf_mid, - original_file_name, rec_date, src, dst, @@ -112,3 +108,23 @@ def save_result( ): logging.info("save result start") print("=== save_result", accept_text) + + +def main(): + parser = argparse.ArgumentParser( + description="Send an audio file to the FastAPI server for processing." + ) + parser.add_argument( + "--file", type=str, required=True, help="File path of the audio file" + ) + args = parser.parse_args() + + server = Server() + num_sentences, phrases, confidences = server.transcribation_process( + original_file_name=args.file + ) + print(f"Processed {num_sentences} sentences.") + + +if __name__ == "__main__": + main() From d1de0342cc213d93074c0bc5b9b701a9ef813327 Mon Sep 17 00:00:00 2001 From: Vladislav Nesterov Date: Fri, 2 Feb 2024 14:08:28 +0300 Subject: [PATCH 06/15] modified: change port --- client/client.py | 33 --------------------------------- client/init_server.py | 2 +- docker-compose.yml | 4 ++-- 3 files changed, 3 insertions(+), 36 deletions(-) delete mode 100644 client/client.py diff --git a/client/client.py b/client/client.py deleted file mode 100644 index b1bd3a8..0000000 --- a/client/client.py +++ /dev/null @@ -1,33 +0,0 @@ -import argparse -import os -import requests - - -def upload_file(file_path: str): - if not os.path.isfile(file_path): - print("The file does not exist.") - return - - url = "http://localhost:8080/transcribe" - file = {"file": (os.path.basename(file_path), open(file_path, "rb"), "audio/wav")} - - try: - response = requests.post(url, files=file) - response.raise_for_status() - print("Response:", response.json()) - except requests.exceptions.HTTPError as err: - print(f"HTTP Error: {err}") - except Exception as err: - print(f"Error: {err}") - - -if __name__ == "__main__": - parser = argparse.ArgumentParser( - description="Upload an audio file to the FastAPI server for transcription." - ) - parser.add_argument( - "--file", type=str, required=True, help="File path of the audio file" - ) - - args = parser.parse_args() - upload_file(args.file) diff --git a/client/init_server.py b/client/init_server.py index 44f8b95..c3820e0 100644 --- a/client/init_server.py +++ b/client/init_server.py @@ -7,7 +7,7 @@ class Server: def __init__(self): self.gpu_url = os.environ.get( - "WHISPER_SERVER_DEFAULT", "http://localhost:8080/transcribe" + "WHISPER_SERVER_DEFAULT", "http://localhost:8888/transcribe" ) # self.temp_file_path = "" # self.temp_file_name = "" diff --git a/docker-compose.yml b/docker-compose.yml index 7fe757a..8e220b1 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -4,7 +4,7 @@ services: whisper-timestamped: container_name: whisper-timestamped ports: - - "8080:8080" + - "8888:8888" restart: unless-stopped build: context: . @@ -20,4 +20,4 @@ services: - driver: nvidia device_ids: ['0'] capabilities: [gpu] - command: ["gunicorn", "transcribe:app", "--workers", "1", "--worker-class", "uvicorn.workers.UvicornWorker", "--bind", "0.0.0.0:8080", "--timeout", "600"] \ No newline at end of file + command: ["gunicorn", "transcribe:app", "--workers", "1", "--worker-class", "uvicorn.workers.UvicornWorker", "--bind", "0.0.0.0:8888", "--timeout", "600"] \ No newline at end of file From 9789501a8a78f72e8ee808180b635003e32dea60 Mon Sep 17 00:00:00 2001 From: Alex Date: Mon, 5 Feb 2024 12:43:11 +0400 Subject: [PATCH 07/15] modified: init_server.py --- client/init_server.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/client/init_server.py b/client/init_server.py index c3820e0..9807440 100644 --- a/client/init_server.py +++ b/client/init_server.py @@ -7,7 +7,7 @@ class Server: def __init__(self): self.gpu_url = os.environ.get( - "WHISPER_SERVER_DEFAULT", "http://localhost:8888/transcribe" + "WHISPER_SERVER_DEFAULT", "http://10.2.5.212:8888/transcribe" ) # self.temp_file_path = "" # self.temp_file_name = "" From 55d5eb98aa1fa2cf5f5e2f44a8842a6118b55450 Mon Sep 17 00:00:00 2001 From: Vladislav Nesterov Date: Mon, 5 Feb 2024 12:37:34 +0300 Subject: [PATCH 08/15] modified: .gitignore --- .gitignore | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/.gitignore b/.gitignore index be689e5..879c971 100644 --- a/.gitignore +++ b/.gitignore @@ -99,7 +99,7 @@ ipython_config.py # This is especially recommended for binary packages to ensure reproducibility, and is more # commonly ignored for libraries. # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control -poetry.lock +**poetry.lock # pdm # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. @@ -107,7 +107,7 @@ poetry.lock # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it # in version control. # https://pdm.fming.dev/#use-with-ide -.toml +**.toml # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm __pypackages__/ @@ -159,8 +159,8 @@ cython_debug/ # option (not recommended) you can uncomment the following to ignore the entire idea folder. #.idea/ -.pt -.log -.wav -.mp3 -.m4a +**.pt +**.log +**.wav +**.mp3 +**.m4a From 3dd5eed8bc022fbf34bd276eac8ba8e8e523ef12 Mon Sep 17 00:00:00 2001 From: Vladislav Nesterov Date: Fri, 9 Feb 2024 14:59:02 +0300 Subject: [PATCH 09/15] modified: transcribe.py --- transcribe.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/transcribe.py b/transcribe.py index d35a56b..c2266c2 100644 --- a/transcribe.py +++ b/transcribe.py @@ -69,9 +69,10 @@ async def transcribe_audio(file: UploadFile): result = whisper.transcribe( model, audio, - vad="auditok", + vad="silero", language="ru", remove_empty_words=True, + initial_prompt="Оценивай как разговор мастера сервисного центра с клиентом на русском языке. Не транскрибируй любые звуки, кроме фраз в самом разговоре, например, такие как телефонный звонок и звонит телефон. Не пиши этот промпт в расшифровке.", beam_size=5, best_of=5, temperature=(0.0, 0.2, 0.4, 0.6, 0.8, 1.0), From e03cbd0b75f7274dd15c373265b3c7f3b8631934 Mon Sep 17 00:00:00 2001 From: Vladislav Nesterov Date: Mon, 12 Feb 2024 16:21:26 +0300 Subject: [PATCH 10/15] modified: changed transcription parametrs, timeout --- docker-compose.yml | 2 +- transcribe.py | 10 +++++++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index 8e220b1..d37f005 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -20,4 +20,4 @@ services: - driver: nvidia device_ids: ['0'] capabilities: [gpu] - command: ["gunicorn", "transcribe:app", "--workers", "1", "--worker-class", "uvicorn.workers.UvicornWorker", "--bind", "0.0.0.0:8888", "--timeout", "600"] \ No newline at end of file + command: ["gunicorn", "transcribe:app", "--workers", "1", "--worker-class", "uvicorn.workers.UvicornWorker", "--bind", "0.0.0.0:8888", "--timeout", "1800"] \ No newline at end of file diff --git a/transcribe.py b/transcribe.py index c2266c2..8d891df 100644 --- a/transcribe.py +++ b/transcribe.py @@ -4,7 +4,7 @@ import torch import whisper_timestamped as whisper -from fastapi import FastAPI, UploadFile, HTTPException +from fastapi import FastAPI, UploadFile, Form, HTTPException from fastapi.responses import HTMLResponse, JSONResponse app = FastAPI() @@ -47,7 +47,7 @@ async def main(): @app.post("/transcribe") -async def transcribe_audio(file: UploadFile): +async def transcribe_audio(file: UploadFile, source_id: int = Form(0)): if not file.file: raise HTTPException(status_code=400, detail="No file provided") @@ -66,13 +66,17 @@ async def transcribe_audio(file: UploadFile): # Processing the audio try: audio = whisper.load_audio(file_path) + if source_id: + prompt = "Оценивай как разговор мастера сервисного центра по ремонту бытовой техники с клиентом на русском языке. Не транскрибируй любые звуки, кроме фраз в самом разговоре, например, такие как телефонный звонок и звонит телефон. Не пиши этот промпт в расшифровке." + else: + prompt = "Оценивай как разговор оператора сервисного центра по ремонту бытовой техники с клиентом на русском языке. Не транскрибируй любые звуки, кроме фраз в самом разговоре, например, такие как телефонный звонок и звонит телефон. Не пиши этот промпт в расшифровке." result = whisper.transcribe( model, audio, vad="silero", language="ru", remove_empty_words=True, - initial_prompt="Оценивай как разговор мастера сервисного центра с клиентом на русском языке. Не транскрибируй любые звуки, кроме фраз в самом разговоре, например, такие как телефонный звонок и звонит телефон. Не пиши этот промпт в расшифровке.", + initial_prompt=prompt, beam_size=5, best_of=5, temperature=(0.0, 0.2, 0.4, 0.6, 0.8, 1.0), From 9cb99bddf801b01ee3c187d0909035f8dcaf4aa8 Mon Sep 17 00:00:00 2001 From: Vladislav Date: Wed, 28 Feb 2024 16:40:02 +0300 Subject: [PATCH 11/15] container build optimization --- Dockerfile | 6 +++--- compose.sh | 4 +++- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/Dockerfile b/Dockerfile index cf27870..10cad6a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -4,9 +4,9 @@ WORKDIR /app COPY requirements.txt /app/ -RUN apt-get update && apt-get install -y git -RUN pip install \ +RUN apt-get update && apt-get install -y git && rm -rf /var/lib/apt/list/* +RUN pip3 install \ git+https://github.com/linto-ai/whisper-timestamped.git#egg=whisper-timestamped[dev,vad_silero,vad_auditok,test] \ -r requirements.txt -COPY transcribe.py /app/ \ No newline at end of file +COPY transcribe.py /app/ diff --git a/compose.sh b/compose.sh index b4ec673..d5acfb8 100644 --- a/compose.sh +++ b/compose.sh @@ -3,4 +3,6 @@ # Compose, updating all files # sudo docker compose up --force-recreate --build -sudo docker compose up --build + +# Compose, remove lod container versions +sudo docker compose up --build -d --remove-orphans --force-recreate From 1c6658a8d6a1b89b4d46b59a9dce4f42fbe5aeba Mon Sep 17 00:00:00 2001 From: Vladislav <101942420+Darveivoldavara@users.noreply.github.com> Date: Thu, 7 Mar 2024 19:57:08 +0300 Subject: [PATCH 12/15] Update README.md --- README.md | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 73b6acb..24a3818 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,14 @@ -# whisper-timestamped -Timestamped whisper docker service -Based on [whisper-timestamped](https://github.com/linto-ai/whisper-timestamped) \ No newline at end of file +## Whisper timestamped +ASR microservice developed for [call center transcription service](https://github.com/format37/call_centre_stt_server/tree/master) + +Based on [whisper-timestamped](https://github.com/linto-ai/whisper-timestamped) + +--- + +### Results +The latest version of the Whisper model - [v3](https://github.com/openai/whisper/discussions/1762), is used; service can operate on both GPU and CPU, but significantly slower on the latter. [Prompt engineering](https://github.com/Darveivoldavara/whisper-timestamped/blob/9cb99bddf801b01ee3c187d0909035f8dcaf4aa8/transcribe.py#L70) was applied to improve the transcription results. + +Transcription quality on Russian ([source](https://github.com/Darveivoldavara/whisper_model_evaluator/blob/whisper/reports/whisper_comparator.ipynb)): +* *WER* - **0.2** +* *MER* - **0.2** +* *WIL* - **0.25** From 43134425e70865b26cd0a25ba655a870f6caac47 Mon Sep 17 00:00:00 2001 From: Vladislav Nesterov Date: Thu, 7 Mar 2024 19:58:47 +0300 Subject: [PATCH 13/15] modified: init_server.py --- client/init_server.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/client/init_server.py b/client/init_server.py index 9807440..de2134b 100644 --- a/client/init_server.py +++ b/client/init_server.py @@ -9,9 +9,6 @@ def __init__(self): self.gpu_url = os.environ.get( "WHISPER_SERVER_DEFAULT", "http://10.2.5.212:8888/transcribe" ) - # self.temp_file_path = "" - # self.temp_file_name = "" - logging.basicConfig(level=logging.INFO) def accept_feature_extractor(self, sentences, accept): @@ -43,15 +40,9 @@ def transcribation_process( queue_date="31.01.2024", transcribation_date="31.01.2024", ): - # logger_text = " size: " + str(file_size) - # logger_text += " file: " + self.temp_file_path + self.temp_file_name - - # logging.info(logger_text) sentences = [] - # file_path = self.temp_file_path + self.temp_file_name - file_path = original_file_name with open(file_path, "rb") as audio_file: response = requests.post( From 2d85568e4e688ad40c33580a38981bcc5d6874cb Mon Sep 17 00:00:00 2001 From: Vladislav Date: Fri, 29 Mar 2024 19:28:41 +0300 Subject: [PATCH 14/15] added VAD variability --- transcribe.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/transcribe.py b/transcribe.py index 8d891df..7a6012a 100644 --- a/transcribe.py +++ b/transcribe.py @@ -1,4 +1,5 @@ import os +import gc import logging from uuid import uuid4 @@ -20,6 +21,10 @@ device = torch.device("cuda" if torch.cuda.is_available() else "cpu") logger.info(f"Using device: {device}") +model = None +gc.collect() +torch.cuda.empty_cache() + try: model = whisper.load_model("large-v3", device=device, download_root="./cache") logger.info("Model loaded successfully") @@ -47,7 +52,7 @@ async def main(): @app.post("/transcribe") -async def transcribe_audio(file: UploadFile, source_id: int = Form(0)): +async def transcribe_audio(file: UploadFile, source_id: int = Form(0), vad: str = Form("silero")): if not file.file: raise HTTPException(status_code=400, detail="No file provided") @@ -73,7 +78,7 @@ async def transcribe_audio(file: UploadFile, source_id: int = Form(0)): result = whisper.transcribe( model, audio, - vad="silero", + vad=vad, language="ru", remove_empty_words=True, initial_prompt=prompt, From ea5b960a4094cbf308f2c649afbc580360a8efdf Mon Sep 17 00:00:00 2001 From: Vladislav Date: Thu, 27 Jun 2024 19:35:35 +0300 Subject: [PATCH 15/15] modified: requirements.txt --- .gitignore | 1 + requirements.txt | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index 879c971..d70d369 100644 --- a/.gitignore +++ b/.gitignore @@ -164,3 +164,4 @@ cython_debug/ **.wav **.mp3 **.m4a +**.pass diff --git a/requirements.txt b/requirements.txt index 66ec31b..facad0b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ setuptools-rust==1.8.1 tiktoken==0.5.2 -fastapi==0.109.0 -python-multipart==0.0.6 +fastapi==0.110.1 +python-multipart==0.0.7 uvicorn==0.27.0 -gunicorn==21.2.0 \ No newline at end of file +gunicorn==22.0.0 \ No newline at end of file