diff --git a/openadapt/alembic/versions/98c8851a5321_add_audio_info.py b/openadapt/alembic/versions/98c8851a5321_add_audio_info.py new file mode 100644 index 000000000..a3db85869 --- /dev/null +++ b/openadapt/alembic/versions/98c8851a5321_add_audio_info.py @@ -0,0 +1,53 @@ +"""add_audio_info + +Revision ID: 98c8851a5321 +Revises: d714cc86fce8 +Create Date: 2024-05-29 16:56:25.832333 + +""" +from alembic import op +import sqlalchemy as sa + +import openadapt + +# revision identifiers, used by Alembic. +revision = "98c8851a5321" +down_revision = "d714cc86fce8" +branch_labels = None +depends_on = None + + +def upgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.create_table( + "audio_info", + sa.Column("id", sa.Integer(), nullable=False), + sa.Column( + "timestamp", + openadapt.models.ForceFloat(precision=10, scale=2, asdecimal=False), + nullable=True, + ), + sa.Column("flac_data", sa.LargeBinary(), nullable=True), + sa.Column("transcribed_text", sa.String(), nullable=True), + sa.Column( + "recording_timestamp", + openadapt.models.ForceFloat(precision=10, scale=2, asdecimal=False), + nullable=True, + ), + sa.Column("recording_id", sa.Integer(), nullable=True), + sa.Column("sample_rate", sa.Integer(), nullable=True), + sa.Column("words_with_timestamps", sa.Text(), nullable=True), + sa.ForeignKeyConstraint( + ["recording_id"], + ["recording.id"], + name=op.f("fk_audio_info_recording_id_recording"), + ), + sa.PrimaryKeyConstraint("id", name=op.f("pk_audio_info")), + ) + # ### end Alembic commands ### + + +def downgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.drop_table("audio_info") + # ### end Alembic commands ### diff --git a/openadapt/app/dashboard/api/recordings.py b/openadapt/app/dashboard/api/recordings.py index 79ee5b9e0..2f349650e 100644 --- a/openadapt/app/dashboard/api/recordings.py +++ b/openadapt/app/dashboard/api/recordings.py @@ -1,5 +1,7 @@ """API endpoints for recordings.""" +import json + from fastapi import APIRouter, WebSocket from loguru import logger @@ -80,6 +82,22 @@ async def get_recording_detail(websocket: WebSocket, recording_id: int) -> None: {"type": "num_events", "value": len(action_events)} ) + try: + # TODO: change to use recording_id once scrubbing PR is merged + audio_info = crud.get_audio_info(session, recording.timestamp)[0] + words_with_timestamps = json.loads(audio_info.words_with_timestamps) + words_with_timestamps = [ + { + "word": word["word"], + "start": word["start"] + action_events[0].timestamp, + "end": word["end"] + action_events[0].timestamp, + } + for word in words_with_timestamps + ] + except IndexError: + words_with_timestamps = [] + word_index = 0 + def convert_to_str(event_dict: dict) -> dict: """Convert the keys to strings.""" if "key" in event_dict: @@ -104,7 +122,18 @@ def convert_to_str(event_dict: dict) -> dict: width, height = 0, 0 event_dict["screenshot"] = image event_dict["dimensions"] = {"width": width, "height": height} - + words = [] + # each word in words_with_timestamp is a dict of word, start, end + # we want to add the word to the event_dict if the start is + # before the event timestamp + while ( + word_index < len(words_with_timestamps) + and words_with_timestamps[word_index]["start"] + < event_dict["timestamp"] + ): + words.append(words_with_timestamps[word_index]["word"]) + word_index += 1 + event_dict["words"] = words convert_to_str(event_dict) await websocket.send_json({"type": "action_event", "value": event_dict}) diff --git a/openadapt/app/dashboard/components/ActionEvent/ActionEvent.tsx b/openadapt/app/dashboard/components/ActionEvent/ActionEvent.tsx index 1b54a5273..dd05f6be2 100644 --- a/openadapt/app/dashboard/components/ActionEvent/ActionEvent.tsx +++ b/openadapt/app/dashboard/components/ActionEvent/ActionEvent.tsx @@ -122,6 +122,12 @@ export const ActionEvent = ({ {event.parent_id} )} + {event.words && event.words.length > 0 && ( + + transcription + {event.words.join(' ')} + + )} children {event.children?.length || 0} diff --git a/openadapt/app/dashboard/types/action-event.ts b/openadapt/app/dashboard/types/action-event.ts index 86b358189..c7a8faef6 100644 --- a/openadapt/app/dashboard/types/action-event.ts +++ b/openadapt/app/dashboard/types/action-event.ts @@ -26,4 +26,5 @@ export type ActionEvent = { mask: string | null; dimensions?: { width: number, height: number }; children?: ActionEvent[]; + words?: string[]; } diff --git a/openadapt/app/tray.py b/openadapt/app/tray.py index 3b235a2c1..7e803a446 100644 --- a/openadapt/app/tray.py +++ b/openadapt/app/tray.py @@ -76,10 +76,6 @@ def __init__(self) -> None: self.app.setQuitOnLastWindowClosed(False) - # since the lock is a file, delete it when starting the app so that - # new instances can start even if the previous one crashed - crud.release_db_lock(raise_exception=False) - # currently required for pyqttoast # TODO: remove once https://github.com/niklashenning/pyqt-toast/issues/9 # is addressed diff --git a/openadapt/config.defaults.json b/openadapt/config.defaults.json index 1e06afee0..742b02fec 100644 --- a/openadapt/config.defaults.json +++ b/openadapt/config.defaults.json @@ -19,6 +19,7 @@ "RECORD_READ_ACTIVE_ELEMENT_STATE": false, "REPLAY_STRIP_ELEMENT_STATE": true, "RECORD_VIDEO": true, + "RECORD_AUDIO": true, "RECORD_FULL_VIDEO": false, "RECORD_IMAGES": false, "LOG_MEMORY": false, diff --git a/openadapt/config.py b/openadapt/config.py index a0cdc3813..c739f1f7c 100644 --- a/openadapt/config.py +++ b/openadapt/config.py @@ -29,6 +29,7 @@ PERFORMANCE_PLOTS_DIR_PATH = (DATA_DIR_PATH / "performance").absolute() CAPTURE_DIR_PATH = (DATA_DIR_PATH / "captures").absolute() VIDEO_DIR_PATH = DATA_DIR_PATH / "videos" +DATABASE_LOCK_FILE_PATH = DATA_DIR_PATH / "openadapt.db.lock" STOP_STRS = [ "oa.stop", @@ -136,6 +137,7 @@ class SegmentationAdapter(str, Enum): RECORD_WINDOW_DATA: bool = False RECORD_READ_ACTIVE_ELEMENT_STATE: bool = False RECORD_VIDEO: bool + RECORD_AUDIO: bool # if false, only write video events corresponding to screenshots RECORD_FULL_VIDEO: bool RECORD_IMAGES: bool diff --git a/openadapt/db/crud.py b/openadapt/db/crud.py index 6af5a8ef5..39b97f7bb 100644 --- a/openadapt/db/crud.py +++ b/openadapt/db/crud.py @@ -11,13 +11,15 @@ from loguru import logger from sqlalchemy.orm import Session as SaSession +import psutil import sqlalchemy as sa from openadapt import utils -from openadapt.config import DATA_DIR_PATH, config +from openadapt.config import DATABASE_LOCK_FILE_PATH, config from openadapt.db.db import Session, get_read_only_session_maker from openadapt.models import ( ActionEvent, + AudioInfo, MemoryStat, PerformanceStat, Recording, @@ -618,6 +620,56 @@ def update_video_start_time( ) +def insert_audio_info( + session: SaSession, + audio_data: bytes, + transcribed_text: str, + recording: Recording, + timestamp: float, + sample_rate: int, + word_list: list, +) -> None: + """Create an AudioInfo entry in the database. + + Args: + session (sa.orm.Session): The database session. + audio_data (bytes): The audio data. + transcribed_text (str): The transcribed text. + recording (Recording): The recording object. + timestamp (float): The timestamp of the audio. + sample_rate (int): The sample rate of the audio. + word_list (list): A list of words with timestamps. + """ + audio_info = AudioInfo( + flac_data=audio_data, + transcribed_text=transcribed_text, + recording_timestamp=recording.timestamp, + recording_id=recording.id, + timestamp=timestamp, + sample_rate=sample_rate, + words_with_timestamps=json.dumps(word_list), + ) + session.add(audio_info) + session.commit() + + +# TODO: change to use recording_id once scrubbing PR is merged +def get_audio_info( + session: SaSession, + recording_timestamp: float, +) -> list[AudioInfo]: + """Get the audio info for a given recording. + + Args: + session (sa.orm.Session): The database session. + recording_timestamp (float): The timestamp of the recording. + + Returns: + list[AudioInfo]: A list of audio info for the recording. + """ + return _get(session, AudioInfo, recording_timestamp) + + def post_process_events(session: SaSession, recording: Recording) -> None: """Post-process events. @@ -764,11 +816,17 @@ def acquire_db_lock(timeout: int = 60) -> bool: if timeout > 0 and time.time() - start > timeout: logger.error("Failed to acquire database lock.") return False - if os.path.exists(DATA_DIR_PATH / "database.lock"): - logger.info("Database is locked. Waiting...") - time.sleep(1) + if os.path.exists(DATABASE_LOCK_FILE_PATH): + with open(DATABASE_LOCK_FILE_PATH, "r") as lock_file: + lock_info = json.load(lock_file) + # check if the process is still running + if psutil.pid_exists(lock_info["pid"]): + logger.info("Database is locked. Waiting...") + time.sleep(1) + else: + release_db_lock(raise_exception=False) else: - with open(DATA_DIR_PATH / "database.lock", "w") as lock_file: + with open(DATABASE_LOCK_FILE_PATH, "w") as lock_file: lock_file.write(json.dumps({"pid": os.getpid(), "time": time.time()})) logger.info("Database lock acquired.") break @@ -778,7 +836,7 @@ def acquire_db_lock(timeout: int = 60) -> bool: def release_db_lock(raise_exception: bool = True) -> None: """Release the database lock.""" try: - os.remove(DATA_DIR_PATH / "database.lock") + os.remove(DATABASE_LOCK_FILE_PATH) except Exception as e: if raise_exception: logger.error("Failed to release database lock.") diff --git a/openadapt/models.py b/openadapt/models.py index 9133b041a..ac299ff56 100644 --- a/openadapt/models.py +++ b/openadapt/models.py @@ -81,6 +81,7 @@ class Recording(db.Base): "ScrubbedRecording", back_populates="recording", ) + audio_info = sa.orm.relationship("AudioInfo", back_populates="recording") _processed_action_events = None @@ -723,6 +724,23 @@ def convert_png_to_binary(self, image: Image.Image) -> bytes: return buffer.getvalue() +class AudioInfo(db.Base): + """Class representing the audio from a recording in the database.""" + + __tablename__ = "audio_info" + + id = sa.Column(sa.Integer, primary_key=True) + timestamp = sa.Column(ForceFloat) + flac_data = sa.Column(sa.LargeBinary) + transcribed_text = sa.Column(sa.String) + recording_timestamp = sa.Column(ForceFloat) + recording_id = sa.Column(sa.ForeignKey("recording.id")) + sample_rate = sa.Column(sa.Integer) + words_with_timestamps = sa.Column(sa.Text) + + recording = sa.orm.relationship("Recording", back_populates="audio_info") + + class PerformanceStat(db.Base): """Class representing a performance statistic in the database.""" diff --git a/openadapt/record.py b/openadapt/record.py index 485b39ee9..093ec0346 100644 --- a/openadapt/record.py +++ b/openadapt/record.py @@ -31,7 +31,11 @@ from tqdm import tqdm import fire +import numpy as np import psutil +import sounddevice +import soundfile +import whisper from openadapt import utils, video, window from openadapt.config import config @@ -988,6 +992,107 @@ def read_mouse_events( mouse_listener.stop() +def record_audio( + recording: Recording, + terminate_processing: multiprocessing.Event, + started_counter: multiprocessing.Value, +) -> None: + """Record audio narration during the recording and store data in database. + + Args: + recording: The recording object. + terminate_processing: An event to signal the termination of the process. + started_counter: Value to increment once started. + """ + utils.configure_logging(logger, LOG_LEVEL) + utils.set_start_time(recording.timestamp) + + signal.signal(signal.SIGINT, signal.SIG_IGN) + + audio_frames = [] # to store audio frames + + def audio_callback( + indata: np.ndarray, frames: int, time: Any, status: sounddevice.CallbackFlags + ) -> None: + """Callback function used when new audio frames are recorded. + + Note: time is of type cffi.FFI.CData, but since we don't use this argument + and we also don't use the cffi library, the Any type annotation is used. + """ + # called whenever there is new audio frames + audio_frames.append(indata.copy()) + + # open InputStream and start recording while ActionEvents are recorded + audio_stream = sounddevice.InputStream( + callback=audio_callback, samplerate=16000, channels=1 + ) + logger.info("Audio recording started.") + start_timestamp = utils.get_timestamp() + audio_stream.start() + + # NOTE: listener may not have actually started by now + # TODO: handle race condition, e.g. by sending synthetic events from main thread + with started_counter.get_lock(): + started_counter.value += 1 + + terminate_processing.wait() + audio_stream.stop() + audio_stream.close() + + # Concatenate into one Numpy array + concatenated_audio = np.concatenate(audio_frames, axis=0) + # convert concatenated_audio to format expected by whisper + converted_audio = concatenated_audio.flatten().astype(np.float32) + + # Convert audio to text using OpenAI's Whisper + logger.info("Transcribing audio...") + model = whisper.load_model("base") + result_info = model.transcribe(converted_audio, word_timestamps=True, fp16=False) + logger.info(f"The narrated text is: {result_info['text']}") + # empty word_list if the user didn't say anything + word_list = [] + # segments could be empty + if len(result_info["segments"]) > 0: + # there won't be a 'words' list if the user didn't say anything + if "words" in result_info["segments"][0]: + word_list = result_info["segments"][0]["words"] + + # compress and convert to bytes to save to database + logger.info( + "Size of uncompressed audio data: {} bytes".format(converted_audio.nbytes) + ) + # Create an in-memory file-like object + file_obj = io.BytesIO() + # Write the audio data using lossless compression + soundfile.write( + file_obj, converted_audio, int(audio_stream.samplerate), format="FLAC" + ) + # Get the compressed audio data as bytes + compressed_audio_bytes = file_obj.getvalue() + + logger.info( + "Size of compressed audio data: {} bytes".format(len(compressed_audio_bytes)) + ) + + file_obj.close() + + # To decompress the audio and restore it to its original form: + # restored_audio, restored_samplerate = sf.read( + # io.BytesIO(compressed_audio_bytes)) + + with crud.get_new_session(read_and_write=True) as session: + # Create AudioInfo entry + crud.insert_audio_info( + session, + compressed_audio_bytes, + result_info["text"], + recording, + start_timestamp, + int(audio_stream.samplerate), + word_list, + ) + + @logger.catch @utils.trace(logger) def record( @@ -1159,6 +1264,18 @@ def record( ) video_writer.start() + if config.RECORD_AUDIO: + expected_starts += 1 + audio_recorder = multiprocessing.Process( + target=record_audio, + args=( + recording, + terminate_processing, + started_counter, + ), + ) + audio_recorder.start() + terminate_perf_event = multiprocessing.Event() perf_stat_writer = multiprocessing.Process( target=performance_stats_writer, @@ -1232,6 +1349,8 @@ def record( window_event_writer.join() if config.RECORD_VIDEO: video_writer.join() + if config.RECORD_AUDIO: + audio_recorder.join() terminate_perf_event.set() if PLOT_PERFORMANCE: diff --git a/openadapt/strategies/mixins/openai.py b/openadapt/strategies/mixins/openai.py index 80fd8a13a..a2abd3e5b 100644 --- a/openadapt/strategies/mixins/openai.py +++ b/openadapt/strategies/mixins/openai.py @@ -188,50 +188,3 @@ def _get_completion(prompt: str) -> str: logger.debug(f"appending assistant_message=\n{pformat(assistant_message)}") messages.append(assistant_message) return messages - - -# XXX TODO not currently in use -# https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb -def num_tokens_from_messages(messages: list, model: str = "gpt-3.5-turbo-0301") -> int: - """Returns the number of tokens used by a list of messages.""" - try: - encoding = tiktoken.encoding_for_model(model) - except KeyError: - logger.info("Warning: model not found. Using cl100k_base encoding.") - encoding = tiktoken.get_encoding("cl100k_base") - if model == "gpt-3.5-turbo": - logger.info( - "Warning: gpt-3.5-turbo may change over time. Returning num tokens " - "assuming gpt-3.5-turbo-0301." - ) - return num_tokens_from_messages(messages, model="gpt-3.5-turbo-0301") - elif model == "gpt-4": - logger.info( - "Warning: gpt-4 may change over time. Returning num tokens " - "assuming gpt-4-0314." - ) - return num_tokens_from_messages(messages, model="gpt-4-0314") - elif model == "gpt-3.5-turbo-0301": - tokens_per_message = ( - 4 # every message follows <|start|>{role/name}\n{content}<|end|>\n - ) - tokens_per_name = -1 # if there's a name, the role is omitted - elif model == "gpt-4-0314": - tokens_per_message = 3 - tokens_per_name = 1 - else: - raise NotImplementedError( - f"""num_tokens_from_messages() is not implemented for model " - "{model}. See " - "https://github.com/openai/openai-python/blob/main/chatml.md for " - information on how messages are converted to tokens.""" - ) - num_tokens = 0 - for message in messages: - num_tokens += tokens_per_message - for key, value in message.items(): - num_tokens += len(encoding.encode(value)) - if key == "name": - num_tokens += tokens_per_name - num_tokens += 3 # every reply is primed with <|start|>assistant<|message|> - return num_tokens diff --git a/openadapt/visualize.py b/openadapt/visualize.py index 06b8bb439..2859ee6b9 100644 --- a/openadapt/visualize.py +++ b/openadapt/visualize.py @@ -186,6 +186,10 @@ def main( logger.info(f"{recording=}") logger.info(f"{diff_video=}") + audio_info = row2dict(crud.get_audio_info(recording)) + # don't display the FLAC data + del audio_info["flac_data"] + if diff_video: assert recording.config[ "RECORD_VIDEO" diff --git a/poetry.lock b/poetry.lock index 051b8f7d5..cdbed4072 100644 --- a/poetry.lock +++ b/poetry.lock @@ -2757,6 +2757,39 @@ files = [ [package.dependencies] rapidfuzz = ">=2.3.0,<4.0.0" +[[package]] +name = "llvmlite" +version = "0.40.1rc1" +description = "lightweight wrapper around basic LLVM functionality" +optional = false +python-versions = ">=3.8" +files = [ + {file = "llvmlite-0.40.1rc1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:31b606ae4923a897fe7122fe9a75fa39713279e796b335b83cb929c5d9e8661b"}, + {file = "llvmlite-0.40.1rc1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d08de4135dd8652f46de42e795b744dcad8cc11de3b6044a7326a61636887655"}, + {file = "llvmlite-0.40.1rc1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:94043bb283395963b48fa964c776670889084be5117cbc4f831ab357005365d1"}, + {file = "llvmlite-0.40.1rc1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:659b400cf61e567c5c30159f60eac8562133bf5e497f481388d22e6b5dd00044"}, + {file = "llvmlite-0.40.1rc1-cp310-cp310-win32.whl", hash = "sha256:84f5c569fdcc503a7ce5018d2115ebac3a385743774ed22c6cc8dade673eae33"}, + {file = "llvmlite-0.40.1rc1-cp310-cp310-win_amd64.whl", hash = "sha256:a775e87d6ee6f6fcdae5ead0dec171243719002fc39c500c4813babb3609f6d9"}, + {file = "llvmlite-0.40.1rc1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:239eaeef72267566538b9f4cba8a41fb3e39ac99881c2a9a8100aff60c645edb"}, + {file = "llvmlite-0.40.1rc1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2b8ceb9c436acdc87c3f5ab2dd6e3d003cf938abf55d3470d059abd99dee63d3"}, + {file = "llvmlite-0.40.1rc1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0475c5107334cf528c607275e0e1cd7836c31fe07c6e45994cd02dd45a95e3b1"}, + {file = "llvmlite-0.40.1rc1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3bc15e54522695ef16b5225cb40e89ef7f80d2d37cb0a8ddf3ffe3200fa238ff"}, + {file = "llvmlite-0.40.1rc1-cp311-cp311-win_amd64.whl", hash = "sha256:957c5f18726362fd2426f39997b9090c88a6a1cb11d4330b50b4946fa0c857a7"}, + {file = "llvmlite-0.40.1rc1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:38bcca23eb8919279619bebb4db6946f0d3dfedd879dfe9f741041789c83e36b"}, + {file = "llvmlite-0.40.1rc1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:753359a969f0606c30d3ef38988ae46c65ef2d3bcc7afb4ada0c37a2f4416a68"}, + {file = "llvmlite-0.40.1rc1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fae6c6b04ec4d83b5bd3437dd4ef7a9e6d4461437e615fa0895ac355709b6f10"}, + {file = "llvmlite-0.40.1rc1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f095b6c6e94fcb26705614d9da2267c739118f1e97ba6bb3ea5c9fbc77764171"}, + {file = "llvmlite-0.40.1rc1-cp38-cp38-win32.whl", hash = "sha256:92918a7c60bacebf72297b4caeca2bcf2a6cffb50362e915cc1dc202ac556586"}, + {file = "llvmlite-0.40.1rc1-cp38-cp38-win_amd64.whl", hash = "sha256:2585ea726f6cd012279ea5a0d84d999e436061dc7df67bdaea1cbae998a16f9f"}, + {file = "llvmlite-0.40.1rc1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:d5ad3dd8a0c600533650e14cc908874c2dbeca5ea749acfc262564f15586dc94"}, + {file = "llvmlite-0.40.1rc1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:2daa1de68d1bc0fd78757bb01a96c434373dca83d28460ff16b1accb1f171aff"}, + {file = "llvmlite-0.40.1rc1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6e9930a222cd98487dd4e8f916c3c92c0311ea294136fc4f3cd0bab6265e28b0"}, + {file = "llvmlite-0.40.1rc1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:86e9060e28796c0b38a73f59802d0f6af31a1bb7c6e3b766cb96237d862fe26c"}, + {file = "llvmlite-0.40.1rc1-cp39-cp39-win32.whl", hash = "sha256:b047d0e35b61dcbeaa1a86afac696c2dd9ca48430cb63638417e837cc1f0e60a"}, + {file = "llvmlite-0.40.1rc1-cp39-cp39-win_amd64.whl", hash = "sha256:da0b97219fa1053ab9a964e4703fcfca4ef6077614e7dce21de71bbbe6e4a4e9"}, + {file = "llvmlite-0.40.1rc1.tar.gz", hash = "sha256:8a6465075a0449fd802c9274130abb4f4ccf926972e84e8eac365769b7ec48fc"}, +] + [[package]] name = "loguru" version = "0.6.0" @@ -3180,6 +3213,17 @@ files = [ {file = "mccabe-0.7.0.tar.gz", hash = "sha256:348e0240c33b60bbdf4e523192ef919f28cb2c3d7d5c7794f74009290f236325"}, ] +[[package]] +name = "more-itertools" +version = "10.2.0" +description = "More routines for operating on iterables, beyond itertools" +optional = false +python-versions = ">=3.8" +files = [ + {file = "more-itertools-10.2.0.tar.gz", hash = "sha256:8fccb480c43d3e99a00087634c06dd02b0d50fbf088b380de5a41a015ec239e1"}, + {file = "more_itertools-10.2.0-py3-none-any.whl", hash = "sha256:686b06abe565edfab151cb8fd385a05651e1fdf8f0a14191e4439283421f8684"}, +] + [[package]] name = "moviepy" version = "1.0.3" @@ -3503,38 +3547,78 @@ files = [ [package.dependencies] setuptools = "*" +[[package]] +name = "numba" +version = "0.57.0" +description = "compiling Python code using LLVM" +optional = false +python-versions = ">=3.8" +files = [ + {file = "numba-0.57.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:2e2c14c411545e80bf0f1a33232fb0bd6aa3368f86e56eeffc7f6d3ac16ea3fd"}, + {file = "numba-0.57.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6b3382c56d805ffcdc7b46eb69a906be733dd35b84be14abba8e5fd27d7916b2"}, + {file = "numba-0.57.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:133cba9b5002bf67f6f73d9b3050d919c1be91326bbdcccfdf3259bcfb1cec0e"}, + {file = "numba-0.57.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d92a17ee849574665c5d94e9c9b862e469e1231d3dbb9e58e58b30b4bb0cbce9"}, + {file = "numba-0.57.0-cp310-cp310-win32.whl", hash = "sha256:abc90c3d303a67ae5194770a6f0d0a83edf076683b8a426349a27b91d98e00d1"}, + {file = "numba-0.57.0-cp310-cp310-win_amd64.whl", hash = "sha256:430f43c96f866ca4fe6008d8aa28bb608911d908ff94f879e0dbad7768ef9869"}, + {file = "numba-0.57.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:069f7d8fddad4c0eb1d7534c2a18098fe50473dc77832b409176002e9011b96f"}, + {file = "numba-0.57.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:79daa130fc9e4ebd1eea0a594d1de86d8a4366989f5fab93c482246b502520db"}, + {file = "numba-0.57.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:274f4db4814ebd5ec81697acfc36df04a865b86610d7714905185b753f3f9baf"}, + {file = "numba-0.57.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:0106ee441e3f69cc6f17cb470c4fcccd592e0606567d43245635d72b071ab88e"}, + {file = "numba-0.57.0-cp311-cp311-win_amd64.whl", hash = "sha256:a5d31b4d95000d86ffa9652ab5bcfa0ea30e6c3fc40e610147d4f2f00116703d"}, + {file = "numba-0.57.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:3e0b8de39bf17519435937b53276dfb02e2eb8bc27cd211c8eeb01ffed1cab6b"}, + {file = "numba-0.57.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:18d90fa6fcd5b796999392a8ea67f2fbccecf8dabcea726e2e721c79f40566a6"}, + {file = "numba-0.57.0-cp38-cp38-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d4f62528c7c8c5f97e9689fd788e420b68c67ee0a1a9a7715a57fd584b7aef1e"}, + {file = "numba-0.57.0-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:fd12cf0b431676c08057685e229ea5daaa1ec8efba2506c38671734ace49c2d7"}, + {file = "numba-0.57.0-cp38-cp38-win32.whl", hash = "sha256:e5f11b1d435fb4d1d1b68fa68ff456d632dc4bfd40b18825ff80d6081d1afb26"}, + {file = "numba-0.57.0-cp38-cp38-win_amd64.whl", hash = "sha256:5810ed2d6d22eb3c48bedfac2187fc44bb90e05f02d47fd31059e69207ae4106"}, + {file = "numba-0.57.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:eddba74493d4003a42cd61ff7feca4928a94a45553d1fddac77a5cc339f6f4f9"}, + {file = "numba-0.57.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:110be5e1213d0a3d5fc691e921a000779500620196d99cee9908fce83d1e48df"}, + {file = "numba-0.57.0-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:f949018ab9c467d38f14fe17db4df0d4a1c664be802189e2d6c5a434d9ffd4f6"}, + {file = "numba-0.57.0-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:9fc0cd4ec93a1e3877985e10ed5837ed2991c83aa4b7ca574caae5c8b448cc4b"}, + {file = "numba-0.57.0-cp39-cp39-win32.whl", hash = "sha256:83d4f21c98eed3001e9896a43a1ce9c825999c03f7eb39ddd1c2d07a76708392"}, + {file = "numba-0.57.0-cp39-cp39-win_amd64.whl", hash = "sha256:9173d00c6753212b68e4fd319cfa96c21b2263949452c97b034e78ce09539dee"}, + {file = "numba-0.57.0.tar.gz", hash = "sha256:2af6d81067a5bdc13960c6d2519dbabbf4d5d597cf75d640c5aeaefd48c6420a"}, +] + +[package.dependencies] +llvmlite = "==0.40.*" +numpy = ">=1.21,<1.25" + [[package]] name = "numpy" -version = "1.25.2" +version = "1.24.4" description = "Fundamental package for array computing in Python" optional = false -python-versions = ">=3.9" +python-versions = ">=3.8" files = [ - {file = "numpy-1.25.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:db3ccc4e37a6873045580d413fe79b68e47a681af8db2e046f1dacfa11f86eb3"}, - {file = "numpy-1.25.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:90319e4f002795ccfc9050110bbbaa16c944b1c37c0baeea43c5fb881693ae1f"}, - {file = "numpy-1.25.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dfe4a913e29b418d096e696ddd422d8a5d13ffba4ea91f9f60440a3b759b0187"}, - {file = "numpy-1.25.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f08f2e037bba04e707eebf4bc934f1972a315c883a9e0ebfa8a7756eabf9e357"}, - {file = "numpy-1.25.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:bec1e7213c7cb00d67093247f8c4db156fd03075f49876957dca4711306d39c9"}, - {file = "numpy-1.25.2-cp310-cp310-win32.whl", hash = "sha256:7dc869c0c75988e1c693d0e2d5b26034644399dd929bc049db55395b1379e044"}, - {file = "numpy-1.25.2-cp310-cp310-win_amd64.whl", hash = "sha256:834b386f2b8210dca38c71a6e0f4fd6922f7d3fcff935dbe3a570945acb1b545"}, - {file = "numpy-1.25.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c5462d19336db4560041517dbb7759c21d181a67cb01b36ca109b2ae37d32418"}, - {file = "numpy-1.25.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c5652ea24d33585ea39eb6a6a15dac87a1206a692719ff45d53c5282e66d4a8f"}, - {file = "numpy-1.25.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0d60fbae8e0019865fc4784745814cff1c421df5afee233db6d88ab4f14655a2"}, - {file = "numpy-1.25.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:60e7f0f7f6d0eee8364b9a6304c2845b9c491ac706048c7e8cf47b83123b8dbf"}, - {file = "numpy-1.25.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:bb33d5a1cf360304754913a350edda36d5b8c5331a8237268c48f91253c3a364"}, - {file = "numpy-1.25.2-cp311-cp311-win32.whl", hash = "sha256:5883c06bb92f2e6c8181df7b39971a5fb436288db58b5a1c3967702d4278691d"}, - {file = "numpy-1.25.2-cp311-cp311-win_amd64.whl", hash = "sha256:5c97325a0ba6f9d041feb9390924614b60b99209a71a69c876f71052521d42a4"}, - {file = "numpy-1.25.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b79e513d7aac42ae918db3ad1341a015488530d0bb2a6abcbdd10a3a829ccfd3"}, - {file = "numpy-1.25.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:eb942bfb6f84df5ce05dbf4b46673ffed0d3da59f13635ea9b926af3deb76926"}, - {file = "numpy-1.25.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3e0746410e73384e70d286f93abf2520035250aad8c5714240b0492a7302fdca"}, - {file = "numpy-1.25.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d7806500e4f5bdd04095e849265e55de20d8cc4b661b038957354327f6d9b295"}, - {file = "numpy-1.25.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:8b77775f4b7df768967a7c8b3567e309f617dd5e99aeb886fa14dc1a0791141f"}, - {file = "numpy-1.25.2-cp39-cp39-win32.whl", hash = "sha256:2792d23d62ec51e50ce4d4b7d73de8f67a2fd3ea710dcbc8563a51a03fb07b01"}, - {file = "numpy-1.25.2-cp39-cp39-win_amd64.whl", hash = "sha256:76b4115d42a7dfc5d485d358728cdd8719be33cc5ec6ec08632a5d6fca2ed380"}, - {file = "numpy-1.25.2-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:1a1329e26f46230bf77b02cc19e900db9b52f398d6722ca853349a782d4cff55"}, - {file = "numpy-1.25.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4c3abc71e8b6edba80a01a52e66d83c5d14433cbcd26a40c329ec7ed09f37901"}, - {file = "numpy-1.25.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:1b9735c27cea5d995496f46a8b1cd7b408b3f34b6d50459d9ac8fe3a20cc17bf"}, - {file = "numpy-1.25.2.tar.gz", hash = "sha256:fd608e19c8d7c55021dffd43bfe5492fab8cc105cc8986f813f8c3c048b38760"}, + {file = "numpy-1.24.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c0bfb52d2169d58c1cdb8cc1f16989101639b34c7d3ce60ed70b19c63eba0b64"}, + {file = "numpy-1.24.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ed094d4f0c177b1b8e7aa9cba7d6ceed51c0e569a5318ac0ca9a090680a6a1b1"}, + {file = "numpy-1.24.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:79fc682a374c4a8ed08b331bef9c5f582585d1048fa6d80bc6c35bc384eee9b4"}, + {file = "numpy-1.24.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7ffe43c74893dbf38c2b0a1f5428760a1a9c98285553c89e12d70a96a7f3a4d6"}, + {file = "numpy-1.24.4-cp310-cp310-win32.whl", hash = "sha256:4c21decb6ea94057331e111a5bed9a79d335658c27ce2adb580fb4d54f2ad9bc"}, + {file = "numpy-1.24.4-cp310-cp310-win_amd64.whl", hash = "sha256:b4bea75e47d9586d31e892a7401f76e909712a0fd510f58f5337bea9572c571e"}, + {file = "numpy-1.24.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f136bab9c2cfd8da131132c2cf6cc27331dd6fae65f95f69dcd4ae3c3639c810"}, + {file = "numpy-1.24.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e2926dac25b313635e4d6cf4dc4e51c8c0ebfed60b801c799ffc4c32bf3d1254"}, + {file = "numpy-1.24.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:222e40d0e2548690405b0b3c7b21d1169117391c2e82c378467ef9ab4c8f0da7"}, + {file = "numpy-1.24.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7215847ce88a85ce39baf9e89070cb860c98fdddacbaa6c0da3ffb31b3350bd5"}, + {file = "numpy-1.24.4-cp311-cp311-win32.whl", hash = "sha256:4979217d7de511a8d57f4b4b5b2b965f707768440c17cb70fbf254c4b225238d"}, + {file = "numpy-1.24.4-cp311-cp311-win_amd64.whl", hash = "sha256:b7b1fc9864d7d39e28f41d089bfd6353cb5f27ecd9905348c24187a768c79694"}, + {file = "numpy-1.24.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1452241c290f3e2a312c137a9999cdbf63f78864d63c79039bda65ee86943f61"}, + {file = "numpy-1.24.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:04640dab83f7c6c85abf9cd729c5b65f1ebd0ccf9de90b270cd61935eef0197f"}, + {file = "numpy-1.24.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a5425b114831d1e77e4b5d812b69d11d962e104095a5b9c3b641a218abcc050e"}, + {file = "numpy-1.24.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd80e219fd4c71fc3699fc1dadac5dcf4fd882bfc6f7ec53d30fa197b8ee22dc"}, + {file = "numpy-1.24.4-cp38-cp38-win32.whl", hash = "sha256:4602244f345453db537be5314d3983dbf5834a9701b7723ec28923e2889e0bb2"}, + {file = "numpy-1.24.4-cp38-cp38-win_amd64.whl", hash = "sha256:692f2e0f55794943c5bfff12b3f56f99af76f902fc47487bdfe97856de51a706"}, + {file = "numpy-1.24.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:2541312fbf09977f3b3ad449c4e5f4bb55d0dbf79226d7724211acc905049400"}, + {file = "numpy-1.24.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9667575fb6d13c95f1b36aca12c5ee3356bf001b714fc354eb5465ce1609e62f"}, + {file = "numpy-1.24.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f3a86ed21e4f87050382c7bc96571755193c4c1392490744ac73d660e8f564a9"}, + {file = "numpy-1.24.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d11efb4dbecbdf22508d55e48d9c8384db795e1b7b51ea735289ff96613ff74d"}, + {file = "numpy-1.24.4-cp39-cp39-win32.whl", hash = "sha256:6620c0acd41dbcb368610bb2f4d83145674040025e5536954782467100aa8835"}, + {file = "numpy-1.24.4-cp39-cp39-win_amd64.whl", hash = "sha256:befe2bf740fd8373cf56149a5c23a0f601e82869598d41f8e188a0e9869926f8"}, + {file = "numpy-1.24.4-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:31f13e25b4e304632a4619d0e0777662c2ffea99fcae2029556b17d8ff958aef"}, + {file = "numpy-1.24.4-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:95f7ac6540e95bc440ad77f56e520da5bf877f87dca58bd095288dce8940532a"}, + {file = "numpy-1.24.4-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:e98f220aa76ca2a977fe435f5b04d7b3470c0a2e6312907b37ba6068f26787f2"}, + {file = "numpy-1.24.4.tar.gz", hash = "sha256:80f5e3a4e498641401868df4208b74581206afbee7cf7b8329daae82676d9463"}, ] [[package]] @@ -3646,6 +3730,32 @@ dev = ["black (>=21.6b0,<22.0)", "pytest (==6.*)", "pytest-asyncio", "pytest-moc embeddings = ["matplotlib", "numpy", "openpyxl (>=3.0.7)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)", "plotly", "scikit-learn (>=1.0.2)", "scipy", "tenacity (>=8.0.1)"] wandb = ["numpy", "openpyxl (>=3.0.7)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)", "wandb"] +[[package]] +name = "openai-whisper" +version = "20230314" +description = "Robust Speech Recognition via Large-Scale Weak Supervision" +optional = false +python-versions = ">=3.8" +files = [] +develop = false + +[package.dependencies] +more-itertools = "*" +numba = "*" +numpy = "*" +tiktoken = "0.3.3" +torch = "*" +tqdm = "*" + +[package.extras] +dev = ["black", "flake8", "isort", "pytest", "scipy"] + +[package.source] +type = "git" +url = "https://github.com/openai/whisper.git" +reference = "HEAD" +resolved_reference = "ba3f3cd54b0e5b8ce1ab3de13e32122d0d5f98ab" + [[package]] name = "opencv-python" version = "4.9.0.80" @@ -6300,6 +6410,49 @@ files = [ {file = "snowballstemmer-2.2.0.tar.gz", hash = "sha256:09b16deb8547d3412ad7b590689584cd0fe25ec8db3be37788be3810cbf19cb1"}, ] +[[package]] +name = "sounddevice" +version = "0.4.6" +description = "Play and Record Sound with Python" +optional = false +python-versions = ">=3.7" +files = [ + {file = "sounddevice-0.4.6-py3-none-any.whl", hash = "sha256:5de768ba6fe56ad2b5aaa2eea794b76b73e427961c95acad2ee2ed7f866a4b20"}, + {file = "sounddevice-0.4.6-py3-none-macosx_10_6_x86_64.macosx_10_6_universal2.whl", hash = "sha256:8b0b806c205dd3e3cd5a97262b2482624fd21db7d47083b887090148a08051c8"}, + {file = "sounddevice-0.4.6-py3-none-win32.whl", hash = "sha256:e3ba6e674ffa8f79a591d744a1d4ab922fe5bdfd4faf8b25069a08e051010b7b"}, + {file = "sounddevice-0.4.6-py3-none-win_amd64.whl", hash = "sha256:7830d4f8f8570f2e5552942f81d96999c5fcd9a0b682d6fc5d5c5529df23be2c"}, + {file = "sounddevice-0.4.6.tar.gz", hash = "sha256:3236b78f15f0415bdf006a620cef073d0c0522851d66f4a961ed6d8eb1482fe9"}, +] + +[package.dependencies] +CFFI = ">=1.0" + +[package.extras] +numpy = ["NumPy"] + +[[package]] +name = "soundfile" +version = "0.12.1" +description = "An audio library based on libsndfile, CFFI and NumPy" +optional = false +python-versions = "*" +files = [ + {file = "soundfile-0.12.1-py2.py3-none-any.whl", hash = "sha256:828a79c2e75abab5359f780c81dccd4953c45a2c4cd4f05ba3e233ddf984b882"}, + {file = "soundfile-0.12.1-py2.py3-none-macosx_10_9_x86_64.whl", hash = "sha256:d922be1563ce17a69582a352a86f28ed8c9f6a8bc951df63476ffc310c064bfa"}, + {file = "soundfile-0.12.1-py2.py3-none-macosx_11_0_arm64.whl", hash = "sha256:bceaab5c4febb11ea0554566784bcf4bc2e3977b53946dda2b12804b4fe524a8"}, + {file = "soundfile-0.12.1-py2.py3-none-manylinux_2_17_x86_64.whl", hash = "sha256:2dc3685bed7187c072a46ab4ffddd38cef7de9ae5eb05c03df2ad569cf4dacbc"}, + {file = "soundfile-0.12.1-py2.py3-none-manylinux_2_31_x86_64.whl", hash = "sha256:074247b771a181859d2bc1f98b5ebf6d5153d2c397b86ee9e29ba602a8dfe2a6"}, + {file = "soundfile-0.12.1-py2.py3-none-win32.whl", hash = "sha256:59dfd88c79b48f441bbf6994142a19ab1de3b9bb7c12863402c2bc621e49091a"}, + {file = "soundfile-0.12.1-py2.py3-none-win_amd64.whl", hash = "sha256:0d86924c00b62552b650ddd28af426e3ff2d4dc2e9047dae5b3d8452e0a49a77"}, + {file = "soundfile-0.12.1.tar.gz", hash = "sha256:e8e1017b2cf1dda767aef19d2fd9ee5ebe07e050d430f77a0a7c66ba08b8cdae"}, +] + +[package.dependencies] +cffi = ">=1.0" + +[package.extras] +numpy = ["numpy"] + [[package]] name = "spacy" version = "3.7.4" @@ -7000,40 +7153,40 @@ all = ["defusedxml", "fsspec", "imagecodecs (>=2023.8.12)", "lxml", "matplotlib" [[package]] name = "tiktoken" -version = "0.4.0" +version = "0.3.3" description = "tiktoken is a fast BPE tokeniser for use with OpenAI's models" optional = false python-versions = ">=3.8" files = [ - {file = "tiktoken-0.4.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:176cad7f053d2cc82ce7e2a7c883ccc6971840a4b5276740d0b732a2b2011f8a"}, - {file = "tiktoken-0.4.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:450d504892b3ac80207700266ee87c932df8efea54e05cefe8613edc963c1285"}, - {file = "tiktoken-0.4.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:00d662de1e7986d129139faf15e6a6ee7665ee103440769b8dedf3e7ba6ac37f"}, - {file = "tiktoken-0.4.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5727d852ead18b7927b8adf558a6f913a15c7766725b23dbe21d22e243041b28"}, - {file = "tiktoken-0.4.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:c06cd92b09eb0404cedce3702fa866bf0d00e399439dad3f10288ddc31045422"}, - {file = "tiktoken-0.4.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:9ec161e40ed44e4210d3b31e2ff426b4a55e8254f1023e5d2595cb60044f8ea6"}, - {file = "tiktoken-0.4.0-cp310-cp310-win_amd64.whl", hash = "sha256:1e8fa13cf9889d2c928b9e258e9dbbbf88ab02016e4236aae76e3b4f82dd8288"}, - {file = "tiktoken-0.4.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:bb2341836b725c60d0ab3c84970b9b5f68d4b733a7bcb80fb25967e5addb9920"}, - {file = "tiktoken-0.4.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2ca30367ad750ee7d42fe80079d3092bd35bb266be7882b79c3bd159b39a17b0"}, - {file = "tiktoken-0.4.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3dc3df19ddec79435bb2a94ee46f4b9560d0299c23520803d851008445671197"}, - {file = "tiktoken-0.4.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4d980fa066e962ef0f4dad0222e63a484c0c993c7a47c7dafda844ca5aded1f3"}, - {file = "tiktoken-0.4.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:329f548a821a2f339adc9fbcfd9fc12602e4b3f8598df5593cfc09839e9ae5e4"}, - {file = "tiktoken-0.4.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:b1a038cee487931a5caaef0a2e8520e645508cde21717eacc9af3fbda097d8bb"}, - {file = "tiktoken-0.4.0-cp311-cp311-win_amd64.whl", hash = "sha256:08efa59468dbe23ed038c28893e2a7158d8c211c3dd07f2bbc9a30e012512f1d"}, - {file = "tiktoken-0.4.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:f3020350685e009053829c1168703c346fb32c70c57d828ca3742558e94827a9"}, - {file = "tiktoken-0.4.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:ba16698c42aad8190e746cd82f6a06769ac7edd415d62ba027ea1d99d958ed93"}, - {file = "tiktoken-0.4.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9c15d9955cc18d0d7ffcc9c03dc51167aedae98542238b54a2e659bd25fe77ed"}, - {file = "tiktoken-0.4.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:64e1091c7103100d5e2c6ea706f0ec9cd6dc313e6fe7775ef777f40d8c20811e"}, - {file = "tiktoken-0.4.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:e87751b54eb7bca580126353a9cf17a8a8eaadd44edaac0e01123e1513a33281"}, - {file = "tiktoken-0.4.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:e063b988b8ba8b66d6cc2026d937557437e79258095f52eaecfafb18a0a10c03"}, - {file = "tiktoken-0.4.0-cp38-cp38-win_amd64.whl", hash = "sha256:9c6dd439e878172dc163fced3bc7b19b9ab549c271b257599f55afc3a6a5edef"}, - {file = "tiktoken-0.4.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:8d1d97f83697ff44466c6bef5d35b6bcdb51e0125829a9c0ed1e6e39fb9a08fb"}, - {file = "tiktoken-0.4.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:1b6bce7c68aa765f666474c7c11a7aebda3816b58ecafb209afa59c799b0dd2d"}, - {file = "tiktoken-0.4.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5a73286c35899ca51d8d764bc0b4d60838627ce193acb60cc88aea60bddec4fd"}, - {file = "tiktoken-0.4.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d0394967d2236a60fd0aacef26646b53636423cc9c70c32f7c5124ebe86f3093"}, - {file = "tiktoken-0.4.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:dae2af6f03ecba5f679449fa66ed96585b2fa6accb7fd57d9649e9e398a94f44"}, - {file = "tiktoken-0.4.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:55e251b1da3c293432179cf7c452cfa35562da286786be5a8b1ee3405c2b0dd2"}, - {file = "tiktoken-0.4.0-cp39-cp39-win_amd64.whl", hash = "sha256:c835d0ee1f84a5aa04921717754eadbc0f0a56cf613f78dfc1cf9ad35f6c3fea"}, - {file = "tiktoken-0.4.0.tar.gz", hash = "sha256:59b20a819969735b48161ced9b92f05dc4519c17be4015cfb73b65270a243620"}, + {file = "tiktoken-0.3.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d1f37fa75ba70c1bc7806641e8ccea1fba667d23e6341a1591ea333914c226a9"}, + {file = "tiktoken-0.3.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:3d7296c38392a943c2ccc0b61323086b8550cef08dcf6855de9949890dbc1fd3"}, + {file = "tiktoken-0.3.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3c84491965e139a905280ac28b74baaa13445b3678e07f96767089ad1ef5ee7b"}, + {file = "tiktoken-0.3.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:65970d77ea85ce6c7fce45131da9258cd58a802ffb29ead8f5552e331c025b2b"}, + {file = "tiktoken-0.3.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:bd3f72d0ba7312c25c1652292121a24c8f1711207b63c6d8dab21afe4be0bf04"}, + {file = "tiktoken-0.3.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:719c9e13432602dc496b24f13e3c3ad3ec0d2fbdb9aace84abfb95e9c3a425a4"}, + {file = "tiktoken-0.3.3-cp310-cp310-win_amd64.whl", hash = "sha256:dc00772284c94e65045b984ed7e9f95d000034f6b2411df252011b069bd36217"}, + {file = "tiktoken-0.3.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4db2c40f79f8f7a21a9fdbf1c6dee32dea77b0d7402355dc584a3083251d2e15"}, + {file = "tiktoken-0.3.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e3c0f2231aa3829a1a431a882201dc27858634fd9989898e0f7d991dbc6bcc9d"}, + {file = "tiktoken-0.3.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:48c13186a479de16cfa2c72bb0631fa9c518350a5b7569e4d77590f7fee96be9"}, + {file = "tiktoken-0.3.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6674e4e37ab225020135cd66a392589623d5164c6456ba28cc27505abed10d9e"}, + {file = "tiktoken-0.3.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:4a0c1357f6191211c544f935d5aa3cb9d7abd118c8f3c7124196d5ecd029b4af"}, + {file = "tiktoken-0.3.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:2e948d167fc3b04483cbc33426766fd742e7cefe5346cd62b0cbd7279ef59539"}, + {file = "tiktoken-0.3.3-cp311-cp311-win_amd64.whl", hash = "sha256:5dca434c8680b987eacde2dbc449e9ea4526574dbf9f3d8938665f638095be82"}, + {file = "tiktoken-0.3.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:984758ebc07cd8c557345697c234f1f221bd730b388f4340dd08dffa50213a01"}, + {file = "tiktoken-0.3.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:891012f29e159a989541ae47259234fb29ff88c22e1097567316e27ad33a3734"}, + {file = "tiktoken-0.3.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:210f8602228e4c5d706deeb389da5a152b214966a5aa558eec87b57a1969ced5"}, + {file = "tiktoken-0.3.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd783564f80d4dc44ff0a64b13756ded8390ed2548549aefadbe156af9188307"}, + {file = "tiktoken-0.3.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:03f64bde9b4eb8338bf49c8532bfb4c3578f6a9a6979fc176d939f9e6f68b408"}, + {file = "tiktoken-0.3.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:1ac369367b6f5e5bd80e8f9a7766ac2a9c65eda2aa856d5f3c556d924ff82986"}, + {file = "tiktoken-0.3.3-cp38-cp38-win_amd64.whl", hash = "sha256:94600798891f78db780e5aa9321456cf355e54a4719fbd554147a628de1f163f"}, + {file = "tiktoken-0.3.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:e59db6fca8d5ccea302fe2888917364446d6f4201a25272a1a1c44975c65406a"}, + {file = "tiktoken-0.3.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:19340d8ba4d6fd729b2e3a096a547ded85f71012843008f97475f9db484869ee"}, + {file = "tiktoken-0.3.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:542686cbc9225540e3a10f472f82fa2e1bebafce2233a211dee8459e95821cfd"}, + {file = "tiktoken-0.3.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3a43612b2a09f4787c050163a216bf51123851859e9ab128ad03d2729826cde9"}, + {file = "tiktoken-0.3.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:a11674f0275fa75fb59941b703650998bd4acb295adbd16fc8af17051aaed19d"}, + {file = "tiktoken-0.3.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:65fc0a449630bab28c30b4adec257442a4706d79cffc2337c1d9df3e91825cdd"}, + {file = "tiktoken-0.3.3-cp39-cp39-win_amd64.whl", hash = "sha256:0b9a7a9a8b781a50ee9289e85e28771d7e113cc0c656eadfb6fc6d3a106ff9bb"}, + {file = "tiktoken-0.3.3.tar.gz", hash = "sha256:97b58b7bfda945791ec855e53d166e8ec20c6378942b93851a6c919ddf9d0496"}, ] [package.dependencies] @@ -8092,4 +8245,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"] [metadata] lock-version = "2.0" python-versions = "3.10.x" -content-hash = "e96d0e3b1ea3d3882bd117ea1aca380e47ce9658099e11602da584fe0eea46da" +content-hash = "03253442817b8bd8e787d5dbaaa99894c86a60c3ab865d78f9f6e03d55d4f5e6" diff --git a/pyproject.toml b/pyproject.toml index 0542d0795..10fa77ed9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -45,7 +45,6 @@ rapidocr-onnxruntime = "1.2.3" scikit-learn = "1.2.2" scipy = "1.9.3" sqlalchemy = "1.4.43" -tiktoken = "0.4.0" torch = "^2.0.0" tqdm = "4.64.0" transformers = "4.29.2" @@ -100,6 +99,11 @@ imagehash = "^4.3.1" pydantic-settings = "^2.2.1" pyqt-toast-notification = "^1.1.0" pudb = "^2024.1" +llvmlite = "0.40.1rc1" +numba = "0.57.0" +openai-whisper = {git = "https://github.com/openai/whisper.git"} +sounddevice = "^0.4.6" +soundfile = "^0.12.1" [tool.pytest.ini_options] filterwarnings = [