Skip to content

Commit

Permalink
Merge branch 'prod' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
Darveivoldavara committed Jun 27, 2024
2 parents b4e8e3a + ea5b960 commit 1abf851
Show file tree
Hide file tree
Showing 9 changed files with 190 additions and 146 deletions.
14 changes: 7 additions & 7 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -99,15 +99,15 @@ ipython_config.py
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
poetry.lock
**poetry.lock

# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
# in version control.
# https://pdm.fming.dev/#use-with-ide
.toml
**.toml

# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/
Expand Down Expand Up @@ -159,9 +159,9 @@ cython_debug/
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/

**.pt
**.log
**.wav
**.mp3
**.m4a
**.pass
**..pt
**..log
**..wav
**..mp3
**..m4a
6 changes: 3 additions & 3 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@ WORKDIR /app

COPY requirements.txt /app/

RUN apt-get update && apt-get install -y git
RUN pip install \
RUN apt-get update && apt-get install -y git && rm -rf /var/lib/apt/list/*
RUN pip3 install \
git+https://github.com/linto-ai/whisper-timestamped.git#egg=whisper-timestamped[dev,vad_silero,vad_auditok,test] \
-r requirements.txt

COPY transcribe.py /app/
COPY transcribe.py /app/
30 changes: 0 additions & 30 deletions client/client.py

This file was deleted.

1 change: 0 additions & 1 deletion client/convert.sh

This file was deleted.

121 changes: 121 additions & 0 deletions client/init_server.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
import os
import argparse
import requests
import logging


class Server:
def __init__(self):
self.gpu_url = os.environ.get(
"WHISPER_SERVER_DEFAULT", "http://10.2.5.212:8888/transcribe"
)
logging.basicConfig(level=logging.INFO)

def accept_feature_extractor(self, sentences, accept):
if len(accept) > 1 and accept["text"] != "":
for segments_rec in accept["segments"]:
segment_text = str(segments_rec["text"])
segment_start = segments_rec["start"]
segment_end = segments_rec["end"]
conf_score = float(segments_rec["confidence"])
sentences.append(
{
"text": segment_text,
"start": segment_start,
"end": segment_end,
"confidence": conf_score,
}
)

def transcribation_process(
self,
original_file_name,
duration=0,
side=True,
rec_date="31.01.2024",
src=1,
dst=2,
linkedid=3,
file_size=0,
queue_date="31.01.2024",
transcribation_date="31.01.2024",
):

sentences = []

file_path = original_file_name
with open(file_path, "rb") as audio_file:
response = requests.post(
self.gpu_url,
files={"file": (os.path.basename(file_path), audio_file, "audio/wav")},
)

if response.status_code == 200:
accept = response.json()
self.accept_feature_extractor(sentences, accept)
else:
logging.error(f"Error in file processing: {response.text}")
return 0, [], []

for i in range(0, len(sentences)):
self.save_result(
original_file_name,
duration,
sentences[i]["text"],
sentences[i]["start"],
sentences[i]["end"],
side,
transcribation_date,
str(sentences[i]["confidence"]),
rec_date,
src,
dst,
linkedid,
file_size,
queue_date,
)

phrases = [sentences[i]["text"] for i in range(len(sentences))]
confidences = [sentences[i]["confidence"] for i in range(len(sentences))]

return len(sentences), phrases, confidences

def save_result(
self,
original_file_name,
duration,
accept_text,
accept_start,
accept_end,
side,
transcribation_date,
conf_mid,
rec_date,
src,
dst,
linkedid,
file_size,
queue_date,
):
logging.info("save result start")
print("=== save_result", accept_text)


def main():
parser = argparse.ArgumentParser(
description="Send an audio file to the FastAPI server for processing."
)
parser.add_argument(
"--file", type=str, required=True, help="File path of the audio file"
)
args = parser.parse_args()

server = Server()
num_sentences, phrases, confidences = server.transcribation_process(
original_file_name=args.file
)
print(f"Processed {num_sentences} sentences.")


if __name__ == "__main__":
main()
4 changes: 3 additions & 1 deletion compose.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,6 @@

# Compose, updating all files
# sudo docker compose up --force-recreate --build
sudo docker compose up --build

# Compose, remove lod container versions
sudo docker compose up --build -d --remove-orphans --force-recreate
5 changes: 2 additions & 3 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,13 @@ services:
whisper-timestamped:
container_name: whisper-timestamped
ports:
- "8000:8000"
- "8888:8888"
restart: unless-stopped
build:
context: .
dockerfile: Dockerfile
volumes:
- ./input/:/app/input
- ./output:/app/output
- ./cache:/app/cache
- ./logs:/app/logs
deploy:
Expand All @@ -21,4 +20,4 @@ services:
- driver: nvidia
device_ids: ['0']
capabilities: [gpu]
command: ["gunicorn", "transcribe:app", "--workers", "1", "--worker-class", "uvicorn.workers.UvicornWorker", "--bind", "0.0.0.0:8000", "--timeout", "240"]
command: ["gunicorn", "transcribe:app", "--workers", "1", "--worker-class", "uvicorn.workers.UvicornWorker", "--bind", "0.0.0.0:8888", "--timeout", "1800"]
8 changes: 3 additions & 5 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
setuptools-rust==1.8.1
tiktoken==0.5.2
pandas==2.2.0
pyarrow==15.0.0
fastapi==0.109.0
python-multipart==0.0.6
fastapi==0.110.1
python-multipart==0.0.7
uvicorn==0.27.0
gunicorn==21.2.0
gunicorn==22.0.0
Loading

0 comments on commit 1abf851

Please sign in to comment.