Skip to content

Commit 169f48a

Browse files
committed
Merge branch 'debug-parallel'
2 parents d0888e0 + 863c237 commit 169f48a

File tree

6 files changed

+286
-142
lines changed

6 files changed

+286
-142
lines changed

python/scripts/batch_decode.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
"""
1717

1818
import json
19+
import random
1920
import traceback
2021

2122
from typing import List
@@ -67,7 +68,7 @@ def transcribe_audio(audio_stream, model: str, language_code: str, sample_rate=8
6768
data_bytes=len(audio_stream)
6869
)
6970

70-
response = client.recognize(config, audio, uuid="", timeout=1000)
71+
response = client.recognize(config, audio, uuid=str(random.randint(1000, 100000)), timeout=1000)
7172
except Exception as e:
7273
print(f"error: {str(e)}")
7374
return []

python/scripts/example_client.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ def transcribe_chunks_streaming(client, audio_chunks, model: str, language_code:
8282
data_bytes=chunk_len
8383
)
8484
audio_params = [(config(len(chunk)), RecognitionAudio(content=chunk)) for chunk in audio_chunks]
85-
response = client.streaming_recognize_raw(audio_params, uuid="")
85+
response = client.streaming_recognize_raw(audio_params, uuid=str(random.randint(1000, 100000)))
8686
else:
8787
audio = (RecognitionAudio(content=chunk) for chunk in audio_chunks)
8888
config = RecognitionConfig(
@@ -93,7 +93,7 @@ def transcribe_chunks_streaming(client, audio_chunks, model: str, language_code:
9393
model=model,
9494
word_level=word_level
9595
)
96-
response = client.streaming_recognize(config, audio, uuid="")
96+
response = client.streaming_recognize(config, audio, uuid=str(random.randint(1000, 100000)))
9797
except Exception as e:
9898
traceback.print_exc()
9999
print(f'error: {str(e)}')
@@ -125,7 +125,7 @@ def audio_params_gen(audio_chunks):
125125
for chunk in audio_chunks:
126126
yield config(len(chunk)), RecognitionAudio(content=chunk)
127127

128-
response_gen = client.bidi_streaming_recognize_raw(audio_params_gen(audio_chunks), uuid="")
128+
response_gen = client.bidi_streaming_recognize_raw(audio_params_gen(audio_chunks), uuid=str(random.randint(1000, 100000)))
129129
else:
130130
config = RecognitionConfig(
131131
sample_rate_hertz=sample_rate,
@@ -140,7 +140,7 @@ def audio_chunks_gen(audio_chunks):
140140
for chunk in audio_chunks:
141141
yield RecognitionAudio(content=chunk)
142142

143-
response_gen = client.bidi_streaming_recognize(config, audio_chunks_gen(audio_chunks), uuid="")
143+
response_gen = client.bidi_streaming_recognize(config, audio_chunks_gen(audio_chunks), uuid=str(random.randint(1000, 100000)))
144144
except Exception as e:
145145
traceback.print_exc()
146146
print(f'error: {str(e)}')

python/scripts/parse_logs.py

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
"""
2+
Script for parsing server debug logs.
3+
4+
Usage:
5+
parse_logs.py <logs-file> <metrics-csv>
6+
"""
7+
8+
from docopt import docopt
9+
10+
import pandas as pd
11+
12+
13+
def main(args):
14+
df = parse_logs(args["<logs-file>"])
15+
df.to_csv(args["<metrics-csv>"], index=False)
16+
17+
18+
def parse_logs(logs_file):
19+
with open(logs_file, "r", encoding="utf-8") as f:
20+
logs = f.read().split("\n")
21+
22+
uuids = []
23+
24+
for log in logs:
25+
if "decoder acquired" in log:
26+
parts = log.split()
27+
uuids.append(parts[parts.index("uuid:") + 1])
28+
29+
uuids = list(set(uuids))
30+
31+
df = pd.DataFrame(columns=["uuid", "chunk#", "read", "accepted", "decoded", "computed"])
32+
33+
for uuid in uuids:
34+
l_logs = list(filter(lambda x: uuid in x, logs))
35+
chunks = []
36+
for log in l_logs:
37+
if "chunk" in log and "received" in log:
38+
parts = log.split()
39+
chunks.append("chunk " + parts[parts.index("chunk") + 1])
40+
chunks = sorted(list(set(chunks)))
41+
42+
for chunk in chunks:
43+
chunk_logs = list(filter(lambda x: chunk in x, l_logs))
44+
idx = l_logs.index(chunk_logs[0])
45+
46+
read = float(l_logs[idx + 1].split()[-1].replace("ms", ""))
47+
accepted = float(l_logs[idx + 2].split()[-1].replace("ms", ""))
48+
decoded = float(l_logs[idx + 3].split()[-1].replace("ms", ""))
49+
computed = float(chunk_logs[1].split()[-1].replace("ms", ""))
50+
51+
df = df.append({
52+
"uuid": uuid,
53+
"chunk#": chunk,
54+
"read": read,
55+
"accepted": accepted,
56+
"decoded": decoded,
57+
"computed": computed
58+
}, ignore_index=True)
59+
60+
return df
61+
62+
63+
if __name__ == "__main__":
64+
args = docopt(__doc__)
65+
main(args)

0 commit comments

Comments
 (0)