Skip to content

Commit 4f99f00

Browse files
committed
feat: Add transcribed text to dashboard visualisation
1 parent 61d01c6 commit 4f99f00

File tree

7 files changed

+53
-2
lines changed

7 files changed

+53
-2
lines changed

openadapt/alembic/versions/98c8851a5321_add_audio_info.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,11 @@ def upgrade() -> None:
2929
openadapt.models.ForceFloat(precision=10, scale=2, asdecimal=False),
3030
nullable=True,
3131
),
32+
sa.Column(
33+
"timestamp",
34+
openadapt.models.ForceFloat(precision=10, scale=2, asdecimal=False),
35+
nullable=True,
36+
),
3237
sa.Column("sample_rate", sa.Integer(), nullable=True),
3338
sa.Column("words_with_timestamps", sa.Text(), nullable=True),
3439
sa.ForeignKeyConstraint(

openadapt/app/dashboard/api/recordings.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
"""API endpoints for recordings."""
22

3+
import json
4+
35
from fastapi import FastAPI, WebSocket
46
from loguru import logger
57

@@ -84,6 +86,22 @@ def convert_to_str(event_dict: dict) -> dict:
8486
for child in event_dict["children"]:
8587
convert_to_str(child)
8688

89+
try:
90+
# TODO: change to use recording_id once scrubbing PR is merged
91+
audio_info = crud.get_audio_info(recording.timestamp, session)[0]
92+
words_with_timestamps = json.loads(audio_info.words_with_timestamps)
93+
words_with_timestamps = [
94+
{
95+
"word": word["word"],
96+
"start": word["start"] + action_events[0].timestamp,
97+
"end": word["end"] + action_events[0].timestamp,
98+
}
99+
for word in words_with_timestamps
100+
]
101+
except IndexError:
102+
words_with_timestamps = []
103+
word_index = 0
104+
87105
for action_event in action_events:
88106
event_dict = row2dict(action_event)
89107
try:
@@ -96,6 +114,18 @@ def convert_to_str(event_dict: dict) -> dict:
96114
width, height = 0, 0
97115
event_dict["screenshot"] = image
98116
event_dict["dimensions"] = {"width": width, "height": height}
117+
words = []
118+
# each word in words_with_timestamp is a dict of word, start, end
119+
# we want to add the word to the event_dict if the start is
120+
# before the event timestamp
121+
while (
122+
word_index < len(words_with_timestamps)
123+
and words_with_timestamps[word_index]["start"]
124+
< event_dict["timestamp"]
125+
):
126+
words.append(words_with_timestamps[word_index]["word"])
127+
word_index += 1
128+
event_dict["words"] = words
99129
convert_to_str(event_dict)
100130
await websocket.send_json(
101131
{"type": "action_event", "value": event_dict}

openadapt/app/dashboard/components/ActionEvent/ActionEvent.tsx

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,12 @@ export const ActionEvent = ({
122122
<TableCellWithBorder>{event.parent_id}</TableCellWithBorder>
123123
</TableRowWithBorder>
124124
)}
125+
{event.words && event.words.length > 0 && (
126+
<TableRowWithBorder>
127+
<TableCellWithBorder>transcription</TableCellWithBorder>
128+
<TableCellWithBorder>{event.words.join(' ')}</TableCellWithBorder>
129+
</TableRowWithBorder>
130+
)}
125131
<TableRowWithBorder>
126132
<TableCellWithBorder>children</TableCellWithBorder>
127133
<TableCellWithBorder>{event.children?.length || 0}</TableCellWithBorder>

openadapt/app/dashboard/types/action-event.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,4 +26,5 @@ export type ActionEvent = {
2626
mask: string | null;
2727
dimensions?: { width: number, height: number };
2828
children?: ActionEvent[];
29+
words?: string[];
2930
}

openadapt/db/crud.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -515,6 +515,7 @@ def insert_audio_info(
515515
audio_data: bytes,
516516
transcribed_text: str,
517517
recording_timestamp: float,
518+
timestamp: float,
518519
sample_rate: int,
519520
word_list: list,
520521
) -> None:
@@ -523,16 +524,21 @@ def insert_audio_info(
523524
flac_data=audio_data,
524525
transcribed_text=transcribed_text,
525526
recording_timestamp=recording_timestamp,
527+
timestamp=timestamp,
526528
sample_rate=sample_rate,
527529
words_with_timestamps=json.dumps(word_list),
528530
)
529531
db.add(audio_info)
530532
db.commit()
531533

532534

533-
def get_audio_info(recording_timestamp: float) -> list[AudioInfo]:
535+
# TODO: change to use recording_id once scrubbing PR is merged
536+
def get_audio_info(
537+
recording_timestamp: float, session: sa.orm.Session = None
538+
) -> list[AudioInfo]:
534539
"""Get the audio info for a given recording."""
535-
return _get(AudioInfo, recording_timestamp)
540+
_db = session or db
541+
return _get(AudioInfo, recording_timestamp, _db)
536542

537543

538544
async def acquire_db_lock() -> bool:

openadapt/models.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -662,6 +662,7 @@ class AudioInfo(db.Base):
662662
__tablename__ = "audio_info"
663663

664664
id = sa.Column(sa.Integer, primary_key=True)
665+
timestamp = sa.Column(ForceFloat)
665666
flac_data = sa.Column(sa.LargeBinary)
666667
transcribed_text = sa.Column(sa.String)
667668
recording_timestamp = sa.Column(sa.ForeignKey("recording.timestamp"))

openadapt/record.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1067,6 +1067,7 @@ def audio_callback(
10671067
callback=audio_callback, samplerate=16000, channels=1
10681068
)
10691069
logger.info("Audio recording started.")
1070+
start_timestamp = utils.get_timestamp()
10701071
audio_stream.start()
10711072

10721073
# NOTE: listener may not have actually started by now
@@ -1124,6 +1125,7 @@ def audio_callback(
11241125
compressed_audio_bytes,
11251126
result_info["text"],
11261127
recording_timestamp,
1128+
start_timestamp,
11271129
int(audio_stream.samplerate),
11281130
word_list,
11291131
)

0 commit comments

Comments
 (0)