Skip to content

Commit 0238ee4

Browse files
Segment boundaries (#61)
* handle repeat words at segment boundaries * correct overlapping logic * increment version * cover case of empy list
1 parent 4e00064 commit 0238ee4

File tree

3 files changed

+9
-4
lines changed

3 files changed

+9
-4
lines changed

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "tonic-textual"
3-
version = "3.10.5"
3+
version = "3.10.6"
44
description = "Wrappers around the Tonic Textual API"
55
authors = ["Adam Kamor <[email protected]>", "Joe Ferrara <[email protected]>", "Ander Steele <[email protected]>", "Ethan Philpott <[email protected]>", "Lyon Van Voorhis <[email protected]>", "Kirill Medvedev <[email protected]>", "Travis Matthews <[email protected]>"]
66
license = "MIT"

tonic_textual/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "3.10.5"
1+
__version__ = "3.10.6"

tonic_textual/helpers/redact_audio_file_helper.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,12 @@ def get_intervals_to_redact(
113113
"""
114114
transcript_words = []
115115
for segment in transcript_segments:
116-
transcript_words.extend(segment.words)
116+
# sometimes the last word of the previous segmant matches the first word of the new segment
117+
# this causes issues later on if repeats are not removed
118+
if len(transcript_words) > 0 and len(segment.words[0]) > 0 and segment.words[0] == transcript_words[-1]:
119+
transcript_words.extend(segment.words[1:])
120+
else:
121+
transcript_words.extend(segment.words)
117122
enriched_transcript_words = add_character_indices_to_words(
118123
transcript_text, transcript_words
119124
)
@@ -129,7 +134,7 @@ def get_intervals_to_redact(
129134
# this beeps entire word when span is part of a word
130135
if word_start < span_end and word_start >= span_start:
131136
intersecting_words.append(word_obj)
132-
elif word_end >= span_start and word_end < span_end:
137+
elif word_end > span_start and word_end <= span_end:
133138
intersecting_words.append(word_obj)
134139
elif word_start > span_end: # done
135140
break

0 commit comments

Comments
 (0)