Skip to content

Commit 4e00064

Browse files
fix intersecting words logic (#60)
* fix intersecting words logic * update version
1 parent e680b20 commit 4e00064

File tree

3 files changed

+13
-6
lines changed

3 files changed

+13
-6
lines changed

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "tonic-textual"
3-
version = "3.10.4"
3+
version = "3.10.5"
44
description = "Wrappers around the Tonic Textual API"
55
authors = ["Adam Kamor <[email protected]>", "Joe Ferrara <[email protected]>", "Ander Steele <[email protected]>", "Ethan Philpott <[email protected]>", "Lyon Van Voorhis <[email protected]>", "Kirill Medvedev <[email protected]>", "Travis Matthews <[email protected]>"]
66
license = "MIT"

tonic_textual/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "3.10.4"
1+
__version__ = "3.10.5"

tonic_textual/helpers/redact_audio_file_helper.py

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -119,16 +119,23 @@ def get_intervals_to_redact(
119119
)
120120
output_intervals = []
121121
for span in de_identify_results:
122-
start = span.start
123-
end = span.end
122+
span_start = span.start
123+
span_end = span.end
124124
intersecting_words: List[TranscriptionWord] = []
125125
for word_obj in enriched_transcript_words:
126126
word_start = word_obj.char_start
127127
word_end = word_obj.char_end
128-
if start < word_end and word_start < end:
128+
# beep a word if it overlaps with the found span
129+
# this beeps entire word when span is part of a word
130+
if word_start < span_end and word_start >= span_start:
129131
intersecting_words.append(word_obj)
130-
if word_start > end: # done
132+
elif word_end >= span_start and word_end < span_end:
133+
intersecting_words.append(word_obj)
134+
elif word_start > span_end: # done
131135
break
136+
# if fail to find intersecting words continue
137+
if len(intersecting_words) == 0:
138+
continue
132139
# unecessary if transcript_words is sorted but cheap
133140
span_time_start = min([word_obj.start for word_obj in intersecting_words])
134141
span_time_end = max([word_obj.end for word_obj in intersecting_words])

0 commit comments

Comments
 (0)