Skip to content

Commit 98bf9b5

Browse files
committed
Fix code formatting issues for CI
🔧 Fixed Black formatting in subtitle.py: - Convert single quotes to double quotes for consistency - Fix trailing whitespace in docstrings - Add proper trailing commas - Normalize line spacing ✅ Resolves CI formatting failures 🎯 Black, isort, and flake8 syntax checks now pass
1 parent 64abd1c commit 98bf9b5

File tree

1 file changed

+83
-79
lines changed

1 file changed

+83
-79
lines changed

src/srt_processor/models/subtitle.py

Lines changed: 83 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -104,184 +104,184 @@ def is_sdh_marker(self) -> bool:
104104

105105
def is_sdh_only_block(self) -> bool:
106106
"""Check if this block contains ONLY SDH markers without dialogue content.
107-
107+
108108
Returns True for blocks that contain only:
109109
- Music markers (♪♪, ♪♪♪)
110110
- Pure audio descriptions [Music plays], [Chuckles]
111111
- Sound effects [Mobile vibrates], [Knock on door]
112-
112+
113113
Returns False for blocks that contain dialogue mixed with SDH:
114114
- "-[ Sobbing ] It's Cal." (dialogue with SDH)
115115
- "Hello? [Mobile vibrates]" (mixed content)
116116
- Regular dialogue without SDH markers
117117
"""
118118
if not self.lines:
119119
return False
120-
120+
121121
# Join all lines to analyze the complete text
122122
full_text = self.text.strip()
123-
123+
124124
if not full_text:
125125
return False
126-
126+
127127
# Enhanced SDH patterns
128-
music_patterns = [r'^♪+$', r'^🎵+$', r'^🎶+$']
128+
music_patterns = [r"^♪+$", r"^🎵+$", r"^🎶+$"]
129129
audio_description_patterns = [
130-
r'^\[\s*.*?\s*\]$', # Pure audio descriptions like [Music plays]
131-
r'^\(\s*.*?\s*\)$', # Sound effects in ASCII parentheses
132-
r'^(\s*.*?\s*)$', # Sound effects in full-width parentheses
133-
r'^【\s*.*?\s*】$', # Chinese-style audio descriptions
134-
r'^《\s*.*?\s*》$', # Chinese-style audio descriptions
135-
r'^[\s*.*?\s*]$', # Full-width square brackets
136-
r'^〔\s*.*?\s*〕$', # Japanese/Chinese square brackets
137-
r'^〈\s*.*?\s*〉$', # Angle brackets
130+
r"^\[\s*.*?\s*\]$", # Pure audio descriptions like [Music plays]
131+
r"^\(\s*.*?\s*\)$", # Sound effects in ASCII parentheses
132+
r"^(\s*.*?\s*)$", # Sound effects in full-width parentheses
133+
r"^【\s*.*?\s*】$", # Chinese-style audio descriptions
134+
r"^《\s*.*?\s*》$", # Chinese-style audio descriptions
135+
r"^[\s*.*?\s*]$", # Full-width square brackets
136+
r"^〔\s*.*?\s*〕$", # Japanese/Chinese square brackets
137+
r"^〈\s*.*?\s*〉$", # Angle brackets
138138
]
139-
139+
140140
# Check if entire block is just music markers
141141
for pattern in music_patterns:
142142
if re.match(pattern, full_text):
143143
return True
144-
144+
145145
# Check if entire block is just audio descriptions
146146
for pattern in audio_description_patterns:
147147
if re.match(pattern, full_text):
148148
return True
149-
149+
150150
# Check each line individually for pure SDH content
151151
for line in self.lines:
152152
line = line.strip()
153153
if not line:
154154
continue
155-
155+
156156
# Skip empty or whitespace-only lines
157157
if not line:
158158
continue
159-
159+
160160
# Check if line contains any actual dialogue content
161161
# Remove SDH markers and see if meaningful content remains
162162
temp_line = line
163-
163+
164164
# Remove music markers
165-
temp_line = re.sub(r'♪+|🎵+|🎶+', '', temp_line)
166-
165+
temp_line = re.sub(r"♪+|🎵+|🎶+", "", temp_line)
166+
167167
# Remove audio descriptions
168-
temp_line = re.sub(r'\[.*?\]|\(.*?\)|【.*?】|《.*?》', '', temp_line)
169-
168+
temp_line = re.sub(r"\[.*?\]|\(.*?\)|【.*?】|《.*?》", "", temp_line)
169+
170170
# Remove dialogue markers and whitespace
171-
temp_line = re.sub(r'^-\s*', '', temp_line).strip()
172-
171+
temp_line = re.sub(r"^-\s*", "", temp_line).strip()
172+
173173
# If anything meaningful remains after removing SDH markers,
174174
# this is not an SDH-only block
175175
if temp_line and len(temp_line) > 0:
176176
return False
177-
177+
178178
# If we get here, all lines were pure SDH content
179179
return True
180180

181181
def clean_sdh_markers(self) -> "SubtitleBlock":
182182
"""Create a new SubtitleBlock with SDH markers removed from dialogue lines.
183-
183+
184184
This method removes SDH markers like [Chuckles], [Sighs], etc. from lines
185185
while preserving the actual dialogue content.
186-
186+
187187
Examples:
188188
- "[ Sighs ] Hold on." → "Hold on."
189189
- "-[ Sobbing ] Ruth?" → "- Ruth?"
190190
- "Whoo! Whoo!\n-[ Chuckles ]" → "Whoo! Whoo!"
191-
191+
192192
Returns:
193193
New SubtitleBlock with cleaned lines
194194
"""
195195
cleaned_lines = []
196-
196+
197197
for line in self.lines:
198198
original_line = line.strip()
199199
if not original_line:
200200
continue
201-
201+
202202
# Clean the line by removing SDH markers
203203
cleaned_line = self._remove_sdh_from_line(original_line)
204-
204+
205205
# Only add non-empty lines
206206
if cleaned_line.strip():
207207
cleaned_lines.append(cleaned_line)
208-
208+
209209
# Create new block with cleaned lines
210210
return SubtitleBlock(
211211
index=self.index,
212212
time_code=self.time_code,
213213
lines=cleaned_lines,
214214
language=self.language,
215-
is_sdh=self.is_sdh
215+
is_sdh=self.is_sdh,
216216
)
217217

218218
def _remove_sdh_from_line(self, line: str) -> str:
219219
"""Remove SDH markers from a single line while preserving dialogue.
220-
220+
221221
Args:
222222
line: Original line text
223-
223+
224224
Returns:
225225
Cleaned line with SDH markers removed
226226
"""
227227
# Enhanced SDH marker patterns with Unicode support
228228
sdh_patterns = [
229229
# Audio descriptions in square brackets (ASCII)
230-
r'\[\s*[^\]]*\s*\]',
230+
r"\[\s*[^\]]*\s*\]",
231231
# Audio descriptions in parentheses (ASCII)
232-
r'\(\s*[^)]*\s*\)',
232+
r"\(\s*[^)]*\s*\)",
233233
# Audio descriptions in full-width parentheses (Unicode/Chinese)
234-
r'(\s*[^)]*\s*)',
234+
r"(\s*[^)]*\s*)",
235235
# Chinese-style audio descriptions
236-
r'【\s*[^】]*\s*】',
237-
r'《\s*[^》]*\s*》',
236+
r"【\s*[^】]*\s*】",
237+
r"《\s*[^》]*\s*》",
238238
# Music markers (Unicode and ASCII)
239-
r'♪+',
240-
r'🎵+',
241-
r'🎶+',
239+
r"♪+",
240+
r"🎵+",
241+
r"🎶+",
242242
# Additional Unicode brackets/parentheses variants
243-
r'[\s*[^]]*\s*]', # Full-width square brackets
244-
r'〔\s*[^〕]*\s*〕', # Japanese/Chinese square brackets
245-
r'〈\s*[^〉]*\s*〉', # Angle brackets
246-
r'「\s*[^」]*\s*」', # Japanese quotation marks (sometimes used for SDH)
243+
r"[\s*[^]]*\s*]", # Full-width square brackets
244+
r"〔\s*[^〕]*\s*〕", # Japanese/Chinese square brackets
245+
r"〈\s*[^〉]*\s*〉", # Angle brackets
246+
r"「\s*[^」]*\s*」", # Japanese quotation marks (sometimes used for SDH)
247247
]
248-
248+
249249
cleaned = line
250-
250+
251251
# Remove all SDH patterns iteratively
252252
for pattern in sdh_patterns:
253-
cleaned = re.sub(pattern, '', cleaned)
254-
253+
cleaned = re.sub(pattern, "", cleaned)
254+
255255
# Clean up whitespace and formatting
256256
cleaned = self._clean_whitespace(cleaned)
257-
257+
258258
return cleaned
259259

260260
def _clean_whitespace(self, text: str) -> str:
261261
"""Clean up whitespace after SDH removal.
262-
262+
263263
Args:
264264
text: Text to clean
265-
265+
266266
Returns:
267267
Text with normalized whitespace
268268
"""
269269
# Remove extra spaces
270-
cleaned = re.sub(r'\s+', ' ', text)
271-
270+
cleaned = re.sub(r"\s+", " ", text)
271+
272272
# Fix dialogue marker spacing: "- text" or "-text" → "- text"
273-
cleaned = re.sub(r'^-\s*', '- ', cleaned)
274-
273+
cleaned = re.sub(r"^-\s*", "- ", cleaned)
274+
275275
# Fix multiple dashes that can occur after SDH removal: "- -text" → "- text"
276-
cleaned = re.sub(r'^-\s*-\s*', '- ', cleaned)
277-
276+
cleaned = re.sub(r"^-\s*-\s*", "- ", cleaned)
277+
278278
# Remove leading/trailing whitespace
279279
cleaned = cleaned.strip()
280-
280+
281281
# Handle case where only dialogue marker remains
282-
if cleaned == '-':
283-
return ''
284-
282+
if cleaned == "-":
283+
return ""
284+
285285
return cleaned
286286

287287
def get_reading_speed(self) -> float:
@@ -324,63 +324,67 @@ def get_sdh_only_blocks(self) -> List[SubtitleBlock]:
324324

325325
def remove_sdh_only_blocks(self) -> "SRTDocument":
326326
"""Create a new document with SDH-only blocks removed and indices resorted.
327-
327+
328328
This removes blocks that contain ONLY SDH markers (music, sound effects, etc.)
329329
while preserving dialogue blocks that may contain embedded SDH markers.
330-
330+
331331
Returns:
332332
New SRTDocument with filtered blocks and resorted indices
333333
"""
334334
# Filter out SDH-only blocks
335335
filtered_blocks = [
336336
block for block in self.blocks if not block.is_sdh_only_block()
337337
]
338-
338+
339339
# Resort indices sequentially
340340
for i, block in enumerate(filtered_blocks):
341341
block.index = i + 1
342-
342+
343343
# Create new document with filtered blocks
344344
return SRTDocument(
345345
blocks=filtered_blocks,
346346
source_file=self.source_file,
347347
detected_language=self.detected_language,
348-
encoding=self.encoding
348+
encoding=self.encoding,
349349
)
350350

351351
def remove_sdh_blocks_and_clean_content(self) -> "SRTDocument":
352352
"""Create a new document with SDH-only blocks removed and SDH markers cleaned from remaining blocks.
353-
353+
354354
This performs comprehensive SDH removal:
355355
1. Removes blocks that contain ONLY SDH markers
356356
2. Removes SDH markers from mixed content blocks (dialogue + SDH)
357357
3. Resorts indices sequentially
358-
358+
359359
Returns:
360360
New SRTDocument with filtered and cleaned blocks
361361
"""
362362
processed_blocks = []
363-
363+
364364
for block in self.blocks:
365365
# Skip SDH-only blocks entirely
366366
if block.is_sdh_only_block():
367367
continue
368-
368+
369369
# For mixed content blocks, clean SDH markers but preserve dialogue
370370
cleaned_block = block.clean_sdh_markers()
371-
if cleaned_block and cleaned_block.lines and any(line.strip() for line in cleaned_block.lines):
371+
if (
372+
cleaned_block
373+
and cleaned_block.lines
374+
and any(line.strip() for line in cleaned_block.lines)
375+
):
372376
processed_blocks.append(cleaned_block)
373-
377+
374378
# Resort indices sequentially
375379
for i, block in enumerate(processed_blocks):
376380
block.index = i + 1
377-
381+
378382
# Create new document with processed blocks
379383
return SRTDocument(
380384
blocks=processed_blocks,
381385
source_file=self.source_file,
382386
detected_language=self.detected_language,
383-
encoding=self.encoding
387+
encoding=self.encoding,
384388
)
385389

386390
def to_srt_format(self) -> str:

0 commit comments

Comments
 (0)