@@ -104,184 +104,184 @@ def is_sdh_marker(self) -> bool:
104104
105105 def is_sdh_only_block (self ) -> bool :
106106 """Check if this block contains ONLY SDH markers without dialogue content.
107-
107+
108108 Returns True for blocks that contain only:
109109 - Music markers (♪♪, ♪♪♪)
110110 - Pure audio descriptions [Music plays], [Chuckles]
111111 - Sound effects [Mobile vibrates], [Knock on door]
112-
112+
113113 Returns False for blocks that contain dialogue mixed with SDH:
114114 - "-[ Sobbing ] It's Cal." (dialogue with SDH)
115115 - "Hello? [Mobile vibrates]" (mixed content)
116116 - Regular dialogue without SDH markers
117117 """
118118 if not self .lines :
119119 return False
120-
120+
121121 # Join all lines to analyze the complete text
122122 full_text = self .text .strip ()
123-
123+
124124 if not full_text :
125125 return False
126-
126+
127127 # Enhanced SDH patterns
128- music_patterns = [r' ^♪+$' , r' ^🎵+$' , r' ^🎶+$' ]
128+ music_patterns = [r" ^♪+$" , r" ^🎵+$" , r" ^🎶+$" ]
129129 audio_description_patterns = [
130- r' ^\[\s*.*?\s*\]$' , # Pure audio descriptions like [Music plays]
131- r' ^\(\s*.*?\s*\)$' , # Sound effects in ASCII parentheses
132- r' ^(\s*.*?\s*)$' , # Sound effects in full-width parentheses
133- r' ^【\s*.*?\s*】$' , # Chinese-style audio descriptions
134- r' ^《\s*.*?\s*》$' , # Chinese-style audio descriptions
135- r' ^[\s*.*?\s*]$' , # Full-width square brackets
136- r' ^〔\s*.*?\s*〕$' , # Japanese/Chinese square brackets
137- r' ^〈\s*.*?\s*〉$' , # Angle brackets
130+ r" ^\[\s*.*?\s*\]$" , # Pure audio descriptions like [Music plays]
131+ r" ^\(\s*.*?\s*\)$" , # Sound effects in ASCII parentheses
132+ r" ^(\s*.*?\s*)$" , # Sound effects in full-width parentheses
133+ r" ^【\s*.*?\s*】$" , # Chinese-style audio descriptions
134+ r" ^《\s*.*?\s*》$" , # Chinese-style audio descriptions
135+ r" ^[\s*.*?\s*]$" , # Full-width square brackets
136+ r" ^〔\s*.*?\s*〕$" , # Japanese/Chinese square brackets
137+ r" ^〈\s*.*?\s*〉$" , # Angle brackets
138138 ]
139-
139+
140140 # Check if entire block is just music markers
141141 for pattern in music_patterns :
142142 if re .match (pattern , full_text ):
143143 return True
144-
144+
145145 # Check if entire block is just audio descriptions
146146 for pattern in audio_description_patterns :
147147 if re .match (pattern , full_text ):
148148 return True
149-
149+
150150 # Check each line individually for pure SDH content
151151 for line in self .lines :
152152 line = line .strip ()
153153 if not line :
154154 continue
155-
155+
156156 # Skip empty or whitespace-only lines
157157 if not line :
158158 continue
159-
159+
160160 # Check if line contains any actual dialogue content
161161 # Remove SDH markers and see if meaningful content remains
162162 temp_line = line
163-
163+
164164 # Remove music markers
165- temp_line = re .sub (r' ♪+|🎵+|🎶+' , '' , temp_line )
166-
165+ temp_line = re .sub (r" ♪+|🎵+|🎶+" , "" , temp_line )
166+
167167 # Remove audio descriptions
168- temp_line = re .sub (r' \[.*?\]|\(.*?\)|【.*?】|《.*?》' , '' , temp_line )
169-
168+ temp_line = re .sub (r" \[.*?\]|\(.*?\)|【.*?】|《.*?》" , "" , temp_line )
169+
170170 # Remove dialogue markers and whitespace
171- temp_line = re .sub (r' ^-\s*' , '' , temp_line ).strip ()
172-
171+ temp_line = re .sub (r" ^-\s*" , "" , temp_line ).strip ()
172+
173173 # If anything meaningful remains after removing SDH markers,
174174 # this is not an SDH-only block
175175 if temp_line and len (temp_line ) > 0 :
176176 return False
177-
177+
178178 # If we get here, all lines were pure SDH content
179179 return True
180180
181181 def clean_sdh_markers (self ) -> "SubtitleBlock" :
182182 """Create a new SubtitleBlock with SDH markers removed from dialogue lines.
183-
183+
184184 This method removes SDH markers like [Chuckles], [Sighs], etc. from lines
185185 while preserving the actual dialogue content.
186-
186+
187187 Examples:
188188 - "[ Sighs ] Hold on." → "Hold on."
189189 - "-[ Sobbing ] Ruth?" → "- Ruth?"
190190 - "Whoo! Whoo!\n -[ Chuckles ]" → "Whoo! Whoo!"
191-
191+
192192 Returns:
193193 New SubtitleBlock with cleaned lines
194194 """
195195 cleaned_lines = []
196-
196+
197197 for line in self .lines :
198198 original_line = line .strip ()
199199 if not original_line :
200200 continue
201-
201+
202202 # Clean the line by removing SDH markers
203203 cleaned_line = self ._remove_sdh_from_line (original_line )
204-
204+
205205 # Only add non-empty lines
206206 if cleaned_line .strip ():
207207 cleaned_lines .append (cleaned_line )
208-
208+
209209 # Create new block with cleaned lines
210210 return SubtitleBlock (
211211 index = self .index ,
212212 time_code = self .time_code ,
213213 lines = cleaned_lines ,
214214 language = self .language ,
215- is_sdh = self .is_sdh
215+ is_sdh = self .is_sdh ,
216216 )
217217
218218 def _remove_sdh_from_line (self , line : str ) -> str :
219219 """Remove SDH markers from a single line while preserving dialogue.
220-
220+
221221 Args:
222222 line: Original line text
223-
223+
224224 Returns:
225225 Cleaned line with SDH markers removed
226226 """
227227 # Enhanced SDH marker patterns with Unicode support
228228 sdh_patterns = [
229229 # Audio descriptions in square brackets (ASCII)
230- r' \[\s*[^\]]*\s*\]' ,
230+ r" \[\s*[^\]]*\s*\]" ,
231231 # Audio descriptions in parentheses (ASCII)
232- r' \(\s*[^)]*\s*\)' ,
232+ r" \(\s*[^)]*\s*\)" ,
233233 # Audio descriptions in full-width parentheses (Unicode/Chinese)
234- r' (\s*[^)]*\s*)' ,
234+ r" (\s*[^)]*\s*)" ,
235235 # Chinese-style audio descriptions
236- r' 【\s*[^】]*\s*】' ,
237- r' 《\s*[^》]*\s*》' ,
236+ r" 【\s*[^】]*\s*】" ,
237+ r" 《\s*[^》]*\s*》" ,
238238 # Music markers (Unicode and ASCII)
239- r'♪+' ,
240- r'🎵+' ,
241- r'🎶+' ,
239+ r"♪+" ,
240+ r"🎵+" ,
241+ r"🎶+" ,
242242 # Additional Unicode brackets/parentheses variants
243- r' [\s*[^]]*\s*]' , # Full-width square brackets
244- r' 〔\s*[^〕]*\s*〕' , # Japanese/Chinese square brackets
245- r' 〈\s*[^〉]*\s*〉' , # Angle brackets
246- r' 「\s*[^」]*\s*」' , # Japanese quotation marks (sometimes used for SDH)
243+ r" [\s*[^]]*\s*]" , # Full-width square brackets
244+ r" 〔\s*[^〕]*\s*〕" , # Japanese/Chinese square brackets
245+ r" 〈\s*[^〉]*\s*〉" , # Angle brackets
246+ r" 「\s*[^」]*\s*」" , # Japanese quotation marks (sometimes used for SDH)
247247 ]
248-
248+
249249 cleaned = line
250-
250+
251251 # Remove all SDH patterns iteratively
252252 for pattern in sdh_patterns :
253- cleaned = re .sub (pattern , '' , cleaned )
254-
253+ cleaned = re .sub (pattern , "" , cleaned )
254+
255255 # Clean up whitespace and formatting
256256 cleaned = self ._clean_whitespace (cleaned )
257-
257+
258258 return cleaned
259259
260260 def _clean_whitespace (self , text : str ) -> str :
261261 """Clean up whitespace after SDH removal.
262-
262+
263263 Args:
264264 text: Text to clean
265-
265+
266266 Returns:
267267 Text with normalized whitespace
268268 """
269269 # Remove extra spaces
270- cleaned = re .sub (r' \s+' , ' ' , text )
271-
270+ cleaned = re .sub (r" \s+" , " " , text )
271+
272272 # Fix dialogue marker spacing: "- text" or "-text" → "- text"
273- cleaned = re .sub (r' ^-\s*' , '- ' , cleaned )
274-
273+ cleaned = re .sub (r" ^-\s*" , "- " , cleaned )
274+
275275 # Fix multiple dashes that can occur after SDH removal: "- -text" → "- text"
276- cleaned = re .sub (r' ^-\s*-\s*' , '- ' , cleaned )
277-
276+ cleaned = re .sub (r" ^-\s*-\s*" , "- " , cleaned )
277+
278278 # Remove leading/trailing whitespace
279279 cleaned = cleaned .strip ()
280-
280+
281281 # Handle case where only dialogue marker remains
282- if cleaned == '-' :
283- return ''
284-
282+ if cleaned == "-" :
283+ return ""
284+
285285 return cleaned
286286
287287 def get_reading_speed (self ) -> float :
@@ -324,63 +324,67 @@ def get_sdh_only_blocks(self) -> List[SubtitleBlock]:
324324
325325 def remove_sdh_only_blocks (self ) -> "SRTDocument" :
326326 """Create a new document with SDH-only blocks removed and indices resorted.
327-
327+
328328 This removes blocks that contain ONLY SDH markers (music, sound effects, etc.)
329329 while preserving dialogue blocks that may contain embedded SDH markers.
330-
330+
331331 Returns:
332332 New SRTDocument with filtered blocks and resorted indices
333333 """
334334 # Filter out SDH-only blocks
335335 filtered_blocks = [
336336 block for block in self .blocks if not block .is_sdh_only_block ()
337337 ]
338-
338+
339339 # Resort indices sequentially
340340 for i , block in enumerate (filtered_blocks ):
341341 block .index = i + 1
342-
342+
343343 # Create new document with filtered blocks
344344 return SRTDocument (
345345 blocks = filtered_blocks ,
346346 source_file = self .source_file ,
347347 detected_language = self .detected_language ,
348- encoding = self .encoding
348+ encoding = self .encoding ,
349349 )
350350
351351 def remove_sdh_blocks_and_clean_content (self ) -> "SRTDocument" :
352352 """Create a new document with SDH-only blocks removed and SDH markers cleaned from remaining blocks.
353-
353+
354354 This performs comprehensive SDH removal:
355355 1. Removes blocks that contain ONLY SDH markers
356356 2. Removes SDH markers from mixed content blocks (dialogue + SDH)
357357 3. Resorts indices sequentially
358-
358+
359359 Returns:
360360 New SRTDocument with filtered and cleaned blocks
361361 """
362362 processed_blocks = []
363-
363+
364364 for block in self .blocks :
365365 # Skip SDH-only blocks entirely
366366 if block .is_sdh_only_block ():
367367 continue
368-
368+
369369 # For mixed content blocks, clean SDH markers but preserve dialogue
370370 cleaned_block = block .clean_sdh_markers ()
371- if cleaned_block and cleaned_block .lines and any (line .strip () for line in cleaned_block .lines ):
371+ if (
372+ cleaned_block
373+ and cleaned_block .lines
374+ and any (line .strip () for line in cleaned_block .lines )
375+ ):
372376 processed_blocks .append (cleaned_block )
373-
377+
374378 # Resort indices sequentially
375379 for i , block in enumerate (processed_blocks ):
376380 block .index = i + 1
377-
381+
378382 # Create new document with processed blocks
379383 return SRTDocument (
380384 blocks = processed_blocks ,
381385 source_file = self .source_file ,
382386 detected_language = self .detected_language ,
383- encoding = self .encoding
387+ encoding = self .encoding ,
384388 )
385389
386390 def to_srt_format (self ) -> str :
0 commit comments