16
16
17
17
# Any changes to the path and your own modules
18
18
from autosub import constants
19
- from autosub import metadata
20
- from autosub import ffmpeg_utils
21
19
from autosub import core
20
+ from autosub import ffmpeg_utils
21
+ from autosub import metadata
22
+ from autosub import sub_utils
22
23
23
24
24
25
def get_cmd_args ():
@@ -27,7 +28,7 @@ def get_cmd_args():
27
28
"""
28
29
parser = argparse .ArgumentParser (
29
30
prog = metadata .NAME ,
30
- usage = '\n %(prog)s source_path [options]' ,
31
+ usage = '\n %(prog)s < source_path> [options]' ,
31
32
description = metadata .DESCRIPTION ,
32
33
epilog = """Make sure the argument with space is in quotes.
33
34
The default value is used
@@ -94,7 +95,7 @@ def get_cmd_args():
94
95
95
96
output_group .add_argument (
96
97
'-fps' , '--sub-fps' ,
97
- metavar = 'number ' ,
98
+ metavar = 'float ' ,
98
99
type = float ,
99
100
help = "Valid when your output format is sub. "
100
101
"If input, it will override the fps check "
@@ -105,7 +106,7 @@ def get_cmd_args():
105
106
)
106
107
107
108
output_group .add_argument (
108
- '-aty ' , '--ass-styles' ,
109
+ '-sty ' , '--ass-styles' ,
109
110
nargs = '?' , metavar = 'path' ,
110
111
default = ' ' ,
111
112
help = """Valid when your output format is ass/ssa.
@@ -135,6 +136,25 @@ def get_cmd_args():
135
136
"(arg_num = 1) (default: %(default)s)"
136
137
)
137
138
139
+ speech_group .add_argument (
140
+ '-mnc' , '--min-confidence' ,
141
+ metavar = 'float' ,
142
+ type = float ,
143
+ help = "GoogleSpeechV2 API response for text confidence. "
144
+ "A float value between 0 and 1. "
145
+ "Confidence bigger means result better. "
146
+ "Input this argument will drop any result below it. "
147
+ "Ref: https://github.com/BingLingGroup/google-speech-v2#response "
148
+ "(arg_num = 1)"
149
+ )
150
+
151
+ speech_group .add_argument (
152
+ '-der' , '--drop-empty-regions' ,
153
+ action = 'store_true' ,
154
+ help = "Drop any regions without speech-to-text result. "
155
+ "(arg_num = 0)"
156
+ )
157
+
138
158
trans_group .add_argument (
139
159
'-D' , '--dst-language' ,
140
160
metavar = 'locale' ,
@@ -152,7 +172,7 @@ def get_cmd_args():
152
172
153
173
options_group .add_argument (
154
174
'-C' , '--concurrency' ,
155
- metavar = 'number ' ,
175
+ metavar = 'integer ' ,
156
176
type = int ,
157
177
default = constants .DEFAULT_CONCURRENCY ,
158
178
help = "Number of concurrent API requests to make. "
@@ -219,7 +239,7 @@ def get_cmd_args():
219
239
'-mxcs' , '--max-continuous-silence' ,
220
240
metavar = 'second' ,
221
241
type = float ,
222
- default = constants .MAX_CONTINUOUS_SILENCE ,
242
+ default = constants .DEFAULT_CONTINUOUS_SILENCE ,
223
243
help = "Maximum length of a tolerated silence within a valid audio activity. "
224
244
"Same docs above. "
225
245
"(arg_num = 1) (default: %(default)s)"
@@ -280,7 +300,7 @@ def get_cmd_args():
280
300
return parser .parse_args ()
281
301
282
302
283
- def validate (args ): # pylint: disable=too-many-branches,too-many-return-statements
303
+ def validate (args ): # pylint: disable=too-many-branches,too-many-return-statements, too-many-statements
284
304
"""
285
305
Check that the CLI arguments passed to autosub are valid.
286
306
"""
@@ -328,21 +348,35 @@ def validate(args): # pylint: disable=too-many-branches,too-many-return-stateme
328
348
"Destination language not provided. "
329
349
"Only performing speech recognition."
330
350
)
331
- args .dst_language = args .src_language
332
351
333
- elif args .dst_language == args .src_language :
352
+ else :
353
+ if args .min_confidence < 0.0 or args .min_confidence > 1.0 :
354
+ print (
355
+ "Error: min_confidence's value isn't legal."
356
+ )
357
+ return False
358
+
359
+ if not args .api_key :
360
+ print (
361
+ "Error: Subtitle translation requires specified Google Translate API key. "
362
+ )
363
+ return False
364
+
365
+ if args .dst_language and \
366
+ args .dst_language not in constants .TRANSLATION_LANGUAGE_CODES .keys ():
367
+ print (
368
+ "Error: Destination language not supported. "
369
+ "Run with \" -ltc\" or \" --list-translation-codes\" "
370
+ "to see all supported languages."
371
+ )
372
+ return False
373
+
374
+ if args .dst_language == args .src_language :
334
375
print (
335
376
"Source language is the same as the Destination language. "
336
377
"Only performing speech recognition."
337
378
)
338
-
339
- elif args .dst_language not in constants .TRANSLATION_LANGUAGE_CODES .keys ():
340
- print (
341
- "Error: Destination language not supported. "
342
- "Run with \" -ltc\" or \" --list-translation-codes\" "
343
- "to see all supported languages."
344
- )
345
- return False
379
+ args .dst_language = None
346
380
347
381
else :
348
382
if args .format == 'txt' :
@@ -367,6 +401,7 @@ def validate(args): # pylint: disable=too-many-branches,too-many-return-stateme
367
401
368
402
if not args .ass_styles :
369
403
# when args.ass_styles is used but without option
404
+ # its value is ' '
370
405
if not args .external_speech_regions :
371
406
print (
372
407
"Error: External speech regions file not provided."
@@ -375,6 +410,7 @@ def validate(args): # pylint: disable=too-many-branches,too-many-return-stateme
375
410
else :
376
411
args .ass_styles = args .external_speech_regions
377
412
else :
413
+ # then set it to None
378
414
args .ass_styles = None
379
415
380
416
if not args .external_speech_regions :
@@ -398,9 +434,9 @@ def validate(args): # pylint: disable=too-many-branches,too-many-return-stateme
398
434
print (
399
435
"Your maximum continuous silence {mxcs} is smaller than 0.\n "
400
436
"Now reset to {dmxcs}" .format (mxcs = args .max_continuous_silence ,
401
- dmxcs = constants .MAX_CONTINUOUS_SILENCE )
437
+ dmxcs = constants .DEFAULT_CONTINUOUS_SILENCE )
402
438
)
403
- args .max_continuous_silence = constants .MAX_CONTINUOUS_SILENCE
439
+ args .max_continuous_silence = constants .DEFAULT_CONTINUOUS_SILENCE
404
440
405
441
return True
406
442
@@ -443,7 +479,9 @@ def main(): # pylint: disable=too-many-branches, too-many-statements
443
479
else :
444
480
fps = 0.0
445
481
446
- if not args .dst_language :
482
+ if not args .src_language and not args .dst_language :
483
+ # valid when generating times
484
+ # in this case, program only use args.dst_language as a name tail
447
485
args .dst_language = 'times'
448
486
449
487
if not args .output :
@@ -460,13 +498,15 @@ def main(): # pylint: disable=too-many-branches, too-many-statements
460
498
"Now file path set to {new}" .format (new = args .output ))
461
499
462
500
if args .external_speech_regions :
501
+ # use external speech regions
463
502
print ("Using external speech regions." )
464
- regions = core . sub_gen_speech_regions (
503
+ regions = sub_utils . sub_to_speech_regions (
465
504
source_file = args .source_path ,
466
505
sub_file = args .external_speech_regions
467
506
)
468
507
469
508
else :
509
+ # use auditok_gen_speech_regions
470
510
mode = 0
471
511
if args .strict_min_length :
472
512
mode = auditok .StreamTokenizer .STRICT_MIN_LENGTH
@@ -480,27 +520,45 @@ def main(): # pylint: disable=too-many-branches, too-many-statements
480
520
energy_threshold = args .energy_threshold ,
481
521
min_region_size = constants .MIN_REGION_SIZE ,
482
522
max_region_size = constants .MAX_REGION_SIZE ,
483
- max_continuous_silence = constants .MAX_CONTINUOUS_SILENCE ,
523
+ max_continuous_silence = constants .DEFAULT_CONTINUOUS_SILENCE ,
484
524
mode = mode
485
525
)
486
526
487
527
if args .src_language :
488
- timed_subtitles = core .api_gen_text (
528
+ # speech to text
529
+ text_list = core .speech_to_text (
489
530
source_file = args .source_path ,
490
531
api_url = api_url ,
491
532
regions = regions ,
492
- api_key = args .api_key ,
493
533
concurrency = args .concurrency ,
494
534
src_language = args .src_language ,
495
- dst_language = args .dst_language
535
+ min_confidence = args .min_confidence
496
536
)
497
537
538
+ if args .dst_language :
539
+ # text translation
540
+ translated_text = core .text_translation (
541
+ text_list = text_list ,
542
+ api_key = args .api_key ,
543
+ concurrency = args .concurrency ,
544
+ src_language = args .src_language ,
545
+ dst_language = args .dst_language
546
+ )
547
+ text_list = translated_text
548
+ # drop src_language text_list
549
+
550
+ if not args .drop_empty_regions :
551
+ timed_text = [(region , text ) for region , text in zip (regions , text_list )]
552
+ else :
553
+ timed_text = [(region , text ) for region , text in zip (regions , text_list ) if text ]
554
+
498
555
subtitles_string , extension = core .list_to_sub_str (
499
- timed_subtitles = timed_subtitles ,
556
+ timed_subtitles = timed_text ,
500
557
fps = fps ,
501
558
subtitles_file_format = args .format ,
502
559
ass_styles_file = args .ass_styles
503
560
)
561
+ # formatting timed_text to subtitles string
504
562
505
563
else :
506
564
subtitles_string , extension = core .times_to_sub_str (
@@ -509,13 +567,16 @@ def main(): # pylint: disable=too-many-branches, too-many-statements
509
567
subtitles_file_format = args .format ,
510
568
ass_styles_file = args .ass_styles
511
569
)
570
+ # times to subtitles string
512
571
513
572
subtitles_file_path = core .str_to_file (
514
573
str_ = subtitles_string ,
515
574
output = args .output ,
516
575
extension = extension ,
517
576
input_m = input_m
518
577
)
578
+ # subtitles string to file
579
+
519
580
print ("\n Subtitles file created at \" {}\" " .format (subtitles_file_path ))
520
581
521
582
except KeyboardInterrupt :
0 commit comments