diff --git a/CHANGELOG.md b/CHANGELOG.md index ff8b0e8b..69f1c8cc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -32,6 +32,7 @@ Click up arrow to go back to TOC. - Add arguments for [auditok.StreamTokenizer](https://auditok.readthedocs.io/en/latest/core.html#class-summary) and [energy_threshold](https://auditok.readthedocs.io/en/latest/apitutorial.html#examples-using-real-audio-data). [issue #30](https://github.com/BingLingGroup/autosub/issues/30) - Add overwrite option `-y` for output overwrite and no input pause. [issue #29](https://github.com/BingLingGroup/autosub/issues/29) - Add specific .ass style when output format is .ass. [issue #21](https://github.com/BingLingGroup/autosub/issues/21) +- Add timings generating function instead of using speech-to-text api. [issue #14](https://github.com/BingLingGroup/autosub/issues/14) #### Changed(Unreleased) diff --git a/autosub/__init__.py b/autosub/__init__.py index 9b2bea56..e27cc286 100644 --- a/autosub/__init__.py +++ b/autosub/__init__.py @@ -65,6 +65,7 @@ def get_cmd_args(): 'source_path', nargs='?', metavar='path', help="The path to the video or audio file needs to generate subtitle. " + "If Speech Options not given, it will only generate the times." "(arg_num = 1)" ) @@ -130,7 +131,6 @@ def get_cmd_args(): speech_group.add_argument( '-S', '--src-language', metavar='locale', - default=constants.DEFAULT_SRC_LANGUAGE, help="Locale of language spoken in source file. " "(arg_num = 1) (default: %(default)s)" ) @@ -314,34 +314,56 @@ def validate(args): # pylint: disable=too-many-branches,too-many-return-stateme ) return False - if args.src_language not in constants.SPEECH_TO_TEXT_LANGUAGE_CODES.keys(): - print( - "Error: Source language not supported. " - "Run with \"-lsc\" or \"--list-speech-to-text-codes\" " - "to see all supported languages." - ) - return False + if args.src_language: + if args.src_language not in constants.SPEECH_TO_TEXT_LANGUAGE_CODES.keys(): + print( + "Error: Source language not supported. " + "Run with \"-lsc\" or \"--list-speech-to-text-codes\" " + "to see all supported languages." + ) + return False - if args.dst_language is None: - print( - "Destination language not provided. " - "Only performing speech recognition." - ) - args.dst_language = args.src_language + if args.dst_language is None: + print( + "Destination language not provided. " + "Only performing speech recognition." + ) + args.dst_language = args.src_language - elif args.dst_language == args.src_language: - print( - "Source language is the same as the Destination language. " - "Only performing speech recognition." - ) + elif args.dst_language == args.src_language: + print( + "Source language is the same as the Destination language. " + "Only performing speech recognition." + ) - elif args.dst_language not in constants.TRANSLATION_LANGUAGE_CODES.keys(): - print( - "Error: Destination language not supported. " - "Run with \"-ltc\" or \"--list-translation-codes\" " - "to see all supported languages." - ) - return False + elif args.dst_language not in constants.TRANSLATION_LANGUAGE_CODES.keys(): + print( + "Error: Destination language not supported. " + "Run with \"-ltc\" or \"--list-translation-codes\" " + "to see all supported languages." + ) + return False + + else: + if args.format == 'txt': + print( + "Plain text don't include times. " + "No works done." + ) + return False + + if args.external_speech_regions: + print( + "You've already input times. " + "No works done." + ) + return False + + else: + print( + "Source language not provided. " + "Only performing speech regions detection." + ) if not args.ass_styles: # when args.ass_styles is used but without option @@ -355,29 +377,30 @@ def validate(args): # pylint: disable=too-many-branches,too-many-return-stateme else: args.ass_styles = None - if args.min_region_size < constants.MIN_REGION_SIZE: - print( - "Your minimum region size {mrs0} is smaller than {mrs}.\n" - "Now reset to {mrs}".format(mrs0=args.min_region_size, - mrs=constants.MIN_REGION_SIZE) - ) - args.min_region_size = constants.MIN_REGION_SIZE + if not args.external_speech_regions: + if args.min_region_size < constants.MIN_REGION_SIZE: + print( + "Your minimum region size {mrs0} is smaller than {mrs}.\n" + "Now reset to {mrs}".format(mrs0=args.min_region_size, + mrs=constants.MIN_REGION_SIZE) + ) + args.min_region_size = constants.MIN_REGION_SIZE - if args.max_region_size > constants.MAX_EXT_REGION_SIZE: - print( - "Your maximum region size {mrs0} is larger than {mrs}.\n" - "Now reset to {mrs}".format(mrs0=args.max_region_size, - mrs=constants.MAX_EXT_REGION_SIZE) - ) - args.max_region_size = constants.MAX_EXT_REGION_SIZE + if args.max_region_size > constants.MAX_EXT_REGION_SIZE: + print( + "Your maximum region size {mrs0} is larger than {mrs}.\n" + "Now reset to {mrs}".format(mrs0=args.max_region_size, + mrs=constants.MAX_EXT_REGION_SIZE) + ) + args.max_region_size = constants.MAX_EXT_REGION_SIZE - if args.max_continuous_silence < 0: - print( - "Your maximum continuous silence {mxcs} is smaller than 0.\n" - "Now reset to {dmxcs}".format(mxcs=args.max_continuous_silence, - dmxcs=constants.MAX_CONTINUOUS_SILENCE) - ) - args.max_continuous_silence = constants.MAX_CONTINUOUS_SILENCE + if args.max_continuous_silence < 0: + print( + "Your maximum continuous silence {mxcs} is smaller than 0.\n" + "Now reset to {dmxcs}".format(mxcs=args.max_continuous_silence, + dmxcs=constants.MAX_CONTINUOUS_SILENCE) + ) + args.max_continuous_silence = constants.MAX_CONTINUOUS_SILENCE return True @@ -420,6 +443,9 @@ def main(): # pylint: disable=too-many-branches, too-many-statements else: fps = 0.0 + if not args.dst_language: + args.dst_language = 'times' + if not args.output: base = os.path.splitext(args.source_path)[0] args.output = "{base}.{langcode}.{extension}".format(base=base, @@ -458,23 +484,37 @@ def main(): # pylint: disable=too-many-branches, too-many-statements mode=mode ) - timed_subtitles = core.api_gen_text( - source_file=args.source_path, - api_url=api_url, - regions=regions, - api_key=args.api_key, - concurrency=args.concurrency, - src_language=args.src_language, - dst_language=args.dst_language - ) + if args.src_language: + timed_subtitles = core.api_gen_text( + source_file=args.source_path, + api_url=api_url, + regions=regions, + api_key=args.api_key, + concurrency=args.concurrency, + src_language=args.src_language, + dst_language=args.dst_language + ) + + subtitles_string, extension = core.list_to_sub_str( + timed_subtitles=timed_subtitles, + fps=fps, + subtitles_file_format=args.format, + ass_styles_file=args.ass_styles + ) + + else: + subtitles_string, extension = core.times_to_sub_str( + times=regions, + fps=fps, + subtitles_file_format=args.format, + ass_styles_file=args.ass_styles + ) - subtitles_file_path = core.list_to_sub_file( - timed_subtitles=timed_subtitles, + subtitles_file_path = core.str_to_file( + str_=subtitles_string, output=args.output, - fps=fps, - subtitles_file_format=args.format, - input_m=input_m, - ass_styles_file=args.ass_styles + extension=extension, + input_m=input_m ) print("\nSubtitles file created at \"{}\"".format(subtitles_file_path)) diff --git a/autosub/core.py b/autosub/core.py index d5d5952a..b1a59464 100644 --- a/autosub/core.py +++ b/autosub/core.py @@ -191,16 +191,14 @@ def api_gen_text( # pylint: disable=too-many-locals,too-many-arguments,too-many return timed_subtitles -def list_to_sub_file( # pylint: disable=too-many-arguments +def list_to_sub_str( # pylint: disable=too-many-arguments timed_subtitles, - output, fps=30.0, subtitles_file_format=constants.DEFAULT_SUBTITLES_FORMAT, - input_m=input, ass_styles_file=None ): """ - Given an input timedsub list, format it and write it to file. + Given an input timedsub list, format it to a string. """ if subtitles_file_format == 'srt' \ @@ -259,6 +257,83 @@ def list_to_sub_file( # pylint: disable=too-many-arguments subtitles=timed_subtitles, sub_format=constants.DEFAULT_SUBTITLES_FORMAT) + return formatted_subtitles + + +def times_to_sub_str( # pylint: disable=too-many-arguments + times, + fps=30.0, + subtitles_file_format=constants.DEFAULT_SUBTITLES_FORMAT, + ass_styles_file=None +): + """ + Given an input timedsub list, format it to a string. + """ + + if subtitles_file_format == 'srt' \ + or subtitles_file_format == 'tmp': + formatted_subtitles = formatters.pysubs2_times_formatter( + times=times, + sub_format=subtitles_file_format) + + elif subtitles_file_format == 'ass' \ + or subtitles_file_format == 'ssa': + if ass_styles_file: + ass_file = pysubs2.SSAFile.load(ass_styles_file) + ass_styles = ass_file.styles + else: + ass_styles = None + formatted_subtitles = formatters.pysubs2_times_formatter( + times=times, + sub_format=subtitles_file_format, + ass_styles=ass_styles) + + elif subtitles_file_format == 'vtt': + formatted_subtitles = formatters.vtt_times_formatter( + times=times) + + elif subtitles_file_format == 'json': + formatted_subtitles = formatters.json_times_formatter( + times=times) + + elif subtitles_file_format == 'sub': + subtitles_file_format = 'microdvd' + formatted_subtitles = formatters.pysubs2_times_formatter( + times=times, + sub_format=subtitles_file_format, + fps=fps) + # sub format need fps + # ref https://pysubs2.readthedocs.io/en/latest + # /api-reference.html#supported-input-output-formats + subtitles_file_format = 'sub' + + elif subtitles_file_format == 'mpl2': + formatted_subtitles = formatters.pysubs2_times_formatter( + times=times, + sub_format=subtitles_file_format) + subtitles_file_format = 'mpl2.txt' + + else: + # fallback process + print("Format \"{fmt}\" not supported. \ + Using \"{default_fmt}\" instead.".format(fmt=subtitles_file_format, + default_fmt=constants.DEFAULT_SUBTITLES_FORMAT)) + formatted_subtitles = formatters.pysubs2_times_formatter( + times=times, + sub_format=constants.DEFAULT_SUBTITLES_FORMAT) + + return formatted_subtitles, subtitles_file_format + + +def str_to_file( + str_, + output, + extension, + input_m=input, +): + """ + Given a string and write it to file + """ dest = output while input_m and os.path.isfile(dest): @@ -267,9 +342,9 @@ def list_to_sub_file( # pylint: disable=too-many-arguments dest = input_m("Input a new path (including directory and file name) for output file.\n") dest = os.path.splitext(dest)[0] dest = "{base}.{extension}".format(base=dest, - extension=subtitles_file_format) + extension=extension) with open(dest, 'wb') as output_file: - output_file.write(formatted_subtitles.encode("utf-8")) + output_file.write(str_.encode("utf-8")) return dest diff --git a/autosub/formatters.py b/autosub/formatters.py index 352bb41e..4ef89767 100644 --- a/autosub/formatters.py +++ b/autosub/formatters.py @@ -57,7 +57,7 @@ def json_formatter(subtitles): { 'start': start / 1000.0, 'end': end / 1000.0, - 'content': text, + 'content': text } for ((start, end), text) in subtitles @@ -70,3 +70,55 @@ def txt_formatter(subtitles): Serialize a list of subtitles as a newline-delimited string. """ return '\n'.join(text for (_rng, text) in subtitles) + + +def pysubs2_times_formatter(times, + sub_format='srt', + fps=0.0, + ass_styles=None): + """ + Serialize a list of subtitles according to the SRT format. + """ + pysubs2_obj = pysubs2.SSAFile() + if fps != 0.0: + pysubs2_obj.fps = fps + if ass_styles: + pysubs2_obj.styles = ass_styles + style_name = ass_styles.popitem()[0] + for (start, end) in times: + event = pysubs2.SSAEvent() + event.start = start + event.end = end + event.style = style_name + pysubs2_obj.events.append(event) + else: + for (start, end) in times: + event = pysubs2.SSAEvent() + event.start = start + event.end = end + pysubs2_obj.events.append(event) + return pysubs2_obj.to_string(format_=sub_format, fps=pysubs2_obj.fps) + + +def vtt_times_formatter(times): + """ + Serialize a list of subtitles according to the VTT format. + """ + text = pysubs2_times_formatter(times) + text = 'WEBVTT\n\n' + text.replace(',', '.') + return text + + +def json_times_formatter(times): + """ + Serialize a list of subtitles as a JSON blob. + """ + subtitle_dicts = [ + { + 'start': start / 1000.0, + 'end': end / 1000.0 + } + for (start, end) + in times + ] + return json.dumps(subtitle_dicts, indent=4, ensure_ascii=False) diff --git a/docs/CHANGELOG.zh-Hans.md b/docs/CHANGELOG.zh-Hans.md index 5457b9a1..9e29fc96 100644 --- a/docs/CHANGELOG.zh-Hans.md +++ b/docs/CHANGELOG.zh-Hans.md @@ -31,6 +31,7 @@ - 添加参数用于控制[auditok.StreamTokenizer](https://auditok.readthedocs.io/en/latest/core.html#class-summary)和[energy_threshold](https://auditok.readthedocs.io/en/latest/apitutorial.html#examples-using-real-audio-data)。[issue #30](https://github.com/BingLingGroup/autosub/issues/30) - 添加覆写参数`-y`用于输出文件覆写并且取消任何运行时暂停。[issue #29](https://github.com/BingLingGroup/autosub/issues/29) - 添加ass样式文件输入并在输出格式是ass时起作用。[issue #21](https://github.com/BingLingGroup/autosub/issues/21) +- 添加自动分句/自动生成时间轴功能,而不需要调用speech-to-text api。[issue #14](https://github.com/BingLingGroup/autosub/issues/14) #### 改动(未发布的部分)