diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 0d20026..ec6b250 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -16,7 +16,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.8", "3.9", "3.10"] + python-version: ["3.8", "3.9", "3.10", "3.11"] steps: - uses: actions/checkout@v3 diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..ef739a3 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,4 @@ +{ + "editor.formatOnType": true, + "editor.formatOnSave": true, +} \ No newline at end of file diff --git a/.whitesource b/.whitesource deleted file mode 100644 index e0aaa3e..0000000 --- a/.whitesource +++ /dev/null @@ -1,8 +0,0 @@ -{ - "checkRunSettings": { - "vulnerableCheckRunConclusionLevel": "failure" - }, - "issueSettings": { - "minSeverityLevel": "LOW" - } -} \ No newline at end of file diff --git a/README.md b/README.md index 1222a33..5d3f5ec 100644 --- a/README.md +++ b/README.md @@ -16,11 +16,18 @@ python -m pip install vtt_to_srt3 ## Usage from terminal ```shell -python -m vtt_to_srt pathname [-r] +usage: vtt_to_srt.py [-h] [-r] [-e ENCODING] pathname -pathname - a file or directory with files to be converted +Convert vtt files to srt files --r - walk path recursively +positional arguments: + pathname a file or directory with files to be converted + +options: + -h, --help show this help message and exit + -r, --recursive walk path recursively + -e ENCODING, --encoding ENCODING + encoding format for input and output files ``` ## Usage as a lib @@ -30,8 +37,8 @@ Convert vtt file from vtt_to_srt.vtt_to_srt import vtt_to_srt path = '/path/to/file.vtt' vtt_to_srt(path) -``` - +``` + Recursively convert all vtt files in directory ```shell from vtt_to_srt.vtt_to_srt import vtt_to_srt diff --git a/setup.py b/setup.py index 1183bc0..f7ce397 100644 --- a/setup.py +++ b/setup.py @@ -1,30 +1,27 @@ import setuptools -with open("README.md", "r") as fh: +with open(file="README.md", mode="r", encoding="utf-8") as fh: long_description = fh.read() -setuptools.setup( - name='vtt_to_srt3', - version='0.1.9.1', - author="Jeison Cardoso", - author_email="j@jsonzilla.com", - description="vtt to srt subtitles converter package", - long_description=long_description, - long_description_content_type="text/markdown", - url="https://github.com/jsonzilla/vtt-to-srt.py", - packages=setuptools.find_packages(), - classifiers=[ - "Programming Language :: Python :: 3.7", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10", - "Operating System :: OS Independent", - ], - entry_points={ - "console_scripts": [ - "vtt_to_srt=vtt_to_srt.vtt_to_srt:main", - ] - } - - ) +setuptools.setup(name='vtt_to_srt3', + version='0.1.9.2', + author="Jeison Cardoso", + author_email="j@jsonzilla.com", + description="vtt to srt subtitles converter package", + long_description=long_description, + long_description_content_type="text/markdown", + url="https://github.com/jsonzilla/vtt-to-srt.py", + packages=setuptools.find_packages( + exclude=["test_*.py", "valid*.*", "input*.*"]), + classifiers=["Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Operating System :: OS Independent"], + entry_points={ + "console_scripts": + ["vtt_to_srt=vtt_to_srt.vtt_to_srt:main"] + }, + ) diff --git a/vtt_to_srt/input_alternative_iso-8859-2.vtt b/vtt_to_srt/input_alternative_iso-8859-2.vtt new file mode 100644 index 0000000..a198b0e --- /dev/null +++ b/vtt_to_srt/input_alternative_iso-8859-2.vtt @@ -0,0 +1,8 @@ +WEBVTT + +00:01.000 --> 00:04.000 +- Trinken Sie niemals flüssigen Stickstoff. + +00:05.000 --> 00:09.000 +- Es wird Ihren Magen perforieren. +- Du könntest sterben. diff --git a/vtt_to_srt/input_alternative_utf8.vtt b/vtt_to_srt/input_alternative_utf8.vtt new file mode 100644 index 0000000..9c86dfd --- /dev/null +++ b/vtt_to_srt/input_alternative_utf8.vtt @@ -0,0 +1,8 @@ +WEBVTT + +00:01.000 --> 00:04.000 +- Never drink liquid nitrogen. + +00:05.000 --> 00:09.000 +- It will perforate your stomach. +- You could die. diff --git a/vtt_to_srt/input_iso-8859-2.vtt b/vtt_to_srt/input_iso-8859-2.vtt new file mode 100644 index 0000000..a198b0e --- /dev/null +++ b/vtt_to_srt/input_iso-8859-2.vtt @@ -0,0 +1,8 @@ +WEBVTT + +00:01.000 --> 00:04.000 +- Trinken Sie niemals flüssigen Stickstoff. + +00:05.000 --> 00:09.000 +- Es wird Ihren Magen perforieren. +- Du könntest sterben. diff --git a/vtt_to_srt/input_utf8.vtt b/vtt_to_srt/input_utf8.vtt new file mode 100644 index 0000000..9c86dfd --- /dev/null +++ b/vtt_to_srt/input_utf8.vtt @@ -0,0 +1,8 @@ +WEBVTT + +00:01.000 --> 00:04.000 +- Never drink liquid nitrogen. + +00:05.000 --> 00:09.000 +- It will perforate your stomach. +- You could die. diff --git a/vtt_to_srt/test_base.py b/vtt_to_srt/test_base.py new file mode 100644 index 0000000..279dd1d --- /dev/null +++ b/vtt_to_srt/test_base.py @@ -0,0 +1,32 @@ +import os +import pytest + + +def _clean(): + """Remove all files with .srt extension without valid_output in name recursively""" + for root, _, files in os.walk(os.path.dirname(__file__)): + for file in files: + if file.endswith(".srt") and "valid_output" not in file: + os.remove(os.path.join(root, file)) + + +@pytest.fixture(autouse=True, scope="module") +def clean_files(): + """Clean files""" + _clean() + yield + _clean() + + +def concat_path(pathname): + """Concat path to file for unix and windows""" + return os.path.join(os.path.dirname(__file__), pathname) + + +def equals_files(file_a, file_b, encoding): + """Compare two text files independently of line endings""" + with open(concat_path(file_a), "r", encoding=encoding) as file_a: + with open(concat_path(file_b), "r", encoding=encoding) as file_b: + a = file_a.read() + b = file_b.read() + return a == b diff --git a/vtt_to_srt/test_convert_directory.py b/vtt_to_srt/test_convert_directory.py new file mode 100644 index 0000000..f5545da --- /dev/null +++ b/vtt_to_srt/test_convert_directory.py @@ -0,0 +1,27 @@ +import os +import pytest + +from vtt_to_srt.test_base import concat_path, equals_files, clean_files +from vtt_to_srt.vtt_to_srt import ConvertDirectories + + +class TestConvertDirectories: + """Test ConvertFile class""" + + def test_convert_directory(self, clean_files): + """Test convert file""" + convert_file = ConvertDirectories( + concat_path("."), False, "utf-8") + convert_file.convert() + + assert equals_files("input_alternative_utf8.srt", + "valid_output_utf8.srt", "utf-8") + + def test_convert_directory_recursive(self, clean_files): + """Test convert file""" + convert_file = ConvertDirectories( + concat_path("."), True, "utf-8") + convert_file.convert() + + assert equals_files("input_alternative_utf8.srt", + "valid_output_utf8.srt", "utf-8") diff --git a/vtt_to_srt/test_convert_file.py b/vtt_to_srt/test_convert_file.py new file mode 100644 index 0000000..d40e6f9 --- /dev/null +++ b/vtt_to_srt/test_convert_file.py @@ -0,0 +1,27 @@ +import os +import pytest + +from vtt_to_srt.test_base import concat_path, equals_files, clean_files +from vtt_to_srt.vtt_to_srt import ConvertFile + + +class TestConvertFile: + """Test ConvertFile class""" + + def test_convert_file(self, clean_files): + """Test convert file""" + convert_file = ConvertFile( + concat_path("input_utf8.vtt"), "utf-8") + convert_file.convert() + + assert equals_files("input_utf8.srt", + "valid_output_utf8.srt", "utf-8") + + def test_convert_file_not_utf8(self, clean_files): + """Test convert file with not utf-8 encoding""" + convert_file = ConvertFile( + concat_path("input_iso-8859-2.vtt"), "ISO-8859-2") + convert_file.convert() + + assert equals_files("input_iso-8859-2.srt", + "valid_output_iso-8859-2.srt", "ISO-8859-2") diff --git a/vtt_to_srt/test_vtt_to_str.py b/vtt_to_srt/test_vtt_to_str.py index f1f141e..4db2f54 100644 --- a/vtt_to_srt/test_vtt_to_str.py +++ b/vtt_to_srt/test_vtt_to_str.py @@ -1,33 +1,50 @@ -import vtt_to_srt -import pytest -import os +import pytest + +from vtt_to_srt.vtt_to_srt import VttToStr -from vtt_to_srt.vtt_to_srt import * class TestVttToStr: def test_convert_header(self): - assert repr(convert_header("WEBVTT\nKind: captions\nLanguage: zh-TW")) == repr("Language: zh-TW") + assert repr(VttToStr().convert_header( + "WEBVTT\nKind: captions\nLanguage: zh-TW")) == repr("Language: zh-TW") def test_convert_timestamp(self): - assert repr(convert_timestamp("00:03:08.500 --> 00:03:15.300\n")) == repr("00:03:08,500 --> 00:03:15,300\n") - assert repr(convert_timestamp("03:08.500 --> 03:15.300\n")) == repr("00:03:08,500 --> 00:03:15,300\n") - assert repr(convert_timestamp("08.500 --> 15.300\n")) == repr("00:00:08,500 --> 00:00:15,300\n") + vtt_to_str = VttToStr() + assert repr(vtt_to_str.convert_timestamp("00:03:08.500 --> 00:03:15.300\n") + ) == repr("00:03:08,500 --> 00:03:15,300\n") + assert repr(vtt_to_str.convert_timestamp("03:08.500 --> 03:15.300\n") + ) == repr("00:03:08,500 --> 00:03:15,300\n") + assert repr(vtt_to_str.convert_timestamp("08.500 --> 15.300\n") + ) == repr("00:00:08,500 --> 00:00:15,300\n") def test_not_add_sequence_before(self): - assert repr(add_sequence_numbers("What you got, a billion could've never bought (oooh)")) == repr("What you got, a billion could've never bought (oooh)"+os.linesep) - assert repr(add_sequence_numbers("")) == repr(""+os.linesep) - assert repr(add_sequence_numbers("告訴你,今晚我想帶你出去。")) == repr("告訴你,今晚我想帶你出去。"+os.linesep) - assert repr(add_sequence_numbers("Hi --> MAX")) == repr("Hi --> MAX"+os.linesep) - + vtt_to_str = VttToStr() + assert repr(vtt_to_str.add_sequence_numbers("What you got, a billion could've never bought (oooh)")) == repr( + "What you got, a billion could've never bought (oooh)\n") + assert repr(vtt_to_str.add_sequence_numbers("") + ) == repr("\n") + assert repr(vtt_to_str.add_sequence_numbers("告訴你,今晚我想帶你出去。")) == repr( + "告訴你,今晚我想帶你出去。\n") + assert repr(vtt_to_str.add_sequence_numbers("Hi --> MAX") + ) == repr("Hi --> MAX\n") + def test_add_sequence_before_timestamp(self): - assert repr(add_sequence_numbers("00:03:08,500 --> 00:03:15,300")) == repr("1"+os.linesep+"00:03:08,500 --> 00:03:15,300"+os.linesep) + vtt_to_str = VttToStr() + assert repr(vtt_to_str.add_sequence_numbers("00:03:08,500 --> 00:03:15,300") + ) == repr("1\n00:03:08,500 --> 00:03:15,300\n") def test_convert_empty_return_newline(self): - assert repr(convert_content("")) == repr(os.linesep) - + vtt_to_str = VttToStr() + assert repr(vtt_to_str.convert_content("")) == repr("\n") + def test_convert_header_language(self): - assert repr(convert_content("WEBVTT\nKind: captions\nLanguage: zh-TW")) == repr("Language: zh-TW"+os.linesep) + vtt_to_str = VttToStr() + assert repr(vtt_to_str.convert_content("WEBVTT\nKind: captions\nLanguage: zh-TW") + ) == repr("Language: zh-TW\n") def test_text(self): - assert repr(convert_content("告訴你,今晚我想帶你出去。")) == repr("告訴你,今晚我想帶你出去。"+os.linesep) - assert repr(convert_content("What you got, a billion could've never bought (oooh)")) == repr("What you got, a billion could've never bought (oooh)"+os.linesep) \ No newline at end of file + vtt_to_str = VttToStr() + assert repr(vtt_to_str.convert_content("告訴你,今晚我想帶你出去。")) == repr( + "告訴你,今晚我想帶你出去。\n") + assert repr(vtt_to_str.convert_content("What you got, a billion could've never bought (oooh)")) == repr( + "What you got, a billion could've never bought (oooh)\n") diff --git a/vtt_to_srt/valid_output_iso-8859-2.srt b/vtt_to_srt/valid_output_iso-8859-2.srt new file mode 100644 index 0000000..ee92aa7 --- /dev/null +++ b/vtt_to_srt/valid_output_iso-8859-2.srt @@ -0,0 +1,10 @@ + +1 +00:00:01,000 --> 00:00:04,000 +- Trinken Sie niemals flüssigen Stickstoff. + +2 +00:00:05,000 --> 00:00:09,000 +- Es wird Ihren Magen perforieren. +- Du könntest sterben. + diff --git a/vtt_to_srt/valid_output_utf8.srt b/vtt_to_srt/valid_output_utf8.srt new file mode 100644 index 0000000..92a1e70 --- /dev/null +++ b/vtt_to_srt/valid_output_utf8.srt @@ -0,0 +1,10 @@ + +1 +00:00:01,000 --> 00:00:04,000 +- Never drink liquid nitrogen. + +2 +00:00:05,000 --> 00:00:09,000 +- It will perforate your stomach. +- You could die. + diff --git a/vtt_to_srt/vtt_to_srt.py b/vtt_to_srt/vtt_to_srt.py index 690ac81..aaf0ad1 100644 --- a/vtt_to_srt/vtt_to_srt.py +++ b/vtt_to_srt/vtt_to_srt.py @@ -5,204 +5,264 @@ import os import re -import sys +import argparse from string import Template from stat import S_ISDIR, ST_MODE, S_ISREG -def convert_header(contents): - """Convert of vtt header to srt format - - Keyword arguments: - contents - """ - replacement = re.sub(r"WEBVTT\n", "", contents) - replacement = re.sub(r"Kind:[ \-\w]+\n", "", replacement) - replacement = re.sub(r"Language:[ \-\w]+\n", "", replacement) - return replacement - - -def padding_timestamp(contents): - """Add 00 to padding timestamp of to srt format - - Keyword arguments: - contents - """ - find_srt = Template(r'$a,$b --> $a,$b(?:[ \-\w]+:[\w\%\d:,.]+)*\n') - minute = r"((?:\d\d:){1}\d\d)" - second = r"((?:\d\d:){0}\d\d)" - padding_minute = find_srt.substitute(a=minute, b=r"(\d{0,3})") - padding_second = find_srt.substitute(a=second, b=r"(\d{0,3})") - replacement = re.sub(padding_minute, r"00:\1,\2 --> 00:\3,\4\n", contents) - return re.sub(padding_second, r"00:00:\1,\2 --> 00:00:\3,\4\n", replacement) - - -def convert_timestamp(contents): - """Convert timestamp of vtt file to srt format - - Keyword arguments: - contents - """ - find_vtt = Template(r'$a.$b --> $a.$b(?:[ \-\w]+:[\w\%\d:,.]+)*\n') - all_timestamp = find_vtt.substitute(a=r"((?:\d\d:){0,2}\d\d)", b=r"(\d{0,3})") - return padding_timestamp(re.sub(all_timestamp, r"\1,\2 --> \3,\4\n", contents)) - - -def convert_content(contents): - """Convert content of vtt file to srt format - - Keyword arguments: - contents - """ - replacement = convert_timestamp(contents) - replacement = convert_header(replacement) - replacement = re.sub(r"", "", replacement) - replacement = re.sub(r"", "", replacement) - replacement = re.sub(r"<\d\d:\d\d:\d\d.\d\d\d>", "", replacement) - replacement = re.sub(r"::[\-\w]+\([\-.\w\d]+\)[ ]*{[.,:;\(\) \-\w\d]+\n }\n", "", replacement) - replacement = re.sub(r"Style:\n##\n", "", replacement) - replacement = add_sequence_numbers(replacement) - return replacement - - -def timestamp_line(content): - """Check if line is a timestamp srt format - - Keyword arguments: - contents - """ - return re.match(r"((\d\d:){2}\d\d),(\d{3}) --> ((\d\d:){2}\d\d),(\d{3})", content) is not None - - -def add_sequence_numbers(contents): - """Adds sequence numbers to subtitle contents and returns new subtitle contents - - Keyword arguments: - contents - """ - output = '' - lines = contents.split(os.linesep) - - i = 1 - for line in lines: - if timestamp_line(line): - output += str(i) + os.linesep - i += 1 - output += line + os.linesep - return output - - -def file_create(str_name_file: str, str_data): - """Create a file with some data - - Keyword arguments: - str_name_file -- filename pat - str_data -- dat to write - """ - try: - with open(str_name_file, "w", encoding='utf-8') as file: - file.writelines(str(str_data)) - except IOError: - str_name_file = str_name_file.split(os.sep)[-1] - with open(str_name_file, "w") as file: - file.writelines(str(str_data)) - print("file created: " + str_name_file + "\n") - - -def read_text_file(str_name_file: str): - """Read a file text - - Keyword arguments: - str_name_file -- filename pat - """ - content: str = '' - with open(str_name_file, mode="r", encoding='utf-8') as file: - print("file being read: " + str_name_file + "\n") - content = file.read() - return content - - -def vtt_to_srt(str_name_file: str): - """Convert vtt file to a srt file - - Keyword arguments: - str_name_file -- filename path - """ - file_contents: str = read_text_file(str_name_file) - str_data: str = "" - str_data = str_data + convert_content(file_contents) - str_name_file: str = str_name_file.replace(".vtt", ".srt") - print(str_name_file) - file_create(str_name_file, str_data) - - -def walk_tree(top_most_path, callback): - """Recursively descend the directory tree rooted at top_most_path, - calling the callback function for each regular file - - Keyword arguments: - top_most_path -- parent directory - callback -- function to call - """ - for file in os.listdir(top_most_path): - pathname = os.path.join(top_most_path, file) - mode = os.stat(pathname)[ST_MODE] - if S_ISDIR(mode): - # It's a directory, recurse into it - walk_tree(pathname, callback) - elif S_ISREG(mode): - # It's a file, call the callback function - callback(pathname) +class VttToStr: + """Convert vtt to srt""" + + def __init__(self) -> None: + pass + + def convert_header(self, contents): + """Convert of vtt header to srt format + + Keyword arguments: + contents + """ + replacement = re.sub(r"WEBVTT\n", "", contents) + replacement = re.sub(r"Kind:[ \-\w]+\n", "", replacement) + replacement = re.sub(r"Language:[ \-\w]+\n", "", replacement) + return replacement + + def add_padding_to_timestamp(self, contents): + """Add 00 to padding timestamp of to srt format + + Keyword arguments: + contents + """ + find_srt = Template(r'$a,$b --> $a,$b(?:[ \-\w]+:[\w\%\d:,.]+)*\n') + minute = r"((?:\d\d:){1}\d\d)" + second = r"((?:\d\d:){0}\d\d)" + padding_minute = find_srt.substitute(a=minute, b=r"(\d{0,3})") + padding_second = find_srt.substitute(a=second, b=r"(\d{0,3})") + replacement = re.sub( + padding_minute, r"00:\1,\2 --> 00:\3,\4\n", contents) + return re.sub(padding_second, r"00:00:\1,\2 --> 00:00:\3,\4\n", replacement) + + def convert_timestamp(self, contents): + """Convert timestamp of vtt file to srt format + + Keyword arguments: + contents + """ + find_vtt = Template(r'$a.$b --> $a.$b(?:[ \-\w]+:[\w\%\d:,.]+)*\n') + all_timestamp = find_vtt.substitute( + a=r"((?:\d\d:){0,2}\d\d)", b=r"(\d{0,3})") + return self.add_padding_to_timestamp(re.sub(all_timestamp, r"\1,\2 --> \3,\4\n", contents)) + + def convert_content(self, contents): + """Convert content of vtt file to srt format + + Keyword arguments: + contents + """ + replacement = self.convert_timestamp(contents) + replacement = self.convert_header(replacement) + replacement = re.sub(r"", "", replacement) + replacement = re.sub(r"", "", replacement) + replacement = re.sub(r"<\d\d:\d\d:\d\d.\d\d\d>", "", replacement) + replacement = re.sub( + r"::[\-\w]+\([\-.\w\d]+\)[ ]*{[.,:;\(\) \-\w\d]+\n }\n", "", replacement) + replacement = re.sub(r"Style:\n##\n", "", replacement) + replacement = self.add_sequence_numbers(replacement) + + return replacement + + def has_timestamp(self, content): + """Check if line is a timestamp srt format + + Keyword arguments: + contents + """ + return re.match(r"((\d\d:){2}\d\d),(\d{3}) --> ((\d\d:){2}\d\d),(\d{3})", content) is not None + + def add_sequence_numbers(self, contents): + """Adds sequence numbers to subtitle contents and returns new subtitle contents + + Keyword arguments: + contents + """ + output = '' + lines = contents.split('\n') + i = 1 + for line in lines: + if self.has_timestamp(line): + output += str(i) + '\n' + i += 1 + output += line + '\n' + return output + + def write_file(self, filename: str, data, encoding_format: str = "utf-8"): + """Create a file with some data + + Keyword arguments: + filename -- filename pat + data -- data to write + encoding_format -- encoding format + """ + try: + with open(filename, "w", encoding=encoding_format) as file: + file.writelines(str(data)) + except IOError: + filename = filename.split(os.sep)[-1] + with open(filename, "w", encoding=encoding_format) as file: + file.writelines(str(data)) + print(f"file created {filename}\n") + + def read_file(self, filename: str, encoding_format: str = "utf-8"): + """Read a file text + + Keyword arguments: + filename -- filename path + encoding_format -- encoding format + """ + content: str = '' + with open(filename, mode="r", encoding=encoding_format) as file: + print(f"file being read: {filename}\n") + content = file.read() + + return content + + def process(self, filename: str, encoding_format: str = "utf-8"): + """Convert vtt file to a srt file + + Keyword arguments: + str_name_file -- filename path + encoding_format -- encoding format + """ + file_contents: str = self.read_file(filename, encoding_format) + str_data: str = "" + str_data = str_data + self.convert_content(file_contents) + filename = filename.replace(".vtt", ".srt") + self.write_file(filename, str_data, encoding_format) + + +class ConvertFile: + """Convert vtt file to srt file""" + + def __init__(self, pathname: str, encoding_format: str): + """Constructor + + Keyword arguments: + pathname -- path to file or directory + encoding_format -- encoding format + """ + self.pathname = pathname + self.encoding_format = encoding_format + self.vtt_to_str = VttToStr() + + def convert(self): + """Convert vtt file to srt file""" + if ".vtt" in self.pathname: + self.vtt_to_str.process(self.pathname, self.encoding_format) + + +class ConvertDirectories: + """Convert vtt files to srt files""" + + def __init__(self, pathname: str, enable_recursive: bool, encoding_format: str): + """Constructor + + Keyword arguments: + pathname -- path to file or directory + enable_recursive -- enable recursive + encoding_format -- encoding format + """ + self.pathname = pathname + self.enable_recursive = enable_recursive + self.encoding_format = encoding_format + self.vtt_to_str = VttToStr() + + def _walk_dir(self, top_most_path: str, callback): + """Walk a directory + + Keyword arguments: + top_most_path -- parent directory + callback -- function to call + """ + for file in os.listdir(top_most_path): + pathname = os.path.join(top_most_path, file) + if not os.path.isdir(pathname): + # It"s a file, call the callback function + callback(pathname) + + def _walk_tree(self, top_most_path, callback): + """Recursively descend the directory tree rooted at top_most_path, + calling the callback function for each regular file + + Keyword arguments: + top_most_path -- parent directory + callback -- function to call + """ + for file in os.listdir(top_most_path): + pathname = os.path.join(top_most_path, file) + mode = os.stat(pathname)[ST_MODE] + if S_ISDIR(mode): + # It's a directory, recurse into it + self._walk_tree(pathname, callback) + elif S_ISREG(mode): + # It's a file, call the callback function + callback(pathname) + else: + # Unknown file type, print a message + print(f"Skipping {pathname}") + + def convert_vtt_to_str(self, file: str): + """Convert vtt file to string + + Keyword arguments: + f -- file to convert + encoding_format -- encoding format + """ + if ".vtt" in file: + try: + self.vtt_to_str.process(file, self.encoding_format) + except UnicodeDecodeError: + print(f"UnicodeDecodeError: {file}") + + def _vtt_to_srt_batch(self, directory: str): + """Walk down directory searching for vtt files + + Keyword arguments: + directory -- path to search + enable_recursive_search -- enable recursive + """ + top_most_path = directory + if self.enable_recursive: + self._walk_tree(top_most_path, self.convert_vtt_to_str) else: - # Unknown file type, print a message - print("Skipping %s" % pathname) - - -def walk_dir(top_most_path, callback): - """Walk a directory - - Keyword arguments: - top_most_path -- parent directory - callback -- function to call - """ - for file in os.listdir(top_most_path): - pathname = os.path.join(top_most_path, file) - if not os.path.isdir(pathname): - # It"s a file, call the callback function - callback(pathname) + self._walk_dir(top_most_path, self.convert_vtt_to_str) + def convert(self): + """Convert vtt files to srt files""" + self._vtt_to_srt_batch(self.pathname) -def convert_vtt_to_str(file): - """Convert vtt file to string - Keyword arguments: - f -- file to convert - """ - if ".vtt" in file: - vtt_to_srt(file) - - -def vtts_to_srt(directory, rec = False): - """Walk down directory seaching for vtt files - - Keyword arguments: - directory -- path to search - rec -- enable recursive - """ - top_most_path = directory - if rec: - walk_tree(top_most_path, convert_vtt_to_str) - else: - walk_dir(top_most_path, convert_vtt_to_str) - - -def print_usage(): +def _show_usage(): """Show a info message about the usage""" print("\nUsage:\tvtt_to_srt pathname [-r]\n") print("\tpathname\t- a file or directory with files to be converted") print("\t-r\t\t- walk path recursively\n") +def _parse_args(): + """Parse command line arguments""" + parser = argparse.ArgumentParser( + description='Convert vtt files to srt files') + parser.add_argument( + "pathname", help="a file or directory with files to be converted") + parser.add_argument("-r", "--recursive", + help="walk path recursively", action="store_true") + parser.add_argument("-e", "--encoding", + help="encoding format for input and output files") + + args = parser.parse_args() + return args + + def main(): """main @@ -210,15 +270,26 @@ def main(): pathname - a file or directory with files to be converted -r walk path recursively """ - if len(sys.argv) < 2 or sys.argv[1] == "--help" or not os.path.exists(sys.argv[1]): - print_usage() - sys.exit() - path = sys.argv[1] - rec = bool(len(sys.argv) > 2 and sys.argv[2] == "-r") - if os.path.isdir(path): - vtts_to_srt(path, rec) - else: - vtt_to_srt(path) + + args = _parse_args() + pathname = args.pathname + recursive = args.recursive + encoding = args.encoding + + if not encoding: + encoding = "utf-8" + + if os.path.isfile(pathname): + print(f"file being converted: {pathname}\n") + ConvertFile(pathname, encoding).convert() + + if os.path.isdir(pathname): + print(f"directory being converted: {pathname}\n") + ConvertDirectories(pathname, recursive, encoding).convert() + + if not os.path.isfile(pathname) and not os.path.isdir(pathname): + print(f"pathname is not a file or directory: {pathname}\n") + _show_usage() if __name__ == "__main__":