diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
index 0d20026..ec6b250 100644
--- a/.github/workflows/python-package.yml
+++ b/.github/workflows/python-package.yml
@@ -16,7 +16,7 @@ jobs:
strategy:
fail-fast: false
matrix:
- python-version: ["3.8", "3.9", "3.10"]
+ python-version: ["3.8", "3.9", "3.10", "3.11"]
steps:
- uses: actions/checkout@v3
diff --git a/.vscode/settings.json b/.vscode/settings.json
new file mode 100644
index 0000000..ef739a3
--- /dev/null
+++ b/.vscode/settings.json
@@ -0,0 +1,4 @@
+{
+ "editor.formatOnType": true,
+ "editor.formatOnSave": true,
+}
\ No newline at end of file
diff --git a/.whitesource b/.whitesource
deleted file mode 100644
index e0aaa3e..0000000
--- a/.whitesource
+++ /dev/null
@@ -1,8 +0,0 @@
-{
- "checkRunSettings": {
- "vulnerableCheckRunConclusionLevel": "failure"
- },
- "issueSettings": {
- "minSeverityLevel": "LOW"
- }
-}
\ No newline at end of file
diff --git a/README.md b/README.md
index 1222a33..5d3f5ec 100644
--- a/README.md
+++ b/README.md
@@ -16,11 +16,18 @@ python -m pip install vtt_to_srt3
## Usage from terminal
```shell
-python -m vtt_to_srt pathname [-r]
+usage: vtt_to_srt.py [-h] [-r] [-e ENCODING] pathname
-pathname - a file or directory with files to be converted
+Convert vtt files to srt files
--r - walk path recursively
+positional arguments:
+ pathname a file or directory with files to be converted
+
+options:
+ -h, --help show this help message and exit
+ -r, --recursive walk path recursively
+ -e ENCODING, --encoding ENCODING
+ encoding format for input and output files
```
## Usage as a lib
@@ -30,8 +37,8 @@ Convert vtt file
from vtt_to_srt.vtt_to_srt import vtt_to_srt
path = '/path/to/file.vtt'
vtt_to_srt(path)
-```
-
+```
+
Recursively convert all vtt files in directory
```shell
from vtt_to_srt.vtt_to_srt import vtt_to_srt
diff --git a/setup.py b/setup.py
index 1183bc0..f7ce397 100644
--- a/setup.py
+++ b/setup.py
@@ -1,30 +1,27 @@
import setuptools
-with open("README.md", "r") as fh:
+with open(file="README.md", mode="r", encoding="utf-8") as fh:
long_description = fh.read()
-setuptools.setup(
- name='vtt_to_srt3',
- version='0.1.9.1',
- author="Jeison Cardoso",
- author_email="j@jsonzilla.com",
- description="vtt to srt subtitles converter package",
- long_description=long_description,
- long_description_content_type="text/markdown",
- url="https://github.com/jsonzilla/vtt-to-srt.py",
- packages=setuptools.find_packages(),
- classifiers=[
- "Programming Language :: Python :: 3.7",
- "Programming Language :: Python :: 3.8",
- "Programming Language :: Python :: 3.9",
- "Programming Language :: Python :: 3.10",
- "Operating System :: OS Independent",
- ],
- entry_points={
- "console_scripts": [
- "vtt_to_srt=vtt_to_srt.vtt_to_srt:main",
- ]
- }
-
- )
+setuptools.setup(name='vtt_to_srt3',
+ version='0.1.9.2',
+ author="Jeison Cardoso",
+ author_email="j@jsonzilla.com",
+ description="vtt to srt subtitles converter package",
+ long_description=long_description,
+ long_description_content_type="text/markdown",
+ url="https://github.com/jsonzilla/vtt-to-srt.py",
+ packages=setuptools.find_packages(
+ exclude=["test_*.py", "valid*.*", "input*.*"]),
+ classifiers=["Programming Language :: Python :: 3.7",
+ "Programming Language :: Python :: 3.8",
+ "Programming Language :: Python :: 3.9",
+ "Programming Language :: Python :: 3.10",
+ "Programming Language :: Python :: 3.11",
+ "Operating System :: OS Independent"],
+ entry_points={
+ "console_scripts":
+ ["vtt_to_srt=vtt_to_srt.vtt_to_srt:main"]
+ },
+ )
diff --git a/vtt_to_srt/input_alternative_iso-8859-2.vtt b/vtt_to_srt/input_alternative_iso-8859-2.vtt
new file mode 100644
index 0000000..a198b0e
--- /dev/null
+++ b/vtt_to_srt/input_alternative_iso-8859-2.vtt
@@ -0,0 +1,8 @@
+WEBVTT
+
+00:01.000 --> 00:04.000
+- Trinken Sie niemals flüssigen Stickstoff.
+
+00:05.000 --> 00:09.000
+- Es wird Ihren Magen perforieren.
+- Du könntest sterben.
diff --git a/vtt_to_srt/input_alternative_utf8.vtt b/vtt_to_srt/input_alternative_utf8.vtt
new file mode 100644
index 0000000..9c86dfd
--- /dev/null
+++ b/vtt_to_srt/input_alternative_utf8.vtt
@@ -0,0 +1,8 @@
+WEBVTT
+
+00:01.000 --> 00:04.000
+- Never drink liquid nitrogen.
+
+00:05.000 --> 00:09.000
+- It will perforate your stomach.
+- You could die.
diff --git a/vtt_to_srt/input_iso-8859-2.vtt b/vtt_to_srt/input_iso-8859-2.vtt
new file mode 100644
index 0000000..a198b0e
--- /dev/null
+++ b/vtt_to_srt/input_iso-8859-2.vtt
@@ -0,0 +1,8 @@
+WEBVTT
+
+00:01.000 --> 00:04.000
+- Trinken Sie niemals flüssigen Stickstoff.
+
+00:05.000 --> 00:09.000
+- Es wird Ihren Magen perforieren.
+- Du könntest sterben.
diff --git a/vtt_to_srt/input_utf8.vtt b/vtt_to_srt/input_utf8.vtt
new file mode 100644
index 0000000..9c86dfd
--- /dev/null
+++ b/vtt_to_srt/input_utf8.vtt
@@ -0,0 +1,8 @@
+WEBVTT
+
+00:01.000 --> 00:04.000
+- Never drink liquid nitrogen.
+
+00:05.000 --> 00:09.000
+- It will perforate your stomach.
+- You could die.
diff --git a/vtt_to_srt/test_base.py b/vtt_to_srt/test_base.py
new file mode 100644
index 0000000..279dd1d
--- /dev/null
+++ b/vtt_to_srt/test_base.py
@@ -0,0 +1,32 @@
+import os
+import pytest
+
+
+def _clean():
+ """Remove all files with .srt extension without valid_output in name recursively"""
+ for root, _, files in os.walk(os.path.dirname(__file__)):
+ for file in files:
+ if file.endswith(".srt") and "valid_output" not in file:
+ os.remove(os.path.join(root, file))
+
+
+@pytest.fixture(autouse=True, scope="module")
+def clean_files():
+ """Clean files"""
+ _clean()
+ yield
+ _clean()
+
+
+def concat_path(pathname):
+ """Concat path to file for unix and windows"""
+ return os.path.join(os.path.dirname(__file__), pathname)
+
+
+def equals_files(file_a, file_b, encoding):
+ """Compare two text files independently of line endings"""
+ with open(concat_path(file_a), "r", encoding=encoding) as file_a:
+ with open(concat_path(file_b), "r", encoding=encoding) as file_b:
+ a = file_a.read()
+ b = file_b.read()
+ return a == b
diff --git a/vtt_to_srt/test_convert_directory.py b/vtt_to_srt/test_convert_directory.py
new file mode 100644
index 0000000..f5545da
--- /dev/null
+++ b/vtt_to_srt/test_convert_directory.py
@@ -0,0 +1,27 @@
+import os
+import pytest
+
+from vtt_to_srt.test_base import concat_path, equals_files, clean_files
+from vtt_to_srt.vtt_to_srt import ConvertDirectories
+
+
+class TestConvertDirectories:
+ """Test ConvertFile class"""
+
+ def test_convert_directory(self, clean_files):
+ """Test convert file"""
+ convert_file = ConvertDirectories(
+ concat_path("."), False, "utf-8")
+ convert_file.convert()
+
+ assert equals_files("input_alternative_utf8.srt",
+ "valid_output_utf8.srt", "utf-8")
+
+ def test_convert_directory_recursive(self, clean_files):
+ """Test convert file"""
+ convert_file = ConvertDirectories(
+ concat_path("."), True, "utf-8")
+ convert_file.convert()
+
+ assert equals_files("input_alternative_utf8.srt",
+ "valid_output_utf8.srt", "utf-8")
diff --git a/vtt_to_srt/test_convert_file.py b/vtt_to_srt/test_convert_file.py
new file mode 100644
index 0000000..d40e6f9
--- /dev/null
+++ b/vtt_to_srt/test_convert_file.py
@@ -0,0 +1,27 @@
+import os
+import pytest
+
+from vtt_to_srt.test_base import concat_path, equals_files, clean_files
+from vtt_to_srt.vtt_to_srt import ConvertFile
+
+
+class TestConvertFile:
+ """Test ConvertFile class"""
+
+ def test_convert_file(self, clean_files):
+ """Test convert file"""
+ convert_file = ConvertFile(
+ concat_path("input_utf8.vtt"), "utf-8")
+ convert_file.convert()
+
+ assert equals_files("input_utf8.srt",
+ "valid_output_utf8.srt", "utf-8")
+
+ def test_convert_file_not_utf8(self, clean_files):
+ """Test convert file with not utf-8 encoding"""
+ convert_file = ConvertFile(
+ concat_path("input_iso-8859-2.vtt"), "ISO-8859-2")
+ convert_file.convert()
+
+ assert equals_files("input_iso-8859-2.srt",
+ "valid_output_iso-8859-2.srt", "ISO-8859-2")
diff --git a/vtt_to_srt/test_vtt_to_str.py b/vtt_to_srt/test_vtt_to_str.py
index f1f141e..4db2f54 100644
--- a/vtt_to_srt/test_vtt_to_str.py
+++ b/vtt_to_srt/test_vtt_to_str.py
@@ -1,33 +1,50 @@
-import vtt_to_srt
-import pytest
-import os
+import pytest
+
+from vtt_to_srt.vtt_to_srt import VttToStr
-from vtt_to_srt.vtt_to_srt import *
class TestVttToStr:
def test_convert_header(self):
- assert repr(convert_header("WEBVTT\nKind: captions\nLanguage: zh-TW")) == repr("Language: zh-TW")
+ assert repr(VttToStr().convert_header(
+ "WEBVTT\nKind: captions\nLanguage: zh-TW")) == repr("Language: zh-TW")
def test_convert_timestamp(self):
- assert repr(convert_timestamp("00:03:08.500 --> 00:03:15.300\n")) == repr("00:03:08,500 --> 00:03:15,300\n")
- assert repr(convert_timestamp("03:08.500 --> 03:15.300\n")) == repr("00:03:08,500 --> 00:03:15,300\n")
- assert repr(convert_timestamp("08.500 --> 15.300\n")) == repr("00:00:08,500 --> 00:00:15,300\n")
+ vtt_to_str = VttToStr()
+ assert repr(vtt_to_str.convert_timestamp("00:03:08.500 --> 00:03:15.300\n")
+ ) == repr("00:03:08,500 --> 00:03:15,300\n")
+ assert repr(vtt_to_str.convert_timestamp("03:08.500 --> 03:15.300\n")
+ ) == repr("00:03:08,500 --> 00:03:15,300\n")
+ assert repr(vtt_to_str.convert_timestamp("08.500 --> 15.300\n")
+ ) == repr("00:00:08,500 --> 00:00:15,300\n")
def test_not_add_sequence_before(self):
- assert repr(add_sequence_numbers("What you got, a billion could've never bought (oooh)")) == repr("What you got, a billion could've never bought (oooh)"+os.linesep)
- assert repr(add_sequence_numbers("")) == repr(""+os.linesep)
- assert repr(add_sequence_numbers("å‘Šè¨´ä½ ï¼Œä»Šæ™šæˆ‘æƒ³å¸¶ä½ å‡ºåŽ»ã€‚")) == repr("å‘Šè¨´ä½ ï¼Œä»Šæ™šæˆ‘æƒ³å¸¶ä½ å‡ºåŽ»ã€‚"+os.linesep)
- assert repr(add_sequence_numbers("Hi --> MAX")) == repr("Hi --> MAX"+os.linesep)
-
+ vtt_to_str = VttToStr()
+ assert repr(vtt_to_str.add_sequence_numbers("What you got, a billion could've never bought (oooh)")) == repr(
+ "What you got, a billion could've never bought (oooh)\n")
+ assert repr(vtt_to_str.add_sequence_numbers("")
+ ) == repr("\n")
+ assert repr(vtt_to_str.add_sequence_numbers("å‘Šè¨´ä½ ï¼Œä»Šæ™šæˆ‘æƒ³å¸¶ä½ å‡ºåŽ»ã€‚")) == repr(
+ "å‘Šè¨´ä½ ï¼Œä»Šæ™šæˆ‘æƒ³å¸¶ä½ å‡ºåŽ»ã€‚\n")
+ assert repr(vtt_to_str.add_sequence_numbers("Hi --> MAX")
+ ) == repr("Hi --> MAX\n")
+
def test_add_sequence_before_timestamp(self):
- assert repr(add_sequence_numbers("00:03:08,500 --> 00:03:15,300")) == repr("1"+os.linesep+"00:03:08,500 --> 00:03:15,300"+os.linesep)
+ vtt_to_str = VttToStr()
+ assert repr(vtt_to_str.add_sequence_numbers("00:03:08,500 --> 00:03:15,300")
+ ) == repr("1\n00:03:08,500 --> 00:03:15,300\n")
def test_convert_empty_return_newline(self):
- assert repr(convert_content("")) == repr(os.linesep)
-
+ vtt_to_str = VttToStr()
+ assert repr(vtt_to_str.convert_content("")) == repr("\n")
+
def test_convert_header_language(self):
- assert repr(convert_content("WEBVTT\nKind: captions\nLanguage: zh-TW")) == repr("Language: zh-TW"+os.linesep)
+ vtt_to_str = VttToStr()
+ assert repr(vtt_to_str.convert_content("WEBVTT\nKind: captions\nLanguage: zh-TW")
+ ) == repr("Language: zh-TW\n")
def test_text(self):
- assert repr(convert_content("å‘Šè¨´ä½ ï¼Œä»Šæ™šæˆ‘æƒ³å¸¶ä½ å‡ºåŽ»ã€‚")) == repr("å‘Šè¨´ä½ ï¼Œä»Šæ™šæˆ‘æƒ³å¸¶ä½ å‡ºåŽ»ã€‚"+os.linesep)
- assert repr(convert_content("What you got, a billion could've never bought (oooh)")) == repr("What you got, a billion could've never bought (oooh)"+os.linesep)
\ No newline at end of file
+ vtt_to_str = VttToStr()
+ assert repr(vtt_to_str.convert_content("å‘Šè¨´ä½ ï¼Œä»Šæ™šæˆ‘æƒ³å¸¶ä½ å‡ºåŽ»ã€‚")) == repr(
+ "å‘Šè¨´ä½ ï¼Œä»Šæ™šæˆ‘æƒ³å¸¶ä½ å‡ºåŽ»ã€‚\n")
+ assert repr(vtt_to_str.convert_content("What you got, a billion could've never bought (oooh)")) == repr(
+ "What you got, a billion could've never bought (oooh)\n")
diff --git a/vtt_to_srt/valid_output_iso-8859-2.srt b/vtt_to_srt/valid_output_iso-8859-2.srt
new file mode 100644
index 0000000..ee92aa7
--- /dev/null
+++ b/vtt_to_srt/valid_output_iso-8859-2.srt
@@ -0,0 +1,10 @@
+
+1
+00:00:01,000 --> 00:00:04,000
+- Trinken Sie niemals flüssigen Stickstoff.
+
+2
+00:00:05,000 --> 00:00:09,000
+- Es wird Ihren Magen perforieren.
+- Du könntest sterben.
+
diff --git a/vtt_to_srt/valid_output_utf8.srt b/vtt_to_srt/valid_output_utf8.srt
new file mode 100644
index 0000000..92a1e70
--- /dev/null
+++ b/vtt_to_srt/valid_output_utf8.srt
@@ -0,0 +1,10 @@
+
+1
+00:00:01,000 --> 00:00:04,000
+- Never drink liquid nitrogen.
+
+2
+00:00:05,000 --> 00:00:09,000
+- It will perforate your stomach.
+- You could die.
+
diff --git a/vtt_to_srt/vtt_to_srt.py b/vtt_to_srt/vtt_to_srt.py
index 690ac81..aaf0ad1 100644
--- a/vtt_to_srt/vtt_to_srt.py
+++ b/vtt_to_srt/vtt_to_srt.py
@@ -5,204 +5,264 @@
import os
import re
-import sys
+import argparse
from string import Template
from stat import S_ISDIR, ST_MODE, S_ISREG
-def convert_header(contents):
- """Convert of vtt header to srt format
-
- Keyword arguments:
- contents
- """
- replacement = re.sub(r"WEBVTT\n", "", contents)
- replacement = re.sub(r"Kind:[ \-\w]+\n", "", replacement)
- replacement = re.sub(r"Language:[ \-\w]+\n", "", replacement)
- return replacement
-
-
-def padding_timestamp(contents):
- """Add 00 to padding timestamp of to srt format
-
- Keyword arguments:
- contents
- """
- find_srt = Template(r'$a,$b --> $a,$b(?:[ \-\w]+:[\w\%\d:,.]+)*\n')
- minute = r"((?:\d\d:){1}\d\d)"
- second = r"((?:\d\d:){0}\d\d)"
- padding_minute = find_srt.substitute(a=minute, b=r"(\d{0,3})")
- padding_second = find_srt.substitute(a=second, b=r"(\d{0,3})")
- replacement = re.sub(padding_minute, r"00:\1,\2 --> 00:\3,\4\n", contents)
- return re.sub(padding_second, r"00:00:\1,\2 --> 00:00:\3,\4\n", replacement)
-
-
-def convert_timestamp(contents):
- """Convert timestamp of vtt file to srt format
-
- Keyword arguments:
- contents
- """
- find_vtt = Template(r'$a.$b --> $a.$b(?:[ \-\w]+:[\w\%\d:,.]+)*\n')
- all_timestamp = find_vtt.substitute(a=r"((?:\d\d:){0,2}\d\d)", b=r"(\d{0,3})")
- return padding_timestamp(re.sub(all_timestamp, r"\1,\2 --> \3,\4\n", contents))
-
-
-def convert_content(contents):
- """Convert content of vtt file to srt format
-
- Keyword arguments:
- contents
- """
- replacement = convert_timestamp(contents)
- replacement = convert_header(replacement)
- replacement = re.sub(r"", "", replacement)
- replacement = re.sub(r"", "", replacement)
- replacement = re.sub(r"<\d\d:\d\d:\d\d.\d\d\d>", "", replacement)
- replacement = re.sub(r"::[\-\w]+\([\-.\w\d]+\)[ ]*{[.,:;\(\) \-\w\d]+\n }\n", "", replacement)
- replacement = re.sub(r"Style:\n##\n", "", replacement)
- replacement = add_sequence_numbers(replacement)
- return replacement
-
-
-def timestamp_line(content):
- """Check if line is a timestamp srt format
-
- Keyword arguments:
- contents
- """
- return re.match(r"((\d\d:){2}\d\d),(\d{3}) --> ((\d\d:){2}\d\d),(\d{3})", content) is not None
-
-
-def add_sequence_numbers(contents):
- """Adds sequence numbers to subtitle contents and returns new subtitle contents
-
- Keyword arguments:
- contents
- """
- output = ''
- lines = contents.split(os.linesep)
-
- i = 1
- for line in lines:
- if timestamp_line(line):
- output += str(i) + os.linesep
- i += 1
- output += line + os.linesep
- return output
-
-
-def file_create(str_name_file: str, str_data):
- """Create a file with some data
-
- Keyword arguments:
- str_name_file -- filename pat
- str_data -- dat to write
- """
- try:
- with open(str_name_file, "w", encoding='utf-8') as file:
- file.writelines(str(str_data))
- except IOError:
- str_name_file = str_name_file.split(os.sep)[-1]
- with open(str_name_file, "w") as file:
- file.writelines(str(str_data))
- print("file created: " + str_name_file + "\n")
-
-
-def read_text_file(str_name_file: str):
- """Read a file text
-
- Keyword arguments:
- str_name_file -- filename pat
- """
- content: str = ''
- with open(str_name_file, mode="r", encoding='utf-8') as file:
- print("file being read: " + str_name_file + "\n")
- content = file.read()
- return content
-
-
-def vtt_to_srt(str_name_file: str):
- """Convert vtt file to a srt file
-
- Keyword arguments:
- str_name_file -- filename path
- """
- file_contents: str = read_text_file(str_name_file)
- str_data: str = ""
- str_data = str_data + convert_content(file_contents)
- str_name_file: str = str_name_file.replace(".vtt", ".srt")
- print(str_name_file)
- file_create(str_name_file, str_data)
-
-
-def walk_tree(top_most_path, callback):
- """Recursively descend the directory tree rooted at top_most_path,
- calling the callback function for each regular file
-
- Keyword arguments:
- top_most_path -- parent directory
- callback -- function to call
- """
- for file in os.listdir(top_most_path):
- pathname = os.path.join(top_most_path, file)
- mode = os.stat(pathname)[ST_MODE]
- if S_ISDIR(mode):
- # It's a directory, recurse into it
- walk_tree(pathname, callback)
- elif S_ISREG(mode):
- # It's a file, call the callback function
- callback(pathname)
+class VttToStr:
+ """Convert vtt to srt"""
+
+ def __init__(self) -> None:
+ pass
+
+ def convert_header(self, contents):
+ """Convert of vtt header to srt format
+
+ Keyword arguments:
+ contents
+ """
+ replacement = re.sub(r"WEBVTT\n", "", contents)
+ replacement = re.sub(r"Kind:[ \-\w]+\n", "", replacement)
+ replacement = re.sub(r"Language:[ \-\w]+\n", "", replacement)
+ return replacement
+
+ def add_padding_to_timestamp(self, contents):
+ """Add 00 to padding timestamp of to srt format
+
+ Keyword arguments:
+ contents
+ """
+ find_srt = Template(r'$a,$b --> $a,$b(?:[ \-\w]+:[\w\%\d:,.]+)*\n')
+ minute = r"((?:\d\d:){1}\d\d)"
+ second = r"((?:\d\d:){0}\d\d)"
+ padding_minute = find_srt.substitute(a=minute, b=r"(\d{0,3})")
+ padding_second = find_srt.substitute(a=second, b=r"(\d{0,3})")
+ replacement = re.sub(
+ padding_minute, r"00:\1,\2 --> 00:\3,\4\n", contents)
+ return re.sub(padding_second, r"00:00:\1,\2 --> 00:00:\3,\4\n", replacement)
+
+ def convert_timestamp(self, contents):
+ """Convert timestamp of vtt file to srt format
+
+ Keyword arguments:
+ contents
+ """
+ find_vtt = Template(r'$a.$b --> $a.$b(?:[ \-\w]+:[\w\%\d:,.]+)*\n')
+ all_timestamp = find_vtt.substitute(
+ a=r"((?:\d\d:){0,2}\d\d)", b=r"(\d{0,3})")
+ return self.add_padding_to_timestamp(re.sub(all_timestamp, r"\1,\2 --> \3,\4\n", contents))
+
+ def convert_content(self, contents):
+ """Convert content of vtt file to srt format
+
+ Keyword arguments:
+ contents
+ """
+ replacement = self.convert_timestamp(contents)
+ replacement = self.convert_header(replacement)
+ replacement = re.sub(r"", "", replacement)
+ replacement = re.sub(r"", "", replacement)
+ replacement = re.sub(r"<\d\d:\d\d:\d\d.\d\d\d>", "", replacement)
+ replacement = re.sub(
+ r"::[\-\w]+\([\-.\w\d]+\)[ ]*{[.,:;\(\) \-\w\d]+\n }\n", "", replacement)
+ replacement = re.sub(r"Style:\n##\n", "", replacement)
+ replacement = self.add_sequence_numbers(replacement)
+
+ return replacement
+
+ def has_timestamp(self, content):
+ """Check if line is a timestamp srt format
+
+ Keyword arguments:
+ contents
+ """
+ return re.match(r"((\d\d:){2}\d\d),(\d{3}) --> ((\d\d:){2}\d\d),(\d{3})", content) is not None
+
+ def add_sequence_numbers(self, contents):
+ """Adds sequence numbers to subtitle contents and returns new subtitle contents
+
+ Keyword arguments:
+ contents
+ """
+ output = ''
+ lines = contents.split('\n')
+ i = 1
+ for line in lines:
+ if self.has_timestamp(line):
+ output += str(i) + '\n'
+ i += 1
+ output += line + '\n'
+ return output
+
+ def write_file(self, filename: str, data, encoding_format: str = "utf-8"):
+ """Create a file with some data
+
+ Keyword arguments:
+ filename -- filename pat
+ data -- data to write
+ encoding_format -- encoding format
+ """
+ try:
+ with open(filename, "w", encoding=encoding_format) as file:
+ file.writelines(str(data))
+ except IOError:
+ filename = filename.split(os.sep)[-1]
+ with open(filename, "w", encoding=encoding_format) as file:
+ file.writelines(str(data))
+ print(f"file created {filename}\n")
+
+ def read_file(self, filename: str, encoding_format: str = "utf-8"):
+ """Read a file text
+
+ Keyword arguments:
+ filename -- filename path
+ encoding_format -- encoding format
+ """
+ content: str = ''
+ with open(filename, mode="r", encoding=encoding_format) as file:
+ print(f"file being read: {filename}\n")
+ content = file.read()
+
+ return content
+
+ def process(self, filename: str, encoding_format: str = "utf-8"):
+ """Convert vtt file to a srt file
+
+ Keyword arguments:
+ str_name_file -- filename path
+ encoding_format -- encoding format
+ """
+ file_contents: str = self.read_file(filename, encoding_format)
+ str_data: str = ""
+ str_data = str_data + self.convert_content(file_contents)
+ filename = filename.replace(".vtt", ".srt")
+ self.write_file(filename, str_data, encoding_format)
+
+
+class ConvertFile:
+ """Convert vtt file to srt file"""
+
+ def __init__(self, pathname: str, encoding_format: str):
+ """Constructor
+
+ Keyword arguments:
+ pathname -- path to file or directory
+ encoding_format -- encoding format
+ """
+ self.pathname = pathname
+ self.encoding_format = encoding_format
+ self.vtt_to_str = VttToStr()
+
+ def convert(self):
+ """Convert vtt file to srt file"""
+ if ".vtt" in self.pathname:
+ self.vtt_to_str.process(self.pathname, self.encoding_format)
+
+
+class ConvertDirectories:
+ """Convert vtt files to srt files"""
+
+ def __init__(self, pathname: str, enable_recursive: bool, encoding_format: str):
+ """Constructor
+
+ Keyword arguments:
+ pathname -- path to file or directory
+ enable_recursive -- enable recursive
+ encoding_format -- encoding format
+ """
+ self.pathname = pathname
+ self.enable_recursive = enable_recursive
+ self.encoding_format = encoding_format
+ self.vtt_to_str = VttToStr()
+
+ def _walk_dir(self, top_most_path: str, callback):
+ """Walk a directory
+
+ Keyword arguments:
+ top_most_path -- parent directory
+ callback -- function to call
+ """
+ for file in os.listdir(top_most_path):
+ pathname = os.path.join(top_most_path, file)
+ if not os.path.isdir(pathname):
+ # It"s a file, call the callback function
+ callback(pathname)
+
+ def _walk_tree(self, top_most_path, callback):
+ """Recursively descend the directory tree rooted at top_most_path,
+ calling the callback function for each regular file
+
+ Keyword arguments:
+ top_most_path -- parent directory
+ callback -- function to call
+ """
+ for file in os.listdir(top_most_path):
+ pathname = os.path.join(top_most_path, file)
+ mode = os.stat(pathname)[ST_MODE]
+ if S_ISDIR(mode):
+ # It's a directory, recurse into it
+ self._walk_tree(pathname, callback)
+ elif S_ISREG(mode):
+ # It's a file, call the callback function
+ callback(pathname)
+ else:
+ # Unknown file type, print a message
+ print(f"Skipping {pathname}")
+
+ def convert_vtt_to_str(self, file: str):
+ """Convert vtt file to string
+
+ Keyword arguments:
+ f -- file to convert
+ encoding_format -- encoding format
+ """
+ if ".vtt" in file:
+ try:
+ self.vtt_to_str.process(file, self.encoding_format)
+ except UnicodeDecodeError:
+ print(f"UnicodeDecodeError: {file}")
+
+ def _vtt_to_srt_batch(self, directory: str):
+ """Walk down directory searching for vtt files
+
+ Keyword arguments:
+ directory -- path to search
+ enable_recursive_search -- enable recursive
+ """
+ top_most_path = directory
+ if self.enable_recursive:
+ self._walk_tree(top_most_path, self.convert_vtt_to_str)
else:
- # Unknown file type, print a message
- print("Skipping %s" % pathname)
-
-
-def walk_dir(top_most_path, callback):
- """Walk a directory
-
- Keyword arguments:
- top_most_path -- parent directory
- callback -- function to call
- """
- for file in os.listdir(top_most_path):
- pathname = os.path.join(top_most_path, file)
- if not os.path.isdir(pathname):
- # It"s a file, call the callback function
- callback(pathname)
+ self._walk_dir(top_most_path, self.convert_vtt_to_str)
+ def convert(self):
+ """Convert vtt files to srt files"""
+ self._vtt_to_srt_batch(self.pathname)
-def convert_vtt_to_str(file):
- """Convert vtt file to string
- Keyword arguments:
- f -- file to convert
- """
- if ".vtt" in file:
- vtt_to_srt(file)
-
-
-def vtts_to_srt(directory, rec = False):
- """Walk down directory seaching for vtt files
-
- Keyword arguments:
- directory -- path to search
- rec -- enable recursive
- """
- top_most_path = directory
- if rec:
- walk_tree(top_most_path, convert_vtt_to_str)
- else:
- walk_dir(top_most_path, convert_vtt_to_str)
-
-
-def print_usage():
+def _show_usage():
"""Show a info message about the usage"""
print("\nUsage:\tvtt_to_srt pathname [-r]\n")
print("\tpathname\t- a file or directory with files to be converted")
print("\t-r\t\t- walk path recursively\n")
+def _parse_args():
+ """Parse command line arguments"""
+ parser = argparse.ArgumentParser(
+ description='Convert vtt files to srt files')
+ parser.add_argument(
+ "pathname", help="a file or directory with files to be converted")
+ parser.add_argument("-r", "--recursive",
+ help="walk path recursively", action="store_true")
+ parser.add_argument("-e", "--encoding",
+ help="encoding format for input and output files")
+
+ args = parser.parse_args()
+ return args
+
+
def main():
"""main
@@ -210,15 +270,26 @@ def main():
pathname - a file or directory with files to be converted
-r walk path recursively
"""
- if len(sys.argv) < 2 or sys.argv[1] == "--help" or not os.path.exists(sys.argv[1]):
- print_usage()
- sys.exit()
- path = sys.argv[1]
- rec = bool(len(sys.argv) > 2 and sys.argv[2] == "-r")
- if os.path.isdir(path):
- vtts_to_srt(path, rec)
- else:
- vtt_to_srt(path)
+
+ args = _parse_args()
+ pathname = args.pathname
+ recursive = args.recursive
+ encoding = args.encoding
+
+ if not encoding:
+ encoding = "utf-8"
+
+ if os.path.isfile(pathname):
+ print(f"file being converted: {pathname}\n")
+ ConvertFile(pathname, encoding).convert()
+
+ if os.path.isdir(pathname):
+ print(f"directory being converted: {pathname}\n")
+ ConvertDirectories(pathname, recursive, encoding).convert()
+
+ if not os.path.isfile(pathname) and not os.path.isdir(pathname):
+ print(f"pathname is not a file or directory: {pathname}\n")
+ _show_usage()
if __name__ == "__main__":