Change to 3.11 and try to converto to classes (#20)

* add explicit python 3.11 support * add option to choose encoding for process files * fixed subtitle counter bug (windows platform) * better test coverage
jsonzilla · Nov 6, 2022 · caf9ad7 · caf9ad7
1 parent 29f71eb
commit caf9ad7
Show file tree

Hide file tree

Showing 16 changed files with 477 additions and 251 deletions.
diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
@@ -16,7 +16,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: ["3.8", "3.9", "3.10"]
+        python-version: ["3.8", "3.9", "3.10", "3.11"]
 
     steps:
     - uses: actions/checkout@v3

diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -0,0 +1,4 @@
+{
+  "editor.formatOnType": true,
+  "editor.formatOnSave": true,
+}
diff --git a/.whitesource b/.whitesource
diff --git a/README.md b/README.md
@@ -16,11 +16,18 @@ python -m pip install vtt_to_srt3
 ## Usage from terminal
 
 ```shell
-python -m vtt_to_srt pathname [-r]
+usage: vtt_to_srt.py [-h] [-r] [-e ENCODING] pathname
 
-pathname - a file or directory with files to be converted 
+Convert vtt files to srt files
 
--r       - walk path recursively                          
+positional arguments:
+  pathname              a file or directory with files to be converted
+
+options:
+  -h, --help            show this help message and exit
+  -r, --recursive       walk path recursively
+  -e ENCODING, --encoding ENCODING
+                        encoding format for input and output files
 ```
 
 ## Usage as a lib
@@ -30,8 +37,8 @@ Convert vtt file
 from vtt_to_srt.vtt_to_srt import vtt_to_srt
 path = '/path/to/file.vtt'
 vtt_to_srt(path)
-```		
-		
+```
+
 Recursively convert all vtt files in directory
 ```shell
 from vtt_to_srt.vtt_to_srt import vtt_to_srt

diff --git a/setup.py b/setup.py
@@ -1,30 +1,27 @@
 import setuptools
 
-with open("README.md", "r") as fh:
+with open(file="README.md", mode="r", encoding="utf-8") as fh:
 
     long_description = fh.read()
 
-setuptools.setup(
-     name='vtt_to_srt3',
-     version='0.1.9.1',
-     author="Jeison Cardoso",
-     author_email="[email protected]",
-     description="vtt to srt subtitles converter package",
-     long_description=long_description,
-     long_description_content_type="text/markdown",
-     url="https://github.com/jsonzilla/vtt-to-srt.py",
-     packages=setuptools.find_packages(),
-     classifiers=[
-         "Programming Language :: Python :: 3.7",
-         "Programming Language :: Python :: 3.8",
-         "Programming Language :: Python :: 3.9",
-         "Programming Language :: Python :: 3.10",
-         "Operating System :: OS Independent",
-     ],
-     entry_points={
-        "console_scripts": [
-            "vtt_to_srt=vtt_to_srt.vtt_to_srt:main",
-        ]
-    }
-
- )
+setuptools.setup(name='vtt_to_srt3',
+                 version='0.1.9.2',
+                 author="Jeison Cardoso",
+                 author_email="[email protected]",
+                 description="vtt to srt subtitles converter package",
+                 long_description=long_description,
+                 long_description_content_type="text/markdown",
+                 url="https://github.com/jsonzilla/vtt-to-srt.py",
+                 packages=setuptools.find_packages(
+                     exclude=["test_*.py", "valid*.*", "input*.*"]),
+                 classifiers=["Programming Language :: Python :: 3.7",
+                              "Programming Language :: Python :: 3.8",
+                              "Programming Language :: Python :: 3.9",
+                              "Programming Language :: Python :: 3.10",
+                              "Programming Language :: Python :: 3.11",
+                              "Operating System :: OS Independent"],
+                 entry_points={
+                     "console_scripts":
+                     ["vtt_to_srt=vtt_to_srt.vtt_to_srt:main"]
+                 },
+                 )
diff --git a/vtt_to_srt/input_alternative_iso-8859-2.vtt b/vtt_to_srt/input_alternative_iso-8859-2.vtt
@@ -0,0 +1,8 @@
+WEBVTT
+
+00:01.000 --> 00:04.000
+- Trinken Sie niemals flüssigen Stickstoff.
+
+00:05.000 --> 00:09.000
+- Es wird Ihren Magen perforieren.
+- Du könntest sterben.
diff --git a/vtt_to_srt/input_alternative_utf8.vtt b/vtt_to_srt/input_alternative_utf8.vtt
@@ -0,0 +1,8 @@
+WEBVTT
+
+00:01.000 --> 00:04.000
+- Never drink liquid nitrogen.
+
+00:05.000 --> 00:09.000
+- It will perforate your stomach.
+- You could die.
diff --git a/vtt_to_srt/input_iso-8859-2.vtt b/vtt_to_srt/input_iso-8859-2.vtt
@@ -0,0 +1,8 @@
+WEBVTT
+
+00:01.000 --> 00:04.000
+- Trinken Sie niemals flüssigen Stickstoff.
+
+00:05.000 --> 00:09.000
+- Es wird Ihren Magen perforieren.
+- Du könntest sterben.
diff --git a/vtt_to_srt/input_utf8.vtt b/vtt_to_srt/input_utf8.vtt
@@ -0,0 +1,8 @@
+WEBVTT
+
+00:01.000 --> 00:04.000
+- Never drink liquid nitrogen.
+
+00:05.000 --> 00:09.000
+- It will perforate your stomach.
+- You could die.
diff --git a/vtt_to_srt/test_base.py b/vtt_to_srt/test_base.py
@@ -0,0 +1,32 @@
+import os
+import pytest
+
+
+def _clean():
+    """Remove all files with .srt extension without valid_output in name recursively"""
+    for root, _, files in os.walk(os.path.dirname(__file__)):
+        for file in files:
+            if file.endswith(".srt") and "valid_output" not in file:
+                os.remove(os.path.join(root, file))
+
+
+@pytest.fixture(autouse=True, scope="module")
+def clean_files():
+    """Clean files"""
+    _clean()
+    yield
+    _clean()
+
+
+def concat_path(pathname):
+    """Concat path to file for unix and windows"""
+    return os.path.join(os.path.dirname(__file__), pathname)
+
+
+def equals_files(file_a, file_b, encoding):
+    """Compare two text files independently of line endings"""
+    with open(concat_path(file_a), "r", encoding=encoding) as file_a:
+        with open(concat_path(file_b), "r", encoding=encoding) as file_b:
+            a = file_a.read()
+            b = file_b.read()
+            return a == b
diff --git a/vtt_to_srt/test_convert_directory.py b/vtt_to_srt/test_convert_directory.py
@@ -0,0 +1,27 @@
+import os
+import pytest
+
+from vtt_to_srt.test_base import concat_path, equals_files, clean_files
+from vtt_to_srt.vtt_to_srt import ConvertDirectories
+
+
+class TestConvertDirectories:
+    """Test ConvertFile class"""
+
+    def test_convert_directory(self, clean_files):
+        """Test convert file"""
+        convert_file = ConvertDirectories(
+            concat_path("."), False, "utf-8")
+        convert_file.convert()
+
+        assert equals_files("input_alternative_utf8.srt",
+                            "valid_output_utf8.srt", "utf-8")
+
+    def test_convert_directory_recursive(self, clean_files):
+        """Test convert file"""
+        convert_file = ConvertDirectories(
+            concat_path("."), True, "utf-8")
+        convert_file.convert()
+
+        assert equals_files("input_alternative_utf8.srt",
+                            "valid_output_utf8.srt", "utf-8")
diff --git a/vtt_to_srt/test_convert_file.py b/vtt_to_srt/test_convert_file.py
@@ -0,0 +1,27 @@
+import os
+import pytest
+
+from vtt_to_srt.test_base import concat_path, equals_files, clean_files
+from vtt_to_srt.vtt_to_srt import ConvertFile
+
+
+class TestConvertFile:
+    """Test ConvertFile class"""
+
+    def test_convert_file(self, clean_files):
+        """Test convert file"""
+        convert_file = ConvertFile(
+            concat_path("input_utf8.vtt"), "utf-8")
+        convert_file.convert()
+
+        assert equals_files("input_utf8.srt",
+                            "valid_output_utf8.srt", "utf-8")
+
+    def test_convert_file_not_utf8(self, clean_files):
+        """Test convert file with not utf-8 encoding"""
+        convert_file = ConvertFile(
+            concat_path("input_iso-8859-2.vtt"), "ISO-8859-2")
+        convert_file.convert()
+
+        assert equals_files("input_iso-8859-2.srt",
+                            "valid_output_iso-8859-2.srt", "ISO-8859-2")
diff --git a/vtt_to_srt/test_vtt_to_str.py b/vtt_to_srt/test_vtt_to_str.py
@@ -1,33 +1,50 @@
-import vtt_to_srt
-import pytest
-import os
+import pytest
+
+from vtt_to_srt.vtt_to_srt import VttToStr
 
-from vtt_to_srt.vtt_to_srt import *
 
 class TestVttToStr:
     def test_convert_header(self):
-        assert repr(convert_header("WEBVTT\nKind: captions\nLanguage: zh-TW")) == repr("Language: zh-TW")
+        assert repr(VttToStr().convert_header(
+            "WEBVTT\nKind: captions\nLanguage: zh-TW")) == repr("Language: zh-TW")
 
     def test_convert_timestamp(self):
-        assert repr(convert_timestamp("00:03:08.500 --> 00:03:15.300\n")) == repr("00:03:08,500 --> 00:03:15,300\n")
-        assert repr(convert_timestamp("03:08.500 --> 03:15.300\n")) == repr("00:03:08,500 --> 00:03:15,300\n")
-        assert repr(convert_timestamp("08.500 --> 15.300\n")) == repr("00:00:08,500 --> 00:00:15,300\n")        
+        vtt_to_str = VttToStr()
+        assert repr(vtt_to_str.convert_timestamp("00:03:08.500 --> 00:03:15.300\n")
+                    ) == repr("00:03:08,500 --> 00:03:15,300\n")
+        assert repr(vtt_to_str.convert_timestamp("03:08.500 --> 03:15.300\n")
+                    ) == repr("00:03:08,500 --> 00:03:15,300\n")
+        assert repr(vtt_to_str.convert_timestamp("08.500 --> 15.300\n")
+                    ) == repr("00:00:08,500 --> 00:00:15,300\n")
 
     def test_not_add_sequence_before(self):
-        assert repr(add_sequence_numbers("What you got, a billion could've never bought (oooh)")) == repr("What you got, a billion could've never bought (oooh)"+os.linesep)
-        assert repr(add_sequence_numbers("")) == repr(""+os.linesep)
-        assert repr(add_sequence_numbers("告訴你，今晚我想帶你出去。")) == repr("告訴你，今晚我想帶你出去。"+os.linesep)
-        assert repr(add_sequence_numbers("Hi --> MAX")) == repr("Hi --> MAX"+os.linesep)
-
+        vtt_to_str = VttToStr()
+        assert repr(vtt_to_str.add_sequence_numbers("What you got, a billion could've never bought (oooh)")) == repr(
+            "What you got, a billion could've never bought (oooh)\n")
+        assert repr(vtt_to_str.add_sequence_numbers("")
+                    ) == repr("\n")
+        assert repr(vtt_to_str.add_sequence_numbers("告訴你，今晚我想帶你出去。")) == repr(
+            "告訴你，今晚我想帶你出去。\n")
+        assert repr(vtt_to_str.add_sequence_numbers("Hi --> MAX")
+                    ) == repr("Hi --> MAX\n")
+
     def test_add_sequence_before_timestamp(self):
-        assert repr(add_sequence_numbers("00:03:08,500 --> 00:03:15,300")) == repr("1"+os.linesep+"00:03:08,500 --> 00:03:15,300"+os.linesep)
+        vtt_to_str = VttToStr()
+        assert repr(vtt_to_str.add_sequence_numbers("00:03:08,500 --> 00:03:15,300")
+                    ) == repr("1\n00:03:08,500 --> 00:03:15,300\n")
 
     def test_convert_empty_return_newline(self):
-        assert repr(convert_content("")) == repr(os.linesep)
-
+        vtt_to_str = VttToStr()
+        assert repr(vtt_to_str.convert_content("")) == repr("\n")
+
     def test_convert_header_language(self):
-        assert repr(convert_content("WEBVTT\nKind: captions\nLanguage: zh-TW")) == repr("Language: zh-TW"+os.linesep)
+        vtt_to_str = VttToStr()
+        assert repr(vtt_to_str.convert_content("WEBVTT\nKind: captions\nLanguage: zh-TW")
+                    ) == repr("Language: zh-TW\n")
 
     def test_text(self):
-        assert repr(convert_content("告訴你，今晚我想帶你出去。")) == repr("告訴你，今晚我想帶你出去。"+os.linesep)
-        assert repr(convert_content("What you got, a billion could've never bought (oooh)")) == repr("What you got, a billion could've never bought (oooh)"+os.linesep)
+        vtt_to_str = VttToStr()
+        assert repr(vtt_to_str.convert_content("告訴你，今晚我想帶你出去。")) == repr(
+            "告訴你，今晚我想帶你出去。\n")
+        assert repr(vtt_to_str.convert_content("What you got, a billion could've never bought (oooh)")) == repr(
+            "What you got, a billion could've never bought (oooh)\n")
diff --git a/vtt_to_srt/valid_output_iso-8859-2.srt b/vtt_to_srt/valid_output_iso-8859-2.srt
@@ -0,0 +1,10 @@
+
+1
+00:00:01,000 --> 00:00:04,000
+- Trinken Sie niemals flüssigen Stickstoff.
+
+2
+00:00:05,000 --> 00:00:09,000
+- Es wird Ihren Magen perforieren.
+- Du könntest sterben.
+
diff --git a/vtt_to_srt/valid_output_utf8.srt b/vtt_to_srt/valid_output_utf8.srt
@@ -0,0 +1,10 @@
+
+1
+00:00:01,000 --> 00:00:04,000
+- Never drink liquid nitrogen.
+
+2
+00:00:05,000 --> 00:00:09,000
+- It will perforate your stomach.
+- You could die.
+