DmytroNorth · CptAwe · Dec 14, 2022 · Dec 14, 2022
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,3 @@
+.venv
+.vscode
+__pycache__
diff --git a/README.md b/README.md
@@ -1,5 +1,19 @@
 # Text to Subtitles - Python
 
+This is a fork of the original project from [DmytroNorth](https://github.com/DmytroNorth/Text_To_Subtitles-Python).
+
+I tried to make it more pythonic and more easily importable as a module.
+
+This project can be easily implemented into and enhanced by the library [srt](https://github.com/cdown/srt).
+
+Example usage:
+
+```
+python main.py 'path/to/text.txt' 'name_of_subtitle_file.srt'
+```
+
+---
+
 ![main2](png/main2.png)
 
 This python file **creates subtitles of a given length** from **text paragraphs** that can be easily imported into any **Video Editing software** such as FinalCut Pro for further adjustments.

diff --git a/Text2SRT.py b/Text2SRT.py
@@ -0,0 +1,103 @@
+import os
+import re
+from datetime import datetime, timedelta
+
+def timedelta_2_str(timedlt: timedelta, timedlt_format: str = "%H:%M:%S") -> str:
+    '''
+    Easily convert a datetime.timedelta to a string
+    '''
+    time_base = datetime.min
+    timedlt_str = datetime.strftime(time_base+timedlt, timedlt_format)
+    return timedlt_str
+
+
+def timed_sub_list_2_srt(subtitles: list[dict]) -> str:
+    '''
+    Accepts a list of the format:
+    [
+        {
+            "timings" : ( <start timedelta> , <end timedelta> ),
+            "text" : " <some text> "
+        },
+        ...
+    ]
+
+    And converts it into an srt file formatted string.
+    '''
+
+    timing_str_format = "%H:%M:%S,%f"
+
+    default_timing = (timedelta(seconds=0), timedelta(seconds=1))
+    def __get_timings_of_subtitle(sub_index: int):
+        '''
+        An overengineered way to always return a subtitle's timings.
+        If there is no timing information it generates it (almost certainly an incorrect one) and 
+        modifies the original input subtitle list so that it remains consistent throughout the
+        execution of the function
+        '''
+
+        nonlocal subtitles
+
+        if "timings" not in subtitles[sub_index].keys():
+            # There is no timing for this subtitle
+            if sub_index > 0:
+                # There is a previous subtitle
+                previous_timings = subtitles[sub_index-1]["timings"]
+
+                # Add a second to the previous' start and end
+                modified_timings = (
+                    previous_timings[0] + timedelta(seconds=1),
+                    previous_timings[1] + timedelta(seconds=1)
+                )
+
+                # update the original input list
+                subtitles[sub_index]["timings"] = modified_timings
+            else:
+                # This is the first subtitle. Just add the default timing
+                subtitles[sub_index]["timings"] = default_timing
+
+        return subtitles[sub_index]["timings"]
+
+
+    srt_file_text = ""
+    for sub_index, sub in enumerate(subtitles):
+        timings: tuple[timedelta] = sub.get("timings", __get_timings_of_subtitle(sub_index))
+        text = sub.get("text", None)
+
+        # Convert the timedeltas to strings.
+        # Chop off the last 3 digits.
+        # According to this https://stackoverflow.com/a/11040248 it is better than rounding
+        start_time_text = timedelta_2_str(timings[0], timing_str_format)[:-3]
+        end_time_text = timedelta_2_str(timings[1], timing_str_format)[:-3]
+
+        current_sub_text = f"{sub_index+1}\n{start_time_text} --> {end_time_text}\n{text}\n\n"
+
+        srt_file_text += current_sub_text
+
+
+    return srt_file_text
+
+
+def read_transcript_file(input_file_dir: str) -> list[str]:
+    '''
+    Reads a .txt file and returns its contents as a list
+    '''
+
+    if not os.path.isfile(input_file_dir):
+        raise ValueError(f"Couldn't find file '{input_file_dir}'")
+
+    with open(input_file_dir, 'r') as input_file:
+        contents = input_file.read()
+
+    contents = re.split('\n{2,}', contents)
+
+    return contents
+
+
+def save_srt_string_to_srt_file(srt_text: str, output_file_dir: str) -> None:
+    '''
+    It saves the srt formatted text to an .srt file
+    '''
+
+    with open(output_file_dir, 'w') as output_file:
+        output_file.write(srt_text)
diff --git a/main.py b/main.py
@@ -0,0 +1,26 @@
+import sys
+
+from Text2SRT import *
+
+def main(input_file_dir, output_file_name):
+    '''
+    Accepts the path of the input file and the name of the output file.
+    If everything goes well then it converts the input file to an srt
+    in the same location
+    '''
+
+    # Read the transcript and convert it to a list
+    transcript_list = read_transcript_file(input_file_dir)
+
+    # Convert the list of strings to a compatible list of dictionaries
+    subtitles_list = [ {"text" : sub} for sub in transcript_list ]
+
+    # Convert to a srt string
+    subtitle_srt = timed_sub_list_2_srt(subtitles_list)
+
+    # Save the text to a file
+    save_srt_string_to_srt_file(subtitle_srt, output_file_name)
+
+
+if __name__ == "__main__":
+    main(sys.argv[1], sys.argv[2])
diff --git a/subtitles.srt b/subtitles.srt
@@ -0,0 +1,23 @@
+1
+00:00:00,000 --> 00:00:01,000
+Call me Ishmael.
+
+2
+00:00:01,000 --> 00:00:02,000
+Some years ago,
+never mind how long precisely,
+
+3
+00:00:02,000 --> 00:00:03,000
+having little or no money in my purse,
+and nothing particular
+
+4
+00:00:03,000 --> 00:00:04,000
+to interest me on shore,
+I thought I would sail about a little
+
+5
+00:00:04,000 --> 00:00:05,000
+and see the watery part of the world.
+
diff --git a/text_to_subtitles.py b/text_to_subtitles.py
diff --git a/subtitles.txt → transcript.txt b/subtitles.txt → transcript.txt