From 9fad0730865177b528c1ece658c099f26b912077 Mon Sep 17 00:00:00 2001
From: JackieDo <anhvudo@gmail.com>
Date: Sun, 30 Dec 2018 15:06:19 +0700
Subject: [PATCH 1/4] Fix issue [agermanidis#120]

Fix issue [agermanidis#120](https://github.com/agermanidis/autosub/issues/120) for original branch
---
 autosub/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/autosub/__init__.py b/autosub/__init__.py
index 61019464..f4f36abc 100644
--- a/autosub/__init__.py
+++ b/autosub/__init__.py
@@ -99,7 +99,7 @@ def __call__(self, data):
                         line = json.loads(line)
                         line = line['result'][0]['alternative'][0]['transcript']
                         return line[:1].upper() + line[1:]
-                    except IndexError:
+                    except:
                         # no result
                         continue
 

From 63ae1da850f9de24645a21827469807dd92c333f Mon Sep 17 00:00:00 2001
From: JackieDo <anhvudo@gmail.com>
Date: Sun, 30 Dec 2018 15:19:10 +0700
Subject: [PATCH 2/4] Refactoring code and add features

- [Update]
   - Refactoring source code
   - Beautifying source code
- [Add]
   - Add the arguments -V, --version for commanline options
---
 autosub/__init__.py   | 360 ++++++++++++++++++++++--------------------
 autosub/constants.py  |  12 +-
 autosub/formatters.py |  28 +---
 autosub/metadata.py   |  17 ++
 setup.py              |  29 ++--
 5 files changed, 228 insertions(+), 218 deletions(-)
 create mode 100644 autosub/metadata.py

diff --git a/autosub/__init__.py b/autosub/__init__.py
index f4f36abc..c0cdc1c6 100644
--- a/autosub/__init__.py
+++ b/autosub/__init__.py
@@ -1,9 +1,4 @@
-"""
-Defines autosub's main functionality.
-"""
-
 #!/usr/bin/env python
-
 from __future__ import absolute_import, print_function, unicode_literals
 
 import argparse
@@ -12,81 +7,83 @@
 import math
 import multiprocessing
 import os
+import requests
 import subprocess
 import sys
 import tempfile
 import wave
 
-import requests
 from googleapiclient.discovery import build
 from progressbar import ProgressBar, Percentage, Bar, ETA
-
+from autosub.metadata import *
 from autosub.constants import (
     LANGUAGE_CODES, GOOGLE_SPEECH_API_KEY, GOOGLE_SPEECH_API_URL,
+    DEFAULT_CONCURRENCY, DEFAULT_SRC_LANGUAGE, DEFAULT_DST_LANGUAGE, DEFAULT_SUBTITLE_FORMAT
 )
 from autosub.formatters import FORMATTERS
 
-DEFAULT_SUBTITLE_FORMAT = 'srt'
-DEFAULT_CONCURRENCY = 10
-DEFAULT_SRC_LANGUAGE = 'en'
-DEFAULT_DST_LANGUAGE = 'en'
-
 
 def percentile(arr, percent):
-    """
-    Calculate the given percentile of arr.
-    """
-    arr = sorted(arr)
+    arr   = sorted(arr)
     index = (len(arr) - 1) * percent
     floor = math.floor(index)
-    ceil = math.ceil(index)
+    ceil  = math.ceil(index)
+
     if floor == ceil:
         return arr[int(index)]
-    low_value = arr[int(floor)] * (ceil - index)
+
+    low_value  = arr[int(floor)] * (ceil - index)
     high_value = arr[int(ceil)] * (index - floor)
+
     return low_value + high_value
 
 
-class FLACConverter(object): # pylint: disable=too-few-public-methods
-    """
-    Class for converting a region of an input audio or video file into a FLAC audio file
-    """
+def is_same_language(lang1, lang2):
+    return lang1.split("-")[0] == lang2.split("-")[0]
+
+
+class ConsoleHelpFormatter(argparse.HelpFormatter):
+    def _split_lines(self, text, width):
+        return argparse.HelpFormatter._split_lines(self, text, width=70)
+
+
+class FLACConverter(object):
     def __init__(self, source_path, include_before=0.25, include_after=0.25):
-        self.source_path = source_path
+        self.source_path    = source_path
         self.include_before = include_before
-        self.include_after = include_after
+        self.include_after  = include_after
 
     def __call__(self, region):
         try:
             start, end = region
-            start = max(0, start - self.include_before)
-            end += self.include_after
-            temp = tempfile.NamedTemporaryFile(suffix='.flac')
+            start      = max(0, start - self.include_before)
+            end       += self.include_after
+
+            temp    = tempfile.NamedTemporaryFile(suffix='.flac', delete=False)
             command = ["ffmpeg", "-ss", str(start), "-t", str(end - start),
                        "-y", "-i", self.source_path,
                        "-loglevel", "error", temp.name]
             use_shell = True if os.name == "nt" else False
+
             subprocess.check_output(command, stdin=open(os.devnull), shell=use_shell)
+
             return temp.read()
 
         except KeyboardInterrupt:
             return None
 
 
-class SpeechRecognizer(object): # pylint: disable=too-few-public-methods
-    """
-    Class for performing speech-to-text for an input FLAC file.
-    """
-    def __init__(self, language="en", rate=44100, retries=3, api_key=GOOGLE_SPEECH_API_KEY):
+class SpeechRecognizer(object):
+    def __init__(self, language=DEFAULT_SRC_LANGUAGE, rate=44100, retries=3, api_key=GOOGLE_SPEECH_API_KEY):
         self.language = language
-        self.rate = rate
-        self.api_key = api_key
-        self.retries = retries
+        self.rate     = rate
+        self.api_key  = api_key
+        self.retries  = retries
 
     def __call__(self, data):
         try:
-            for _ in range(self.retries):
-                url = GOOGLE_SPEECH_API_URL.format(lang=self.language, key=self.api_key)
+            for i in range(self.retries):
+                url     = GOOGLE_SPEECH_API_URL.format(lang=self.language, key=self.api_key)
                 headers = {"Content-Type": "audio/x-flac; rate=%d" % self.rate}
 
                 try:
@@ -102,29 +99,24 @@ def __call__(self, data):
                     except:
                         # no result
                         continue
-
         except KeyboardInterrupt:
             return None
 
 
-class Translator(object): # pylint: disable=too-few-public-methods
-    """
-    Class for translating a sentence from a one language to another.
-    """
+class Translator(object):
     def __init__(self, language, api_key, src, dst):
         self.language = language
-        self.api_key = api_key
-        self.service = build('translate', 'v2',
-                             developerKey=self.api_key)
-        self.src = src
-        self.dst = dst
+        self.api_key  = api_key
+        self.service  = build('translate', 'v2', developerKey=self.api_key)
+        self.src      = src
+        self.dst      = dst
 
     def __call__(self, sentence):
         try:
             if not sentence:
                 return None
 
-            result = self.service.translations().list( # pylint: disable=no-member
+            result = self.service.translations().list(
                 source=self.src,
                 target=self.dst,
                 q=[sentence]
@@ -135,171 +127,150 @@ def __call__(self, sentence):
                 return result['translations'][0]['translatedText']
 
             return None
-
         except KeyboardInterrupt:
             return None
 
 
 def which(program):
-    """
-    Return the path for a given executable.
-    """
-    def is_exe(file_path):
-        """
-        Checks whether a file is executable.
-        """
-        return os.path.isfile(file_path) and os.access(file_path, os.X_OK)
-
-    fpath, _ = os.path.split(program)
+    def is_exe(fpath):
+        return os.path.isfile(fpath) and os.access(fpath, os.X_OK)
+
+    if os.name == "nt":
+        if ".exe" != program[-4:]:
+            program = program + ".exe"
+
+    fpath, fname = os.path.split(program)
     if fpath:
         if is_exe(program):
             return program
     else:
         for path in os.environ["PATH"].split(os.pathsep):
-            path = path.strip('"')
+            path     = path.strip('"')
             exe_file = os.path.join(path, program)
+
             if is_exe(exe_file):
                 return exe_file
     return None
 
 
 def extract_audio(filename, channels=1, rate=16000):
-    """
-    Extract audio from an input file to a temporary WAV file.
-    """
     temp = tempfile.NamedTemporaryFile(suffix='.wav', delete=False)
+
     if not os.path.isfile(filename):
-        print("The given file does not exist: {}".format(filename))
-        raise Exception("Invalid filepath: {}".format(filename))
+        print("The given file does not exist: {}.".format(filename))
+        raise Exception("Invalid filepath: {}.".format(filename))
+
     if not which("ffmpeg"):
         print("ffmpeg: Executable not found on machine.")
-        raise Exception("Dependency not found: ffmpeg")
+        raise Exception("Dependency not found: ffmpeg.")
+
     command = ["ffmpeg", "-y", "-i", filename,
                "-ac", str(channels), "-ar", str(rate),
                "-loglevel", "error", temp.name]
     use_shell = True if os.name == "nt" else False
+
     subprocess.check_output(command, stdin=open(os.devnull), shell=use_shell)
+
     return temp.name, rate
 
 
-def find_speech_regions(filename, frame_width=4096, min_region_size=0.5, max_region_size=6): # pylint: disable=too-many-locals
-    """
-    Perform voice activity detection on a given audio file.
-    """
-    reader = wave.open(filename)
-    sample_width = reader.getsampwidth()
-    rate = reader.getframerate()
-    n_channels = reader.getnchannels()
+def find_speech_regions(filename, frame_width=4096, min_region_size=0.5, max_region_size=6):
+    reader         = wave.open(filename)
+    sample_width   = reader.getsampwidth()
+    rate           = reader.getframerate()
+    n_channels     = reader.getnchannels()
     chunk_duration = float(frame_width) / rate
+    n_chunks       = int(math.ceil(reader.getnframes()*1.0 / frame_width))
+    energies       = []
 
-    n_chunks = int(math.ceil(reader.getnframes()*1.0 / frame_width))
-    energies = []
-
-    for _ in range(n_chunks):
+    for i in range(n_chunks):
         chunk = reader.readframes(frame_width)
         energies.append(audioop.rms(chunk, sample_width * n_channels))
 
-    threshold = percentile(energies, 0.2)
-
+    threshold    = percentile(energies, 0.2)
     elapsed_time = 0
-
-    regions = []
+    regions      = []
     region_start = None
 
     for energy in energies:
-        is_silence = energy <= threshold
+        is_silence   = energy <= threshold
         max_exceeded = region_start and elapsed_time - region_start >= max_region_size
 
         if (max_exceeded or is_silence) and region_start:
             if elapsed_time - region_start >= min_region_size:
                 regions.append((region_start, elapsed_time))
                 region_start = None
-
         elif (not region_start) and (not is_silence):
             region_start = elapsed_time
+
         elapsed_time += chunk_duration
+
     return regions
 
 
-def generate_subtitles( # pylint: disable=too-many-locals,too-many-arguments
-        source_path,
-        output=None,
-        concurrency=DEFAULT_CONCURRENCY,
-        src_language=DEFAULT_SRC_LANGUAGE,
-        dst_language=DEFAULT_DST_LANGUAGE,
-        subtitle_file_format=DEFAULT_SUBTITLE_FORMAT,
-        api_key=None,
-    ):
-    """
-    Given an input audio/video file, generate subtitles in the specified language and format.
-    """
+def generate_subtitles(source_path, output=None, concurrency=DEFAULT_CONCURRENCY, src_language=DEFAULT_SRC_LANGUAGE, dst_language=DEFAULT_DST_LANGUAGE, subtitle_file_format=DEFAULT_SUBTITLE_FORMAT, api_key=None):
     audio_filename, audio_rate = extract_audio(source_path)
 
-    regions = find_speech_regions(audio_filename)
-
-    pool = multiprocessing.Pool(concurrency)
-    converter = FLACConverter(source_path=audio_filename)
-    recognizer = SpeechRecognizer(language=src_language, rate=audio_rate,
-                                  api_key=GOOGLE_SPEECH_API_KEY)
-
+    regions     = find_speech_regions(audio_filename)
+    pool        = multiprocessing.Pool(concurrency)
+    converter   = FLACConverter(source_path=audio_filename)
+    recognizer  = SpeechRecognizer(language=src_language, rate=audio_rate, api_key=GOOGLE_SPEECH_API_KEY)
     transcripts = []
+
     if regions:
         try:
-            widgets = ["Converting speech regions to FLAC files: ", Percentage(), ' ', Bar(), ' ',
-                       ETA()]
-            pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start()
+            widgets           = ["Converting speech regions to FLAC files: ", Percentage(), ' ', Bar(), ' ', ETA()]
+            pbar              = ProgressBar(widgets=widgets, maxval=len(regions)).start()
             extracted_regions = []
+
             for i, extracted_region in enumerate(pool.imap(converter, regions)):
                 extracted_regions.append(extracted_region)
                 pbar.update(i)
             pbar.finish()
 
             widgets = ["Performing speech recognition: ", Percentage(), ' ', Bar(), ' ', ETA()]
-            pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start()
+            pbar    = ProgressBar(widgets=widgets, maxval=len(regions)).start()
 
             for i, transcript in enumerate(pool.imap(recognizer, extracted_regions)):
                 transcripts.append(transcript)
                 pbar.update(i)
             pbar.finish()
 
-            if src_language.split("-")[0] != dst_language.split("-")[0]:
+            if not is_same_language(src_language, dst_language):
                 if api_key:
                     google_translate_api_key = api_key
-                    translator = Translator(dst_language, google_translate_api_key,
-                                            dst=dst_language,
-                                            src=src_language)
-                    prompt = "Translating from {0} to {1}: ".format(src_language, dst_language)
-                    widgets = [prompt, Percentage(), ' ', Bar(), ' ', ETA()]
-                    pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start()
-                    translated_transcripts = []
+                    translator               = Translator(dst_language, google_translate_api_key, dst=dst_language, src=src_language)
+                    prompt                   = "Translating from {0} to {1}: ".format(src_language, dst_language)
+                    widgets                  = [prompt, Percentage(), ' ', Bar(), ' ', ETA()]
+                    pbar                     = ProgressBar(widgets=widgets, maxval=len(regions)).start()
+                    translated_transcripts   = []
+
                     for i, transcript in enumerate(pool.imap(translator, transcripts)):
                         translated_transcripts.append(transcript)
                         pbar.update(i)
                     pbar.finish()
+
                     transcripts = translated_transcripts
                 else:
-                    print(
-                        "Error: Subtitle translation requires specified Google Translate API key. "
-                        "See --help for further information."
-                    )
+                    print("Error: Subtitle translation requires specified Google Translate API key. See --help for further information.")
                     return 1
 
         except KeyboardInterrupt:
             pbar.finish()
             pool.terminate()
             pool.join()
-            print("Cancelling transcription")
-            raise
+            print("Cancelling transcription.")
+            return 1
 
-    timed_subtitles = [(r, t) for r, t in zip(regions, transcripts) if t]
-    formatter = FORMATTERS.get(subtitle_file_format)
+    timed_subtitles     = [(r, t) for r, t in zip(regions, transcripts) if t]
+    formatter           = FORMATTERS.get(subtitle_file_format)
     formatted_subtitles = formatter(timed_subtitles)
 
     dest = output
 
     if not dest:
-        base = os.path.splitext(source_path)[0]
-        dest = "{base}.{format}".format(base=base, format=subtitle_file_format)
+        base, ext = os.path.splitext(source_path)
+        dest      = "{base}.{locale}.{format}".format(base=base, locale=dst_language, format=subtitle_file_format)
 
     with open(dest, 'wb') as output_file:
         output_file.write(formatted_subtitles.encode("utf-8"))
@@ -310,28 +281,16 @@ def generate_subtitles( # pylint: disable=too-many-locals,too-many-arguments
 
 
 def validate(args):
-    """
-    Check that the CLI arguments passed to autosub are valid.
-    """
-    if args.format not in FORMATTERS:
-        print(
-            "Subtitle format not supported. "
-            "Run with --list-formats to see all supported formats."
-        )
+    if args.format not in FORMATTERS.keys():
+        print("Subtitle format not supported. Run with --list-formats to see all supported formats.")
         return False
 
     if args.src_language not in LANGUAGE_CODES.keys():
-        print(
-            "Source language not supported. "
-            "Run with --list-languages to see all supported languages."
-        )
+        print("Source language not supported. Run with --list-languages to see all supported languages.")
         return False
 
     if args.dst_language not in LANGUAGE_CODES.keys():
-        print(
-            "Destination language not supported. "
-            "Run with --list-languages to see all supported languages."
-        )
+        print("Destination language not supported. Run with --list-languages to see all supported languages.")
         return False
 
     if not args.source_path:
@@ -342,43 +301,105 @@ def validate(args):
 
 
 def main():
-    """
-    Run autosub as a command-line program.
-    """
-    parser = argparse.ArgumentParser()
-    parser.add_argument('source_path', help="Path to the video or audio file to subtitle",
-                        nargs='?')
-    parser.add_argument('-C', '--concurrency', help="Number of concurrent API requests to make",
-                        type=int, default=DEFAULT_CONCURRENCY)
-    parser.add_argument('-o', '--output',
-                        help="Output path for subtitles (by default, subtitles are saved in \
-                        the same directory and name as the source path)")
-    parser.add_argument('-F', '--format', help="Destination subtitle format",
-                        default=DEFAULT_SUBTITLE_FORMAT)
-    parser.add_argument('-S', '--src-language', help="Language spoken in source file",
-                        default=DEFAULT_SRC_LANGUAGE)
-    parser.add_argument('-D', '--dst-language', help="Desired language for the subtitles",
-                        default=DEFAULT_DST_LANGUAGE)
-    parser.add_argument('-K', '--api-key',
-                        help="The Google Translate API key to be used. \
-                        (Required for subtitle translation)")
-    parser.add_argument('--list-formats', help="List all available subtitle formats",
-                        action='store_true')
-    parser.add_argument('--list-languages', help="List all available source/destination languages",
-                        action='store_true')
+    parser = argparse.ArgumentParser(
+        prog=metadata.name,
+        usage='\n  %(prog)s [options] <source_path>',
+        description=metadata.description,
+        formatter_class=ConsoleHelpFormatter,
+        add_help=False
+    )
+
+    pgroup = parser.add_argument_group('Required')
+    ogroup = parser.add_argument_group('Options')
+
+    pgroup.add_argument(
+        'source_path',
+        nargs='?',
+        help="The path to the video or audio file needs to generate subtitle."
+    )
+
+    ogroup.add_argument(
+        '-C', '--concurrency',
+        metavar='<number>',
+        type=int,
+        default=DEFAULT_CONCURRENCY,
+        help="Number of concurrent API requests to make (default: %(default)s)."
+    )
+
+    ogroup.add_argument(
+        '-o', '--output',
+        metavar='<path>',
+        help="The output path for subtitle file. The default is in the same directory and the name is same as the source path."
+    )
+
+    ogroup.add_argument(
+        '-F', '--format',
+        metavar='<format>',
+        default=DEFAULT_SUBTITLE_FORMAT,
+        help="Destination subtitle format (default: %(default)s)."
+    )
+
+    ogroup.add_argument(
+        '-S', '--src-language',
+        metavar='<locale>',
+        default=DEFAULT_SRC_LANGUAGE,
+        help="Locale of language spoken in source file (default: %(default)s)."
+    )
+
+    ogroup.add_argument(
+        '-D', '--dst-language',
+        metavar='<locale>',
+        default=DEFAULT_DST_LANGUAGE,
+        help="Locale of desired language for the subtitles (default: %(default)s)."
+    )
+
+    ogroup.add_argument(
+        '-K', '--api-key',
+        metavar='<key>',
+        help="The Google Translate API key to be used. Required for subtitle translation."
+    )
+
+    ogroup.add_argument(
+        '-h', '--help',
+        action='help',
+        help="Show %(prog)s help message and exit."
+    )
+
+    ogroup.add_argument(
+        '-V', '--version',
+        action='version',
+        version='%(prog)s ' + metadata.version + ' by ' + metadata.author + ' <' + metadata.author_email + '>',
+        help="Show %(prog)s version and exit."
+    )
+
+    ogroup.add_argument(
+        '--list-formats',
+        action='store_true',
+        help="List all available subtitle formats."
+    )
+
+    ogroup.add_argument(
+        '--list-languages',
+        action='store_true',
+        help="List all available source/destination languages."
+    )
 
     args = parser.parse_args()
 
     if args.list_formats:
         print("List of formats:")
-        for subtitle_format in FORMATTERS:
+
+        for subtitle_format in FORMATTERS.keys():
             print("{format}".format(format=subtitle_format))
+
         return 0
 
     if args.list_languages:
         print("List of all languages:")
+
         for code, language in sorted(LANGUAGE_CODES.items()):
             print("{code}\t{language}".format(code=code, language=language))
+
         return 0
 
     if not validate(args):
@@ -387,14 +408,15 @@ def main():
     try:
         subtitle_file_path = generate_subtitles(
             source_path=args.source_path,
+            output=args.output,
             concurrency=args.concurrency,
             src_language=args.src_language,
             dst_language=args.dst_language,
-            api_key=args.api_key,
             subtitle_file_format=args.format,
-            output=args.output,
+            api_key=args.api_key
         )
-        print("Subtitles file created at {}".format(subtitle_file_path))
+
+        print("Subtitles file created at \"{}\"".format(subtitle_file_path))
     except KeyboardInterrupt:
         return 1
 
diff --git a/autosub/constants.py b/autosub/constants.py
index c776c112..33686971 100644
--- a/autosub/constants.py
+++ b/autosub/constants.py
@@ -1,11 +1,8 @@
-"""
-Defines constants used by autosub.
-"""
-
+#!/usr/bin/env python
 from __future__ import unicode_literals
 
 GOOGLE_SPEECH_API_KEY = "AIzaSyBOti4mM-6x9WDnZIjIeyEU21OpBXqWBgw"
-GOOGLE_SPEECH_API_URL = "http://www.google.com/speech-api/v2/recognize?client=chromium&lang={lang}&key={key}" # pylint: disable=line-too-long
+GOOGLE_SPEECH_API_URL = "http://www.google.com/speech-api/v2/recognize?client=chromium&lang={lang}&key={key}"
 
 LANGUAGE_CODES = {
     'af': 'Afrikaans',
@@ -100,3 +97,8 @@
     'zh-TW': 'Chinese (Traditional)',
     'zu': 'Zulu',
 }
+
+DEFAULT_CONCURRENCY     = 10
+DEFAULT_SRC_LANGUAGE    = "en"
+DEFAULT_DST_LANGUAGE    = "en"
+DEFAULT_SUBTITLE_FORMAT = "srt"
diff --git a/autosub/formatters.py b/autosub/formatters.py
index b0d581c0..3b229534 100644
--- a/autosub/formatters.py
+++ b/autosub/formatters.py
@@ -1,20 +1,10 @@
-"""
-Defines subtitle formatters used by autosub.
-"""
-
-# -*- coding: utf-8 -*-
+#!/usr/bin/env python
 from __future__ import unicode_literals
-
 import json
-
 import pysrt
 import six
 
-
 def srt_formatter(subtitles, padding_before=0, padding_after=0):
-    """
-    Serialize a list of subtitles according to the SRT format, with optional time padding.
-    """
     sub_rip_file = pysrt.SubRipFile()
     for i, ((start, end), text) in enumerate(subtitles, start=1):
         item = pysrt.SubRipItem()
@@ -25,39 +15,25 @@ def srt_formatter(subtitles, padding_before=0, padding_after=0):
         sub_rip_file.append(item)
     return '\n'.join(six.text_type(item) for item in sub_rip_file)
 
-
 def vtt_formatter(subtitles, padding_before=0, padding_after=0):
-    """
-    Serialize a list of subtitles according to the VTT format, with optional time padding.
-    """
     text = srt_formatter(subtitles, padding_before, padding_after)
     text = 'WEBVTT\n\n' + text.replace(',', '.')
     return text
 
-
 def json_formatter(subtitles):
-    """
-    Serialize a list of subtitles as a JSON blob.
-    """
     subtitle_dicts = [
         {
             'start': start,
             'end': end,
             'content': text,
         }
-        for ((start, end), text)
-        in subtitles
+        for ((start, end), text) in subtitles
     ]
     return json.dumps(subtitle_dicts)
 
-
 def raw_formatter(subtitles):
-    """
-    Serialize a list of subtitles as a newline-delimited string.
-    """
     return ' '.join(text for (_rng, text) in subtitles)
 
-
 FORMATTERS = {
     'srt': srt_formatter,
     'vtt': vtt_formatter,
diff --git a/autosub/metadata.py b/autosub/metadata.py
new file mode 100644
index 00000000..dd6b6af6
--- /dev/null
+++ b/autosub/metadata.py
@@ -0,0 +1,17 @@
+#!/usr/bin/env python
+name             = 'autosub'
+version          = '0.4.0'
+description      = 'Auto-generates subtitles for any video or audio file.'
+long_description = (
+    'Autosub is a utility for automatic speech recognition and subtitle generation. '
+    'It takes a video or an audio file as input, performs voice activity detection '
+    'to find speech regions, makes parallel requests to Google Web Speech API to '
+    'generate transcriptions for those regions, (optionally) translates them to a '
+    'different language, and finally saves the resulting subtitles to disk. It '
+    'supports a variety of input and output languages (to see which, run the '
+    'utility with the argument --list-languages) and can currently produce '
+    'subtitles in either the SRT format or simple JSON.'
+)
+author       = 'Anastasis Germanidis'
+author_email = 'agermanidis@gmail.com'
+homepage     = 'https://github.com/agermanidis/autosub'
\ No newline at end of file
diff --git a/setup.py b/setup.py
index c9ac20c0..64297154 100644
--- a/setup.py
+++ b/setup.py
@@ -6,26 +6,19 @@
 except ImportError:
     from distutils.core import setup
 
-long_description = (
-    'Autosub is a utility for automatic speech recognition and subtitle generation. '
-    'It takes a video or an audio file as input, performs voice activity detection '
-    'to find speech regions, makes parallel requests to Google Web Speech API to '
-    'generate transcriptions for those regions, (optionally) translates them to a '
-    'different language, and finally saves the resulting subtitles to disk. '
-    'It supports a variety of input and output languages (to see which, run the '
-    'utility with --list-src-languages and --list-dst-languages as arguments '
-    'respectively) and can currently produce subtitles in either the SRT format or '
-    'simple JSON.'
-)
+metadata = {}
+
+with open("autosub/metadata.py") as metafile:
+    exec(metafile.read(), metadata)
 
 setup(
-    name='autosub',
-    version='0.4.0',
-    description='Auto-generates subtitles for any video or audio file',
-    long_description=long_description,
-    author='Anastasis Germanidis',
-    author_email='agermanidis@gmail.com',
-    url='https://github.com/agermanidis/autosub',
+    name=metadata['name'],
+    version=metadata['version'],
+    description=metadata['description'],
+    long_description=metadata['long_description'],
+    author=metadata['author'],
+    author_email=metadata['author_email'],
+    url=metadata['homepage'],
     packages=['autosub'],
     entry_points={
         'console_scripts': [

From 355972ac91183854d1d5e78f4ee252f8b98cad56 Mon Sep 17 00:00:00 2001
From: JackieDo <anhvudo@gmail.com>
Date: Sun, 30 Dec 2018 18:24:21 +0700
Subject: [PATCH 3/4] Refactor with pylint

- Add pylint configure file (.pylintrc).
- Update constant names for metadata.
- Resolved issue [agermanidis#120] with another solution without remove exception.
- Refactoring source code again with pylint.
---
 .pylintrc             |  9 +++++++++
 autosub/__init__.py   | 39 ++++++++++++++++++++++-----------------
 autosub/formatters.py | 10 +++++-----
 autosub/metadata.py   | 15 ++++++++-------
 setup.py              | 14 +++++++-------
 5 files changed, 51 insertions(+), 36 deletions(-)
 create mode 100644 .pylintrc

diff --git a/.pylintrc b/.pylintrc
new file mode 100644
index 00000000..38787cee
--- /dev/null
+++ b/.pylintrc
@@ -0,0 +1,9 @@
+[MESSAGES CONTROL]
+disable=missing-docstring,
+        wildcard-import,
+        line-too-long,
+        bad-whitespace,
+        too-few-public-methods,
+        too-many-arguments,
+        too-many-locals,
+        no-member
diff --git a/autosub/__init__.py b/autosub/__init__.py
index c0cdc1c6..a7530755 100644
--- a/autosub/__init__.py
+++ b/autosub/__init__.py
@@ -7,15 +7,15 @@
 import math
 import multiprocessing
 import os
-import requests
 import subprocess
 import sys
 import tempfile
 import wave
+import requests
 
-from googleapiclient.discovery import build
 from progressbar import ProgressBar, Percentage, Bar, ETA
-from autosub.metadata import *
+from googleapiclient.discovery import build
+from autosub import metadata
 from autosub.constants import (
     LANGUAGE_CODES, GOOGLE_SPEECH_API_KEY, GOOGLE_SPEECH_API_URL,
     DEFAULT_CONCURRENCY, DEFAULT_SRC_LANGUAGE, DEFAULT_DST_LANGUAGE, DEFAULT_SUBTITLE_FORMAT
@@ -82,7 +82,7 @@ def __init__(self, language=DEFAULT_SRC_LANGUAGE, rate=44100, retries=3, api_key
 
     def __call__(self, data):
         try:
-            for i in range(self.retries):
+            for _ in range(self.retries):
                 url     = GOOGLE_SPEECH_API_URL.format(lang=self.language, key=self.api_key)
                 headers = {"Content-Type": "audio/x-flac; rate=%d" % self.rate}
 
@@ -93,10 +93,15 @@ def __call__(self, data):
 
                 for line in resp.content.decode('utf-8').split("\n"):
                     try:
-                        line = json.loads(line)
-                        line = line['result'][0]['alternative'][0]['transcript']
-                        return line[:1].upper() + line[1:]
-                    except:
+                        if line:
+                            line = json.loads(line)
+                            line = line['result'][0]['alternative'][0]['transcript']
+                            line = line[:1].upper() + line[1:]
+                        else:
+                            line = None
+
+                        return line
+                    except IndexError:
                         # no result
                         continue
         except KeyboardInterrupt:
@@ -136,10 +141,10 @@ def is_exe(fpath):
         return os.path.isfile(fpath) and os.access(fpath, os.X_OK)
 
     if os.name == "nt":
-        if ".exe" != program[-4:]:
+        if program[-4:] != ".exe":
             program = program + ".exe"
 
-    fpath, fname = os.path.split(program)
+    fpath, _ = os.path.split(program)
     if fpath:
         if is_exe(program):
             return program
@@ -183,7 +188,7 @@ def find_speech_regions(filename, frame_width=4096, min_region_size=0.5, max_reg
     n_chunks       = int(math.ceil(reader.getnframes()*1.0 / frame_width))
     energies       = []
 
-    for i in range(n_chunks):
+    for _ in range(n_chunks):
         chunk = reader.readframes(frame_width)
         energies.append(audioop.rms(chunk, sample_width * n_channels))
 
@@ -269,8 +274,8 @@ def generate_subtitles(source_path, output=None, concurrency=DEFAULT_CONCURRENCY
     dest = output
 
     if not dest:
-        base, ext = os.path.splitext(source_path)
-        dest      = "{base}.{locale}.{format}".format(base=base, locale=dst_language, format=subtitle_file_format)
+        base = os.path.splitext(source_path)[0]
+        dest = "{base}.{locale}.{format}".format(base=base, locale=dst_language, format=subtitle_file_format)
 
     with open(dest, 'wb') as output_file:
         output_file.write(formatted_subtitles.encode("utf-8"))
@@ -302,9 +307,9 @@ def validate(args):
 
 def main():
     parser = argparse.ArgumentParser(
-        prog=metadata.name,
+        prog=metadata.NAME,
         usage='\n  %(prog)s [options] <source_path>',
-        description=metadata.description,
+        description=metadata.DESCRIPTION,
         formatter_class=ConsoleHelpFormatter,
         add_help=False
     )
@@ -368,7 +373,7 @@ def main():
     ogroup.add_argument(
         '-V', '--version',
         action='version',
-        version='%(prog)s ' + metadata.version + ' by ' + metadata.author + ' <' + metadata.author_email + '>',
+        version='%(prog)s ' + metadata.VERSION + ' by ' + metadata.AUTHOR + ' <' + metadata.AUTHOR_EMAIL + '>',
         help="Show %(prog)s version and exit."
     )
 
@@ -389,7 +394,7 @@ def main():
     if args.list_formats:
         print("List of formats:")
 
-        for subtitle_format in FORMATTERS.keys():
+        for subtitle_format in FORMATTERS:
             print("{format}".format(format=subtitle_format))
 
         return 0
diff --git a/autosub/formatters.py b/autosub/formatters.py
index 3b229534..11792d1c 100644
--- a/autosub/formatters.py
+++ b/autosub/formatters.py
@@ -1,17 +1,17 @@
 #!/usr/bin/env python
 from __future__ import unicode_literals
 import json
-import pysrt
 import six
+import pysrt
 
 def srt_formatter(subtitles, padding_before=0, padding_after=0):
     sub_rip_file = pysrt.SubRipFile()
     for i, ((start, end), text) in enumerate(subtitles, start=1):
-        item = pysrt.SubRipItem()
-        item.index = i
-        item.text = six.text_type(text)
+        item               = pysrt.SubRipItem()
+        item.index         = i
+        item.text          = six.text_type(text)
         item.start.seconds = max(0, start - padding_before)
-        item.end.seconds = end + padding_after
+        item.end.seconds   = end + padding_after
         sub_rip_file.append(item)
     return '\n'.join(six.text_type(item) for item in sub_rip_file)
 
diff --git a/autosub/metadata.py b/autosub/metadata.py
index dd6b6af6..09a9a7c7 100644
--- a/autosub/metadata.py
+++ b/autosub/metadata.py
@@ -1,8 +1,9 @@
 #!/usr/bin/env python
-name             = 'autosub'
-version          = '0.4.0'
-description      = 'Auto-generates subtitles for any video or audio file.'
-long_description = (
+
+NAME             = 'autosub'
+VERSION          = '0.4.0'
+DESCRIPTION      = 'Auto-generates subtitles for any video or audio file.'
+LONG_DESCRIPTION = (
     'Autosub is a utility for automatic speech recognition and subtitle generation. '
     'It takes a video or an audio file as input, performs voice activity detection '
     'to find speech regions, makes parallel requests to Google Web Speech API to '
@@ -12,6 +13,6 @@
     'utility with the argument --list-languages) and can currently produce '
     'subtitles in either the SRT format or simple JSON.'
 )
-author       = 'Anastasis Germanidis'
-author_email = 'agermanidis@gmail.com'
-homepage     = 'https://github.com/agermanidis/autosub'
\ No newline at end of file
+AUTHOR       = 'Anastasis Germanidis'
+AUTHOR_EMAIL = 'agermanidis@gmail.com'
+HOMEPAGE     = 'https://github.com/agermanidis/autosub'
diff --git a/setup.py b/setup.py
index 64297154..c033a509 100644
--- a/setup.py
+++ b/setup.py
@@ -12,13 +12,13 @@
     exec(metafile.read(), metadata)
 
 setup(
-    name=metadata['name'],
-    version=metadata['version'],
-    description=metadata['description'],
-    long_description=metadata['long_description'],
-    author=metadata['author'],
-    author_email=metadata['author_email'],
-    url=metadata['homepage'],
+    name=metadata['NAME'],
+    version=metadata['VERSION'],
+    description=metadata['DESCRIPTION'],
+    long_description=metadata['LONG_DESCRIPTION'],
+    author=metadata['AUTHOR'],
+    author_email=metadata['AUTHOR_EMAIL'],
+    url=metadata['HOMEPAGE'],
     packages=['autosub'],
     entry_points={
         'console_scripts': [

From cff85beebf34b560685c4939bb4f2ddb2fc259b2 Mon Sep 17 00:00:00 2001
From: JackieDo <anhvudo@gmail.com>
Date: Mon, 31 Dec 2018 01:26:51 +0700
Subject: [PATCH 4/4] Fix issue [agermanidis#120] with another solution

Resolved issue [agermanidis#120] with same solution as pull request [agermanidis#110]
---
 autosub/__init__.py | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/autosub/__init__.py b/autosub/__init__.py
index a7530755..83f437ab 100644
--- a/autosub/__init__.py
+++ b/autosub/__init__.py
@@ -93,15 +93,10 @@ def __call__(self, data):
 
                 for line in resp.content.decode('utf-8').split("\n"):
                     try:
-                        if line:
-                            line = json.loads(line)
-                            line = line['result'][0]['alternative'][0]['transcript']
-                            line = line[:1].upper() + line[1:]
-                        else:
-                            line = None
-
-                        return line
-                    except IndexError:
+                        line = json.loads(line)
+                        line = line['result'][0]['alternative'][0]['transcript']
+                        return line[:1].upper() + line[1:]
+                    except (IndexError, ValueError):
                         # no result
                         continue
         except KeyboardInterrupt: