From 0d1e05e3f7010c2aa9c626e16f597d187797fbfd Mon Sep 17 00:00:00 2001 From: Giorgio Momigliano Date: Fri, 27 Oct 2023 22:27:49 +0200 Subject: [PATCH 01/21] Update python-publish.yml --- .github/workflows/python-publish.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml index 5e7b819..554a5e9 100644 --- a/.github/workflows/python-publish.yml +++ b/.github/workflows/python-publish.yml @@ -11,7 +11,7 @@ name: Upload Python Package on: push: branches: - - main + - master permissions: contents: read From d326bb7388a46f37e4c7f5eec904e229289fbc3a Mon Sep 17 00:00:00 2001 From: Giorgio Momigliano Date: Fri, 27 Oct 2023 22:28:59 +0200 Subject: [PATCH 02/21] Delete .github/workflows directory --- .github/workflows/python-publish.yml | 43 ---------------------------- 1 file changed, 43 deletions(-) delete mode 100644 .github/workflows/python-publish.yml diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml deleted file mode 100644 index 554a5e9..0000000 --- a/.github/workflows/python-publish.yml +++ /dev/null @@ -1,43 +0,0 @@ -# This workflow will upload a Python Package using Twine when a release is created -# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python#publishing-to-package-registries - -# This workflow uses actions that are not certified by GitHub. -# They are provided by a third-party and are governed by -# separate terms of service, privacy policy, and support -# documentation. - -name: Upload Python Package - -on: - push: - branches: - - master - -permissions: - contents: read - -jobs: - build: - - runs-on: ubuntu-latest - strategy: - matrix: - python-version: ["2.7", "3.5"] - - steps: - - uses: actions/checkout@v3 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v3 - with: - python-version: ${{ matrix.python-version }} - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install build - - name: Build package - run: python -m build - - name: Publish package - uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29 - with: - user: __token__ - password: ${{ secrets.PYPI_API_TOKEN }} From 60ee833a84d80292afe663d432db5365708030ca Mon Sep 17 00:00:00 2001 From: Giorgio Momigliano Date: Mon, 30 Oct 2023 12:15:37 +0100 Subject: [PATCH 03/21] Fix only-title torrent names --- PTN/__init__.py | 3 +-- PTN/parse.py | 10 +++++++--- tests/files/input.json | 6 ++++-- tests/files/output_raw.json | 8 +++++++- tests/files/output_standard.json | 8 +++++++- 5 files changed, 26 insertions(+), 9 deletions(-) diff --git a/PTN/__init__.py b/PTN/__init__.py index 2ebbc6b..62b224b 100644 --- a/PTN/__init__.py +++ b/PTN/__init__.py @@ -14,10 +14,9 @@ __author__ = "Giorgio Momigliano" __email__ = "gmomigliano@protonmail.com" -__version__ = "2.6" +__version__ = "2.7" __license__ = "MIT" -# TODO change coherent_types default to True in 3.0 def parse(name, standardise=True, coherent_types=False): return PTN().parse(name, standardise, coherent_types) diff --git a/PTN/parse.py b/PTN/parse.py index 5273457..dff46ca 100644 --- a/PTN/parse.py +++ b/PTN/parse.py @@ -328,16 +328,16 @@ def process_title(self): self._part("title", None, "") raw = self.torrent_name[title_start:title_end] - # Something in square brackets with 3 chars or less is too weird to be right. + # Something in square brackets with 3 chars or fewer is too weird to be right. # If this seems too arbitrary, make it any square bracket, and Mother test # case will lose its translated title (which is mostly fine I think). - m = re.search("\(|(?:\[(?:.{,3}\]|[^\]]*\d[^\]]*\]?))", raw, flags=re.I) + m = re.search(r"\(|(?:\[(?:.{,3}\]|[^\]]*\d[^\]]*\]?))", raw, flags=re.I) if m: relative_title_end = m.start() raw = raw[:relative_title_end] title_end = relative_title_end + title_start # Similar logic as above, but looking at beginning of string unmatched brackets. - m = re.search("^(?:\)|\[.*\])", raw) + m = re.search(r"^(?:\)|\[.*\])", raw) if m: relative_title_start = m.end() raw = raw[relative_title_start:] @@ -368,6 +368,10 @@ def unmatched_list(self, keep_punctuation=True): ): unmatched.append((end, len(self.torrent_name))) + # If nothing matched, assume the whole thing is the title + if not self.match_slices: + unmatched.append((0, len(self.torrent_name))) + return unmatched def fix_known_exceptions(self): diff --git a/tests/files/input.json b/tests/files/input.json index a34f1ad..eade845 100644 --- a/tests/files/input.json +++ b/tests/files/input.json @@ -392,5 +392,7 @@ "Bolt.2008.MULTI.BluRay.3D.1080p.AVC.DTS-HD.MA.DD.EX.5.1-SnOoP-UPR.iso", "Casino.1995.MULTi.REMUX.2160p.UHD.Blu-ray.HDR.HEVC.DTS-X7.1-DENDA.mkv", "Seinfeld.S04E23E24.The.Pilot.FiNAL.MULTi.1080p.NF.WEB-DL.HE-AAC2.0.H264-Ralf.mkv", - "Escape.Room.Tournament.of.Champions.2021.PL.EXTENDED.1080p.BRRip.HE-AACv2.AV1.mkv" -] + "Escape.Room.Tournament.of.Champions.2021.PL.EXTENDED.1080p.BRRip.HE-AACv2.AV1.mkv", + "Steven Universe", + "The Amazing World of Gumball" +] \ No newline at end of file diff --git a/tests/files/output_raw.json b/tests/files/output_raw.json index 122b79e..70148a4 100644 --- a/tests/files/output_raw.json +++ b/tests/files/output_raw.json @@ -3621,5 +3621,11 @@ "resolution": "1080p", "title": "Escape Room Tournament of Champions", "year": 2021 + }, + { + "title": "Steven Universe" + }, + { + "title": "The Amazing World of Gumball" } -] +] \ No newline at end of file diff --git a/tests/files/output_standard.json b/tests/files/output_standard.json index 38dbe1f..9dcd3d6 100644 --- a/tests/files/output_standard.json +++ b/tests/files/output_standard.json @@ -2119,5 +2119,11 @@ "filetype": "MKV", "language": "Polish", "title": "Escape Room Tournament of Champions" + }, + { + "title": "Steven Universe" + }, + { + "title": "The Amazing World of Gumball" } -] +] \ No newline at end of file From 6fe2362075e80d77a519522bdf88ea215d4dd08b Mon Sep 17 00:00:00 2001 From: Giorgio Momigliano Date: Sun, 31 Dec 2023 00:49:55 +0100 Subject: [PATCH 04/21] Remove complete series words --- PTN/extras.py | 6 ++++++ PTN/patterns.py | 3 +++ PTN/post.py | 16 +++++++++++++++- 3 files changed, 24 insertions(+), 1 deletion(-) diff --git a/PTN/extras.py b/PTN/extras.py index 104a763..79b5517 100644 --- a/PTN/extras.py +++ b/PTN/extras.py @@ -46,6 +46,12 @@ ("Thriller", "Thriller"), ] +# Match strings like "complete series" for tv seasons/series, matching within the final title string. +complete_series = [ + r"(?:the\s)?complete\s(?:series|season|collection)$", + r"(?:the)\scomplete\s?(?:series|season|collection)?$", +] + # Some titles just can't be parsed without breaking everything else, so here # are known those known exceptions. They are executed when the parsed_title and # incorrect_parse match within a .parse() dict, removing the latter, and replacing diff --git a/PTN/patterns.py b/PTN/patterns.py index 95e08fa..23a0676 100644 --- a/PTN/patterns.py +++ b/PTN/patterns.py @@ -110,6 +110,9 @@ r"\b(?:Complete" + delimiters + r")?Season[\. -][0-9]{1,2}\b", # Describes Season.15 type descriptions + # r"\b(?:Seasons?)" + # + delimiters + # + r"(\d{1,2})" + "(?:(?:" + delimiters + r"|&|and|to)(\d{1,2}))+\b" ] # The first 4 season regexes won't have 'Part' in them. patterns["episode"] += [ diff --git a/PTN/post.py b/PTN/post.py index f50cfd7..47e1b92 100644 --- a/PTN/post.py +++ b/PTN/post.py @@ -3,7 +3,7 @@ # Post-processing functions that run after the main parsing. from . import re -from .extras import link_patterns +from .extras import link_patterns, complete_series from .patterns import episode_name_pattern, langs, patterns, pre_website_encoder_pattern # Before excess functions (before we split what was unmatched in the title into a list). @@ -83,7 +83,20 @@ def try_encoder_before_site(self, unmatched): return unmatched +def remove_complete_series_string(self, unmatched): + if "title" in self.parts: + complete_series_regex = link_patterns(complete_series) + complete_match = re.search(complete_series_regex, self.parts["title"], flags=re.I) + if complete_match: + title = self.parts["title"] + title = title[: complete_match.start()] + title[complete_match.end() :] + self._part("title", (complete_match.start(), complete_match.end()), self._clean_string(title), overwrite=True) + + return unmatched + + post_processing_before_excess = [ + remove_complete_series_string, try_episode_name, try_encoder_before_site, ] @@ -199,6 +212,7 @@ def try_vague_season_episode(self): ) +# Probably for movies like 1917, where the title is just the year (would need the release year to also be absent) def use_year_as_title_if_absent(self): if "year" in self.parts and not self.parts.get("title"): self._part("title", None, str(self.parts["year"]), overwrite=True) From 8ece81065f4db7a7f66bc935b8a202bf24536373 Mon Sep 17 00:00:00 2001 From: Giorgio Momigliano Date: Sun, 31 Dec 2023 01:06:00 +0100 Subject: [PATCH 05/21] Fix overlapping stuff for seasons --- PTN/parse.py | 16 ++++++++++++++-- PTN/patterns.py | 11 +++++++++-- tests/files/input.json | 4 +++- tests/files/output_raw.json | 10 ++++++++++ tests/files/output_standard.json | 7 +++++++ 5 files changed, 43 insertions(+), 5 deletions(-) diff --git a/PTN/parse.py b/PTN/parse.py index dff46ca..69ddc1a 100644 --- a/PTN/parse.py +++ b/PTN/parse.py @@ -1,7 +1,7 @@ #!/usr/bin/env python from . import re from .extras import exceptions, genres, langs, link_patterns, patterns_ignore_title -from .patterns import delimiters, patterns, patterns_ordered, types +from .patterns import delimiters, patterns, patterns_ordered, types, patterns_no_overlap from .post import post_processing_after_excess, post_processing_before_excess @@ -111,7 +111,19 @@ def parse(self, name, standardise, coherent_types): if self.standardise: clean = self.standardise_clean(clean, key, replace, transforms) - self._part(key, (match_start, match_end), clean) + part_overlaps = False + for part, part_slices in self.part_slices.items(): + if part in patterns_no_overlap: + # Strict smaller/larger than since punctuation can overlap. + if ( + (part_slices[0] < match_start < part_slices[1]) + or (part_slices[0] < match_end < part_slices[1]) + ): + part_overlaps = True + break + + if not part_overlaps: + self._part(key, (match_start, match_end), clean) self.process_title() self.fix_known_exceptions() diff --git a/PTN/patterns.py b/PTN/patterns.py index 23a0676..63722b0 100644 --- a/PTN/patterns.py +++ b/PTN/patterns.py @@ -45,13 +45,13 @@ # Forces an order to go by the regexes, as we want this to be deterministic (different # orders can generate different matchings). e.g. "doctor_who_2005..." in input.json patterns_ordered = [ + "resolution", + "quality", "season", "episode", "year", "month", "day", - "resolution", - "quality", "codec", "audio", "region", @@ -85,6 +85,13 @@ "genre", ] + +# Added recently. Instead of annoying punctuation/overlap stuff, since most patterns should have no overlap, +# add ones that are causing issues here (if it makes sense). +patterns_no_overlap = [ + "resolution" +] + patterns = {} patterns["episode"] = [ r"(? Date: Sun, 31 Dec 2023 01:28:39 +0100 Subject: [PATCH 06/21] Improve season range support when many are listed --- PTN/parse.py | 4 ++++ PTN/patterns.py | 8 ++++---- tests/files/input.json | 3 ++- tests/files/output_raw.json | 6 +++++- tests/files/output_standard.json | 3 +++ 5 files changed, 18 insertions(+), 6 deletions(-) diff --git a/PTN/parse.py b/PTN/parse.py index 69ddc1a..c69fc74 100644 --- a/PTN/parse.py +++ b/PTN/parse.py @@ -232,8 +232,12 @@ def get_season_episode(match): m = re.findall(r"[0-9]+", match[0]) if m and len(m) > 1: clean = list(range(int(m[0]), int(m[-1]) + 1)) + # This elif exists entirely for the Seasons 1, 2, 3, 4, etc. case. No other regex gives a number in match[1]. + elif len(match) > 1 and match[1] and m: + clean = list(range(int(m[0]), int(match[1]) + 1)) elif m: clean = int(m[0]) + return clean @staticmethod diff --git a/PTN/patterns.py b/PTN/patterns.py index 63722b0..efe3072 100644 --- a/PTN/patterns.py +++ b/PTN/patterns.py @@ -102,6 +102,9 @@ ] # If adding season patterns, remember to look at episode, as it uses the last few! patterns["season"] = [ + r"\b(?:Seasons?)" + + delimiters + + r"(\d{1,2})" + "(?:(?:" + delimiters + r"|&|and|to){1,3}(\d{1,2})){2,}\b", r"\ss?(\d{1,2})\s\-\s\d{1,2}\s", # Avoids matching some anime releases season and episode as a season range r"\b" + season_range_pattern + r"\b", # Describes season ranges r"(?:s\d{1,2}[.+\s]*){2,}\b", # for S01.S02.etc. patterns @@ -117,13 +120,10 @@ r"\b(?:Complete" + delimiters + r")?Season[\. -][0-9]{1,2}\b", # Describes Season.15 type descriptions - # r"\b(?:Seasons?)" - # + delimiters - # + r"(\d{1,2})" + "(?:(?:" + delimiters + r"|&|and|to)(\d{1,2}))+\b" ] # The first 4 season regexes won't have 'Part' in them. patterns["episode"] += [ - link_patterns(patterns["season"][5:]) + link_patterns(patterns["season"][6:]) + delimiters + "*P(?:ar)?t" + delimiters diff --git a/tests/files/input.json b/tests/files/input.json index 9f4a3cb..3592fb2 100644 --- a/tests/files/input.json +++ b/tests/files/input.json @@ -396,5 +396,6 @@ "Steven Universe", "The Amazing World of Gumball", "Avatar The Last Airbender - The Complete Series 1080p [HEVC AAC] - SEPH1", - "The Inbetweeners Complete Collection" + "The Inbetweeners Complete Collection", + "The Sopranos - The Complete Series (Season 1, 2, 3, 4, 5 & 6) + Extras" ] \ No newline at end of file diff --git a/tests/files/output_raw.json b/tests/files/output_raw.json index 456b5b0..cf58f48 100644 --- a/tests/files/output_raw.json +++ b/tests/files/output_raw.json @@ -3637,5 +3637,9 @@ }, { "title": "The Inbetweeners" + }, + { + "season": [1,2,3,4,5,6], + "title": "The Sopranos" } -] \ No newline at end of file +] diff --git a/tests/files/output_standard.json b/tests/files/output_standard.json index 524d29f..99e2779 100644 --- a/tests/files/output_standard.json +++ b/tests/files/output_standard.json @@ -2132,5 +2132,8 @@ }, { "title": "The Inbetweeners" + }, + { + "title": "The Sopranos" } ] \ No newline at end of file From ca89c7ba64257099385abf26c6c9cc07317dffb6 Mon Sep 17 00:00:00 2001 From: Giorgio Momigliano Date: Sun, 31 Dec 2023 01:45:51 +0100 Subject: [PATCH 07/21] Improve French subtitle support --- PTN/parse.py | 2 +- PTN/patterns.py | 6 ++++-- tests/files/input.json | 4 +++- tests/files/output_raw.json | 22 +++++++++++++++++++++- tests/files/output_standard.json | 14 ++++++++++++++ 5 files changed, 43 insertions(+), 5 deletions(-) diff --git a/PTN/parse.py b/PTN/parse.py index c69fc74..fce523c 100644 --- a/PTN/parse.py +++ b/PTN/parse.py @@ -287,7 +287,7 @@ def standardise_languages(clean): if re.match( lang_regex, re.sub( - link_patterns(patterns["subtitles"][2:]), "", lang, flags=re.I + link_patterns(patterns["subtitles"][-2:]), "", lang, flags=re.I ), re.IGNORECASE, ): diff --git a/PTN/patterns.py b/PTN/patterns.py index efe3072..827e9d8 100644 --- a/PTN/patterns.py +++ b/PTN/patterns.py @@ -293,13 +293,15 @@ + delimiters + r"+|\b))" ) -subs_list_pattern = r"\b(?:" + link_patterns(langs) + delimiters + "*)" +subs_list_pattern = r"(?:" + link_patterns(langs) + delimiters + "*)" patterns["subtitles"] = [ "sub(?:title|bed)?s?{d}*{langs}+".format(d=delimiters, langs=subs_list_pattern), "(?:soft{d}*)?{langs}+(?:(?:m(?:ulti(?:ple)?)?{d}*)?sub(?:title|bed)?s?)".format( d=delimiters, langs=subs_list_pattern ), + ("VOSTFR", ["French"]), + # The following are patterns just for the 'subs' strings. Add normal sub stuff above. # Need a pattern just for subs, and can't just make above regexes * over + as we want # just 'subs' to match last. "(?:m(?:ulti(?:ple)?)?{d}*)sub(?:title|bed)?s?".format(d=delimiters), @@ -331,7 +333,7 @@ + "+)(?:" + delimiters + "*" - + patterns["subtitles"][2] + + patterns["subtitles"][-2] + ")", ] patterns["sbs"] = [("Half-SBS", "Half SBS"), ("SBS", None, "upper")] diff --git a/tests/files/input.json b/tests/files/input.json index 3592fb2..bbe46a5 100644 --- a/tests/files/input.json +++ b/tests/files/input.json @@ -397,5 +397,7 @@ "The Amazing World of Gumball", "Avatar The Last Airbender - The Complete Series 1080p [HEVC AAC] - SEPH1", "The Inbetweeners Complete Collection", - "The Sopranos - The Complete Series (Season 1, 2, 3, 4, 5 & 6) + Extras" + "The Sopranos - The Complete Series (Season 1, 2, 3, 4, 5 & 6) + Extras", + "The.Walking.Dead.S06E07.SUBFRENCH.HDTV.x264-AMB3R.mkv", + "The Good German (2006).VOSTFR.720p.WEBDL.h264.aac.mkv" ] \ No newline at end of file diff --git a/tests/files/output_raw.json b/tests/files/output_raw.json index cf58f48..80cb523 100644 --- a/tests/files/output_raw.json +++ b/tests/files/output_raw.json @@ -3641,5 +3641,25 @@ { "season": [1,2,3,4,5,6], "title": "The Sopranos" + }, + { + "codec": "x264", + "encoder": "AMB3R", + "episode": 7, + "filetype": "mkv", + "quality": "HDTV", + "season": 6, + "subtitles": "SUBFRENCH", + "title": "The Walking Dead" + }, + { + "audio": "aac", + "codec": "h264", + "filetype": "mkv", + "quality": "WEBDL", + "resolution": "720p", + "subtitles": "VOSTFR", + "title": "The Good German", + "year": 2006 } -] +] \ No newline at end of file diff --git a/tests/files/output_standard.json b/tests/files/output_standard.json index 99e2779..3057ce8 100644 --- a/tests/files/output_standard.json +++ b/tests/files/output_standard.json @@ -2135,5 +2135,19 @@ }, { "title": "The Sopranos" + }, + { + "codec": "H.264", + "filetype": "MKV", + "subtitles": "French", + "title": "The Walking Dead" + }, + { + "audio": "AAC", + "codec": "H.264", + "filetype": "MKV", + "quality": "WEB-DL", + "subtitles": "French", + "title": "The Good German" } ] \ No newline at end of file From ee7a3b67bb42d4234d532f4d7a88fa20479aba7e Mon Sep 17 00:00:00 2001 From: Giorgio Momigliano Date: Sun, 31 Dec 2023 02:01:21 +0100 Subject: [PATCH 08/21] Improve site matching at beginning of title --- PTN/patterns.py | 4 ++-- tests/files/input.json | 3 ++- tests/files/output_raw.json | 5 +++++ tests/files/output_standard.json | 3 +++ 4 files changed, 12 insertions(+), 3 deletions(-) diff --git a/PTN/patterns.py b/PTN/patterns.py index 827e9d8..4a27eab 100644 --- a/PTN/patterns.py +++ b/PTN/patterns.py @@ -40,7 +40,7 @@ + delimiters + "|$))+)" ) -pre_website_encoder_pattern = r"[^\s\.\[\]\-\(\)]+\)\s{0,2}\[[^\s\-]+\]|[^\s\.\[\]\-\(\)]+\s{0,2}(?:-\s)?[^\s\.\[\]\-]+$" +pre_website_encoder_pattern = r"[^\s\.\[\]\-\(\)]+\)\s*\[[^\s\-]+\]|[^\s\.\[\]\-\(\)]+\s*(?:-\s)?[^\s\.\[\]\-]+$" # Forces an order to go by the regexes, as we want this to be deterministic (different # orders can generate different matchings). e.g. "doctor_who_2005..." in input.json @@ -279,7 +279,7 @@ (r"\.?(iso)$", "ISO"), ] patterns["widescreen"] = "WS" -patterns["site"] = [r"^(\[ ?([^\]]+?)\s?\])", r"^((?:www\.)?[\w-]+\.[\w]{2,4})\s-\s?"] +patterns["site"] = [r"^(\[ ?([^\]]+?)\s?\])", r"^((?:www\.)?[\w-]+\.[\w]{2,4})\s+-\s*"] lang_list_pattern = ( r"\b(?:" diff --git a/tests/files/input.json b/tests/files/input.json index bbe46a5..dd96722 100644 --- a/tests/files/input.json +++ b/tests/files/input.json @@ -399,5 +399,6 @@ "The Inbetweeners Complete Collection", "The Sopranos - The Complete Series (Season 1, 2, 3, 4, 5 & 6) + Extras", "The.Walking.Dead.S06E07.SUBFRENCH.HDTV.x264-AMB3R.mkv", - "The Good German (2006).VOSTFR.720p.WEBDL.h264.aac.mkv" + "The Good German (2006).VOSTFR.720p.WEBDL.h264.aac.mkv", + "www.Torrenting.com - Anatomy Of A Fall (2023)" ] \ No newline at end of file diff --git a/tests/files/output_raw.json b/tests/files/output_raw.json index 80cb523..6c953f7 100644 --- a/tests/files/output_raw.json +++ b/tests/files/output_raw.json @@ -3661,5 +3661,10 @@ "subtitles": "VOSTFR", "title": "The Good German", "year": 2006 + }, + { + "site": "www.Torrenting.com", + "title": "Anatomy Of A Fall", + "year": 2023 } ] \ No newline at end of file diff --git a/tests/files/output_standard.json b/tests/files/output_standard.json index 3057ce8..fd37acb 100644 --- a/tests/files/output_standard.json +++ b/tests/files/output_standard.json @@ -2149,5 +2149,8 @@ "quality": "WEB-DL", "subtitles": "French", "title": "The Good German" + }, + { + "title": "Anatomy Of A Fall" } ] \ No newline at end of file From 16835f03fc6f1216e2aee7c413e0766872247197 Mon Sep 17 00:00:00 2001 From: Giorgio Momigliano Date: Sun, 31 Dec 2023 02:04:45 +0100 Subject: [PATCH 09/21] Bump version --- PTN/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/PTN/__init__.py b/PTN/__init__.py index 62b224b..015ee60 100644 --- a/PTN/__init__.py +++ b/PTN/__init__.py @@ -14,7 +14,7 @@ __author__ = "Giorgio Momigliano" __email__ = "gmomigliano@protonmail.com" -__version__ = "2.7" +__version__ = "2.8" __license__ = "MIT" From 20ae328c3399f81806d5a1dd8734f1f9a3d2110e Mon Sep 17 00:00:00 2001 From: mhdzumair Date: Thu, 4 Jan 2024 10:34:43 +0530 Subject: [PATCH 10/21] added standard resolution types --- PTN/patterns.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/PTN/patterns.py b/PTN/patterns.py index 4a27eab..3d33aeb 100644 --- a/PTN/patterns.py +++ b/PTN/patterns.py @@ -136,14 +136,19 @@ patterns["day"] = "(?:{year}){d}(?:{month}){d}({day})".format( d=delimiters, year=year_pattern, month=month_pattern, day=day_pattern ) +# resolution pattern according to https://ihax.io/display-resolution-explained/ and GPT4 patterns["resolution"] = [ - ("([0-9]{3,4}(?:p|i))", None, "lower"), - ("(1280{d}?x{d}?720p?)".format(d=delimiters), "720p"), - ("FHD|1920{d}?x{d}?1080p?".format(d=delimiters), "1080p"), - ("3840x2160p?", "2160p"), - ("UHD", "UHD"), - ("HD", "HD"), - ("4K", "4K"), + (r"([0-9]{3,4}(?:p|i))", None, "lower"), # Generic pattern for resolutions like 480p, 720p, 1080p, etc. + (r"(SD)", "480p"), # Pattern for Standard Definition + (r"(qHD)", "540p"), # Pattern for quarter High Definition + (r"(HD|1280{d}?x{d}?720p?)".format(d=delimiters), "720p"), # Pattern for HD / 720p + (r"(Full HD|FHD|1920{d}?x{d}?1080p?)".format(d=delimiters), "1080p"), # Pattern for Full HD / 1080p + (r"(2K|2048{d}?x{d}?1080p?)".format(d=delimiters), "2K"), # Pattern for 2K + (r"(QHD|QuadHD|WQHD|2560{d}?x{d}?1440p?)".format(d=delimiters), "1440p"), # Pattern for QHD / 1440p + (r"(4K UHD|UHD|3840{d}?x{d}?2160p?)".format(d=delimiters), "2160p"), # Pattern for 4K UHD / 2160p + (r"(4K|4096{d}?x{d}?2160p?)".format(d=delimiters), "4K"), # Pattern for 4K / Cinema 4K + (r"(5K|5120{d}?x{d}?2880p?)".format(d=delimiters), "5K"), # Pattern for 5K + (r"(8K|7680{d}?x{d}?4320p?)".format(d=delimiters), "8K"), # Pattern for 8K ] patterns["quality"] = [ ("WEB[ -\.]?DL(?:Rip|Mux)?|HDRip", "WEB-DL"), From 74de8567bfa0a4468b31f25f75455829d5fb6a9e Mon Sep 17 00:00:00 2001 From: mhdzumair Date: Thu, 4 Jan 2024 11:23:26 +0530 Subject: [PATCH 11/21] reorder the pattern from highest to lowest. --- PTN/patterns.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/PTN/patterns.py b/PTN/patterns.py index 3d33aeb..6a0932d 100644 --- a/PTN/patterns.py +++ b/PTN/patterns.py @@ -136,19 +136,20 @@ patterns["day"] = "(?:{year}){d}(?:{month}){d}({day})".format( d=delimiters, year=year_pattern, month=month_pattern, day=day_pattern ) -# resolution pattern according to https://ihax.io/display-resolution-explained/ and GPT4 +# resolution pattern according to https://ihax.io/display-resolution-explained/ and GPT4. +# order from highest to lowest due to some torrent name having '4K HD' in them but its technically 4K. also this will temporarily fix for QHD. patterns["resolution"] = [ (r"([0-9]{3,4}(?:p|i))", None, "lower"), # Generic pattern for resolutions like 480p, 720p, 1080p, etc. - (r"(SD)", "480p"), # Pattern for Standard Definition - (r"(qHD)", "540p"), # Pattern for quarter High Definition - (r"(HD|1280{d}?x{d}?720p?)".format(d=delimiters), "720p"), # Pattern for HD / 720p - (r"(Full HD|FHD|1920{d}?x{d}?1080p?)".format(d=delimiters), "1080p"), # Pattern for Full HD / 1080p - (r"(2K|2048{d}?x{d}?1080p?)".format(d=delimiters), "2K"), # Pattern for 2K - (r"(QHD|QuadHD|WQHD|2560{d}?x{d}?1440p?)".format(d=delimiters), "1440p"), # Pattern for QHD / 1440p + (r"(8K|7680{d}?x{d}?4320p?)".format(d=delimiters), "8K"), # Pattern for 8K + (r"(5K|5120{d}?x{d}?2880p?)".format(d=delimiters), "5K"), # Pattern for 5K (r"(4K UHD|UHD|3840{d}?x{d}?2160p?)".format(d=delimiters), "2160p"), # Pattern for 4K UHD / 2160p (r"(4K|4096{d}?x{d}?2160p?)".format(d=delimiters), "4K"), # Pattern for 4K / Cinema 4K - (r"(5K|5120{d}?x{d}?2880p?)".format(d=delimiters), "5K"), # Pattern for 5K - (r"(8K|7680{d}?x{d}?4320p?)".format(d=delimiters), "8K"), # Pattern for 8K + (r"(QHD|QuadHD|WQHD|2560{d}?x{d}?1440p?)".format(d=delimiters), "1440p"), # Pattern for QHD / 1440p + (r"(2K|2048{d}?x{d}?1080p?)".format(d=delimiters), "2K"), # Pattern for 2K + (r"(Full HD|FHD|1920{d}?x{d}?1080p?)".format(d=delimiters), "1080p"), # Pattern for Full HD / 1080p + (r"(HD|1280{d}?x{d}?720p?)".format(d=delimiters), "720p"), # Pattern for HD / 720p + (r"(qHD)", "540p"), # Pattern for quarter High Definition + (r"(SD)", "480p"), # Pattern for Standard Definition ] patterns["quality"] = [ ("WEB[ -\.]?DL(?:Rip|Mux)?|HDRip", "WEB-DL"), From c3002b767de54ffdcf633e60a16567b49cc7e0e6 Mon Sep 17 00:00:00 2001 From: mhdzumair Date: Thu, 4 Jan 2024 11:40:36 +0530 Subject: [PATCH 12/21] Added new test title --- tests/files/input.json | 3 ++- tests/files/output_raw.json | 13 +++++++++++++ tests/files/output_standard.json | 7 +++++++ 3 files changed, 22 insertions(+), 1 deletion(-) diff --git a/tests/files/input.json b/tests/files/input.json index dd96722..9190e06 100644 --- a/tests/files/input.json +++ b/tests/files/input.json @@ -400,5 +400,6 @@ "The Sopranos - The Complete Series (Season 1, 2, 3, 4, 5 & 6) + Extras", "The.Walking.Dead.S06E07.SUBFRENCH.HDTV.x264-AMB3R.mkv", "The Good German (2006).VOSTFR.720p.WEBDL.h264.aac.mkv", - "www.Torrenting.com - Anatomy Of A Fall (2023)" + "www.Torrenting.com - Anatomy Of A Fall (2023)", + "www.1TamilBlasters.lat - Thuritham (2023) [Tamil - 2K QHD AVC UNTOUCHED - x264 - AAC - 3.4GB - ESub].mkv" ] \ No newline at end of file diff --git a/tests/files/output_raw.json b/tests/files/output_raw.json index 6c953f7..4f7e075 100644 --- a/tests/files/output_raw.json +++ b/tests/files/output_raw.json @@ -3666,5 +3666,18 @@ "site": "www.Torrenting.com", "title": "Anatomy Of A Fall", "year": 2023 + }, + { + "audio": "AAC", + "codec": "x264", + "filetype": "mkv", + "language": "Tamil", + "resolution": "QHD", + "site": "www.1TamilBlasters.lat", + "size": "3.4GB", + "subtitles": "ESub", + "title": "Thuritham", + "untouched": true, + "year": 2023 } ] \ No newline at end of file diff --git a/tests/files/output_standard.json b/tests/files/output_standard.json index fd37acb..b3d5803 100644 --- a/tests/files/output_standard.json +++ b/tests/files/output_standard.json @@ -2152,5 +2152,12 @@ }, { "title": "Anatomy Of A Fall" + }, + { + "codec": "H.264", + "filetype": "MKV", + "resolution": "1440p", + "subtitles": "English", + "title": "Thuritham" } ] \ No newline at end of file From d9a21d2fdc18471eef54fb39a5a7be37335afbf8 Mon Sep 17 00:00:00 2001 From: mhdzumair Date: Thu, 25 Jan 2024 08:48:54 +0530 Subject: [PATCH 13/21] fix torrent name and site parsing --- PTN/patterns.py | 2 +- PTN/post.py | 2 +- tests/files/input.json | 6 ++++-- tests/files/output_raw.json | 23 +++++++++++++++++++++++ tests/files/output_standard.json | 12 ++++++++++++ 5 files changed, 41 insertions(+), 4 deletions(-) diff --git a/PTN/patterns.py b/PTN/patterns.py index 8fb8f0d..5aa874f 100644 --- a/PTN/patterns.py +++ b/PTN/patterns.py @@ -291,7 +291,7 @@ (r"\.?(iso)$", "ISO"), ] patterns["widescreen"] = "WS" -patterns["site"] = [r"^(\[ ?([^\]]+?)\s?\])", r"^((?:www\.)?[\w-]+\.[\w]{2,4})\s+-\s*"] +patterns["site"] = [r"^(www\.[\w-]+\.[\w-]+)\s+-\s*", r"^((?:www\.)?[\w-]+\.[\w-]+(?:\.[\w-]+)?)\s+-\s*", r"^(\[ ?([^\]]+?)\s?\])"] lang_list_pattern = ( r"\b(?:" diff --git a/PTN/post.py b/PTN/post.py index 498e759..aae8082 100644 --- a/PTN/post.py +++ b/PTN/post.py @@ -76,7 +76,7 @@ def try_encoder_before_site(self, unmatched): "site", (match_s + len(encoder_raw), match_e), self._clean_string(site_raw), - overwrite=True, + overwrite=False, ) unmatched = unmatched.replace(match.group(0), "") diff --git a/tests/files/input.json b/tests/files/input.json index 5a17e46..22803e6 100644 --- a/tests/files/input.json +++ b/tests/files/input.json @@ -402,5 +402,7 @@ "The Good German (2006).VOSTFR.720p.WEBDL.h264.aac.mkv", "www.Torrenting.com - Anatomy Of A Fall (2023)", "Eu.gosto.do.Homem-Aranha.e.dai.1080p.AAC2.0.H264.mkv", - "www.1TamilBlasters.lat - Thuritham (2023) [Tamil - 2K QHD AVC UNTOUCHED - x264 - AAC - 3.4GB - ESub].mkv" -] + "www.1TamilBlasters.lat - Thuritham (2023) [Tamil - 2K QHD AVC UNTOUCHED - x264 - AAC - 3.4GB - ESub].mkv", + "www.1TamilMV.world - Raja Vikramarka (2024) Tamil HQ HDRip - 400MB - x264 - AAC - ESub.mkv", + "www.1TamilMV.world - Kotha Rangula Prapancham (2024) Telugu HQ PreDVD - 700MB - x264 - HQ Clean Aud.mkv" +] \ No newline at end of file diff --git a/tests/files/output_raw.json b/tests/files/output_raw.json index 67a6b76..7a6d3cd 100644 --- a/tests/files/output_raw.json +++ b/tests/files/output_raw.json @@ -3682,5 +3682,28 @@ "title": "Thuritham", "untouched": true, "year": 2023 + }, + { + "audio": "AAC", + "codec": "x264", + "encoder": "HQ", + "filetype": "mkv", + "language": "Tamil", + "quality": "HDRip", + "site": "www.1TamilMV.world", + "size": "400MB", + "subtitles": "ESub", + "title": "Raja Vikramarka", + "year": 2024 + }, + { + "codec": "x264", + "encoder": "Clean", + "filetype": "mkv", + "language": "Telugu", + "site": "www.1TamilMV.world", + "size": "700MB", + "title": "Kotha Rangula Prapancham", + "year": 2024 } ] \ No newline at end of file diff --git a/tests/files/output_standard.json b/tests/files/output_standard.json index 80165be..d76159e 100644 --- a/tests/files/output_standard.json +++ b/tests/files/output_standard.json @@ -2164,5 +2164,17 @@ "resolution": "1440p", "subtitles": "English", "title": "Thuritham" + }, + { + "codec": "H.264", + "filetype": "MKV", + "quality": "WEB-DL", + "subtitles": "English", + "title": "Raja Vikramarka" + }, + { + "codec": "H.264", + "filetype": "MKV", + "title": "Kotha Rangula Prapancham" } ] \ No newline at end of file From 5f4c12b3ac6ed108a68258f1c67e69d4c12a3c71 Mon Sep 17 00:00:00 2001 From: mhdzumair Date: Thu, 15 Feb 2024 12:46:21 +0530 Subject: [PATCH 14/21] Add site regex description --- PTN/patterns.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/PTN/patterns.py b/PTN/patterns.py index 219e229..9ba0bee 100644 --- a/PTN/patterns.py +++ b/PTN/patterns.py @@ -298,6 +298,8 @@ (r"\.?(iso)$", "ISO"), ] patterns["widescreen"] = "WS" + +# Valid the sites with strict URL rules and then other possible sites with more relaxed rules patterns["site"] = [r"^(www\.[\w-]+\.[\w-]+)\s+-\s*", r"^((?:www\.)?[\w-]+\.[\w-]+(?:\.[\w-]+)?)\s+-\s*", r"^(\[ ?([^\]]+?)\s?\])"] lang_list_pattern = ( From 1e4137d1658a58a288928edea2c5735d094bcdcc Mon Sep 17 00:00:00 2001 From: mhdzumair Date: Mon, 27 May 2024 07:28:17 +0530 Subject: [PATCH 15/21] Add more language patterns --- PTN/extras.py | 57 ++++++++++++++--------- tests/files/input.json | 11 ++++- tests/files/output_raw.json | 78 ++++++++++++++++++++++++++++++++ tests/files/output_standard.json | 50 ++++++++++++++++++++ 4 files changed, 174 insertions(+), 22 deletions(-) diff --git a/PTN/extras.py b/PTN/extras.py index e80dda3..ab8f841 100644 --- a/PTN/extras.py +++ b/PTN/extras.py @@ -5,35 +5,51 @@ delimiters = "[\.\s\-\+_\/(),]" langs = [ - ("rus(?:sian)?", "Russian"), - ("(?:True)?fre?(?:nch)?", "French"), + ("rus(?:sian)?|russo", "Russian"), + ("(?:True)?fre?(?:nch)?|fr(?:ench|a|e|anc[eê]s)?", "French"), ("(?:nu)?ita(?:liano?)?", "Italian"), ("castellano|spa(?:nish)?|esp?", "Spanish"), ("swedish", "Swedish"), ("dk|dan(?:ish)?", "Danish"), - ("ger(?:man)?|deu(?:tsch)?", "German"), + ("ger(?:man)?|deu(?:tsch)?|alem[aã]o", "German"), ("nordic", "Nordic"), ("exyu", "ExYu"), - ("chs|chi(?:nese)?", "Chinese"), + ("chs|chi(?:nese)?|(?:mand[ae]rin|ch[sn])|chin[eê]s|zh-hans", "Chinese"), ("hin(?:di)?", "Hindi"), ("polish|poland|pl", "Polish"), - ("mandarin", "Mandarin"), - ("kor(?:ean)?", "Korean"), + ("kor(?:ean)?|coreano", "Korean"), ("ben(?:gali)?|bangla", "Bengali"), ("kan(?:nada)?", "Kannada"), - ("tam(?:il)?", "Tamil"), + ("t[aâ]m(?:il)?", "Tamil"), ("tel(?:ugu)?", "Telugu"), ("mar(?:athi)?", "Marathi"), ("mal(?:ayalam)?", "Malayalam"), - ("japanese|ja?p", "Japanese"), + ("guj(?:arati)?", "Gujarati"), + ("pun(?:jabi)?", "Punjabi"), + ("ori(?:ya)?", "Oriya"), + ("japanese|ja?p|jpn|japon[eê]s", "Japanese"), ("interslavic", "Interslavic"), ("ara(?:bic)?", "Arabic"), ("urdu", "Urdu"), - ("punjabi", "Punjabi"), - ("portuguese", "Portuguese"), - ("albanian?", "Albanian"), - ("egypt(?:ian)?", "Egyptian"), - ("en?(?:g(?:lish)?)?", "English"), # Must be at end, matches just an 'e' + ("tur(?:kish)?|tr", "Turkish"), + ("tailand[eê]s|thai?", "Thai"), + ("tagalog", "Tagalog"), + ("ind(?:onesian)?", "Indonesian"), + ("vie(?:tnamese)?", "Vietnamese"), + ("heb(?:rew)?", "Hebrew"), + ("gre(?:ek)?", "Greek"), + ("cz(?:ech)?", "Czech"), + ("hun(?:garian)?", "Hungarian"), + ("ukr(?:ainian)?", "Ukrainian"), + ("fin(?:nish)?", "Finnish"), + ("nor(?:wegian)?", "Norwegian"), + ("sin(?:hala)?", "Sinhala"), + ("dutch|nl", "Dutch"), + ("p[ua]n(?:jabi)?", "Punjabi"), + ("por(?:tuguese)?|portugu[eèê]s[ea]?|p[rt]|port?", "Portuguese"), + ("alb(?:anian?)?|albanais", "Albanian"), + ("egypt(?:ian)?|egy", "Egyptian"), + ("en?(?:g(?:lish)?)?|ing(?:l[eéê]s)?", "English"), # Must be at end, matches just an 'e' ] genres = [ @@ -88,7 +104,6 @@ "extended": [r"(EXTENDED{d}(?!(?:CUT|EDITIONS?)))".format(d=delimiters)], } - channels = [(1, 0), (2, 0), (5, 0), (5, 1), (6, 1), (7, 1)] @@ -182,12 +197,12 @@ def link_patterns(pattern_options): return ( "(?:" + "|".join( - [ - pattern_option[0] - if isinstance(pattern_option, tuple) - else pattern_option - for pattern_option in pattern_options - ] - ) + [ + pattern_option[0] + if isinstance(pattern_option, tuple) + else pattern_option + for pattern_option in pattern_options + ] + ) + ")" ) diff --git a/tests/files/input.json b/tests/files/input.json index 2f1f159..ee338ca 100644 --- a/tests/files/input.json +++ b/tests/files/input.json @@ -405,5 +405,14 @@ "www.1TamilBlasters.lat - Thuritham (2023) [Tamil - 2K QHD AVC UNTOUCHED - x264 - AAC - 3.4GB - ESub].mkv", "www.1TamilMV.world - Raja Vikramarka (2024) Tamil HQ HDRip - 400MB - x264 - AAC - ESub.mkv", "www.1TamilMV.world - Kotha Rangula Prapancham (2024) Telugu HQ PreDVD - 700MB - x264 - HQ Clean Aud.mkv", - "The.Lord.of.the.Rings.Extended.Edition.2001.1080p.BluRay.x264.DTS-WiKi" + "The.Lord.of.the.Rings.Extended.Edition.2001.1080p.BluRay.x264.DTS-WiKi", + "Deadpool 2016 1080p BluRay DTS Rus Ukr 3xEng HDCL", + "127.Heures.FRENCH.DVDRip.AC3.XViD-DVDFR", + "Men in Black International 2019 (ingl\u00eas portugu\u00eas)", + "Quarantine [2008] [DVDRiP.XviD-M14CH0] [Lektor PL] [Arx]", + "All.Love.E146.KOR.HDTV.XViD-DeBTV", + "Atonement.2017.KOREAN.ENSUBBED.1080p.WEBRip.x264-VXTT", + "Fauda.S01.HEBREW.1080p.NF.WEBRip.DD5.1.x264-TrollHD[rartv]", + "Chinese Zodiac (2012) 1080p BrRip x264 - YIFY", + "Thai Massage (2022) 720p PDVDRip x264 AAC.mkv" ] \ No newline at end of file diff --git a/tests/files/output_raw.json b/tests/files/output_raw.json index ab9aa08..a1daca4 100644 --- a/tests/files/output_raw.json +++ b/tests/files/output_raw.json @@ -3715,5 +3715,83 @@ "resolution": "1080p", "title": "The Lord of the Rings", "year": 2001 + }, + { + "audio": "DTS", + "encoder": "3xEng", + "language": ["Rus","Ukr"], + "quality": "BluRay", + "resolution": "1080p", + "site": "HDCL", + "title": "Deadpool", + "year": 2016 + }, + { + "audio": "AC3", + "codec": "XViD", + "encoder": "DVDFR", + "language": "FRENCH", + "quality": "DVDRip", + "title": "127 Heures" + }, + { + "language": ["inglês","português"], + "title": "Men in Black International", + "year": 2019 + }, + { + "codec": "XviD", + "language": "PL", + "quality": "DVDRiP", + "site": "Arx", + "title": "Quarantine", + "year": 2008 + }, + { + "codec": "XViD", + "encoder": "DeBTV", + "episode": 146, + "language": "KOR", + "quality": "HDTV", + "title": "All Love" + }, + { + "codec": "x264", + "encoder": "VXTT", + "language": "KOREAN", + "quality": "WEBRip", + "resolution": "1080p", + "subtitles": "ENSUBBED", + "title": "Atonement", + "year": 2017 + }, + { + "audio": "DD5.1", + "codec": "x264", + "encoder": "TrollHD", + "language": "HEBREW", + "network": "NF", + "quality": "WEBRip", + "resolution": "1080p", + "season": 1, + "site": "rartv", + "title": "Fauda" + }, + { + "codec": "x264", + "encoder": "YIFY", + "quality": "BrRip", + "resolution": "1080p", + "title": "Chinese Zodiac", + "year": 2012 + }, + { + "audio": "AAC", + "codec": "x264", + "encoder": "PDVDRip", + "filetype": "mkv", + "resolution": "720p", + "title": "Thai Massage", + "year": 2022 } ] \ No newline at end of file diff --git a/tests/files/output_standard.json b/tests/files/output_standard.json index 79a6112..94a94cd 100644 --- a/tests/files/output_standard.json +++ b/tests/files/output_standard.json @@ -2181,5 +2181,55 @@ "codec": "H.264", "quality": "Blu-ray", "title": "The Lord of the Rings" + }, + { + "language": ["Russian","Ukrainian"], + "quality": "Blu-ray", + "title": "Deadpool" + }, + { + "audio": "Dolby Digital", + "codec": "Xvid", + "language": "French", + "quality": "DVD-Rip", + "title": "127 Heures" + }, + { + "language": ["English","Portuguese"], + "title": "Men in Black International" + }, + { + "codec": "Xvid", + "language": "Polish", + "quality": "DVD-Rip", + "title": "Quarantine" + }, + { + "codec": "Xvid", + "language": "Korean", + "title": "All Love" + }, + { + "codec": "H.264", + "language": "Korean", + "subtitles": "English", + "title": "Atonement" + }, + { + "audio": "Dolby Digital 5.1", + "codec": "H.264", + "language": "Hebrew", + "network": "Netflix", + "title": "Fauda" + }, + { + "codec": "H.264", + "quality": "BRRip", + "title": "Chinese Zodiac" + }, + { + "codec": "H.264", + "filetype": "MKV", + "title": "Thai Massage" } ] \ No newline at end of file From e6daab6f3cb85e687cfaf65c58e7694d3e0aa587 Mon Sep 17 00:00:00 2001 From: mhdzumair Date: Mon, 27 May 2024 13:46:44 +0530 Subject: [PATCH 16/21] rename test data generator to not trigger unit test by default --- README.md | 2 +- tests/{test_generator.py => generate_test_data.py} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename tests/{test_generator.py => generate_test_data.py} (100%) diff --git a/README.md b/README.md index 5a4a626..3799e7e 100644 --- a/README.md +++ b/README.md @@ -218,7 +218,7 @@ $ python cli.py --coherent-types 'A freakishly cool movie or TV episode' Submit a PR on the `dev` branch. If you have changed the regex for a pattern, I can assume this is because you had a title that was being incorrectly processed, and your change fixes it. Please add the title to the test suite! To add new titles to the tests, you have 2 options (the first is easier): -- Add the titles to `tests/test_generator`'s main method (in `add_titles()`), and run it. When asked for input, type 's', and it will automatically add what's needed to `files/input.json`, `files/output_raw.json`, and `files/output_standard.json`. The fields `encoder`, `excess`, `site`, and `episodeName` don't always have to be correct - if they're giving you issues, or seem wrong, feel free to manually remove them from the output test files. +- Add the titles to `tests/generate_test_data.py`'s main method (in `add_titles()`), and run it. When asked for input, type 's', and it will automatically add what's needed to `files/input.json`, `files/output_raw.json`, and `files/output_standard.json`. The fields `encoder`, `excess`, `site`, and `episodeName` don't always have to be correct - if they're giving you issues, or seem wrong, feel free to manually remove them from the output test files. - Otherwise, you must add input torrent names to `tests/files/input.json` and full output json objects (with `standardise=False`) to `tests/files/output_raw.json`. Also add the standardised output to `tests/files/output_standard.json`, only including fields that are different from `output_raw.json`, along with `title`. diff --git a/tests/test_generator.py b/tests/generate_test_data.py similarity index 100% rename from tests/test_generator.py rename to tests/generate_test_data.py From c91e1c1cfc402ae89581787dc501531070e2903d Mon Sep 17 00:00:00 2001 From: mhdzumair Date: Mon, 27 May 2024 13:49:08 +0530 Subject: [PATCH 17/21] #64: Add support for parsing non-english chars along with english title --- PTN/parse.py | 31 ++++++++++++++----- PTN/patterns.py | 16 ++++++++++ tests/files/input.json | 9 +++++- tests/files/output_raw.json | 53 ++++++++++++++++++++++++++++++++ tests/files/output_standard.json | 29 +++++++++++++++++ 5 files changed, 130 insertions(+), 8 deletions(-) diff --git a/PTN/parse.py b/PTN/parse.py index a2639d3..2bd8f93 100644 --- a/PTN/parse.py +++ b/PTN/parse.py @@ -36,21 +36,27 @@ def _part(self, name, match_slice, clean, overwrite=False): self.match_slices.append(match_slice) @staticmethod - def _clean_string(string): - clean = re.sub(r"^( -|\(|\[)", "", string) - if clean.find(" ") == -1 and clean.find(".") != -1: + def _clean_dots(string: str) -> str: + if string.find(" ") == -1 and string.find(".") != -1: # 4 dots likely means we want an ellipsis and a space - clean = re.sub(r"\.{4,}", "... ", clean) + string = re.sub(r"\.{4,}", "... ", string) # Replace any instances of less than 3 dots with a space # Lookarounds are used to prevent the 3-dots (ellipses) from being replaced - clean = re.sub(r"(? Date: Fri, 12 Jul 2024 20:56:03 +0530 Subject: [PATCH 18/21] Standardize PTN parsers --- PTN/extras.py | 2 +- PTN/parse.py | 17 +- PTN/patterns.py | 46 +- PTN/post.py | 61 +- README.md | 29 +- tests/files/input.json | 36 +- tests/files/output_raw.json | 2203 +++++++++++++++++++++++------- tests/files/output_standard.json | 769 ++++++++--- 8 files changed, 2332 insertions(+), 831 deletions(-) diff --git a/PTN/extras.py b/PTN/extras.py index ab8f841..4db4543 100644 --- a/PTN/extras.py +++ b/PTN/extras.py @@ -94,7 +94,7 @@ # or a season. So if we have a language in the title it won't cause issues by getting matched. # Empty list indicates to always do so, as opposed to matching specific regexes. patterns_ignore_title = { - "language": [], + "languages": [], "audio": ["LiNE"], "network": ["Hallmark"], "untouched": [], diff --git a/PTN/parse.py b/PTN/parse.py index 2bd8f93..5a25ebd 100644 --- a/PTN/parse.py +++ b/PTN/parse.py @@ -15,7 +15,7 @@ def __init__(self): self.coherent_types = None self.post_title_pattern = "(?:{}|{}|720p|1080p)".format( - link_patterns(patterns["season"]), link_patterns(patterns["year"]) + link_patterns(patterns["seasons"]), link_patterns(patterns["year"]) ) def _part(self, name, match_slice, clean, overwrite=False): @@ -24,9 +24,6 @@ def _part(self, name, match_slice, clean, overwrite=False): if name not in ["title", "episodeName"] and not isinstance(clean, bool): if not isinstance(clean, list): clean = [clean] - else: - if isinstance(clean, list) and len(clean) == 1: - clean = clean[0] # Avoids making a list if it only has 1 element self.parts[name] = clean self.part_slices[name] = match_slice @@ -72,7 +69,7 @@ def parse(self, name, standardise, coherent_types): pattern_options = self.normalise_pattern_options(pattern_options) for (pattern, replace, transforms) in pattern_options: - if key not in ("season", "episode", "site", "language", "genre"): + if key not in ("seasons", "episodes", "site", "languages", "genres"): pattern = r"\b(?:{})\b".format(pattern) clean_name = re.sub(r"_", " ", self.torrent_name) @@ -101,11 +98,11 @@ def parse(self, name, standardise, coherent_types): index = self.get_match_indexes(match) - if key in ("season", "episode"): + if key in ("seasons", "episodes"): clean = self.get_season_episode(match) elif key == "subtitles": clean = self.get_subtitles(match) - elif key in ("language", "genre"): + elif key in ("languages", "genres"): clean = self.split_multi(match) elif key in types.keys() and types[key] == "boolean": clean = True @@ -242,7 +239,7 @@ def get_season_episode(match): elif len(match) > 1 and match[1] and m: clean = list(range(int(m[0]), int(match[1]) + 1)) elif m: - clean = int(m[0]) + clean = [int(m[0])] return clean @@ -277,11 +274,11 @@ def standardise_clean(self, clean, key, replace, transforms): # For python2 compatibility, we're not able to simply pass functions as str.upper # means different things in 2.7 and 3.5. clean = getattr(clean, transform[0])(*transform[1]) - if key == "language" or key == "subtitles": + if key == "languages" or key == "subtitles": clean = self.standardise_languages(clean) if not clean: clean = "Available" - if key == "genre": + if key == "genres": clean = self.standardise_genres(clean) return clean diff --git a/PTN/patterns.py b/PTN/patterns.py index 7137f63..fa96a67 100644 --- a/PTN/patterns.py +++ b/PTN/patterns.py @@ -47,8 +47,8 @@ patterns_ordered = [ "resolution", "quality", - "season", - "episode", + "seasons", + "episodes", "year", "month", "day", @@ -64,7 +64,7 @@ "sbs", "site", "documentary", - "language", + "languages", "subtitles", "unrated", "size", @@ -82,42 +82,42 @@ "untouched", "remux", "internationalCut", - "genre", + "genres", ] -# Some patterns overlap with others. Season & episode do this a lot. Without something like this, we'd get issues like +# Some patterns overlap with others. Season & episodes do this a lot. Without something like this, we'd get issues like # the Avatar test: ... Complete Series 1080p ... 'Series 10' would be matched as a season, but the 10 is # from 1080p, which also gets matched. patterns_allow_overlap = [ - "season", - "episode", - "language", + "seasons", + "episodes", + "languages", "subtitles", "sbs" ] patterns = {} -patterns["episode"] = [ +patterns["episodes"] = [ r"(? 1 ): - self._part("language", None, self.parts["subtitles"][0]) + self._part("languages", None, self.parts["subtitles"][:1]) self._part("subtitles", None, self.parts["subtitles"][1:], overwrite=True) -# Language matches, to support multi-language releases that have the audio with each -# language, will contain audio info (or simply extra strings like 'dub'). +# Language matches, to support multi-languages releases that have the audio with each +# languages, will contain audio info (or simply extra strings like 'dub'). # We remove non-lang matching items from this list. def filter_non_languages(self): - if "language" in self.parts and isinstance(self.parts["language"], list): - languages = list(self.parts["language"]) - for lang in self.parts["language"]: + if "languages" in self.parts and isinstance(self.parts["languages"], list): + languages = list(self.parts["languages"]) + for lang in self.parts["languages"]: matched = False for (lang_regex, lang_clean) in langs: if re.match(lang_regex, lang, re.IGNORECASE): @@ -187,22 +169,37 @@ def filter_non_languages(self): if not matched: languages.remove(lang) - self._part("language", self.part_slices["language"], languages, overwrite=True) + self._part("languages", self.part_slices["languages"], languages, overwrite=True) + + +def is_subtitle_available(self): + if "subtitles" not in self.parts: + return + + languages = self.parts.get("languages") + subtitles = self.parts.get("subtitles") + + self.parts["is_subtitle_available"] = bool(subtitles) + + if "Available" == subtitles and languages: + self._part("subtitles", self.part_slices["subtitles"], languages, overwrite=True) + elif "Available" == subtitles: + self.parts.pop("subtitles") def try_vague_season_episode(self): title = self.parts["title"] m = re.search("(\d{1,2})-(\d{1,2})$", title) if m: - if "season" not in self.parts and "episode" not in self.parts: + if "seasons" not in self.parts and "episodes" not in self.parts: new_title = title[: m.start()] offset = self.part_slices["title"][0] # Setting the match slices here doesn't actually matter, but good practice. self._part( - "season", (offset + m.start(1), offset + m.end(1)), int(m.group(1)) + "seasons", (offset + m.start(1), offset + m.end(1)), [int(m.group(1))] ) self._part( - "episode", (offset + m.start(2), offset + m.end(2)), int(m.group(2)) + "episodes", (offset + m.start(2), offset + m.end(2)), [int(m.group(2))] ) self._part( "title", @@ -231,9 +228,9 @@ def remove_empty_parts(self): post_processing_after_excess = [ try_encoder, try_site, - fix_same_subtitles_language_match, fix_subtitles_no_language, filter_non_languages, + is_subtitle_available, try_vague_season_episode, use_year_as_title_if_absent, remove_empty_parts, diff --git a/README.md b/README.md index 3799e7e..dc45410 100644 --- a/README.md +++ b/README.md @@ -57,8 +57,8 @@ PTN.parse('The Walking Dead S05E03 720p HDTV x264-ASAP[ettv]') # { # 'encoder': 'ASAP', # 'title': 'The Walking Dead', -# 'season': 5, -# 'episode': 3, +# 'seasons': [5], +# 'episodes': [3], # 'resolution': '720p', # 'codec': 'H.264', # 'quality': 'HDTV', @@ -74,8 +74,9 @@ PTN.parse('Vacancy (2007) 720p Bluray Dual Audio [Hindi + English] ⭐800 MB⭐ # 'year': 2007, # 'audio': 'Dolby Digital 2.0', # 'quality': 'Blu-ray', -# 'language': ['Hindi', 'English'], -# 'subtitles': 'Available', +# 'languages': ['Hindi', 'English'], +# 'subtitles': ['Hindi', 'English'], +# 'is_subtitle_available': True, # 'size': 800MB, # 'website': BonsaiHD # 'excess': '⭐⭐' @@ -89,8 +90,8 @@ PTN.parse('Deadliest.Catch.S00E66.No.Safe.Passage.720p.AMZN.WEB-DL.DDP2.0.H.264- # 'codec': 'H.264', # 'audio' : 'Dolby Digital Plus 2.0', # 'network': 'Amazon Studios', -# 'season': 0, -# 'episode': 66, +# 'seasons': [0], +# 'episodes': [66], # 'quality': 'WEB-DL', # 'episodeName': 'No Safe Passage', # 'website': 'TGx' @@ -101,7 +102,7 @@ PTN.parse('Insecure.S04.COMPLETE.720p.AMZN.WEBRip.x264-GalaxyTV') # 'title': 'Insecure' # 'encoder': 'GalaxyTV', # 'codec': 'H.264', -# 'season': 4, +# 'seasons': [4], # 'resolution': '720p', # 'network': 'Amazon Studios', # 'quality': 'WEBRip', @@ -121,7 +122,7 @@ $ python cli.py 'Insecure.S04.COMPLETE.720p.AMZN.WEBRip.x264-GalaxyTV' 'title': 'Insecure' 'encoder': 'GalaxyTV', 'codec': 'H.264', - 'season': 4, + 'seasons': [4], 'resolution': '720p', 'network': 'Amazon Studios', 'quality': 'WEBRip', @@ -154,7 +155,7 @@ $ python cli.py --raw 'A freakishly cool movie or TV episode' The types of parts can be strings, integers, booleans, or lists of the first 2. To simplify this, you can enable the `coherent_types` flag. This will override the types described below according to these rules: - `title` and `episodeName` will always be strings. -- All other non-boolean fields will become lists of the type they currently are. For example, `language` will always be a list of strings, and `episode` a list of episodes. This can be weird for some fields, but it avoids a lot of `isinstance` calls - just always use `x in y` and you should be fine. +- All other non-boolean fields will become lists of the type they currently are. For example, `languages` will always be a list of strings, and `episodes` a list of episodes. This can be weird for some fields, but it avoids a lot of `isinstance` calls - just always use `x in y` and you should be fine. - Boolean types will remain as booleans. To enable this flag: @@ -177,18 +178,18 @@ $ python cli.py --coherent-types 'A freakishly cool movie or TV episode' * **directorsCut** *(boolean)* * **documentary** *(boolean)* * **encoder** *(string)* -* **episode** *(integer, integer list)* +* **episodes** *(integer list)* * **episodeName** *(string)* * **excess** *(string, string list)* * **extended** *(boolean)* * **filetype** *(string)* * **fps** *(integer)* -* **genre** *(string, string list)* +* **genre** *(string list)* * **hardcoded** *(boolean)* * **hdr** *(boolean)* * **internal** *(boolean)* * **internationalCut** *(boolean)* -* **language** *(string, string list)* +* **languages** *(string list)* * **limited** *(boolean)* * **month** *(integer)* * **network** *(string)* @@ -201,10 +202,10 @@ $ python cli.py --coherent-types 'A freakishly cool movie or TV episode' * **repack** *(boolean)* * **resolution** *(string)* * **sbs** *(string)* -* **season** *(integer, integer list)* +* **season** *(integer list)* * **site** *(string)* * **size** *(string)* -* **subtitles** *(string, string list)* +* **subtitles** *(string list)* * **title** *(string)* * **unrated** *(boolean)* * **untouched** *(boolean)* diff --git a/tests/files/input.json b/tests/files/input.json index 98a77e9..74cd68c 100644 --- a/tests/files/input.json +++ b/tests/files/input.json @@ -139,7 +139,7 @@ "Diabolique (1996).720p.H264.ita.eng.Ac3-5.1.sub.ita.eng-MIRCrew", "Road.House.2.Last.Call.2006.Unrated.1080p.HDTV.H264.AC3.DD2.0.Will1869", "We.Summon.The.Darkness.2020.1080p.Bluray.Atmos.TrueHD.7.1.x264-EVO[TGx]", - "37\u00b02 le matin - Betty Blue (1986) Director's Cut.720p.H264.ita.fre.sub.Eng-MIRCrew", + "37°2 le matin - Betty Blue (1986) Director's Cut.720p.H264.ita.fre.sub.Eng-MIRCrew", "Non-Fiction.2018.1080p.BluRay.x264-USURY", "Spring.Night.Summer.Night.1967.BDRip.x264-GHOULS[TGx]", "Insecure.S04.COMPLETE.720p.AMZN.WEBRip.x264-GalaxyTV", @@ -162,13 +162,13 @@ "Kitsutsuki Tanteidokoro - 10 (720p)(Multiple Subtitle)-Erai-raws[TGx]", "Princess Connect! Re-Dive - 11 (720p)(Multiple Subtitle)-Erai-raws[TGx]", "Fruits Basket S2 (2019) - 11 (720p)-HorribleSubs[TGx]", - "Kadakh (2020) Hindi 720p SonyLiv WEB-DL \u2b501.1 GB\u2b50 AAC DD- 2.0 ESub x264 - Shadow (BonsaiHD)", + "Kadakh (2020) Hindi 720p SonyLiv WEB-DL ⭐1.1 GB⭐ AAC DD- 2.0 ESub x264 - Shadow (BonsaiHD)", "Satyagraha (2013) (1080p BluRay x265 10bit HEVC AAC 5.1 RONIN)", "Shuddh Desi Romance 2013 Hindi 720p BluRay x264 AAC 5.1 MSubs - LOKiHD - Telly", "Face 2 Face (2019) Kannada HDRip - 720p - x264 - DD5.1 - 1.1GB - ESub - TamilMV", - "Chaman Bahar (2020) Hindi 720p NF WEBRip \u2b50800 MB\u2b50 DD- 5.1 ESub x264 - Shadow (BonsaiHD)", - "Piprabidya (2013) Bengali 720p Hoichoi WEB-DL \u2b50650 MB\u2b50 AAC DD- 2.0 ESub x264 - Shadow (BonsaiHD)", - "Penguin (2020) Tamil 720p AMZN WEBRip \u2b501.1 GB\u2b50 AAC DD- 5.1 ESub x264 - Shadow (BonsaiHD)", + "Chaman Bahar (2020) Hindi 720p NF WEBRip ⭐800 MB⭐ DD- 5.1 ESub x264 - Shadow (BonsaiHD)", + "Piprabidya (2013) Bengali 720p Hoichoi WEB-DL ⭐650 MB⭐ AAC DD- 2.0 ESub x264 - Shadow (BonsaiHD)", + "Penguin (2020) Tamil 720p AMZN WEBRip ⭐1.1 GB⭐ AAC DD- 5.1 ESub x264 - Shadow (BonsaiHD)", "Kadakh 2020 Hindi 1080p WEBRip x264 AC3 ESubs - LOKiHD - Telly", "Satyagraha (2013) (1080p BluRay x265 10bit HEVC AAC 5.1 RONIN)", "Kavacham (2018) Proper HDRip - x264 - [Tamil + Telugu + Hindi] - 750MB - ESub - TamilMV", @@ -237,7 +237,7 @@ "Mystery Diners S08 Season 8 Complete x265 720P", "Amar (2017) HDRip 720p Hindi + Spanish 800MB[MB].", "Oolu (2019)[Malayalam 720p HDTV UNTOUCHED - x264 1.4GB[MB]", - "Vacancy (2007) 720p Bluray Dual Audio [Hindi + English] \u2b50800 MB\u2b50 DD - 2.0 MSub x264 - Shadow (BonsaiHD)", + "Vacancy (2007) 720p Bluray Dual Audio [Hindi + English] ⭐800 MB⭐ DD - 2.0 MSub x264 - Shadow (BonsaiHD)", "Darkness Falls (2020) HDRip 720p [Hindi-Dub] Dual-Audio x264 - 1XCinema", "Wasp Network (2020) ITA-ENG Ac3 5.1 WEBRip 1080p H264 [ArMor]", "Darlin (2019) ITA-ENG Bluray 1080p - L@Z59 - iDN CreW.mkv", @@ -273,7 +273,7 @@ "Moothon: The Elder One (2019) Malayalam UNTOUCHED 720p WEB-DL - 2.5 GB - (DD- 2.0) ESub x264 - Shadow (BonsaiHD)", "Doppia Pelle - Le Daim (2019) BluRay 1080p.H264 Ita Fre AC3 5.1 Sub Ita Eng MIRCrew", "The Twilight Saga: Breaking Down - Parte 2 (2012) - 720p H264 Ita Eng DTS HD Masters 5.1 Sub Ita Eng by SnakeSPL MIRCrew", - "The Painted Bird (2019) Interslavic 720p Bluray \u2b501.3 GB\u2b50 DD- 2.0 ESub x264 - Shadow (BonsaiHD)", + "The Painted Bird (2019) Interslavic 720p Bluray ⭐1.3 GB⭐ DD- 2.0 ESub x264 - Shadow (BonsaiHD)", "Proximity.2020.1080p.Bluray.DTS-HD.MA.5.1.X264-EVO[TGx]", "Soviet Cinema - Provintsialki 1990 SATRip XviD x263-NOGROUP", "Just Mercy - Il diritto di opporsi (2019) AC3 5.1 ITA.ENG 1080p H265 sub NUita.eng Sp33dy94 MIRCrew", @@ -282,14 +282,14 @@ "War.2019.LIMITED.720p.BluRay.x264-Chakra[TGx]", "X-Men.2000.REMASTERED.BRRip.XviD.B4ND1T69", "X2.X-Men.United.2003.REMASTERED.BRRip.XviD.B4ND1T69", - "#Yaaram (2019) Hindi 720p DC WEBRip \u2b501.5 GB\u2b50 (DD- 2.0) HC ESub x264 - Shadow (BonsaiHD)", + "#Yaaram (2019) Hindi 720p DC WEBRip ⭐1.5 GB⭐ (DD- 2.0) HC ESub x264 - Shadow (BonsaiHD)", "Nessuno sa chi io sono qui-Nadie sabe que estoy aqui (2020) ITA-SPA Ac3 5.1 WEBRip 1080p H264 [ArMor]", "L.ultima corve (1973) ITA-ENG Ac3 2.0 BDRip 1080p H264 [ArMor]", "La signora di Shanghai-The lady from Shanghai (1947) ITA-ENG Ac3 2.0 BDRip 1080p H264 [ArMor]", "The Four (2012) BluRay - 720p - [Telugu + Tamil + Hindi + Chi] - 1.1GB - ESub - TamilMV", - "Samrat & Co. (2014) Hindi 720p AMZN WEBRip \u2b501.2 GB\u2b50 2CH ESub x264 - Shadow (BonsaiHD)", + "Samrat & Co. (2014) Hindi 720p AMZN WEBRip ⭐1.2 GB⭐ 2CH ESub x264 - Shadow (BonsaiHD)", "Tulips in Spring 2016 Hallmark 720p HDRip X264 Solar", - "Adu (2020) 720p NF WEB-DL Dual Audio [English + spanish] \u2b50950 MB\u2b50 DD- 5.1 x265 - Shadow (BonsaiHD)", + "Adu (2020) 720p NF WEB-DL Dual Audio [English + spanish] ⭐950 MB⭐ DD- 5.1 x265 - Shadow (BonsaiHD)", "When Sparks Fly 2014 Hallmark 720P HDTV X264 Solar", "The Fxxk-It List (2020) [English+Hindi - 720p - WEB HDRip - x264 - DD 5.1 - MSub - 2GB] - MAZE", "The Twilight Saga Breaking Dawn - Part 2 (2012) (1080p BDRip x265 10bit DTS-HD MA 7.1 - r0b0t) [TAoE].mkv", @@ -408,18 +408,18 @@ "The.Lord.of.the.Rings.Extended.Edition.2001.1080p.BluRay.x264.DTS-WiKi", "Deadpool 2016 1080p BluRay DTS Rus Ukr 3xEng HDCL", "127.Heures.FRENCH.DVDRip.AC3.XViD-DVDFR", - "Men in Black International 2019 (ingl\u00eas portugu\u00eas)", + "Men in Black International 2019 (inglês português)", "Quarantine [2008] [DVDRiP.XviD-M14CH0] [Lektor PL] [Arx]", "All.Love.E146.KOR.HDTV.XViD-DeBTV", "Atonement.2017.KOREAN.ENSUBBED.1080p.WEBRip.x264-VXTT", "Fauda.S01.HEBREW.1080p.NF.WEBRip.DD5.1.x264-TrollHD[rartv]", "Chinese Zodiac (2012) 1080p BrRip x264 - YIFY", "Thai Massage (2022) 720p PDVDRip x264 AAC.mkv", - "\u6740\u624b\u4e4b\u738b [\u6e2f\u7248\u539f\u76d8/\u56fd\u7ca4\u53cc\u8bed\u4e2d\u5b57].Hitman.1998.1080p.HKG.Blu-ray.AVC.TrueHD.7.1-TAG", - "[www.arabp2p.net]_-_\u062a\u0631\u0643\u064a \u0645\u062a\u0631\u062c\u0645 \u0648\u0645\u062f\u0628\u0644\u062c Last.Call.for.Istanbul.2023.1080p.NF.WEB-DL.DDP5.1.H.264.MKV.torrent", - "\u0413\u043e\u043b\u0443\u0431\u0430\u044f \u0432\u043e\u043b\u043d\u0430 / Blue Crush (2002) DVDRip", - "\u3010\u55b5\u840c\u5976\u8336\u5c4b\u3011\u260501\u6708\u65b0\u756a\u2605[Rebirth][01][720p][\u7b80\u4f53][\u62db\u52df\u7ffb\u8bd1]", - "08.\u041f\u043b\u0430\u043d\u0435\u0442\u0430.\u043e\u0431\u0435\u0437\u044c\u044f\u043d.\u0420\u0435\u0432\u043e\u043b\u044e\u0446\u0438\u044f.2014.BDRip-HEVC.1080p.mkv", - "\u0413\u0440\u0435\u0447\u0435\u0441\u043a\u0430\u044f \u0441\u043c\u043e\u043a\u043e\u0432\u043d\u0438\u0446\u0430 / The fruit is ripe / Griechische Feigen (Siggi G\u00f6tz) [1976, \u0413\u0435\u0440\u043c\u0430\u043d\u0438\u044f, \u042d\u0440\u043e\u0442\u0438\u0447\u0435\u0441\u043a\u0430\u044f \u043a\u043e\u043c\u0435\u0434\u0438\u044f, DVDRip]", - "\u041a\u043d\u0438\u0433\u043e\u043d\u043e\u0448\u0438 / \u041a\u043di\u0433\u0430\u043d\u043e\u0448\u044b (1987) TVRip \u043e\u0442 AND03AND | BLR" + "杀手之王 [港版原盘/国粤双语中字].Hitman.1998.1080p.HKG.Blu-ray.AVC.TrueHD.7.1-TAG", + "[www.arabp2p.net]_-_تركي مترجم ومدبلج Last.Call.for.Istanbul.2023.1080p.NF.WEB-DL.DDP5.1.H.264.MKV.torrent", + "Голубая волна / Blue Crush (2002) DVDRip", + "【喵萌奶茶屋】★01月新番★[Rebirth][01][720p][简体][招募翻译]", + "08.Планета.обезьян.Революция.2014.BDRip-HEVC.1080p.mkv", + "Греческая смоковница / The fruit is ripe / Griechische Feigen (Siggi Götz) [1976, Германия, Эротическая комедия, DVDRip]", + "Книгоноши / Кнiганошы (1987) TVRip от AND03AND | BLR" ] \ No newline at end of file diff --git a/tests/files/output_raw.json b/tests/files/output_raw.json index 67c78b5..16fac85 100644 --- a/tests/files/output_raw.json +++ b/tests/files/output_raw.json @@ -2,10 +2,14 @@ { "codec": "x264", "encoder": "ASAP", - "episode": 3, + "episodes": [ + 3 + ], "quality": "HDTV", "resolution": "720p", - "season": 5, + "seasons": [ + 5 + ], "site": "ettv", "title": "The Walking Dead" }, @@ -24,9 +28,13 @@ }, { "codec": "XviD", - "episode": 6, + "episodes": [ + 6 + ], "quality": "HDTV", - "season": 8, + "seasons": [ + 8 + ], "title": "The Big Bang Theory" }, { @@ -75,9 +83,13 @@ }, { "codec": "x264", - "episode": 5, + "episodes": [ + 5 + ], "quality": "HDTV", - "season": 2, + "seasons": [ + 2 + ], "title": "Marvel's Agents of S.H.I.E.L.D." }, { @@ -98,19 +110,27 @@ }, { "audio": "DD5.1", - "episode": 1, + "episodes": [ + 1 + ], "episodeName": "Shadows", "quality": "WEB-DL", "resolution": "1080p", - "season": 2, + "seasons": [ + 2 + ], "title": "Marvel's Agents of S.H.I.E.L.D." }, { "codec": "x264", "encoder": "KILLERS", - "episode": 6, + "episodes": [ + 6 + ], "quality": "HDTV", - "season": 2, + "seasons": [ + 2 + ], "site": "ettv", "title": "Marvels Agents of S.H.I.E.L.D." }, @@ -123,10 +143,14 @@ "audio": "DD5.1", "codec": "H.264", "encoder": "Cyphanix", - "episode": 3, + "episodes": [ + 3 + ], "quality": "WEB-DL", "resolution": "1080p", - "season": 5, + "seasons": [ + 5 + ], "title": "The Walking Dead" }, { @@ -151,9 +175,13 @@ }, { "codec": "x264", - "episode": 6, + "episodes": [ + 6 + ], "quality": "HDTV", - "season": 5, + "seasons": [ + 5 + ], "title": "Downton Abbey" }, { @@ -173,32 +201,48 @@ }, { "codec": "x264", - "episode": 4, + "episodes": [ + 4 + ], "quality": "HDTV", - "season": 1, + "seasons": [ + 1 + ], "title": "The Flash", "year": 2014 }, { "codec": "x264", - "episode": 5, + "episodes": [ + 5 + ], "quality": "HDTV", - "season": 18, + "seasons": [ + 18 + ], "title": "South Park" }, { "codec": "x264", - "episode": 3, + "episodes": [ + 3 + ], "quality": "HDTV", - "season": 1, + "seasons": [ + 1 + ], "title": "The Flash", "year": 2014 }, { "codec": "x264", - "episode": 1, + "episodes": [ + 1 + ], "quality": "HDTV", - "season": 1, + "seasons": [ + 1 + ], "title": "The Flash", "year": 2014 }, @@ -220,10 +264,14 @@ }, { "codec": "x264", - "episode": 5, + "episodes": [ + 5 + ], "proper": true, "quality": "HDTV", - "season": 26, + "seasons": [ + 26 + ], "title": "The Simpsons" }, { @@ -235,10 +283,14 @@ }, { "codec": "x264", - "episode": 1, + "episodes": [ + 1 + ], "quality": "HDTV", "repack": true, - "season": 12, + "seasons": [ + 12 + ], "site": "eztv", "title": "Two and a Half Men" }, @@ -275,10 +327,14 @@ { "audio": "AAC", "codec": "x264", - "episode": 5, + "episodes": [ + 5 + ], "episodeName": "Viper", "quality": "WEB-DL", - "season": 1, + "seasons": [ + 1 + ], "title": "Gotham" }, { @@ -338,20 +394,28 @@ }, { "codec": "x264", - "episode": 1, + "episodes": [ + 1 + ], "episodeName": "Pilot", "quality": "HDTV", - "season": 1, + "seasons": [ + 1 + ], "title": "The Missing" }, { "codec": "x264", "encoder": "FoV", - "episode": 11, + "episodes": [ + 11 + ], "episodeName": "Dark Water", "quality": "HDTV", "resolution": "720p", - "season": 8, + "seasons": [ + 8 + ], "site": "rartv", "title": "Doctor Who", "year": 2005 @@ -359,10 +423,14 @@ { "audio": "AAC", "codec": "x264", - "episode": 7, + "episodes": [ + 7 + ], "episodeName": "Penguins Umbrella", "quality": "WEB-DL", - "season": 1, + "seasons": [ + 1 + ], "title": "Gotham" }, { @@ -374,7 +442,9 @@ { "audio": "AAC", "codec": "x264", - "language": "Hindi", + "languages": [ + "Hindi" + ], "quality": "DvDScr", "title": "The Shaukeens", "year": 2014 @@ -398,7 +468,9 @@ { "audio": "AAC", "codec": "x264", - "language": "ENG", + "languages": [ + "ENG" + ], "quality": "CAM", "title": "Interstellar", "year": 2014 @@ -429,59 +501,90 @@ "year": 2014 }, { - "episode": 3, - "season": 1, + "episodes": [ + 3 + ], + "seasons": [ + 1 + ], "title": "Sons of Anarchy" }, { "codec": "x264", "encoder": "fov", - "episode": 12, + "episodes": [ + 12 + ], "episodeName": "death in heaven", "quality": "hdtv", "resolution": "720p", - "season": 8, + "seasons": [ + 8 + ], "title": "doctor who", "year": 2005 }, { "codec": "x264", - "episode": 1, + "episodes": [ + 1 + ], "quality": "bluray", "resolution": "720p", - "season": 1, + "seasons": [ + 1 + ], "title": "breaking bad" }, { - "episode": 3, + "episodes": [ + 3 + ], "episodeName": "Breaker of Chains", - "season": 4, + "seasons": [ + 4 + ], "title": "Game of Thrones" }, { "codec": "x264", - "episode": 10, + "episodes": [ + 10 + ], "quality": "BluRay", "resolution": "480p", - "season": 5, + "seasons": [ + 5 + ], "site": "720pMkv.Com", "title": "sons of anarchy" }, { "codec": "X264", "encoder": "DIMENSION", - "episode": 7, + "episodes": [ + 7 + ], "quality": "HDTV", "resolution": "720p", - "season": 7, + "seasons": [ + 7 + ], "site": "www.Speed.cd", "title": "Sons of Anarchy" }, { - "episode": 20, - "language": ["rus","eng"], + "episodes": [ + 20 + ], + "languages": [ + "rus", + "eng" + ], "resolution": "720p", - "season": 2, + "seasons": [ + 2 + ], "title": "Community" }, { @@ -563,7 +666,9 @@ { "audio": "AC3 - 5.1", "codec": "x264", - "language": "Hindi", + "languages": [ + "Hindi" + ], "resolution": "720p", "title": "Akira", "upscaled": true, @@ -588,24 +693,46 @@ "encoder": "RAPiDCOWS", "quality": "BluRay", "resolution": "720p", - "season": 1, - "subtitles": "DKsubs", - "title": "The X-Files" + "seasons": [ + 1 + ], + "subtitles": [ + "DKsubs" + ], + "title": "The X-Files", + "is_subtitle_available": true }, { "codec": "x265", "quality": "BluRay", "resolution": "1080p", - "season": [1,2,3], - "subtitles": "DKsubs", - "title": "The X-Files" + "seasons": [ + 1, + 2, + 3 + ], + "subtitles": [ + "DKsubs" + ], + "title": "The X-Files", + "is_subtitle_available": true }, { "codec": "x264", "encoder": "GECKOS", "quality": "BluRay", "resolution": "1080p", - "season": [1,2,3,4,5,6,7,8,9], + "seasons": [ + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9 + ], "title": "The X-Files" }, { @@ -613,15 +740,22 @@ "encoder": "Scene", "quality": "HDTV", "resolution": "720p", - "season": 3, + "seasons": [ + 3 + ], "title": "The Flash", "year": 2014 }, { "encoder": "ZhuixinFan", - "episode": 7, + "episodes": [ + 7 + ], "filetype": "mp4", - "language": ["Chi","Jap"], + "languages": [ + "Chi", + "Jap" + ], "quality": "HDTVrip", "resolution": "1280X720", "title": "Boku Unmei no Hito desu" @@ -630,7 +764,9 @@ "audio": "DTS5.1", "codec": "x264", "encoder": "TWA", - "language": "NORDiC", + "languages": [ + "NORDiC" + ], "quality": "BluRay", "resolution": "720p", "title": "Blind", @@ -639,7 +775,9 @@ { "codec": "x264", "resolution": "720p", - "season": 17, + "seasons": [ + 17 + ], "title": "Family Guy" }, { @@ -647,22 +785,30 @@ "network": "AMZN", "quality": "WEB-DL", "resolution": "720p", - "season": 23, + "seasons": [ + 23 + ], "title": "South Park" }, { "codec": "x264", "quality": "DVDrip", - "season": 1, + "seasons": [ + 1 + ], "title": "Borgen", "year": 2010 }, { "codec": "XviD", "encoder": "Rezar1337", - "language": "SWEDiSH", + "languages": [ + "SWEDiSH" + ], "quality": "DVDRip", - "season": 2, + "seasons": [ + 2 + ], "title": "Bumbibjornarna" }, { @@ -670,15 +816,22 @@ "codec": "x264", "quality": "HDRip", "resolution": "540p", - "subtitles": "KORSUB", + "subtitles": [ + "KORSUB" + ], "title": "The Martian", - "year": 2015 + "year": 2015, + "is_subtitle_available": true }, { - "episode": 9, + "episodes": [ + 9 + ], "episodeName": "Divide and Rule", "filetype": "mp4", - "season": 1, + "seasons": [ + 1 + ], "title": "Borgen" }, { @@ -708,72 +861,148 @@ { "audio": "AAC5.1", "codec": "X264", - "language": "Polish", + "languages": [ + "Polish" + ], "network": "NF", "quality": "WEB-DL", "resolution": "1080p", - "season": 1, + "seasons": [ + 1 + ], "title": "1983" }, { "audio": "AAC", "codec": "x264", "quality": "BRRip", - "season": 1, + "seasons": [ + 1 + ], "site": "GWC", - "subtitles": "E", - "title": "The Bridge" + "subtitles": [ + "E" + ], + "title": "The Bridge", + "is_subtitle_available": true }, { "codec": "x264", "quality": "HDRip", "resolution": "1080p", - "subtitles": "ExYu", + "subtitles": [ + "ExYu" + ], "title": "The Meg", - "year": 2018 + "year": 2018, + "is_subtitle_available": true }, { "codec": "x264", "quality": "CAM-Rip", - "subtitles": "English", + "subtitles": [ + "English" + ], "title": "Dragon Ball Super: Broly", - "year": 2018 + "year": 2018, + "is_subtitle_available": true }, { "codec": "x264", "hardcoded": true, - "language": "English", + "languages": [ + "English" + ], "quality": "HDRip", "resolution": "720p", - "subtitles": ["Hindi","Eng","Multi"], + "subtitles": [ + "Hindi", + "Eng", + "Multi" + ], "title": "Joker", - "year": 2019 + "year": 2019, + "is_subtitle_available": true }, { "audio": "AAC", "codec": "x264", "hardcoded": true, - "language": "Mandarin", + "languages": [ + "Mandarin" + ], "quality": "HDRip", "resolution": "1080p", - "subtitles": ["CHS","ENG"], + "subtitles": [ + "CHS", + "ENG" + ], "title": "IP Man And Four Kings", - "year": 2019 - }, - { - "season": [1,2,3,4,5,6,7,8,9,10,11,12,13], + "year": 2019, + "is_subtitle_available": true + }, + { + "seasons": [ + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13 + ], "title": "American Dad!" }, { - "language": ["ITA","ENG"], + "languages": [ + "ITA", + "ENG" + ], "quality": "DVDrip", - "season": 1, + "seasons": [ + 1 + ], "title": "The Simpsons" }, { "quality": "DVDRip", "resolution": "1080p", - "season": [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28], + "seasons": [ + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28 + ], "title": "The Simpsons" }, { @@ -801,24 +1030,38 @@ "network": "AMZN", "quality": "WEBRip", "resolution": "720p", - "season": 2, + "seasons": [ + 2 + ], "title": "Tom Clancys Jack Ryan" }, { "audio": "DDP 5.1", "codec": "x264", - "language": "Hindi", + "languages": [ + "Hindi" + ], "resolution": "720p", - "season": 1, - "subtitles": "ESub", + "seasons": [ + 1 + ], + "subtitles": [ + "ESub" + ], "title": "Sacred Games", - "year": 2018 + "year": 2018, + "is_subtitle_available": true }, { "codec": "X264", "quality": "HDTV", "resolution": "720p", - "season": [1,2,3,4], + "seasons": [ + 1, + 2, + 3, + 4 + ], "title": "Homeland" }, { @@ -828,44 +1071,66 @@ "network": "ATVP", "quality": "Webrip", "resolution": "1080p", - "season": 1, + "seasons": [ + 1 + ], "title": "Home Before Dark", "year": 2020 }, { "codec": "x264", - "episode": 3, + "episodes": [ + 3 + ], "quality": "HDTV", - "season": 5, + "seasons": [ + 5 + ], "title": "Our Girl" }, { "codec": "x264", - "episode": 4, + "episodes": [ + 4 + ], "resolution": "480p", - "season": 2, + "seasons": [ + 2 + ], "title": "Roswell New Mexico" }, { "codec": "x264", "encoder": "ROBOTS", - "episode": 0, + "episodes": [ + 0 + ], "episodeName": "Coyotes Journal Coyote And His Faithful Crew", "internal": true, "quality": "WEB", - "season": 1, + "seasons": [ + 1 + ], "site": "TGx", "title": "Coyote Peterson-Brave the Wild" }, { "audio": "DD5.1", "codec": "H264", - "episode": [5,6], - "language": ["ITA","ENG"], + "episodes": [ + 5, + 6 + ], + "languages": [ + "ITA", + "ENG" + ], "network": "AMZN", "quality": "WEB-DLMux", "resolution": "1080p", - "season": 7, + "seasons": [ + 7 + ], "title": "The Blacklist" }, { @@ -875,23 +1140,42 @@ "network": "AMZN", "quality": "WEB-DL", "resolution": "576p", - "season": [1,2,3,4,5,6,7,8,9,10], + "seasons": [ + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10 + ], "title": "Are You Being Served", "year": 1972 }, { "codec": "H264", - "episode": 16, + "episodes": [ + 16 + ], "quality": "WEB", - "season": 6, + "seasons": [ + 6 + ], "site": "TGx", "title": "Empire", "year": 2015 }, { "codec": "XviD", - "episode": 20, - "season": 1, + "episodes": [ + 20 + ], + "seasons": [ + 1 + ], "title": "Mixed-ish" }, { @@ -899,7 +1183,9 @@ "filetype": "MP4", "quality": "WEB-DL", "resolution": "720p", - "season": 2, + "seasons": [ + 2 + ], "size": "4.3GB", "title": "Marvels Iron Fist" }, @@ -934,10 +1220,15 @@ { "codec": "h264", "encoder": "FaiLED", - "episode": [1,2], + "episodes": [ + 1, + 2 + ], "quality": "WEB", "resolution": "720p", - "season": 3, + "seasons": [ + 3 + ], "site": "TGx", "title": "A Touch Of Cloth" }, @@ -952,20 +1243,35 @@ { "audio": "Ac3", "codec": "XviD", - "language": ["Italian","English"], - "season": 1, - "subtitles": ["ita","eng"], + "languages": [ + "Italian", + "English" + ], + "seasons": [ + 1 + ], + "subtitles": [ + "ita", + "eng" + ], "title": "Z Nation", - "year": 2014 + "year": 2014, + "is_subtitle_available": true }, { "audio": "AC3", - "language": ["ITA","ENG"], + "languages": [ + "ITA", + "ENG" + ], "quality": "BluRay Rip", "resolution": "1080p", - "subtitles": "SUBS", + "subtitles": [ + "SUBS" + ], "title": "1917", - "year": 2019 + "year": 2019, + "is_subtitle_available": true }, { "codec": "x264", @@ -1005,16 +1311,23 @@ "codec": "H264", "encoder": "BONE", "quality": "DVDRip", - "season": [1,2], + "seasons": [ + 1, + 2 + ], "title": "The Thin Blue Line", "year": 1995 }, { "codec": "x264", "encoder": "mSD", - "episode": 10, + "episodes": [ + 10 + ], "resolution": "480p", - "season": 2, + "seasons": [ + 2 + ], "site": "TGx", "title": "Heavy Rescue 401" }, @@ -1028,12 +1341,16 @@ "audio": "DDP2.0", "codec": "H.264", "encoder": "NTb", - "episode": 66, + "episodes": [ + 66 + ], "episodeName": "No Safe Passage", "network": "AMZN", "quality": "WEB-DL", "resolution": "720p", - "season": 0, + "seasons": [ + 0 + ], "site": "TGx", "title": "Deadliest Catch" }, @@ -1042,16 +1359,22 @@ "network": "NF", "quality": "WEB", "resolution": "1080p", - "season": 7, + "seasons": [ + 7 + ], "title": "New Girl" }, { "codec": "x264", "encoder": "CRiMSON", - "episode": 7, + "episodes": [ + 7 + ], "episodeName": "Murdered His Mother or Falsely Accused Pt2", "quality": "HDTV", - "season": 1, + "seasons": [ + 1 + ], "site": "TGx", "title": "Accused Guilty or Innocent" }, @@ -1069,22 +1392,36 @@ "audio": "Ac3", "codec": "H264", "encoder": "MIRCrew", - "language": ["ita","eng"], + "languages": [ + "ita", + "eng" + ], "resolution": "720p", - "subtitles": ["ita","eng"], + "subtitles": [ + "ita", + "eng" + ], "title": "The Big Bus - Il fantabus", - "year": 1976 + "year": 1976, + "is_subtitle_available": true }, { "audio": "AC3 5.1", "codec": "H264", "encoder": "MIRCrew", - "language": ["Ita","Eng"], + "languages": [ + "Ita", + "Eng" + ], "quality": "BluRay", "resolution": "1080p", - "subtitles": ["Ita","Eng"], + "subtitles": [ + "Ita", + "Eng" + ], "title": "The Hunt", - "year": 2020 + "year": 2020, + "is_subtitle_available": true }, { "audio": "AAC", @@ -1116,11 +1453,18 @@ "audio": "Ac3-5.1", "codec": "H264", "encoder": "MIRCrew", - "language": ["ita","eng"], + "languages": [ + "ita", + "eng" + ], "resolution": "720p", - "subtitles": ["ita","eng"], + "subtitles": [ + "ita", + "eng" + ], "title": "Diabolique", - "year": 1996 + "year": 1996, + "is_subtitle_available": true }, { "audio": "DD2.0", @@ -1145,11 +1489,17 @@ { "codec": "H264", "directorsCut": true, - "language": ["ita","fre"], + "languages": [ + "ita", + "fre" + ], "resolution": "720p", - "subtitles": "Eng", + "subtitles": [ + "Eng" + ], "title": "37°2 le matin - Betty Blue", - "year": 1986 + "year": 1986, + "is_subtitle_available": true }, { "codec": "x264", @@ -1173,7 +1523,9 @@ "network": "AMZN", "quality": "WEBRip", "resolution": "720p", - "season": 4, + "seasons": [ + 4 + ], "title": "Insecure" }, { @@ -1204,7 +1556,9 @@ "network": "AMZN", "quality": "WEBRip", "resolution": "1080p", - "season": 2, + "seasons": [ + 2 + ], "site": "rartv", "title": "Starhunter ReduX" }, @@ -1222,8 +1576,12 @@ }, { "encoder": "Judas", - "episode": 12, - "season": 1, + "episodes": [ + 12 + ], + "seasons": [ + 1 + ], "site": "TGx", "title": "Kami no Tou" }, @@ -1240,15 +1598,21 @@ { "codec": "h264", "encoder": "TRUMP", - "episode": 7, + "episodes": [ + 7 + ], "quality": "WEB", - "season": 2, + "seasons": [ + 2 + ], "site": "TGx", "title": "Lost Gold of World War II" }, { "encoder": "HorribleSubs", - "episode": 23, + "episodes": [ + 23 + ], "resolution": "360p", "site": "TGx", "title": "Plunderer" @@ -1256,7 +1620,9 @@ { "audio": "AAC", "codec": "x264", - "episode": 8, + "episodes": [ + 8 + ], "filetype": "mkv", "resolution": "1080p", "site": "Golumpa", @@ -1265,79 +1631,116 @@ }, { "encoder": "HorribleSubs", - "episode": 12, + "episodes": [ + 12 + ], "resolution": "480p", "site": "TGx", "title": "Tower of God" }, { "encoder": "Erai-raws", - "episode": 12, + "episodes": [ + 12 + ], "resolution": "720p", "site": "TGx", - "subtitles": "Multiple", - "title": "Kami no Tou" + "subtitles": [ + "Multiple" + ], + "title": "Kami no Tou", + "is_subtitle_available": true }, { "bitDepth": 10, "codec": "x265", "encoder": "Judas", - "episode": 23, + "episodes": [ + 23 + ], "resolution": "1080p", "site": "TGx", - "subtitles": "Eng", - "title": "Plunderer" + "subtitles": [ + "Eng" + ], + "title": "Plunderer", + "is_subtitle_available": true }, { "encoder": "HorribleSubs", - "episode": 12, + "episodes": [ + 12 + ], "resolution": "360p", "site": "TGx", "title": "Tamayomi" }, { "encoder": "HorribleSubs", - "episode": 36, + "episodes": [ + 36 + ], "resolution": "480p", "site": "TGx", "title": "Ahiru no Sora" }, { "encoder": "Erai-raws", - "episode": 11, + "episodes": [ + 11 + ], "resolution": "720p", "site": "TGx", - "subtitles": "Multiple", - "title": "Shadowverse" + "subtitles": [ + "Multiple" + ], + "title": "Shadowverse", + "is_subtitle_available": true }, { "encoder": "HorribleSubs", - "episode": 11, + "episodes": [ + 11 + ], "resolution": "360p", "site": "TGx", "title": "A3! Season Spring & Summer" }, { "encoder": "Erai-raws", - "episode": 10, + "episodes": [ + 10 + ], "resolution": "720p", "site": "TGx", - "subtitles": "Multiple", - "title": "Kitsutsuki Tanteidokoro" + "subtitles": [ + "Multiple" + ], + "title": "Kitsutsuki Tanteidokoro", + "is_subtitle_available": true }, { "encoder": "Erai-raws", - "episode": 11, + "episodes": [ + 11 + ], "resolution": "720p", "site": "TGx", - "subtitles": "Multiple", - "title": "Princess Connect! Re-Dive" + "subtitles": [ + "Multiple" + ], + "title": "Princess Connect! Re-Dive", + "is_subtitle_available": true }, { "encoder": "HorribleSubs", - "episode": 11, + "episodes": [ + 11 + ], "resolution": "720p", - "season": 2, + "seasons": [ + 2 + ], "site": "TGx", "title": "Fruits Basket", "year": 2019 @@ -1345,15 +1748,20 @@ { "audio": "DD- 2.0", "codec": "x264", - "language": "Hindi", + "languages": [ + "Hindi" + ], "network": "SonyLiv", "quality": "WEB-DL", "resolution": "720p", "site": "BonsaiHD", "size": "1.1 GB", - "subtitles": "ESub", + "subtitles": [ + "ESub" + ], "title": "Kadakh", - "year": 2020 + "year": 2020, + "is_subtitle_available": true }, { "audio": "AAC 5.1", @@ -1369,76 +1777,106 @@ "audio": "AAC 5.1", "codec": "x264", "encoder": "LOKiHD", - "language": "Hindi", + "languages": [ + "Hindi" + ], "quality": "BluRay", "resolution": "720p", "site": "Telly", - "subtitles": "MSubs", + "subtitles": [ + "MSubs" + ], "title": "Shuddh Desi Romance", - "year": 2013 + "year": 2013, + "is_subtitle_available": true }, { "audio": "DD5.1", "codec": "x264", "encoder": "TamilMV", - "language": "Kannada", + "languages": [ + "Kannada" + ], "quality": "HDRip", "resolution": "720p", "size": "1.1GB", - "subtitles": "ESub", + "subtitles": [ + "ESub" + ], "title": "Face 2 Face", - "year": 2019 + "year": 2019, + "is_subtitle_available": true }, { "audio": "DD- 5.1", "codec": "x264", - "language": "Hindi", + "languages": [ + "Hindi" + ], "network": "NF", "quality": "WEBRip", "resolution": "720p", "site": "BonsaiHD", "size": "800 MB", - "subtitles": "ESub", + "subtitles": [ + "ESub" + ], "title": "Chaman Bahar", - "year": 2020 + "year": 2020, + "is_subtitle_available": true }, { "audio": "DD- 2.0", "codec": "x264", - "language": "Bengali", + "languages": [ + "Bengali" + ], "network": "Hoichoi", "quality": "WEB-DL", "resolution": "720p", "site": "BonsaiHD", "size": "650 MB", - "subtitles": "ESub", + "subtitles": [ + "ESub" + ], "title": "Piprabidya", - "year": 2013 + "year": 2013, + "is_subtitle_available": true }, { "audio": "DD- 5.1", "codec": "x264", - "language": "Tamil", + "languages": [ + "Tamil" + ], "network": "AMZN", "quality": "WEBRip", "resolution": "720p", "site": "BonsaiHD", "size": "1.1 GB", - "subtitles": "ESub", + "subtitles": [ + "ESub" + ], "title": "Penguin", - "year": 2020 + "year": 2020, + "is_subtitle_available": true }, { "audio": "AC3", "codec": "x264", "encoder": "LOKiHD", - "language": "Hindi", + "languages": [ + "Hindi" + ], "quality": "WEBRip", "resolution": "1080p", "site": "Telly", - "subtitles": "ESubs", + "subtitles": [ + "ESubs" + ], "title": "Kadakh", - "year": 2020 + "year": 2020, + "is_subtitle_available": true }, { "audio": "AAC 5.1", @@ -1453,87 +1891,131 @@ { "codec": "x264", "encoder": "TamilMV", - "language": ["Tamil","Telugu","Hindi"], + "languages": [ + "Tamil", + "Telugu", + "Hindi" + ], "proper": true, "quality": "HDRip", "size": "750MB", - "subtitles": "ESub", + "subtitles": [ + "ESub" + ], "title": "Kavacham", - "year": 2018 + "year": 2018, + "is_subtitle_available": true }, { "codec": "x264", "encoder": "TamilMV", - "language": ["Telugu","Tamil"], + "languages": [ + "Telugu", + "Tamil" + ], "quality": "BR-Rip", "size": "450MB", - "subtitles": "ESub", + "subtitles": [ + "ESub" + ], "title": "M.S. Dhoni: The Untold Story", - "year": 2016 + "year": 2016, + "is_subtitle_available": true }, { "audio": "DD5.1", "encoder": "TamilMV", - "language": ["Telugu","Tamil","Hindi"], + "languages": [ + "Telugu", + "Tamil", + "Hindi" + ], "quality": "BluRay", "resolution": "720p", "size": "1.6GB", - "subtitles": "ESub", + "subtitles": [ + "ESub" + ], "title": "M.S. Dhoni: The Untold Story", - "year": 2016 + "year": 2016, + "is_subtitle_available": true }, { "audio": "DTS 5.1", "codec": "x264", "encoder": "LOKiHD", - "language": "Hindi", + "languages": [ + "Hindi" + ], "quality": "BluRay", "resolution": "720p", "site": "Telly", - "subtitles": "MSubs", + "subtitles": [ + "MSubs" + ], "title": "Detective Byomkesh Bakshy", - "year": 2015 + "year": 2015, + "is_subtitle_available": true }, { "audio": "AAC", "codec": "x264", "filetype": "mkv", - "language": "Hindi", + "languages": [ + "Hindi" + ], "quality": "WebRip", "resolution": "720p", "site": "Telly", - "subtitles": "ESub", + "subtitles": [ + "ESub" + ], "title": "Kasganj", - "year": 2019 + "year": 2019, + "is_subtitle_available": true }, { "audio": "AAC 2.0", "codec": "AVC", "encoder": "Telly", "filetype": "mkv", - "language": "Hindi", + "languages": [ + "Hindi" + ], "network": "Zee5", "quality": "WebDL", "resolution": "1080p", - "subtitles": "ESub", + "subtitles": [ + "ESub" + ], "title": "Kasganj", - "year": 2019 + "year": 2019, + "is_subtitle_available": true }, { "audio": "DD 5.1", "codec": "x264", "encoder": "MAZE", - "language": ["Tam","Tel","Mal"], + "languages": [ + "Tam", + "Tel", + "Mal" + ], "quality": "HDRip", "resolution": "720p", "size": "2GB", - "subtitles": "MSub", + "subtitles": [ + "MSub" + ], "title": "Penguin", - "year": 2020 + "year": 2020, + "is_subtitle_available": true }, { "encoder": "Erai-raws", - "episode": 12, + "episodes": [ + 12 + ], "resolution": "720p", "site": "TGx", "title": "Kakushigoto" @@ -1543,16 +2025,23 @@ "codec": "x265", "encoder": "Judas", "resolution": "1080p", - "season": 1, + "seasons": [ + 1 + ], "site": "TGx", - "subtitles": "Eng", - "title": "Shaman King" + "subtitles": [ + "Eng" + ], + "title": "Shaman King", + "is_subtitle_available": true }, { "audio": "DDP5.1", "codec": "x264", "encoder": "NTG", - "language": "JAPANESE", + "languages": [ + "JAPANESE" + ], "network": "NF", "quality": "WEBRip", "resolution": "1080p", @@ -1565,7 +2054,9 @@ "codec": "x265", "encoder": "rmteam", "filetype": "mkv", - "language": "japanese", + "languages": [ + "japanese" + ], "quality": "bluray", "resolution": "1080p", "title": "liz and the blue bird", @@ -1574,15 +2065,22 @@ { "codec": "H.264", "encoder": "MeM", - "episode": 13, + "episodes": [ + 13 + ], "episodeName": "Il mio amico Grodd", "filetype": "mkv", - "language": ["ITA","ENG"], + "languages": [ + "ITA", + "ENG" + ], "network": "AMZN", "quality": "WEB-DLMux", "repack": true, "resolution": "1080p", - "season": 6, + "seasons": [ + 6 + ], "title": "The Flash", "year": 2014 }, @@ -1590,22 +2088,30 @@ "audio": "DDP5.1", "codec": "H264", "encoder": "NTb", - "episode": 12, + "episodes": [ + 12 + ], "episodeName": "Lovers Quarrel", "network": "DCU", "quality": "WEB-DL", "resolution": "1080p", - "season": 2, + "seasons": [ + 2 + ], "site": "TGx", "title": "Harley Quinn" }, { "codec": "x264", "encoder": "CRiMSON", - "episode": 5, + "episodes": [ + 5 + ], "episodeName": "Homicide at Home", "quality": "HDTV", - "season": 1, + "seasons": [ + 1 + ], "site": "TGx", "title": "The Killer Truth" }, @@ -1617,7 +2123,9 @@ "network": "HMAX", "quality": "Webrip", "resolution": "1080p", - "season": 6, + "seasons": [ + 6 + ], "site": "TAoE", "title": "The Flintstones", "year": 1960 @@ -1625,9 +2133,13 @@ { "codec": "H264", "encoder": "DENTiST", - "episode": 4, + "episodes": [ + 4 + ], "quality": "WEB", - "season": 2, + "seasons": [ + 2 + ], "site": "TGx", "title": "Jamies Super Food" }, @@ -1635,12 +2147,16 @@ "audio": "DDP5.1", "codec": "H.264", "encoder": "NTb", - "episode": 6, + "episodes": [ + 6 + ], "episodeName": "Fire and Brimstone", "network": "AMZN", "quality": "WEB-DL", "resolution": "1080p", - "season": 5, + "seasons": [ + 5 + ], "site": "TGx", "title": "Blindspot" }, @@ -1648,12 +2164,16 @@ "audio": "DDP5.1", "codec": "H.264", "encoder": "NTb", - "episode": 10, + "episodes": [ + 10 + ], "episodeName": "The Last Dance", "network": "AMZN", "quality": "WEB-DL", "resolution": "720p", - "season": 2, + "seasons": [ + 2 + ], "site": "TGx", "title": "In the Dark", "year": 2019 @@ -1664,14 +2184,18 @@ "network": "NF", "quality": "WEBRip", "resolution": "720p", - "season": 2, + "seasons": [ + 2 + ], "title": "Babies" }, { "codec": "x264", "day": 16, "encoder": "SPORTY", - "language": "German", + "languages": [ + "German" + ], "month": 6, "quality": "HDTV", "resolution": "720p", @@ -1700,10 +2224,14 @@ { "codec": "h264", "encoder": "ROBOTS", - "episode": 3, + "episodes": [ + 3 + ], "episodeName": "Major Bonus Room", "quality": "WEB", - "season": 1, + "seasons": [ + 1 + ], "site": "TGx", "title": "Design at Your Door" }, @@ -1723,11 +2251,26 @@ { "audio": "AAC", "codec": "x264", - "episode": [1,2,3,4,5,6,7,8,9,10], - "language": "Hindi", + "episodes": [ + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10 + ], + "languages": [ + "Hindi" + ], "quality": "WebRip", "resolution": "720p", - "season": 1, + "seasons": [ + 1 + ], "site": "Telly", "title": "Lalbazaar" }, @@ -1752,7 +2295,9 @@ "codec": "x264", "encoder": "CAFFEiNE", "quality": "WEBRip", - "season": 1, + "seasons": [ + 1 + ], "title": "Still A Mystery" }, { @@ -1779,8 +2324,11 @@ { "encoder": "BigJ0554", "filetype": "MP4", - "subtitles": "subs", - "title": "Ella Fitzgerald - Just One of Those Things" + "subtitles": [ + "subs" + ], + "title": "Ella Fitzgerald - Just One of Those Things", + "is_subtitle_available": true }, { "codec": "x264", @@ -1803,11 +2351,15 @@ { "codec": "H264", "encoder": "GHOSTS", - "episode": 3, + "episodes": [ + 3 + ], "proper": true, "quality": "WEB", "resolution": "720p", - "season": 1, + "seasons": [ + 1 + ], "site": "TGx", "title": "Trackers" }, @@ -1824,19 +2376,27 @@ { "codec": "x264", "encoder": "Nemo", - "episode": 6, + "episodes": [ + 6 + ], "quality": "WEBRip", "resolution": "720p", - "season": 12, + "seasons": [ + 12 + ], "title": "Big Brother AU" }, { "codec": "h264", "encoder": "LiGATE", - "episode": 20, + "episodes": [ + 20 + ], "episodeName": "Opioids Inc", "quality": "WEB", - "season": 38, + "seasons": [ + 38 + ], "site": "TGx", "title": "Frontline" }, @@ -1844,27 +2404,41 @@ "audio": "DD5.1", "codec": "H.264", "encoder": "MeM", - "episode": 3, + "episodes": [ + 3 + ], "episodeName": "Comunisti alieni dal futuro", "filetype": "mkv", - "language": ["ITA","ENG"], + "languages": [ + "ITA", + "ENG" + ], "network": "AMZN", "quality": "WEB-DLMux", "resolution": "1080p", - "season": 7, + "seasons": [ + 7 + ], "title": "Marvel's Agents of S.H.I.E.L.D." }, { "audio": "AAC", "codec": "x264", "encoder": "LOKiHD", - "language": "Hindi", + "languages": [ + "Hindi" + ], "quality": "WEBRip", "resolution": "720p", - "season": 1, - "subtitles": "ESubs", + "seasons": [ + 1 + ], + "subtitles": [ + "ESubs" + ], "title": "Your Honor", - "year": 2020 + "year": 2020, + "is_subtitle_available": true }, { "codec": "x264", @@ -1881,59 +2455,83 @@ "codec": "x264", "encoder": "StB", "resolution": "720p", - "season": 1, - "subtitles": "MultiSub", - "title": "Thirteen" + "seasons": [ + 1 + ], + "subtitles": [ + "MultiSub" + ], + "title": "Thirteen", + "is_subtitle_available": true }, { "audio": "AAC 5.1", "codec": "x264", "encoder": "LOKiHD", - "language": "Hindi", + "languages": [ + "Hindi" + ], "network": "NF", "quality": "WEBRip", "resolution": "720p", "site": "Telly", - "subtitles": "ESubs", + "subtitles": [ + "ESubs" + ], "title": "Paan Singh Tomar", - "year": 2012 + "year": 2012, + "is_subtitle_available": true }, { "audio": "AAC2.0", "codec": "H.264", "encoder": "AJP69", - "episode": 20, + "episodes": [ + 20 + ], "episodeName": "The Army-Navy Game", "network": "HULU", "quality": "WEB-DL", "resolution": "1080p", - "season": 1, + "seasons": [ + 1 + ], "site": "eztv", "title": "MASH" }, { "codec": "h264", "encoder": "ROBOTS", - "episode": 12, + "episodes": [ + 12 + ], "episodeName": "Family Favorites with Allie", "internal": true, "quality": "WEB", - "season": 16, + "seasons": [ + 16 + ], "site": "eztv", "title": "Trishas Southern Kitchen" }, { "codec": "Xvid", "encoder": "ETRG", - "language": "Eng", + "languages": [ + "Eng" + ], "title": "Split Image", "year": 1982 }, { "encoder": "HorribleSubs", - "episode": 11, + "episodes": [ + 11 + ], "resolution": "720p", - "season": 2, + "seasons": [ + 2 + ], "site": "TGx", "title": "Kaguya-sama wa Kokurasetai" }, @@ -1941,7 +2539,9 @@ "audio": "1Ch.Audio", "codec": "XViD", "encoder": "ETRG", - "language": "Eng", + "languages": [ + "Eng" + ], "quality": "DVDRip", "title": "Five", "year": 1951 @@ -1949,25 +2549,35 @@ { "codec": "H264", "encoder": "OATH", - "episode": 1, + "episodes": [ + 1 + ], "quality": "WEB", "resolution": "720p", - "season": 2, + "seasons": [ + 2 + ], "site": "TGx", "title": "NOS4A2" }, { "codec": "x264", "encoder": "LiGATE", - "episode": 12, + "episodes": [ + 12 + ], "episodeName": "Sicilian and Seafood", "quality": "WEBRip", - "season": 31, + "seasons": [ + 31 + ], "site": "TGx", "title": "Diners Drive-Ins and Dives" }, { - "episode": 12, + "episodes": [ + 12 + ], "filetype": "mkv", "resolution": "720p", "site": "Erai-raws", @@ -1975,9 +2585,13 @@ }, { "encoder": "Erai-raws", - "episode": 11, + "episodes": [ + 11 + ], "resolution": "720p", - "season": 2, + "seasons": [ + 2 + ], "site": "TGx", "title": "Kaguya-sama wa Kokurasetai! Tensai-tachi no Renai Zunousen" }, @@ -1992,21 +2606,33 @@ "year": 2020 }, { - "episode": 12, + "episodes": [ + 12 + ], "filetype": "mkv", "resolution": "720p", "site": "Erai-raws", - "subtitles": "Multiple", - "title": "Yesterday o Utatte" + "subtitles": [ + "Multiple" + ], + "title": "Yesterday o Utatte", + "is_subtitle_available": true }, { - "episode": 12, + "episodes": [ + 12 + ], "filetype": "mkv", "resolution": "1080p", - "season": 2, + "seasons": [ + 2 + ], "site": "Erai-raws", - "subtitles": "Multiple", - "title": "Honzuki no Gekokujou - Shisho ni Naru Tame ni wa Shudan wo Erandeiraremasen" + "subtitles": [ + "Multiple" + ], + "title": "Honzuki no Gekokujou - Shisho ni Naru Tame ni wa Shudan wo Erandeiraremasen", + "is_subtitle_available": true }, { "quality": "WebRip", @@ -2025,11 +2651,15 @@ { "codec": "h264", "encoder": "ROBOTS", - "episode": 0, + "episodes": [ + 0 + ], "episodeName": "Clothed and Opinionated Part 2", "quality": "WEB", "resolution": "720p", - "season": 6, + "seasons": [ + 6 + ], "site": "eztv", "title": "Naked and Afraid XL" }, @@ -2042,16 +2672,22 @@ "quality": "Webrip", "repack": true, "resolution": "1080p", - "season": 1, + "seasons": [ + 1 + ], "site": "TAoE", "title": "The Fresh Prince of Bel-Air", "year": 1990 }, { "encoder": "HorribleSubs", - "episode": 12, + "episodes": [ + 12 + ], "resolution": "720p", - "season": 2, + "seasons": [ + 2 + ], "site": "TGx", "title": "Tsugumomo" }, @@ -2059,26 +2695,39 @@ "fps": 50, "network": "BBC", "resolution": "1280x720p", - "season": 1, - "subtitles": "Eng", + "seasons": [ + 1 + ], + "subtitles": [ + "Eng" + ], "title": "Black Hollywood: 'They've Gotta Have Us'", - "year": 2018 + "year": 2018, + "is_subtitle_available": true }, { "codec": "x264", "encoder": "FiHTV", - "episode": 13, + "episodes": [ + 13 + ], "quality": "HDTV", - "season": 27, + "seasons": [ + 27 + ], "site": "TGx", "title": "Police Ten 7" }, { "codec": "x264", "encoder": "FiHTV", - "episode": 2, + "episodes": [ + 2 + ], "quality": "HDTV", - "season": 1, + "seasons": [ + 1 + ], "site": "TGx", "title": "2nd Chance Charlie" }, @@ -2095,11 +2744,16 @@ { "codec": "x265", "resolution": "720P", - "season": 8, + "seasons": [ + 8 + ], "title": "Mystery Diners" }, { - "language": ["Hindi","Spanish"], + "languages": [ + "Hindi", + "Spanish" + ], "quality": "HDRip", "resolution": "720p", "size": "800MB", @@ -2108,7 +2762,9 @@ }, { "codec": "x264", - "language": "Malayalam", + "languages": [ + "Malayalam" + ], "quality": "HDTV", "resolution": "720p", "size": "1.4GB", @@ -2119,20 +2775,28 @@ { "audio": "DD - 2.0", "codec": "x264", - "language": ["Hindi","English"], + "languages": [ + "Hindi", + "English" + ], "quality": "Bluray", "resolution": "720p", "site": "BonsaiHD", "size": "800 MB", - "subtitles": "MSub", + "subtitles": [ + "MSub" + ], "title": "Vacancy", - "year": 2007 + "year": 2007, + "is_subtitle_available": true }, { "audio": "Dual-Audio", "codec": "x264", "encoder": "1XCinema", - "language": "Hindi", + "languages": [ + "Hindi" + ], "quality": "HDRip", "resolution": "720p", "title": "Darkness Falls", @@ -2141,7 +2805,10 @@ { "audio": "Ac3 5.1", "codec": "H264", - "language": ["ITA","ENG"], + "languages": [ + "ITA", + "ENG" + ], "quality": "WEBRip", "resolution": "1080p", "site": "ArMor", @@ -2151,7 +2818,10 @@ { "encoder": "iDN", "filetype": "mkv", - "language": ["ITA","ENG"], + "languages": [ + "ITA", + "ENG" + ], "quality": "Bluray", "resolution": "1080p", "title": "Darlin", @@ -2170,7 +2840,10 @@ { "audio": "Ac3 5.1", "codec": "H264", - "language": ["ITA","DAN"], + "languages": [ + "ITA", + "DAN" + ], "quality": "BDRip", "resolution": "1080p", "site": "ArMor", @@ -2180,7 +2853,9 @@ { "codec": "x264", "encoder": "Flizmovies", - "language": "HIndi", + "languages": [ + "HIndi" + ], "quality": "WEB-DL", "resolution": "720p", "title": "Perversion", @@ -2202,19 +2877,27 @@ "audio": "5.1Ch", "quality": "Web-DL", "resolution": "720p", - "season": 2, + "seasons": [ + 2 + ], "site": "zooqle.com", "title": "Parks and Recreation" }, { "audio": "AAC 5.1", "codec": "x264", - "language": ["Hindi","English"], + "languages": [ + "Hindi", + "English" + ], "quality": "BluRay", "resolution": "720p", - "subtitles": "MSubs", + "subtitles": [ + "MSubs" + ], "title": "The Amazing Spider-Man 2", - "year": 2014 + "year": 2014, + "is_subtitle_available": true }, { "audio": "EAC3 5.1", @@ -2230,17 +2913,26 @@ }, { "encoder": "TamilMV", - "language": ["Tamil","Hindi","Spanish"], + "languages": [ + "Tamil", + "Hindi", + "Spanish" + ], "quality": "BluRay", "resolution": "720p", "size": "950MB", - "subtitles": "ESub", + "subtitles": [ + "ESub" + ], "title": "Mother [Madre]", - "year": 2016 + "year": 2016, + "is_subtitle_available": true }, { "encoder": "ORG", - "language": "Bangla", + "languages": [ + "Bangla" + ], "quality": "HDRip", "size": "800MB", "title": "Ridoy Jure", @@ -2249,12 +2941,22 @@ { "codec": "H264", "filetype": "mp4", - "genre": "Drama", - "language": ["Eng","Rus"], + "genres": [ + "Drama" + ], + "languages": [ + "Eng", + "Rus" + ], "resolution": "720p", - "subtitles": "Multi", + "subtitles": [ + "Eng", + "Rus", + "Multi" + ], "title": "The Deep Blue Sea", - "year": 2011 + "year": 2011, + "is_subtitle_available": true }, { "audio": "TrueHD 5.1", @@ -2268,7 +2970,9 @@ "year": 2006 }, { - "genre": "Western", + "genres": [ + "Western" + ], "title": "Rustlers on Horseback", "year": 1950 }, @@ -2282,7 +2986,9 @@ "year": 2020 }, { - "language": "BANGLA", + "languages": [ + "BANGLA" + ], "quality": "HDRIP", "title": "SHAHENSHA", "year": 2020 @@ -2291,12 +2997,17 @@ "audio": "DD2.0", "codec": "x264", "encoder": "CineVood", - "language": "Hindi", + "languages": [ + "Hindi" + ], "network": "AMZN", "quality": "WEB-DL", - "subtitles": "Esub", + "subtitles": [ + "Esub" + ], "title": "Rasbhari", - "year": 2020 + "year": 2020, + "is_subtitle_available": true }, { "audio": "AAC", @@ -2320,7 +3031,10 @@ "audio": "AAC 5.1", "codec": "H265", "encoder": "CalicoSkies", - "language": ["EN","JP"], + "languages": [ + "EN", + "JP" + ], "resolution": "4K", "title": "L.A. Story", "upscaled": true, @@ -2329,18 +3043,26 @@ { "codec": "x265", "encoder": "MeGusta", - "episode": 14, + "episodes": [ + 14 + ], "resolution": "1080p", - "season": 8, + "seasons": [ + 8 + ], "site": "eztv", "title": "Monk" }, { "codec": "x265", "encoder": "MeGusta", - "episode": 2, + "episodes": [ + 2 + ], "resolution": "1080p", - "season": 21, + "seasons": [ + 21 + ], "site": "eztv", "title": "24 Hours In A And E" }, @@ -2359,11 +3081,15 @@ { "codec": "h264", "encoder": "LiGATE", - "episode": 15, + "episodes": [ + 15 + ], "episodeName": "Toni Morrison The Pieces I Am", "quality": "WEB", "resolution": "720p", - "season": 33, + "seasons": [ + 33 + ], "site": "eztv", "title": "American Masters" }, @@ -2384,25 +3110,40 @@ "encoder": "Judas", "quality": "BD", "resolution": "1080p", - "season": [1,2], + "seasons": [ + 1, + 2 + ], "site": "TGx", - "subtitles": "Eng", - "title": "Black Lagoon" + "subtitles": [ + "Eng" + ], + "title": "Black Lagoon", + "is_subtitle_available": true }, { "bitDepth": 10, "codec": "x265", "encoder": "Judas", - "episode": 927, + "episodes": [ + 927 + ], "resolution": "1080p", "site": "TGx", - "subtitles": "Multi", - "title": "One Piece" + "subtitles": [ + "Multi" + ], + "title": "One Piece", + "is_subtitle_available": true }, { "encoder": "Judas", - "episode": 1, - "season": 5, + "episodes": [ + 1 + ], + "seasons": [ + 5 + ], "site": "TGx", "title": "Shokugeki No Soma" }, @@ -2410,7 +3151,9 @@ "audio": "DTS-HDMA 5.1", "codec": "x264", "encoder": "Hon3yHD", - "language": "Hindi", + "languages": [ + "Hindi" + ], "quality": "BluRay", "resolution": "1080p", "title": "Dil Chahta Hai", @@ -2420,72 +3163,110 @@ "audio": "AAC", "codec": "x264", "encoder": "CineVood", - "language": ["Hindi","Tamil","Bengali"], + "languages": [ + "Hindi", + "Tamil", + "Bengali" + ], "network": "AMZN", "quality": "WEB-DL", "resolution": "720p", - "subtitles": "Esub", + "subtitles": [ + "Esub" + ], "title": "NOS4A2", "untouched": true, - "year": 2019 + "year": 2019, + "is_subtitle_available": true }, { "bitDepth": 10, "codec": "x265", "encoder": "Judas", - "episode": 1, + "episodes": [ + 1 + ], "resolution": "1080p", - "season": 4, + "seasons": [ + 4 + ], "site": "TGx", - "subtitles": "Multi", - "title": "Haikyuu!!" + "subtitles": [ + "Multi" + ], + "title": "Haikyuu!!", + "is_subtitle_available": true }, { "audio": "DD- 2.0", "codec": "x264", - "language": "Malayalam", + "languages": [ + "Malayalam" + ], "quality": "WEB-DL", "resolution": "720p", "site": "BonsaiHD", "size": "2.5 GB", - "subtitles": "ESub", + "subtitles": [ + "ESub" + ], "title": "Moothon: The Elder One", "untouched": true, - "year": 2019 + "year": 2019, + "is_subtitle_available": true }, { "audio": "AC3 5.1", "codec": "H264", "encoder": "MIRCrew", - "language": ["Ita","Fre"], + "languages": [ + "Ita", + "Fre" + ], "quality": "BluRay", "resolution": "1080p", - "subtitles": ["Ita","Eng"], + "subtitles": [ + "Ita", + "Eng" + ], "title": "Doppia Pelle - Le Daim", - "year": 2019 + "year": 2019, + "is_subtitle_available": true }, { "audio": "DTS HD Masters 5.1", "codec": "H264", "encoder": "SnakeSPL", - "language": ["Ita","Eng"], + "languages": [ + "Ita", + "Eng" + ], "resolution": "720p", "site": "MIRCrew", - "subtitles": ["Ita","Eng"], + "subtitles": [ + "Ita", + "Eng" + ], "title": "The Twilight Saga: Breaking Down - Parte 2", - "year": 2012 + "year": 2012, + "is_subtitle_available": true }, { "audio": "DD- 2.0", "codec": "x264", - "language": "Interslavic", + "languages": [ + "Interslavic" + ], "quality": "Bluray", "resolution": "720p", "site": "BonsaiHD", "size": "1.3 GB", - "subtitles": "ESub", + "subtitles": [ + "ESub" + ], "title": "The Painted Bird", - "year": 2019 + "year": 2019, + "is_subtitle_available": true }, { "audio": "DTS-HD.MA.5.1", @@ -2507,12 +3288,19 @@ "audio": "AC3 5.1", "codec": "H265", "encoder": "Sp33dy94", - "language": ["ITA","ENG"], + "languages": [ + "ITA", + "ENG" + ], "resolution": "1080p", "site": "MIRCrew", - "subtitles": ["NUita","eng"], + "subtitles": [ + "NUita", + "eng" + ], "title": "Just Mercy - Il diritto di opporsi", - "year": 2019 + "year": 2019, + "is_subtitle_available": true }, { "codec": "x264", @@ -2526,12 +3314,17 @@ }, { "documentary": true, - "language": "eng", + "languages": [ + "eng" + ], "network": "BBC", "site": "06", - "subtitles": "ara", + "subtitles": [ + "ara" + ], "title": "The Ottomans: Europe's Muslim Emperors", - "year": 2013 + "year": 2013, + "is_subtitle_available": true }, { "codec": "x264", @@ -2564,19 +3357,27 @@ "codec": "x264", "directorsCut": true, "hardcoded": true, - "language": "Hindi", + "languages": [ + "Hindi" + ], "quality": "WEBRip", "resolution": "720p", "site": "BonsaiHD", "size": "1.5 GB", - "subtitles": "ESub", + "subtitles": [ + "ESub" + ], "title": "#Yaaram", - "year": 2019 + "year": 2019, + "is_subtitle_available": true }, { "audio": "Ac3 5.1", "codec": "H264", - "language": ["ITA","SPA"], + "languages": [ + "ITA", + "SPA" + ], "quality": "WEBRip", "resolution": "1080p", "site": "ArMor", @@ -2586,7 +3387,10 @@ { "audio": "Ac3 2.0", "codec": "H264", - "language": ["ITA","ENG"], + "languages": [ + "ITA", + "ENG" + ], "quality": "BDRip", "resolution": "1080p", "site": "ArMor", @@ -2596,7 +3400,10 @@ { "audio": "Ac3 2.0", "codec": "H264", - "language": ["ITA","ENG"], + "languages": [ + "ITA", + "ENG" + ], "quality": "BDRip", "resolution": "1080p", "site": "ArMor", @@ -2605,26 +3412,39 @@ }, { "encoder": "TamilMV", - "language": ["Telugu","Tamil","Hindi","Chi"], + "languages": [ + "Telugu", + "Tamil", + "Hindi", + "Chi" + ], "quality": "BluRay", "resolution": "720p", "size": "1.1GB", - "subtitles": "ESub", + "subtitles": [ + "ESub" + ], "title": "The Four", - "year": 2012 + "year": 2012, + "is_subtitle_available": true }, { "audio": "2CH", "codec": "x264", - "language": "Hindi", + "languages": [ + "Hindi" + ], "network": "AMZN", "quality": "WEBRip", "resolution": "720p", "site": "BonsaiHD", "size": "1.2 GB", - "subtitles": "ESub", + "subtitles": [ + "ESub" + ], "title": "Samrat & Co.", - "year": 2014 + "year": 2014, + "is_subtitle_available": true }, { "codec": "X264", @@ -2638,7 +3458,10 @@ { "audio": "DD- 5.1", "codec": "x265", - "language": ["English","spanish"], + "languages": [ + "English", + "spanish" + ], "network": "NF", "quality": "WEB-DL", "resolution": "720p", @@ -2660,13 +3483,19 @@ "audio": "DD 5.1", "codec": "x264", "encoder": "MAZE", - "language": ["English","Hindi"], + "languages": [ + "English", + "Hindi" + ], "quality": "HDRip", "resolution": "720p", "size": "2GB", - "subtitles": "MSub", + "subtitles": [ + "MSub" + ], "title": "The Fxxk-It List", - "year": 2020 + "year": 2020, + "is_subtitle_available": true }, { "audio": "DTS-HD MA 7.1", @@ -2696,12 +3525,17 @@ { "audio": "AC3", "codec": "x264", - "language": "Hindi", + "languages": [ + "Hindi" + ], "quality": "WEBRip", "resolution": "1080p", - "subtitles": "ESubs", + "subtitles": [ + "ESubs" + ], "title": "Sarkar 3", - "year": 2017 + "year": 2017, + "is_subtitle_available": true }, { "audio": "DD.2.0", @@ -2717,37 +3551,53 @@ "audio": "AAC", "codec": "x264", "encoder": "MVGroup.org", - "episode": 11, + "episodes": [ + 11 + ], "episodeName": "Oceans Most Powerful", "filetype": "mp4", "quality": "HDTV", "resolution": "1080p", - "season": 2, + "seasons": [ + 2 + ], "title": "Sea Monsters" }, { "audio": "DD 5.1", "codec": "x264", "encoder": "LOKiHD", - "language": "Urdu", + "languages": [ + "Urdu" + ], "quality": "BluRay", "resolution": "1080p", "site": "Telly", - "subtitles": "ESubs", + "subtitles": [ + "ESubs" + ], "title": "Dukhtar", - "year": 2014 + "year": 2014, + "is_subtitle_available": true }, { "encoder": "LMH123", - "language": "Punjabi", + "languages": [ + "Punjabi" + ], "quality": "WEBRip", "resolution": "720p", - "subtitles": "ESubs", + "subtitles": [ + "ESubs" + ], "title": "Main Teri Tu Mera", - "year": 2016 + "year": 2016, + "is_subtitle_available": true }, { - "language": "Kannada", + "languages": [ + "Kannada" + ], "quality": "HDRip", "resolution": "720p", "title": "RangiTaranga", @@ -2774,7 +3624,9 @@ { "codec": "x265", "encoder": "VXT", - "language": "JAPANESE", + "languages": [ + "JAPANESE" + ], "quality": "BluRay", "resolution": "1080p", "title": "Lupin III The First", @@ -2783,25 +3635,34 @@ { "codec": "x265", "encoder": "VXT", - "language": "JAPANESE", + "languages": [ + "JAPANESE" + ], "quality": "WEBRip", "resolution": "1080p", - "subtitles": "ENSUBBED", + "subtitles": [ + "ENSUBBED" + ], "title": "The Rule for a Vagabond", - "year": 1965 + "year": 1965, + "is_subtitle_available": true }, { "audio": "AAC", "codec": "H264", "encoder": "VXT", - "language": "PORTUGUESE", + "languages": [ + "PORTUGUESE" + ], "quality": "BluRay", "resolution": "1080p", "title": "Cousins", "year": 2019 }, { - "genre": "Drama", + "genres": [ + "Drama" + ], "quality": "BrRip", "resolution": "720p", "title": "Easy Rider", @@ -2811,18 +3672,25 @@ "audio": "DD+5.1", "codec": "x264", "encoder": "TamilMV", - "language": "Hindi", + "languages": [ + "Hindi" + ], "quality": "HDRip", "resolution": "720p", "size": "1.3GB", - "subtitles": "ESub", + "subtitles": [ + "ESub" + ], "title": "PENALTY", - "year": 2019 + "year": 2019, + "is_subtitle_available": true }, { "audio": "Dual-Audio", "codec": "x264", - "language": "Hindi", + "languages": [ + "Hindi" + ], "quality": "HDRip", "resolution": "720p", "title": "Wasp Network", @@ -2838,33 +3706,47 @@ { "codec": "h264", "encoder": "WEBTUBE", - "episode": 1, + "episodes": [ + 1 + ], "quality": "WEB", - "season": 1, + "seasons": [ + 1 + ], "site": "TGx", "title": "Nicos Menu Mission" }, { "codec": "x264", "encoder": "mSD", - "episode": 11, + "episodes": [ + 11 + ], "episodeName": "I Need A Hero", "resolution": "480p", - "season": 5, + "seasons": [ + 5 + ], "site": "eztv", "title": "If Loving You Is Wrong" }, { "encoder": "Judas", - "episode": 5, - "season": 1, + "episodes": [ + 5 + ], + "seasons": [ + 1 + ], "site": "TGx", "title": "Digimon Adventure", "year": 2020 }, { "encoder": "MovCr", - "language": "Hindi", + "languages": [ + "Hindi" + ], "resolution": "720p", "size": "450MB", "title": "Kavali", @@ -2873,49 +3755,74 @@ { "codec": "x264", "encoder": "Morpheus", - "episode": 1, + "episodes": [ + 1 + ], "filetype": "mkv", - "language": ["ITA","ENG"], + "languages": [ + "ITA", + "ENG" + ], "quality": "AHDTVMux", "resolution": "1080p", - "season": 1, + "seasons": [ + 1 + ], "title": "Gangs Of London" }, { "codec": "x264", "encoder": "MeM", - "episode": 9, + "episodes": [ + 9 + ], "episodeName": "Canale 4", "filetype": "mkv", - "language": ["ITA","ENG"], + "languages": [ + "ITA", + "ENG" + ], "quality": "Bluray", "resolution": "1080p", - "season": 5, + "seasons": [ + 5 + ], "title": "Fear the Walking Dead" }, { "codec": "x264", "encoder": "FiHTV", - "episode": 5, + "episodes": [ + 5 + ], "quality": "HDTV", "resolution": "720p", - "season": 1, + "seasons": [ + 1 + ], "site": "eztv", "title": "2nd Chance Charlie" }, { "codec": "x264", "encoder": "FiHTV", - "episode": 15, + "episodes": [ + 15 + ], "quality": "HDTV", - "season": 27, + "seasons": [ + 27 + ], "site": "eztv", "title": "Police Ten 7" }, { "audio": "Dual Audios", "internationalCut": true, - "language": ["HIN","MARATHI"], + "languages": [ + "HIN", + "MARATHI" + ], "quality": "WEB Rip", "resolution": "720p", "title": "Nude", @@ -2932,7 +3839,10 @@ "year": 2006 }, { - "genre": ["Comedy","West"], + "genres": [ + "Comedy", + "West" + ], "resolution": "720p", "title": "The Kissing Bandit", "year": 1948 @@ -2942,7 +3852,9 @@ "year": 1982 }, { - "genre": "comedy", + "genres": [ + "comedy" + ], "title": "The Mouse on the Moon", "year": 1963 }, @@ -2958,19 +3870,26 @@ "year": 2019 }, { - "genre": ["Action","Western"], + "genres": [ + "Action", + "Western" + ], "title": "Sugarfoot", "year": 1951 }, { "codec": "h264", "encoder": "ROBOTS", - "episode": 10, + "episodes": [ + 10 + ], "episodeName": "Lights Camera Eat", "internal": true, "quality": "WEB", "resolution": "720p", - "season": 11, + "seasons": [ + 11 + ], "site": "eztv", "title": "Valeries Home Cooking" }, @@ -2978,12 +3897,16 @@ "audio": "AAC2.0", "codec": "x264", "encoder": "BOOP", - "episode": 6, + "episodes": [ + 6 + ], "episodeName": "My Body", "network": "ID", "quality": "WEB-DL", "resolution": "720p", - "season": 5, + "seasons": [ + 5 + ], "site": "eztv", "title": "American Monster" }, @@ -2991,12 +3914,20 @@ "audio": "Ac3 5.1", "codec": "H264", "encoder": "SnakeSPL", - "language": ["Ita","Eng","Deu","Esp"], + "languages": [ + "Ita", + "Eng", + "Deu", + "Esp" + ], "quality": "DVDRip", "site": "MIRCrew", - "subtitles": "Multisub", + "subtitles": [ + "Multisub" + ], "title": "Professor Marston and the Wonder Women", - "year": 2017 + "year": 2017, + "is_subtitle_available": true }, { "audio": "DTS-HD.MA.5.1", @@ -3012,10 +3943,14 @@ { "codec": "x264", "encoder": "SYNCOPY", - "episode": 4, + "episodes": [ + 4 + ], "quality": "HDTV", "resolution": "720p", - "season": 3, + "seasons": [ + 3 + ], "title": "Magnum P.I.", "year": 2018 }, @@ -3023,11 +3958,16 @@ "audio": "DD5.1", "codec": "x264", "encoder": "NTb", - "episode": [23,24], + "episodes": [ + 23, + 24 + ], "filetype": "mkv", "quality": "BluRay", "resolution": "720p", - "season": 9, + "seasons": [ + 9 + ], "title": "Friends" }, { @@ -3055,7 +3995,9 @@ "network": "Amazon", "quality": "WEB-DL", "resolution": "1080p", - "season": 12, + "seasons": [ + 12 + ], "title": "Its Always Sunny In Philadelphia" }, { @@ -3065,7 +4007,9 @@ "network": "HBO", "quality": "WEBRip", "resolution": "1080p", - "season": 1, + "seasons": [ + 1 + ], "title": "Animals" }, { @@ -3074,7 +4018,9 @@ "encoder": "NOGRP", "network": "iT", "quality": "WEB-DL", - "season": 2, + "seasons": [ + 2 + ], "title": "SpongeBob SquarePants" }, { @@ -3084,7 +4030,9 @@ "network": "AS", "quality": "WEB-DL", "resolution": "1080p", - "season": 1, + "seasons": [ + 1 + ], "title": "The Shivering Truth" }, { @@ -3094,7 +4042,9 @@ "network": "CRAV", "quality": "WEB-DL", "resolution": "720p", - "season": 5, + "seasons": [ + 5 + ], "title": "Letterkenny" }, { @@ -3104,7 +4054,9 @@ "network": "CC", "quality": "WEBRip", "resolution": "1080p", - "season": 7, + "seasons": [ + 7 + ], "title": "Workaholics" }, { @@ -3114,7 +4066,9 @@ "network": "SESO", "quality": "WEBRip", "resolution": "1080p", - "season": 1, + "seasons": [ + 1 + ], "title": "HarmonQuest" }, { @@ -3123,7 +4077,9 @@ "network": "VRV", "quality": "WEB-DL", "resolution": "1080p", - "season": 1, + "seasons": [ + 1 + ], "title": "Bee and Puppycat" }, { @@ -3133,20 +4089,26 @@ "network": "PCOK", "quality": "WEB-DL", "resolution": "1080p", - "season": 3, + "seasons": [ + 3 + ], "title": "A P Bio" }, { "audio": "AAC2.0", "codec": "x264", "encoder": "AJP69", - "episode": 1, + "episodes": [ + 1 + ], "episodeName": "The Vulcan Hello", "filetype": "mkv", "network": "CBS", "quality": "WEB-DL", "resolution": "540p", - "season": 1, + "seasons": [ + 1 + ], "title": "Star Trek Discovery" }, { @@ -3175,7 +4137,9 @@ "quality": "BluRay", "remux": true, "resolution": "1080p", - "season": 2, + "seasons": [ + 2 + ], "title": "Archer" }, { @@ -3184,7 +4148,9 @@ "encoder": "DON", "quality": "BluRay", "resolution": "720p", - "season": 1, + "seasons": [ + 1 + ], "title": "Peaky Blinders" }, { @@ -3192,95 +4158,125 @@ "codec": "x264", "encoder": "Exiled-Destiny", "quality": "DVDRip", - "season": 1, + "seasons": [ + 1 + ], "title": "Ghost Stories" }, { "audio": "AAC2.0", "codec": "H.264", "encoder": "RTN", - "episode": 1, + "episodes": [ + 1 + ], "filetype": "mkv", "network": "iP", "quality": "WEB-DL", "resolution": "720p", - "season": 1, + "seasons": [ + 1 + ], "title": "Rick Steins Road To Mexico" }, { "audio": "AAC2.0", "codec": "H.264", "encoder": "BTW", - "episode": 1, + "episodes": [ + 1 + ], "episodeName": "Rangers vs Sabres Part1", "filetype": "mkv", "network": "NBC", "quality": "WEB-DL", "repack": true, "resolution": "1080p", - "season": 7, + "seasons": [ + 7 + ], "title": "Road to the NHL Winter Classic" }, { "audio": "AAC2.0", "codec": "H.264", "encoder": "BOOP", - "episode": 1, + "episodes": [ + 1 + ], "episodeName": "The Mighty Misfits Who Made Marvel", "filetype": "mkv", "network": "AMC", "quality": "WEB-DL", "resolution": "1080p", - "season": 1, + "seasons": [ + 1 + ], "title": "Robert Kirkmans Secret History of Comics" }, { "audio": "AAC2.0", "codec": "x264", "encoder": "SynHD", - "episode": 12, + "episodes": [ + 12 + ], "episodeName": "Chore Day", "filetype": "mkv", "network": "PBS", "quality": "WEBRip", "resolution": "720p", - "season": 1, + "seasons": [ + 1 + ], "title": "Ready Jet Go" }, { "audio": "AAC2.0", "codec": "H.264", "encoder": "BTW", - "episode": 1, + "episodes": [ + 1 + ], "episodeName": "The Hood Maker", "filetype": "mkv", "network": "STAN", "quality": "WEB-DL", - "season": 1, + "seasons": [ + 1 + ], "title": "Philip K Dicks Electric Dreams" }, { "audio": "AAC2.0", "codec": "x264", "encoder": "RTN", - "episode": 15, + "episodes": [ + 15 + ], "episodeName": "The Great Swamp Search", "filetype": "mkv", "network": "DSNY", "quality": "WEBRip", "resolution": "720p", - "season": 4, + "seasons": [ + 4 + ], "title": "Octonauts" }, { "audio": "AAC2.0", "codec": "H.264", "encoder": "RTN", - "episode": 1, + "episodes": [ + 1 + ], "filetype": "mkv", "network": "RTE", "quality": "WEB-DL", - "season": 1, + "seasons": [ + 1 + ], "title": "Nowhere Fast" }, { @@ -3289,45 +4285,59 @@ "encoder": "HorribleSubs", "network": "CR", "quality": "WEB-DL", - "season": 2, + "seasons": [ + 2 + ], "title": "New Game" }, { "audio": "AAC2.0", "codec": "x264", "encoder": "BOOP", - "episode": 1, + "episodes": [ + 1 + ], "episodeName": "Shark Island", "filetype": "mkv", "network": "ANPL", "quality": "WEB-DL", "resolution": "1080p", - "season": 1, + "seasons": [ + 1 + ], "title": "Mystery of the Lost Islands" }, { "audio": "DD2.0", "codec": "x264", "encoder": "BTW", - "episode": 1, + "episodes": [ + 1 + ], "episodeName": "Pilot", "filetype": "mkv", "network": "DTV", "quality": "WEB-DL", - "season": 1, + "seasons": [ + 1 + ], "title": "Mr Mercedes" }, { "audio": "AAC2.0", "codec": "x264", "encoder": "BOOP", - "episode": 1, + "episodes": [ + 1 + ], "episodeName": "Treat Yo Self", "filetype": "mkv", "network": "VICE", "quality": "WEB-DL", "resolution": "1080p", - "season": 1, + "seasons": [ + 1 + ], "title": "Most Expensivest" }, { @@ -3361,20 +4371,29 @@ "quality": "Blu-Ray", "remux": true, "resolution": "1080p", - "season": [1,2,3], + "seasons": [ + 1, + 2, + 3 + ], "title": "Attack on Titan" }, { "quality": "USBD", "remux": true, - "season": 1, + "seasons": [ + 1 + ], "title": "Black Clover" }, { "quality": "BD", "remux": true, "resolution": "1080p", - "season": [1,2], + "seasons": [ + 1, + 2 + ], "site": "npz", "title": "Hero Mask" }, @@ -3383,7 +4402,10 @@ "codec": "x264", "quality": "BDrip", "resolution": "1920x1080", - "season": [1,2], + "seasons": [ + 1, + 2 + ], "site": "CBT", "title": "Nisekoi" }, @@ -3395,9 +4417,13 @@ }, { "audio": "Dual-Audio", - "episode": 24, + "episodes": [ + 24 + ], "resolution": "1080p", - "season": 1, + "seasons": [ + 1 + ], "site": "FFF-Remux", "title": "Accel World" }, @@ -3410,25 +4436,39 @@ "year": 2021 }, { - "genre": "drama", - "language": "Albania", + "genres": [ + "drama" + ], + "languages": [ + "Albania" + ], "title": "Hive", "year": 2021 }, { - "genre": "drama", - "language": "Egypt", + "genres": [ + "drama" + ], + "languages": [ + "Egypt" + ], "title": "Souad", "year": 2021 }, { - "genre": "drama", - "language": "Poland", + "genres": [ + "drama" + ], + "languages": [ + "Poland" + ], "title": "Swinki", "year": 2009 }, { - "genre": "thriller", + "genres": [ + "thriller" + ], "title": "Dead Heat on a Merry Go Round", "year": 1966 }, @@ -3437,7 +4477,14 @@ "codec": "x264", "filetype": "Mp4", "resolution": "1080p", - "season": [1,2,3,4,5,6], + "seasons": [ + 1, + 2, + 3, + 4, + 5, + 6 + ], "title": "Justified" }, { @@ -3450,7 +4497,9 @@ }, { "codec": "x264", - "language": "French", + "languages": [ + "French" + ], "quality": "BluRay", "resolution": "720p", "site": "MoviesFD", @@ -3493,7 +4542,9 @@ { "codec": "x264", "resolution": "1280 x 720", - "season": 3, + "seasons": [ + 3 + ], "title": "Sons of Anarchy" }, { @@ -3510,13 +4561,17 @@ "codec": "x265", "quality": "BluRay", "resolution": "720p", - "season": 1, + "seasons": [ + 1 + ], "site": "Crazy4TV.com", "title": "Dark Matter" }, { "codec": "H264", - "language": "FRENCH", + "languages": [ + "FRENCH" + ], "quality": "WEB", "resolution": "1080p", "site": "www.Torrenting.com", @@ -3527,34 +4582,61 @@ "audio": "DDP5.1", "codec": "x265", "filetype": "mkv", - "language": "Tamil", + "languages": [ + "Tamil" + ], "quality": "HDRip", "resolution": "720p", "sbs": "sbs", "site": "www.1TamilBlasters.sbs", "size": "900MB", - "subtitles": "ESub", + "subtitles": [ + "ESub" + ], "title": "Lucky Man", - "year": 2023 + "year": 2023, + "is_subtitle_available": true }, { "audio": "AAC", "codec": "x264", - "episode": [1,2,3,4,5,6,7,8,9], - "language": ["Tam","Mal","Tel","Kan"], + "episodes": [ + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9 + ], + "languages": [ + "Tam", + "Mal", + "Tel", + "Kan" + ], "quality": "HDRip", - "season": 1, + "seasons": [ + 1 + ], "site": "www.1TamilBlasters.art", "size": "1.2GB", - "subtitles": "ESub", + "subtitles": [ + "ESub" + ], "title": "Sultan of Delhi", - "year": 2023 + "year": 2023, + "is_subtitle_available": true }, { "audio": "Custom.Audio", "encoder": "Spedboy", "filetype": "mkv", - "language": "PL", + "languages": [ + "PL" + ], "resolution": "1080p", "title": "Mission Impossible", "year": 1996 @@ -3574,7 +4656,9 @@ "audio": "DD.EX.5.1", "codec": "AVC", "encoder": "SnOoP-UPR", - "excess": "MULTI", + "excess": [ + "MULTI" + ], "filetype": "iso", "quality": "BluRay", "resolution": "1080p", @@ -3585,7 +4669,9 @@ "audio": "DTS-X7.1", "codec": "HEVC", "encoder": "DENDA", - "excess": "MULTi", + "excess": [ + "MULTi" + ], "filetype": "mkv", "hdr": true, "quality": "Blu-ray", @@ -3598,13 +4684,18 @@ "audio": "HE-AAC2.0", "codec": "H264", "encoder": "Ralf", - "episode": [23,24], + "episodes": [ + 23, + 24 + ], "episodeName": "The Pilot", "filetype": "mkv", "network": "NF", "quality": "WEB-DL", "resolution": "1080p", - "season": 4, + "seasons": [ + 4 + ], "title": "Seinfeld" }, { @@ -3612,7 +4703,9 @@ "codec": "AV1", "extended": true, "filetype": "mkv", - "language": "PL", + "languages": [ + "PL" + ], "quality": "BRRip", "resolution": "1080p", "title": "Escape Room Tournament of Champions", @@ -3635,18 +4728,32 @@ "title": "The Inbetweeners" }, { - "season": [1,2,3,4,5,6], + "seasons": [ + 1, + 2, + 3, + 4, + 5, + 6 + ], "title": "The Sopranos" }, { "codec": "x264", "encoder": "AMB3R", - "episode": 7, + "episodes": [ + 7 + ], "filetype": "mkv", "quality": "HDTV", - "season": 6, - "subtitles": "SUBFRENCH", - "title": "The Walking Dead" + "seasons": [ + 6 + ], + "subtitles": [ + "SUBFRENCH" + ], + "title": "The Walking Dead", + "is_subtitle_available": true }, { "audio": "aac", @@ -3654,9 +4761,12 @@ "filetype": "mkv", "quality": "WEBDL", "resolution": "720p", - "subtitles": "VOSTFR", + "subtitles": [ + "VOSTFR" + ], "title": "The Good German", - "year": 2006 + "year": 2006, + "is_subtitle_available": true }, { "site": "www.Torrenting.com", @@ -3674,33 +4784,45 @@ "audio": "AAC", "codec": "x264", "filetype": "mkv", - "language": "Tamil", + "languages": [ + "Tamil" + ], "resolution": "QHD", "site": "www.1TamilBlasters.lat", "size": "3.4GB", - "subtitles": "ESub", + "subtitles": [ + "ESub" + ], "title": "Thuritham", "untouched": true, - "year": 2023 + "year": 2023, + "is_subtitle_available": true }, { "audio": "AAC", "codec": "x264", "encoder": "HQ", "filetype": "mkv", - "language": "Tamil", + "languages": [ + "Tamil" + ], "quality": "HDRip", "site": "www.1TamilMV.world", "size": "400MB", - "subtitles": "ESub", + "subtitles": [ + "ESub" + ], "title": "Raja Vikramarka", - "year": 2024 + "year": 2024, + "is_subtitle_available": true }, { "codec": "x264", "encoder": "Clean", "filetype": "mkv", - "language": "Telugu", + "languages": [ + "Telugu" + ], "site": "www.1TamilMV.world", "size": "700MB", "title": "Kotha Rangula Prapancham", @@ -3719,7 +4841,10 @@ { "audio": "DTS", "encoder": "3xEng", - "language": ["Rus","Ukr"], + "languages": [ + "Rus", + "Ukr" + ], "quality": "BluRay", "resolution": "1080p", "site": "HDCL", @@ -3730,18 +4855,25 @@ "audio": "AC3", "codec": "XViD", "encoder": "DVDFR", - "language": "FRENCH", + "languages": [ + "FRENCH" + ], "quality": "DVDRip", "title": "127 Heures" }, { - "language": ["inglês","português"], + "languages": [ + "inglês", + "português" + ], "title": "Men in Black International", "year": 2019 }, { "codec": "XviD", - "language": "PL", + "languages": [ + "PL" + ], "quality": "DVDRiP", "site": "Arx", "title": "Quarantine", @@ -3750,30 +4882,43 @@ { "codec": "XViD", "encoder": "DeBTV", - "episode": 146, - "language": "KOR", + "episodes": [ + 146 + ], + "languages": [ + "KOR" + ], "quality": "HDTV", "title": "All Love" }, { "codec": "x264", "encoder": "VXTT", - "language": "KOREAN", + "languages": [ + "KOREAN" + ], "quality": "WEBRip", "resolution": "1080p", - "subtitles": "ENSUBBED", + "subtitles": [ + "ENSUBBED" + ], "title": "Atonement", - "year": 2017 + "year": 2017, + "is_subtitle_available": true }, { "audio": "DD5.1", "codec": "x264", "encoder": "TrollHD", - "language": "HEBREW", + "languages": [ + "HEBREW" + ], "network": "NF", "quality": "WEBRip", "resolution": "1080p", - "season": 1, + "seasons": [ + 1 + ], "site": "rartv", "title": "Fauda" }, diff --git a/tests/files/output_standard.json b/tests/files/output_standard.json index 48190e3..9d86874 100644 --- a/tests/files/output_standard.json +++ b/tests/files/output_standard.json @@ -263,7 +263,9 @@ { "audio": "AAC", "codec": "H.264", - "language": "Hindi", + "languages": [ + "Hindi" + ], "quality": "Screener", "title": "The Shaukeens" }, @@ -281,7 +283,9 @@ { "audio": "AAC", "codec": "H.264", - "language": "English", + "languages": [ + "English" + ], "quality": "Cam", "title": "Interstellar" }, @@ -330,7 +334,10 @@ "title": "Sons of Anarchy" }, { - "language": ["Russian","English"], + "languages": [ + "Russian", + "English" + ], "title": "Community" }, { @@ -388,7 +395,9 @@ { "audio": "Dolby Digital 5.1", "codec": "H.264", - "language": "Hindi", + "languages": [ + "Hindi" + ], "title": "Akira" }, { @@ -406,14 +415,20 @@ { "codec": "H.264", "quality": "Blu-ray", - "subtitles": "Danish", - "title": "The X-Files" + "subtitles": [ + "Danish" + ], + "title": "The X-Files", + "is_subtitle_available": true }, { "codec": "H.265", "quality": "Blu-ray", - "subtitles": "Danish", - "title": "The X-Files" + "subtitles": [ + "Danish" + ], + "title": "The X-Files", + "is_subtitle_available": true }, { "codec": "H.264", @@ -427,7 +442,10 @@ }, { "filetype": "MP4", - "language": ["Chinese","Japanese"], + "languages": [ + "Chinese", + "Japanese" + ], "quality": "HDTV", "resolution": "720p", "title": "Boku Unmei no Hito desu" @@ -435,7 +453,9 @@ { "audio": "DTS 5.1", "codec": "H.264", - "language": "Nordic", + "languages": [ + "Nordic" + ], "quality": "Blu-ray", "title": "Blind" }, @@ -456,7 +476,9 @@ }, { "codec": "Xvid", - "language": "Swedish", + "languages": [ + "Swedish" + ], "quality": "DVD-Rip", "title": "Bumbibjornarna" }, @@ -464,8 +486,11 @@ "audio": "AAC 2.0", "codec": "H.264", "quality": "WEB-DL", - "subtitles": "Korean", - "title": "The Martian" + "subtitles": [ + "Korean" + ], + "title": "The Martian", + "is_subtitle_available": true }, { "filetype": "MP4", @@ -491,7 +516,9 @@ { "audio": "AAC 5.1", "codec": "H.264", - "language": "Polish", + "languages": [ + "Polish" + ], "network": "Netflix", "quality": "WEB-DL", "title": "1983" @@ -500,41 +527,65 @@ "audio": "AAC", "codec": "H.264", "quality": "BRRip", - "subtitles": "English", - "title": "The Bridge" + "subtitles": [ + "English" + ], + "title": "The Bridge", + "is_subtitle_available": true }, { "codec": "H.264", "quality": "WEB-DL", - "subtitles": "ExYu", - "title": "The Meg" + "subtitles": [ + "ExYu" + ], + "title": "The Meg", + "is_subtitle_available": true }, { "codec": "H.264", "quality": "Cam", - "subtitles": "English", - "title": "Dragon Ball Super: Broly" + "subtitles": [ + "English" + ], + "title": "Dragon Ball Super: Broly", + "is_subtitle_available": true }, { "codec": "H.264", - "language": "English", + "languages": [ + "English" + ], "quality": "WEB-DL", - "subtitles": ["Hindi","English"], - "title": "Joker" + "subtitles": [ + "Hindi", + "English" + ], + "title": "Joker", + "is_subtitle_available": true }, { "audio": "AAC", "codec": "H.264", - "language": "Mandarin", + "languages": [ + "Chinese" + ], "quality": "WEB-DL", - "subtitles": ["Chinese","English"], - "title": "IP Man And Four Kings" + "subtitles": [ + "Chinese", + "English" + ], + "title": "IP Man And Four Kings", + "is_subtitle_available": true }, { "title": "American Dad!" }, { - "language": ["Italian","English"], + "languages": [ + "Italian", + "English" + ], "quality": "DVD-Rip", "title": "The Simpsons" }, @@ -562,9 +613,14 @@ { "audio": "Dolby Digital Plus 5.1", "codec": "H.264", - "language": "Hindi", - "subtitles": "English", - "title": "Sacred Games" + "languages": [ + "Hindi" + ], + "subtitles": [ + "English" + ], + "title": "Sacred Games", + "is_subtitle_available": true }, { "codec": "H.264", @@ -596,7 +652,10 @@ { "audio": "Dolby Digital 5.1", "codec": "H.264", - "language": ["Italian","English"], + "languages": [ + "Italian", + "English" + ], "network": "Amazon Studios", "quality": "WEB-DL", "title": "The Blacklist" @@ -655,16 +714,30 @@ { "audio": "Dolby Digital", "codec": "Xvid", - "language": ["Italian","English"], - "subtitles": ["Italian","English"], - "title": "Z Nation" + "languages": [ + "Italian", + "English" + ], + "subtitles": [ + "Italian", + "English" + ], + "title": "Z Nation", + "is_subtitle_available": true }, { "audio": "Dolby Digital", - "language": ["Italian","English"], + "languages": [ + "Italian", + "English" + ], "quality": "BDRip", - "subtitles": "Available", - "title": "1917" + "subtitles": [ + "Italian", + "English" + ], + "title": "1917", + "is_subtitle_available": true }, { "codec": "H.264", @@ -727,17 +800,31 @@ { "audio": "Dolby Digital", "codec": "H.264", - "language": ["Italian","English"], - "subtitles": ["Italian","English"], - "title": "The Big Bus - Il fantabus" + "languages": [ + "Italian", + "English" + ], + "subtitles": [ + "Italian", + "English" + ], + "title": "The Big Bus - Il fantabus", + "is_subtitle_available": true }, { "audio": "Dolby Digital 5.1", "codec": "H.264", - "language": ["Italian","English"], + "languages": [ + "Italian", + "English" + ], "quality": "Blu-ray", - "subtitles": ["Italian","English"], - "title": "The Hunt" + "subtitles": [ + "Italian", + "English" + ], + "title": "The Hunt", + "is_subtitle_available": true }, { "codec": "H.264", @@ -757,9 +844,16 @@ { "audio": "Dolby Digital 5.1", "codec": "H.264", - "language": ["Italian","English"], - "subtitles": ["Italian","English"], - "title": "Diabolique" + "languages": [ + "Italian", + "English" + ], + "subtitles": [ + "Italian", + "English" + ], + "title": "Diabolique", + "is_subtitle_available": true }, { "audio": "Dolby Digital 2.0", @@ -774,9 +868,15 @@ }, { "codec": "H.264", - "language": ["Italian","French"], - "subtitles": "English", - "title": "37°2 le matin - Betty Blue" + "languages": [ + "Italian", + "French" + ], + "subtitles": [ + "English" + ], + "title": "37°2 le matin - Betty Blue", + "is_subtitle_available": true }, { "codec": "H.264", @@ -837,14 +937,18 @@ "title": "Tower of God" }, { - "episode": 12, - "subtitles": "Available", + "episodes": [ + 12 + ], "title": "Kami no Tou" }, { "codec": "H.265", - "subtitles": "English", - "title": "Plunderer" + "subtitles": [ + "English" + ], + "title": "Plunderer", + "is_subtitle_available": true }, { "title": "Tamayomi" @@ -853,18 +957,15 @@ "title": "Ahiru no Sora" }, { - "subtitles": "Available", "title": "Shadowverse" }, { "title": "A3! Season Spring & Summer" }, { - "subtitles": "Available", "title": "Kitsutsuki Tanteidokoro" }, { - "subtitles": "Available", "title": "Princess Connect! Re-Dive" }, { @@ -875,8 +976,11 @@ "codec": "H.264", "network": "SONY LIV", "size": "1.1GB", - "subtitles": "English", - "title": "Kadakh" + "subtitles": [ + "English" + ], + "title": "Kadakh", + "is_subtitle_available": true }, { "codec": "H.265", @@ -886,44 +990,58 @@ { "codec": "H.264", "quality": "Blu-ray", - "subtitles": "Available", "title": "Shuddh Desi Romance" }, { "audio": "Dolby Digital 5.1", "codec": "H.264", "quality": "WEB-DL", - "subtitles": "English", - "title": "Face 2 Face" + "subtitles": [ + "English" + ], + "title": "Face 2 Face", + "is_subtitle_available": true }, { "audio": "Dolby Digital 5.1", "codec": "H.264", "network": "Netflix", "size": "800MB", - "subtitles": "English", - "title": "Chaman Bahar" + "subtitles": [ + "English" + ], + "title": "Chaman Bahar", + "is_subtitle_available": true }, { "audio": "Dolby Digital 2.0", "codec": "H.264", "size": "650MB", - "subtitles": "English", - "title": "Piprabidya" + "subtitles": [ + "English" + ], + "title": "Piprabidya", + "is_subtitle_available": true }, { "audio": "Dolby Digital 5.1", "codec": "H.264", "network": "Amazon Studios", "size": "1.1GB", - "subtitles": "English", - "title": "Penguin" + "subtitles": [ + "English" + ], + "title": "Penguin", + "is_subtitle_available": true }, { "audio": "Dolby Digital", "codec": "H.264", - "subtitles": "English", - "title": "Kadakh" + "subtitles": [ + "English" + ], + "title": "Kadakh", + "is_subtitle_available": true }, { "codec": "H.265", @@ -933,62 +1051,90 @@ { "codec": "H.264", "quality": "WEB-DL", - "subtitles": "English", - "title": "Kavacham" + "subtitles": [ + "English" + ], + "title": "Kavacham", + "is_subtitle_available": true }, { "codec": "H.264", "quality": "BRRip", - "subtitles": "English", - "title": "M.S. Dhoni: The Untold Story" + "subtitles": [ + "English" + ], + "title": "M.S. Dhoni: The Untold Story", + "is_subtitle_available": true }, { "audio": "Dolby Digital 5.1", "quality": "Blu-ray", - "subtitles": "English", - "title": "M.S. Dhoni: The Untold Story" + "subtitles": [ + "English" + ], + "title": "M.S. Dhoni: The Untold Story", + "is_subtitle_available": true }, { "codec": "H.264", "quality": "Blu-ray", - "subtitles": "Available", "title": "Detective Byomkesh Bakshy" }, { "codec": "H.264", "filetype": "MKV", "quality": "WEBRip", - "subtitles": "English", - "title": "Kasganj" + "subtitles": [ + "English" + ], + "title": "Kasganj", + "is_subtitle_available": true }, { "codec": "H.264", "filetype": "MKV", "network": "ZEE5", "quality": "WEB-DL", - "subtitles": "English", - "title": "Kasganj" + "subtitles": [ + "English" + ], + "title": "Kasganj", + "is_subtitle_available": true }, { "audio": "Dolby Digital 5.1", "codec": "H.264", - "language": ["Tamil","Telugu","Malayalam"], + "languages": [ + "Tamil", + "Telugu", + "Malayalam" + ], "quality": "WEB-DL", - "subtitles": "Available", - "title": "Penguin" + "subtitles": [ + "Tamil", + "Telugu", + "Malayalam" + ], + "title": "Penguin", + "is_subtitle_available": true }, { "title": "Kakushigoto" }, { "codec": "H.265", - "subtitles": "English", - "title": "Shaman King" + "subtitles": [ + "English" + ], + "title": "Shaman King", + "is_subtitle_available": true }, { "audio": "Dolby Digital Plus 5.1", "codec": "H.264", - "language": "Japanese", + "languages": [ + "Japanese" + ], "network": "Netflix", "title": "A Whisker Away" }, @@ -996,13 +1142,18 @@ "audio": "Dolby Digital 5.1", "codec": "H.265", "filetype": "MKV", - "language": "Japanese", + "languages": [ + "Japanese" + ], "quality": "Blu-ray", "title": "liz and the blue bird" }, { "filetype": "MKV", - "language": ["Italian","English"], + "languages": [ + "Italian", + "English" + ], "network": "Amazon Studios", "quality": "WEB-DL", "title": "The Flash" @@ -1096,7 +1247,6 @@ "title": "And We Go Green" }, { - "subtitles": "Available", "title": "Ella Fitzgerald - Just One of Those Things" }, { @@ -1129,15 +1279,21 @@ { "audio": "Dolby Digital 5.1", "filetype": "MKV", - "language": ["Italian","English"], + "languages": [ + "Italian", + "English" + ], "network": "Amazon Studios", "quality": "WEB-DL", "title": "Marvel's Agents of S.H.I.E.L.D." }, { "codec": "H.264", - "subtitles": "English", - "title": "Your Honor" + "subtitles": [ + "English" + ], + "title": "Your Honor", + "is_subtitle_available": true }, { "codec": "H.264", @@ -1146,14 +1302,16 @@ }, { "codec": "H.264", - "subtitles": "Available", "title": "Thirteen" }, { "codec": "H.264", "network": "Netflix", - "subtitles": "English", - "title": "Paan Singh Tomar" + "subtitles": [ + "English" + ], + "title": "Paan Singh Tomar", + "is_subtitle_available": true }, { "audio": "AAC 2.0", @@ -1166,7 +1324,9 @@ "title": "Trishas Southern Kitchen" }, { - "language": "English", + "languages": [ + "English" + ], "title": "Split Image" }, { @@ -1175,7 +1335,9 @@ { "audio": "Mono", "codec": "Xvid", - "language": "English", + "languages": [ + "English" + ], "quality": "DVD-Rip", "title": "Five" }, @@ -1203,12 +1365,10 @@ }, { "filetype": "MKV", - "subtitles": "Available", "title": "Yesterday o Utatte" }, { "filetype": "MKV", - "subtitles": "Available", "title": "Honzuki no Gekokujou - Shisho ni Naru Tame ni wa Shudan wo Erandeiraremasen" }, { @@ -1236,8 +1396,11 @@ }, { "resolution": "720p", - "subtitles": "English", - "title": "Black Hollywood: 'They've Gotta Have Us'" + "subtitles": [ + "English" + ], + "title": "Black Hollywood: 'They've Gotta Have Us'", + "is_subtitle_available": true }, { "codec": "H.264", @@ -1270,7 +1433,6 @@ "codec": "H.264", "quality": "Blu-ray", "size": "800MB", - "subtitles": "Available", "title": "Vacancy" }, { @@ -1282,12 +1444,18 @@ { "audio": "Dolby Digital 5.1", "codec": "H.264", - "language": ["Italian","English"], + "languages": [ + "Italian", + "English" + ], "title": "Wasp Network" }, { "filetype": "MKV", - "language": ["Italian","English"], + "languages": [ + "Italian", + "English" + ], "quality": "Blu-ray", "title": "Darlin" }, @@ -1300,12 +1468,17 @@ { "audio": "Dolby Digital 5.1", "codec": "H.264", - "language": ["Italian","Danish"], + "languages": [ + "Italian", + "Danish" + ], "title": "Infection-What We Become" }, { "codec": "H.264", - "language": "Hindi", + "languages": [ + "Hindi" + ], "title": "Perversion" }, { @@ -1321,7 +1494,6 @@ { "codec": "H.264", "quality": "Blu-ray", - "subtitles": "Available", "title": "The Amazing Spider-Man 2" }, { @@ -1332,20 +1504,32 @@ }, { "quality": "Blu-ray", - "subtitles": "English", - "title": "Mother [Madre]" + "subtitles": [ + "English" + ], + "title": "Mother [Madre]", + "is_subtitle_available": true }, { - "language": "Bengali", + "languages": [ + "Bengali" + ], "quality": "WEB-DL", "title": "Ridoy Jure" }, { "codec": "H.264", "filetype": "MP4", - "language": ["English","Russian"], - "subtitles": "Available", - "title": "The Deep Blue Sea" + "languages": [ + "English", + "Russian" + ], + "subtitles": [ + "English", + "Russian" + ], + "title": "The Deep Blue Sea", + "is_subtitle_available": true }, { "audio": "Dolby TrueHD 5.1", @@ -1362,7 +1546,9 @@ "title": "Athlete A" }, { - "language": "Bengali", + "languages": [ + "Bengali" + ], "quality": "WEB-DL", "title": "SHAHENSHA" }, @@ -1370,8 +1556,11 @@ "audio": "Dolby Digital 2.0", "codec": "H.264", "network": "Amazon Studios", - "subtitles": "English", - "title": "Rasbhari" + "subtitles": [ + "English" + ], + "title": "Rasbhari", + "is_subtitle_available": true }, { "codec": "H.264", @@ -1384,7 +1573,10 @@ }, { "codec": "H.265", - "language": ["English","Japanese"], + "languages": [ + "English", + "Japanese" + ], "title": "L.A. Story" }, { @@ -1412,12 +1604,14 @@ "audio": "Dual", "codec": "H.265", "quality": "Blu-ray", - "subtitles": "English", - "title": "Black Lagoon" + "subtitles": [ + "English" + ], + "title": "Black Lagoon", + "is_subtitle_available": true }, { "codec": "H.265", - "subtitles": "Available", "title": "One Piece" }, { @@ -1432,43 +1626,65 @@ { "codec": "H.264", "network": "Amazon Studios", - "subtitles": "English", - "title": "NOS4A2" + "subtitles": [ + "English" + ], + "title": "NOS4A2", + "is_subtitle_available": true }, { "codec": "H.265", - "subtitles": "Available", "title": "Haikyuu!!" }, { "audio": "Dolby Digital 2.0", "codec": "H.264", "size": "2.5GB", - "subtitles": "English", - "title": "Moothon: The Elder One" + "subtitles": [ + "English" + ], + "title": "Moothon: The Elder One", + "is_subtitle_available": true }, { "audio": "Dolby Digital 5.1", "codec": "H.264", - "language": ["Italian","French"], + "languages": [ + "Italian", + "French" + ], "quality": "Blu-ray", - "subtitles": ["Italian","English"], - "title": "Doppia Pelle - Le Daim" + "subtitles": [ + "Italian", + "English" + ], + "title": "Doppia Pelle - Le Daim", + "is_subtitle_available": true }, { "audio": "DTS-HD MA 5.1", "codec": "H.264", - "language": ["Italian","English"], - "subtitles": ["Italian","English"], - "title": "The Twilight Saga: Breaking Down - Parte 2" + "languages": [ + "Italian", + "English" + ], + "subtitles": [ + "Italian", + "English" + ], + "title": "The Twilight Saga: Breaking Down - Parte 2", + "is_subtitle_available": true }, { "audio": "Dolby Digital 2.0", "codec": "H.264", "quality": "Blu-ray", "size": "1.3GB", - "subtitles": "English", - "title": "The Painted Bird" + "subtitles": [ + "English" + ], + "title": "The Painted Bird", + "is_subtitle_available": true }, { "audio": "DTS-HD MA 5.1", @@ -1484,18 +1700,30 @@ { "audio": "Dolby Digital 5.1", "codec": "H.265", - "language": ["Italian","English"], - "subtitles": ["Italian","English"], - "title": "Just Mercy - Il diritto di opporsi" + "languages": [ + "Italian", + "English" + ], + "subtitles": [ + "Italian", + "English" + ], + "title": "Just Mercy - Il diritto di opporsi", + "is_subtitle_available": true }, { "codec": "H.264", "title": "Mae West Dirty Blonde" }, { - "language": "English", - "subtitles": "Arabic", - "title": "The Ottomans: Europe's Muslim Emperors" + "languages": [ + "English" + ], + "subtitles": [ + "Arabic" + ], + "title": "The Ottomans: Europe's Muslim Emperors", + "is_subtitle_available": true }, { "codec": "H.264", @@ -1514,40 +1742,63 @@ "audio": "Dolby Digital 2.0", "codec": "H.264", "size": "1.5GB", - "subtitles": "English", - "title": "#Yaaram" + "subtitles": [ + "English" + ], + "title": "#Yaaram", + "is_subtitle_available": true }, { "audio": "Dolby Digital 5.1", "codec": "H.264", - "language": ["Italian","Spanish"], + "languages": [ + "Italian", + "Spanish" + ], "title": "Nessuno sa chi io sono qui-Nadie sabe que estoy aqui" }, { "audio": "Dolby Digital 2.0", "codec": "H.264", - "language": ["Italian","English"], + "languages": [ + "Italian", + "English" + ], "title": "L.ultima corve" }, { "audio": "Dolby Digital 2.0", "codec": "H.264", - "language": ["Italian","English"], + "languages": [ + "Italian", + "English" + ], "title": "La signora di Shanghai-The lady from Shanghai" }, { - "language": ["Telugu","Tamil","Hindi","Chinese"], + "languages": [ + "Telugu", + "Tamil", + "Hindi", + "Chinese" + ], "quality": "Blu-ray", - "subtitles": "English", - "title": "The Four" + "subtitles": [ + "English" + ], + "title": "The Four", + "is_subtitle_available": true }, { "audio": "Dual", "codec": "H.264", "network": "Amazon Studios", "size": "1.2GB", - "subtitles": "English", - "title": "Samrat & Co." + "subtitles": [ + "English" + ], + "title": "Samrat & Co.", + "is_subtitle_available": true }, { "codec": "H.264", @@ -1557,7 +1808,10 @@ { "audio": "Dolby Digital 5.1", "codec": "H.265", - "language": ["English","Spanish"], + "languages": [ + "English", + "Spanish" + ], "network": "Netflix", "size": "950MB", "title": "Adu" @@ -1571,7 +1825,6 @@ "audio": "Dolby Digital 5.1", "codec": "H.264", "quality": "WEB-DL", - "subtitles": "Available", "title": "The Fxxk-It List" }, { @@ -1588,8 +1841,11 @@ { "audio": "Dolby Digital", "codec": "H.264", - "subtitles": "English", - "title": "Sarkar 3" + "subtitles": [ + "English" + ], + "title": "Sarkar 3", + "is_subtitle_available": true }, { "audio": "Dolby Digital 2.0", @@ -1607,12 +1863,18 @@ "audio": "Dolby Digital 5.1", "codec": "H.264", "quality": "Blu-ray", - "subtitles": "English", - "title": "Dukhtar" + "subtitles": [ + "English" + ], + "title": "Dukhtar", + "is_subtitle_available": true }, { - "subtitles": "English", - "title": "Main Teri Tu Mera" + "subtitles": [ + "English" + ], + "title": "Main Teri Tu Mera", + "is_subtitle_available": true }, { "quality": "WEB-DL", @@ -1631,19 +1893,28 @@ }, { "codec": "H.265", - "language": "Japanese", + "languages": [ + "Japanese" + ], "quality": "Blu-ray", "title": "Lupin III The First" }, { "codec": "H.265", - "language": "Japanese", - "subtitles": "English", - "title": "The Rule for a Vagabond" + "languages": [ + "Japanese" + ], + "subtitles": [ + "English" + ], + "title": "The Rule for a Vagabond", + "is_subtitle_available": true }, { "codec": "H.264", - "language": "Portuguese", + "languages": [ + "Portuguese" + ], "quality": "Blu-ray", "title": "Cousins" }, @@ -1655,8 +1926,11 @@ "audio": "Dolby Digital 5.1", "codec": "H.264", "quality": "WEB-DL", - "subtitles": "English", - "title": "PENALTY" + "subtitles": [ + "English" + ], + "title": "PENALTY", + "is_subtitle_available": true }, { "audio": "Dual", @@ -1686,14 +1960,20 @@ { "codec": "H.264", "filetype": "MKV", - "language": ["Italian","English"], + "languages": [ + "Italian", + "English" + ], "quality": "AHDTV", "title": "Gangs Of London" }, { "codec": "H.264", "filetype": "MKV", - "language": ["Italian","English"], + "languages": [ + "Italian", + "English" + ], "quality": "Blu-ray", "title": "Fear the Walking Dead" }, @@ -1707,7 +1987,10 @@ }, { "audio": "Dual", - "language": ["Hindi","Marathi"], + "languages": [ + "Hindi", + "Marathi" + ], "quality": "WEBRip", "title": "Nude" }, @@ -1717,14 +2000,19 @@ "title": "X-men The Last Stand" }, { - "genre": ["Comedy","Western"], + "genres": [ + "Comedy", + "Western" + ], "title": "The Kissing Bandit" }, { "title": "The King of Comedy" }, { - "genre": "Comedy", + "genres": [ + "Comedy" + ], "title": "The Mouse on the Moon" }, { @@ -1750,10 +2038,21 @@ { "audio": "Dolby Digital 5.1", "codec": "H.264", - "language": ["Italian","English","German","Spanish"], + "languages": [ + "Italian", + "English", + "German", + "Spanish" + ], "quality": "DVD-Rip", - "subtitles": "Available", - "title": "Professor Marston and the Wonder Women" + "subtitles": [ + "Italian", + "English", + "German", + "Spanish" + ], + "title": "Professor Marston and the Wonder Women", + "is_subtitle_available": true }, { "audio": "DTS-HD MA 5.1", @@ -1984,22 +2283,36 @@ "title": "The Djinn" }, { - "genre": "Drama", - "language": "Albanian", + "genres": [ + "Drama" + ], + "languages": [ + "Albanian" + ], "title": "Hive" }, { - "genre": "Drama", - "language": "Egyptian", + "genres": [ + "Drama" + ], + "languages": [ + "Egyptian" + ], "title": "Souad" }, { - "genre": "Drama", - "language": "Polish", + "genres": [ + "Drama" + ], + "languages": [ + "Polish" + ], "title": "Swinki" }, { - "genre": "Thriller", + "genres": [ + "Thriller" + ], "title": "Dead Heat on a Merry Go Round" }, { @@ -2060,7 +2373,9 @@ }, { "codec": "H.264", - "language": "French", + "languages": [ + "French" + ], "quality": "WEBRip", "title": "Presque" }, @@ -2070,20 +2385,33 @@ "filetype": "MKV", "quality": "WEB-DL", "sbs": "SBS", - "subtitles": "English", - "title": "Lucky Man" + "subtitles": [ + "English" + ], + "title": "Lucky Man", + "is_subtitle_available": true }, { "codec": "H.264", - "language": ["Tamil","Malayalam","Telugu","Kannada"], + "languages": [ + "Tamil", + "Malayalam", + "Telugu", + "Kannada" + ], "quality": "WEB-DL", - "subtitles": "English", - "title": "Sultan of Delhi" + "subtitles": [ + "English" + ], + "title": "Sultan of Delhi", + "is_subtitle_available": true }, { "audio": "Custom", "filetype": "MKV", - "language": "Polish", + "languages": [ + "Polish" + ], "title": "Mission Impossible" }, { @@ -2116,7 +2444,9 @@ { "audio": "HE-AAC v2", "filetype": "MKV", - "language": "Polish", + "languages": [ + "Polish" + ], "title": "Escape Room Tournament of Champions" }, { @@ -2138,16 +2468,22 @@ { "codec": "H.264", "filetype": "MKV", - "subtitles": "French", - "title": "The Walking Dead" + "subtitles": [ + "French" + ], + "title": "The Walking Dead", + "is_subtitle_available": true }, { "audio": "AAC", "codec": "H.264", "filetype": "MKV", "quality": "WEB-DL", - "subtitles": "French", - "title": "The Good German" + "subtitles": [ + "French" + ], + "title": "The Good German", + "is_subtitle_available": true }, { "title": "Anatomy Of A Fall" @@ -2162,15 +2498,21 @@ "codec": "H.264", "filetype": "MKV", "resolution": "1440p", - "subtitles": "English", - "title": "Thuritham" + "subtitles": [ + "English" + ], + "title": "Thuritham", + "is_subtitle_available": true }, { "codec": "H.264", "filetype": "MKV", "quality": "WEB-DL", - "subtitles": "English", - "title": "Raja Vikramarka" + "subtitles": [ + "English" + ], + "title": "Raja Vikramarka", + "is_subtitle_available": true }, { "codec": "H.264", @@ -2183,42 +2525,61 @@ "title": "The Lord of the Rings" }, { - "language": ["Russian","Ukrainian"], + "languages": [ + "Russian", + "Ukrainian" + ], "quality": "Blu-ray", "title": "Deadpool" }, { "audio": "Dolby Digital", "codec": "Xvid", - "language": "French", + "languages": [ + "French" + ], "quality": "DVD-Rip", "title": "127 Heures" }, { - "language": ["English","Portuguese"], + "languages": [ + "English", + "Portuguese" + ], "title": "Men in Black International" }, { "codec": "Xvid", - "language": "Polish", + "languages": [ + "Polish" + ], "quality": "DVD-Rip", "title": "Quarantine" }, { "codec": "Xvid", - "language": "Korean", + "languages": [ + "Korean" + ], "title": "All Love" }, { "codec": "H.264", - "language": "Korean", - "subtitles": "English", - "title": "Atonement" + "languages": [ + "Korean" + ], + "subtitles": [ + "English" + ], + "title": "Atonement", + "is_subtitle_available": true }, { "audio": "Dolby Digital 5.1", "codec": "H.264", - "language": "Hebrew", + "languages": [ + "Hebrew" + ], "network": "Netflix", "title": "Fauda" }, From 4997a6fce81b38f7f784ba0cf33c4cb740ab9052 Mon Sep 17 00:00:00 2001 From: mhdzumair Date: Sat, 13 Jul 2024 07:23:13 +0530 Subject: [PATCH 19/21] Refactor & Cleanup & Remove support for python 2 --- PTN/__init__.py | 21 ++- PTN/extras.py | 180 ++++++++++-------------- PTN/parse.py | 302 ++++++++++++++++++----------------------- PTN/patterns.py | 353 ++++++++++++++++++++---------------------------- PTN/post.py | 94 ++++--------- 5 files changed, 390 insertions(+), 560 deletions(-) diff --git a/PTN/__init__.py b/PTN/__init__.py index ed7821c..ab71830 100644 --- a/PTN/__init__.py +++ b/PTN/__init__.py @@ -1,15 +1,6 @@ #!/usr/bin/env python -import pkgutil -import sys - -# Regex in python 2 is very slow so we check if the faster 'regex' library is available. -faster_regex = pkgutil.find_loader("regex") -if faster_regex is not None and sys.version_info[0] < 3: - re = faster_regex.load_module("regex") -else: - re = pkgutil.find_loader("re").load_module("re") - +import re from .parse import PTN __author__ = "Giorgio Momigliano" @@ -18,5 +9,13 @@ __license__ = "MIT" -def parse(name, standardise=True, coherent_types=False): +def parse(name: str, standardise: bool = True, coherent_types: bool = False) -> dict: + """ + Parse the torrent title into its components. + + :param name: The torrent name to parse. + :param standardise: Whether to standardise the parsed values. + :param coherent_types: Whether to ensure coherent types in the parsed results. + :return: A dictionary of parsed components. + """ return PTN().parse(name, standardise, coherent_types) diff --git a/PTN/extras.py b/PTN/extras.py index 4db4543..a708b0b 100644 --- a/PTN/extras.py +++ b/PTN/extras.py @@ -1,65 +1,67 @@ #!/usr/bin/env python +import re +from typing import List, Tuple, Union, Optional # Helper functions and constants for patterns.py -delimiters = "[\.\s\-\+_\/(),]" +delimiters = r"[\.\s\-\+_\/(),]" langs = [ - ("rus(?:sian)?|russo", "Russian"), - ("(?:True)?fre?(?:nch)?|fr(?:ench|a|e|anc[eê]s)?", "French"), - ("(?:nu)?ita(?:liano?)?", "Italian"), - ("castellano|spa(?:nish)?|esp?", "Spanish"), - ("swedish", "Swedish"), - ("dk|dan(?:ish)?", "Danish"), - ("ger(?:man)?|deu(?:tsch)?|alem[aã]o", "German"), - ("nordic", "Nordic"), - ("exyu", "ExYu"), - ("chs|chi(?:nese)?|(?:mand[ae]rin|ch[sn])|chin[eê]s|zh-hans", "Chinese"), - ("hin(?:di)?", "Hindi"), - ("polish|poland|pl", "Polish"), - ("kor(?:ean)?|coreano", "Korean"), - ("ben(?:gali)?|bangla", "Bengali"), - ("kan(?:nada)?", "Kannada"), - ("t[aâ]m(?:il)?", "Tamil"), - ("tel(?:ugu)?", "Telugu"), - ("mar(?:athi)?", "Marathi"), - ("mal(?:ayalam)?", "Malayalam"), - ("guj(?:arati)?", "Gujarati"), - ("pun(?:jabi)?", "Punjabi"), - ("ori(?:ya)?", "Oriya"), - ("japanese|ja?p|jpn|japon[eê]s", "Japanese"), - ("interslavic", "Interslavic"), - ("ara(?:bic)?", "Arabic"), - ("urdu", "Urdu"), - ("tur(?:kish)?|tr", "Turkish"), - ("tailand[eê]s|thai?", "Thai"), - ("tagalog", "Tagalog"), - ("ind(?:onesian)?", "Indonesian"), - ("vie(?:tnamese)?", "Vietnamese"), - ("heb(?:rew)?", "Hebrew"), - ("gre(?:ek)?", "Greek"), - ("cz(?:ech)?", "Czech"), - ("hun(?:garian)?", "Hungarian"), - ("ukr(?:ainian)?", "Ukrainian"), - ("fin(?:nish)?", "Finnish"), - ("nor(?:wegian)?", "Norwegian"), - ("sin(?:hala)?", "Sinhala"), - ("dutch|nl", "Dutch"), - ("p[ua]n(?:jabi)?", "Punjabi"), - ("por(?:tuguese)?|portugu[eèê]s[ea]?|p[rt]|port?", "Portuguese"), - ("alb(?:anian?)?|albanais", "Albanian"), - ("egypt(?:ian)?|egy", "Egyptian"), - ("en?(?:g(?:lish)?)?|ing(?:l[eéê]s)?", "English"), # Must be at end, matches just an 'e' + (r"rus(?:sian)?|russo", "Russian"), + (r"(?:True)?fre?(?:nch)?|fr(?:ench|a|e|anc[eê]s)?", "French"), + (r"(?:nu)?ita(?:liano?)?", "Italian"), + (r"castellano|spa(?:nish)?|esp?", "Spanish"), + (r"swedish", "Swedish"), + (r"dk|dan(?:ish)?", "Danish"), + (r"ger(?:man)?|deu(?:tsch)?|alem[aã]o", "German"), + (r"nordic", "Nordic"), + (r"exyu", "ExYu"), + (r"chs|chi(?:nese)?|(?:mand[ae]rin|ch[sn])|chin[eê]s|zh-hans", "Chinese"), + (r"hin(?:di)?", "Hindi"), + (r"polish|poland|pl", "Polish"), + (r"kor(?:ean)?|coreano", "Korean"), + (r"ben(?:gali)?|bangla", "Bengali"), + (r"kan(?:nada)?", "Kannada"), + (r"t[aâ]m(?:il)?", "Tamil"), + (r"tel(?:ugu)?", "Telugu"), + (r"mar(?:athi)?", "Marathi"), + (r"mal(?:ayalam)?", "Malayalam"), + (r"guj(?:arati)?", "Gujarati"), + (r"pun(?:jabi)?", "Punjabi"), + (r"ori(?:ya)?", "Oriya"), + (r"japanese|ja?p|jpn|japon[eê]s", "Japanese"), + (r"interslavic", "Interslavic"), + (r"ara(?:bic)?", "Arabic"), + (r"urdu", "Urdu"), + (r"tur(?:kish)?|tr", "Turkish"), + (r"tailand[eê]s|thai?", "Thai"), + (r"tagalog", "Tagalog"), + (r"ind(?:onesian)?", "Indonesian"), + (r"vie(?:tnamese)?", "Vietnamese"), + (r"heb(?:rew)?", "Hebrew"), + (r"gre(?:ek)?", "Greek"), + (r"cz(?:ech)?", "Czech"), + (r"hun(?:garian)?", "Hungarian"), + (r"ukr(?:ainian)?", "Ukrainian"), + (r"fin(?:nish)?", "Finnish"), + (r"nor(?:wegian)?", "Norwegian"), + (r"sin(?:hala)?", "Sinhala"), + (r"dutch|nl", "Dutch"), + (r"p[ua]n(?:jabi)?", "Punjabi"), + (r"por(?:tuguese)?|portugu[eèê]s[ea]?|p[rt]|port?", "Portuguese"), + (r"alb(?:anian?)?|albanais", "Albanian"), + (r"egypt(?:ian)?|egy", "Egyptian"), + (r"en?(?:g(?:lish)?)?|ing(?:l[eéê]s)?", "English"), # Must be at end, matches just an 'e' ] genres = [ - ("Sci-?Fi", "Sci-Fi"), - ("Drama", "Drama"), - ("Comedy", "Comedy"), - ("West(?:\.|ern)?", "Western"), - ("Action", "Action"), - ("Adventure", "Adventure"), - ("Thriller", "Thriller"), + (r"Sci-?Fi", "Sci-Fi"), + (r"Drama", "Drama"), + (r"Comedy", "Comedy"), + (r"West(?:\.|ern)?", "Western"), + (r"Action", "Action"), + (r"Adventure", "Adventure"), + (r"Thriller", "Thriller"), ] # Match strings like "complete series" for tv seasons/series, matching within the final title string. @@ -101,37 +103,29 @@ "internal": [], "limited": [], "proper": [], - "extended": [r"(EXTENDED{d}(?!(?:CUT|EDITIONS?)))".format(d=delimiters)], + "extended": [rf"(EXTENDED{delimiters}(?!(?:CUT|EDITIONS?)))"], } channels = [(1, 0), (2, 0), (5, 0), (5, 1), (6, 1), (7, 1)] # Return tuple with regexes for audio name with appended channel types, and without any channels -def get_channel_audio_options(patterns_with_names): +def get_channel_audio_options(patterns_with_names: List[Tuple[str, str]]) -> List[Tuple[str, str]]: options = [] - for (audio_pattern, name) in patterns_with_names: - for (speakers, subwoofers) in channels: + for audio_pattern, name in patterns_with_names: + for speakers, subwoofers in channels: options.append( ( - "((?:{}){}*{}[. \-]?{}(?:ch)?)".format( - audio_pattern, delimiters, speakers, subwoofers - ), - "{} {}.{}".format(name, speakers, subwoofers), + rf"((?:{audio_pattern}){delimiters}*{speakers}[. \-]?{subwoofers}(?:ch)?)", + f"{name} {speakers}.{subwoofers}", ) ) - options.append( - ("({})".format(audio_pattern), name) - ) # After for loop, would match first - + options.append((rf"({audio_pattern})", name)) # After for loop, would match first return options -def prefix_pattern_with(prefixes, pattern_options, between="", optional=False): - if optional: - optional_char = "?" - else: - optional_char = "" +def prefix_pattern_with(prefixes: Union[str, List[str]], pattern_options: Union[str, List[Union[str, Tuple]]], between: str = "", optional: bool = False) -> List[Union[str, Tuple]]: + optional_char = "?" if optional else "" options = [] if not isinstance(prefixes, list): prefixes = [prefixes] @@ -141,28 +135,19 @@ def prefix_pattern_with(prefixes, pattern_options, between="", optional=False): for pattern_option in pattern_options: if isinstance(pattern_option, str): options.append( - "(?:{}){}(?:{})?({})".format( - prefix, optional_char, between, pattern_option - ) + rf"(?:{prefix}){optional_char}(?:{between})?({pattern_option})" ) else: options.append( ( - "(?:{}){}(?:{})?({})".format( - prefix, optional_char, between, pattern_option[0] - ), - ) - + pattern_option[1:] + rf"(?:{prefix}){optional_char}(?:{between})?({pattern_option[0]})", + ) + pattern_option[1:] ) - return options -def suffix_pattern_with(suffixes, pattern_options, between="", optional=False): - if optional: - optional_char = "?" - else: - optional_char = "" +def suffix_pattern_with(suffixes: Union[str, List[str]], pattern_options: Union[str, List[Union[str, Tuple]]], between: str = "", optional: bool = False) -> List[Union[str, Tuple]]: + optional_char = "?" if optional else "" options = [] if not isinstance(suffixes, list): suffixes = [suffixes] @@ -173,36 +158,19 @@ def suffix_pattern_with(suffixes, pattern_options, between="", optional=False): if isinstance(pattern_option, tuple): options.append( ( - "({})(?:{})?(?:{}){}".format( - pattern_option[0], between, suffix, optional_char - ), - ) - + pattern_option[1:] + rf"({pattern_option[0]})(?:{between})?(?:{suffix}){optional_char}", + ) + pattern_option[1:] ) else: options.append( - "({})(?:{})?(?:{}){}".format( - pattern_option, between, suffix, optional_char - ) + rf"({pattern_option})(?:{between})?(?:{suffix}){optional_char}" ) - return options -# Link a regex-tuple list into a single regex (to be able to use elsewhere while -# maintaining standardisation functionality). -def link_patterns(pattern_options): +def link_patterns(pattern_options: Union[str, List[Union[str, Tuple]]]) -> str: if not isinstance(pattern_options, list): return pattern_options return ( - "(?:" - + "|".join( - [ - pattern_option[0] - if isinstance(pattern_option, tuple) - else pattern_option - for pattern_option in pattern_options - ] - ) - + ")" + rf"(?:{'|'.join([pattern_option[0] if isinstance(pattern_option, tuple) else pattern_option for pattern_option in pattern_options])})" ) diff --git a/PTN/parse.py b/PTN/parse.py index 5a25ebd..5816fc1 100644 --- a/PTN/parse.py +++ b/PTN/parse.py @@ -1,30 +1,40 @@ #!/usr/bin/env python -from . import re -from .extras import exceptions, genres, langs, link_patterns, patterns_ignore_title -from .patterns import delimiters, patterns, patterns_ordered, types, patterns_allow_overlap +import re +from typing import Dict, List, Tuple, Union, Optional, Any +from .extras import ( + delimiters, + langs, + genres, + exceptions, + complete_series, + patterns_ignore_title, + get_channel_audio_options, + prefix_pattern_with, + suffix_pattern_with, + link_patterns, +) +from .patterns import patterns, patterns_ordered, types, patterns_allow_overlap from .post import post_processing_after_excess, post_processing_before_excess -class PTN(object): +class PTN: def __init__(self): self.torrent_name = None - self.parts = None - self.part_slices = None - self.match_slices = None - self.standardise = None - self.coherent_types = None + self.parts: Dict[str, Union[str, int, List[int], bool]] = {} + self.part_slices: Dict[str, Tuple[int, int]] = {} + self.match_slices: List[Tuple[int, int]] = [] + self.standardise = False + self.coherent_types = False + self.post_title_pattern = self._generate_post_title_pattern() - self.post_title_pattern = "(?:{}|{}|720p|1080p)".format( - link_patterns(patterns["seasons"]), link_patterns(patterns["year"]) - ) + def _generate_post_title_pattern(self) -> str: + return f"(?:{link_patterns(patterns['seasons'])}|{link_patterns(patterns['year'])}|720p|1080p)" - def _part(self, name, match_slice, clean, overwrite=False): + def _part(self, name: str, match_slice: Optional[Tuple[int, int]], clean: Union[str, int, List[int], bool], overwrite: bool = False) -> None: if overwrite or name not in self.parts: - if self.coherent_types: - if name not in ["title", "episodeName"] and not isinstance(clean, bool): - if not isinstance(clean, list): - clean = [clean] - + if self.coherent_types and name not in ["title", "episodeName"] and not isinstance(clean, bool): + if not isinstance(clean, list): + clean = [clean] self.parts[name] = clean self.part_slices[name] = match_slice @@ -34,8 +44,7 @@ def _part(self, name, match_slice, clean, overwrite=False): @staticmethod def _clean_dots(string: str) -> str: - if string.find(" ") == -1 and string.find(".") != -1: - # 4 dots likely means we want an ellipsis and a space + if ' ' not in string and '.' in string: string = re.sub(r"\.{4,}", "... ", string) # Replace any instances of less than 3 dots with a space @@ -44,7 +53,7 @@ def _clean_dots(string: str) -> str: string = re.sub(r"(? str: clean = re.sub(r"^( -|\(|\[)", "", string) clean = self._clean_dots(clean) @@ -56,77 +65,17 @@ def _clean_string(self, string): clean = self._clean_dots(clean).strip() return clean - def parse(self, name, standardise, coherent_types): - name = name.strip() + def parse(self, name: str, standardise: bool, coherent_types: bool) -> Dict[str, Union[str, int, List[int], bool]]: + self.torrent_name = name.strip() self.parts = {} self.part_slices = {} - self.torrent_name = name self.match_slices = [] self.standardise = standardise self.coherent_types = coherent_types - for key, pattern_options in [(key, patterns[key]) for key in patterns_ordered]: - pattern_options = self.normalise_pattern_options(pattern_options) - - for (pattern, replace, transforms) in pattern_options: - if key not in ("seasons", "episodes", "site", "languages", "genres"): - pattern = r"\b(?:{})\b".format(pattern) - - clean_name = re.sub(r"_", " ", self.torrent_name) - matches = self.get_matches(pattern, clean_name, key) - - if not matches: - continue - - # With multiple matches, we will usually want to use the first match. - # For 'year', we instead use the last instance of a year match since, - # if a title includes a year, we don't want to use this for the year field. - match_index = 0 - if key == "year": - match_index = -1 - - match = matches[match_index]["match"] - match_start, match_end = ( - matches[match_index]["start"], - matches[match_index]["end"], - ) - if ( - key in self.parts - ): # We can skip ahead if we already have a matched part - self._part(key, (match_start, match_end), None, overwrite=False) - continue - - index = self.get_match_indexes(match) - - if key in ("seasons", "episodes"): - clean = self.get_season_episode(match) - elif key == "subtitles": - clean = self.get_subtitles(match) - elif key in ("languages", "genres"): - clean = self.split_multi(match) - elif key in types.keys() and types[key] == "boolean": - clean = True - else: - clean = match[index["clean"]] - if key in types.keys() and types[key] == "integer": - clean = int(clean) - - if self.standardise: - clean = self.standardise_clean(clean, key, replace, transforms) - - part_overlaps = False - for part, part_slices in self.part_slices.items(): - if part not in patterns_allow_overlap: - # Strict smaller/larger than since punctuation can overlap. - if ( - (part_slices[0] < match_start < part_slices[1]) - or (part_slices[0] < match_end < part_slices[1]) - ): - part_overlaps = True - break - - if not part_overlaps: - self._part(key, (match_start, match_end), clean) + for key in patterns_ordered: + pattern_options = self.normalise_pattern_options(patterns[key]) + self._apply_patterns(key, pattern_options) self.process_title() self.fix_known_exceptions() @@ -135,7 +84,6 @@ def parse(self, name, standardise, coherent_types): for f in post_processing_before_excess: unmatched = f(self, unmatched) - # clean_unmatched() depends on the before_excess methods adding more match slices. cleaned_unmatched = self.clean_unmatched() if cleaned_unmatched: self._part("excess", None, cleaned_unmatched) @@ -145,9 +93,58 @@ def parse(self, name, standardise, coherent_types): return self.parts + def _apply_patterns(self, key: str, pattern_options: List[Tuple[str, Optional[str], Optional[Union[str, List[Tuple[str, List[Any]]]]]]]) -> None: + for pattern, replace, transforms in pattern_options: + if key not in ("seasons", "episodes", "site", "languages", "genres"): + pattern = rf"\b(?:{pattern})\b" + + clean_name = re.sub(r"_", " ", self.torrent_name) + matches = self.get_matches(pattern, clean_name, key) + + if not matches: + continue + + # With multiple matches, we will usually want to use the first match. + # For 'year', we instead use the last instance of a year match since, + # if a title includes a year, we don't want to use this for the year field. + match_index = -1 if key == "year" else 0 + match = matches[match_index]["match"] + match_start, match_end = matches[match_index]["start"], matches[match_index]["end"] + + if key in self.parts: # We can skip ahead if we already have a matched part + self._part(key, (match_start, match_end), None, overwrite=False) + continue + + index = self.get_match_indexes(match) + + if key in ("seasons", "episodes"): + clean = self.get_season_episode(match) + elif key == "subtitles": + clean = self.get_subtitles(match) + elif key in ("languages", "genres"): + clean = self.split_multi(match) + elif key in types and types[key] == "boolean": + clean = True + else: + clean = match[index["clean"]] + if key in types and types[key] == "integer": + clean = int(clean) + + if self.standardise: + clean = self.standardise_clean(clean, key, replace, transforms) + + part_overlaps = any( + self._is_overlap(part_slice, (match_start, match_end)) + for part, part_slice in self.part_slices.items() + if part not in patterns_allow_overlap + ) + + if not part_overlaps: + self._part(key, (match_start, match_end), clean) + # Handles all the optional/missing tuple elements into a consistent list. @staticmethod - def normalise_pattern_options(pattern_options): + def normalise_pattern_options(pattern_options: Union[str, Tuple, List[Union[str, Tuple]]]) -> List[Tuple[str, Optional[str], Optional[Union[str, List[Tuple[str, List[Any]]]]]]]: pattern_options_norm = [] if isinstance(pattern_options, tuple): @@ -171,10 +168,9 @@ def normalise_pattern_options(pattern_options): else: pattern_options_norm.append((options, None, None)) - pattern_options = pattern_options_norm - return pattern_options + return pattern_options_norm - def get_matches(self, pattern, clean_name, key): + def get_matches(self, pattern: str, clean_name: str, key: str) -> List[Dict[str, Union[str, int]]]: grouped_matches = [] matches = list(re.finditer(pattern, clean_name, re.IGNORECASE)) for m in matches: @@ -198,7 +194,7 @@ def get_matches(self, pattern, clean_name, key): # Only use part of the torrent name after the (guessed) title (split at a season or year) # to avoid matching certain patterns that could show up in a release title. - def ignore_before_index(self, clean_name, key): + def ignore_before_index(self, clean_name: str, key: str) -> int: match = None if key in patterns_ignore_title: patterns_ignored = patterns_ignore_title[key] @@ -207,16 +203,14 @@ def ignore_before_index(self, clean_name, key): else: for ignore_pattern in patterns_ignored: if re.findall(ignore_pattern, clean_name, re.IGNORECASE): - match = re.search( - self.post_title_pattern, clean_name, re.IGNORECASE - ) + match = re.search(self.post_title_pattern, clean_name, re.IGNORECASE) if match: return match.start() return 0 @staticmethod - def get_match_indexes(match): + def get_match_indexes(match: List[str]) -> Dict[str, int]: index = {"raw": 0, "clean": 0} if len(match) > 1: @@ -230,30 +224,25 @@ def get_match_indexes(match): return index @staticmethod - def get_season_episode(match): - clean = None + def get_season_episode(match: List[str]) -> Optional[List[int]]: m = re.findall(r"[0-9]+", match[0]) if m and len(m) > 1: - clean = list(range(int(m[0]), int(m[-1]) + 1)) - # This elif exists entirely for the Seasons 1, 2, 3, 4, etc. case. No other regex gives a number in match[1]. + return list(range(int(m[0]), int(m[-1]) + 1)) elif len(match) > 1 and match[1] and m: - clean = list(range(int(m[0]), int(match[1]) + 1)) + return list(range(int(m[0]), int(match[1]) + 1)) elif m: - clean = [int(m[0])] - - return clean + return [int(m[0])] + return None @staticmethod - def split_multi(match): - m = re.split(r"{}+".format(delimiters), match[0]) - clean = list(filter(None, m)) - - return clean + def split_multi(match: List[str]) -> List[str]: + m = re.split(rf"{delimiters}+", match[0]) + return list(filter(None, m)) @staticmethod - def get_subtitles(match): + def get_subtitles(match: List[str]) -> List[str]: # handle multi subtitles - m = re.split(r"{}+".format(delimiters), match[0]) + m = re.split(rf"{delimiters}+", match[0]) m = list(filter(None, m)) clean = [] # If it's only 1 result, it's fine if it's just 'subs'. @@ -263,18 +252,15 @@ def get_subtitles(match): for x in m: if not re.match("subs?|soft", x, re.I): clean.append(x) - return clean - def standardise_clean(self, clean, key, replace, transforms): + def standardise_clean(self, clean: Union[str, List[str]], key: str, replace: Optional[str], transforms: Optional[Union[str, List[Tuple[str, List[Any]]]]]) -> Union[str, List[str]]: if replace: clean = replace if transforms: for transform in filter(lambda t: t[0], transforms): - # For python2 compatibility, we're not able to simply pass functions as str.upper - # means different things in 2.7 and 3.5. clean = getattr(clean, transform[0])(*transform[1]) - if key == "languages" or key == "subtitles": + if key in ["languages", "subtitles"]: clean = self.standardise_languages(clean) if not clean: clean = "Available" @@ -283,27 +269,20 @@ def standardise_clean(self, clean, key, replace, transforms): return clean @staticmethod - def standardise_languages(clean): + def standardise_languages(clean: List[str]) -> List[str]: cleaned_langs = [] for lang in clean: - for (lang_regex, lang_clean) in langs: - if re.match( - lang_regex, - re.sub( - link_patterns(patterns["subtitles"][-2:]), "", lang, flags=re.I - ), - re.IGNORECASE, - ): + for lang_regex, lang_clean in langs: + if re.match(lang_regex, re.sub(link_patterns(patterns["subtitles"][-2:]), "", lang, flags=re.I), re.IGNORECASE): cleaned_langs.append(lang_clean) break - clean = cleaned_langs - return clean + return cleaned_langs @staticmethod - def standardise_genres(clean): + def standardise_genres(clean: List[str]) -> List[str]: standard_genres = [] for genre in clean: - for (regex, clean) in genres: + for regex, clean in genres: if re.match(regex, genre, re.IGNORECASE): standard_genres.append(clean) break @@ -311,25 +290,23 @@ def standardise_genres(clean): # Merge all the match slices (such as when they overlap), then remove # them from excess. - def merge_match_slices(self): + def merge_match_slices(self) -> None: matches = sorted(self.match_slices, key=lambda match: match[0]) - - i = 0 slices = [] + i = 0 while i < len(matches): start, end = matches[i] i += 1 - for (next_start, next_end) in matches[i:]: + for next_start, next_end in matches[i:]: if next_start <= end: end = max(end, next_end) i += 1 else: break slices.append((start, end)) - self.match_slices = slices - def process_title(self): + def process_title(self) -> None: unmatched = self.unmatched_list(keep_punctuation=False) # Use the first one as the title @@ -339,11 +316,7 @@ def process_title(self): # If our unmatched is after the first 3 matches, we assume the title is missing # (or more likely got parsed as something else), as no torrents have it that # far away from the beginning of the release title. - if ( - len(self.part_slices) > 3 - and title_start - > sorted(self.part_slices.values(), key=lambda s: s[0])[3][0] - ): + if len(self.part_slices) > 3 and title_start > sorted(self.part_slices.values(), key=lambda s: s[0])[3][0]: self._part("title", None, "") raw = self.torrent_name[title_start:title_end] @@ -367,57 +340,46 @@ def process_title(self): else: self._part("title", None, "") - def unmatched_list(self, keep_punctuation=True): + def unmatched_list(self, keep_punctuation: bool = True) -> List[Tuple[int, int]]: self.merge_match_slices() unmatched = [] prev_start = 0 # A default so the last append won't crash if nothing has matched end = len(self.torrent_name) # Find all unmatched strings that aren't just punctuation - for (start, end) in self.match_slices: - if keep_punctuation or not re.match( - delimiters + r"*\Z", self.torrent_name[prev_start:start] - ): + for start, end in self.match_slices: + if keep_punctuation or not re.match(rf"{delimiters}*\Z", self.torrent_name[prev_start:start]): unmatched.append((prev_start, start)) prev_start = end # Add the last unmatched slice - if keep_punctuation or not re.match( - delimiters + r"*\Z", self.torrent_name[end:] - ): + if keep_punctuation or not re.match(rf"{delimiters}*\Z", self.torrent_name[end:]): unmatched.append((end, len(self.torrent_name))) # If nothing matched, assume the whole thing is the title if not self.match_slices: unmatched.append((0, len(self.torrent_name))) - return unmatched - def fix_known_exceptions(self): + def fix_known_exceptions(self) -> None: # Considerations for results that are known to cause issues, such # as media with years in them but without a release year. for exception in exceptions: incorrect_key, incorrect_value = exception["incorrect_parse"] - if ( - self.parts["title"] == exception["parsed_title"] - and incorrect_key in self.parts - ): - if self.parts[incorrect_key] == incorrect_value or ( - self.coherent_types and incorrect_value in self.parts[incorrect_key] - ): + if self.parts["title"] == exception["parsed_title"] and incorrect_key in self.parts: + if self.parts[incorrect_key] == incorrect_value or (self.coherent_types and incorrect_value in self.parts[incorrect_key]): self.parts.pop(incorrect_key) self._part("title", None, exception["actual_title"], overwrite=True) - def get_unmatched(self): + def get_unmatched(self) -> str: unmatched = "" - for (start, end) in self.unmatched_list(): + for start, end in self.unmatched_list(): unmatched += self.torrent_name[start:end] - return unmatched - def clean_unmatched(self): + def clean_unmatched(self) -> List[str]: unmatched = [] - for (start, end) in self.unmatched_list(): + for start, end in self.unmatched_list(): unmatched.append(self.torrent_name[start:end]) unmatched_clean = [] @@ -429,21 +391,21 @@ def clean_unmatched(self): filtered = [] for extra in unmatched_clean: # re.fullmatch() is not available in python 2.7, so we manually do it with \Z. - if not re.match( - r"(?:Complete|Season|Full)?[\]\[,.+\- ]*(?:Complete|Season|Full)?\Z", - extra, - re.IGNORECASE, - ): + if not re.match(rf"(?:Complete|Season|Full)?[\]\[,.+\- ]*(?:Complete|Season|Full)?\Z", extra, re.IGNORECASE): filtered.append(extra) return filtered @staticmethod - def clean_title(raw_title): - cleaned_title = raw_title - cleaned_title = cleaned_title.replace(r"[[(]movie[)\]]", "") # clear movie indication flag + def clean_title(raw_title: str) -> str: + cleaned_title = raw_title.replace(r"[[(]movie[)\]]", "") # clear movie indication flag cleaned_title = re.sub(patterns["RUSSIAN_CAST_REGEX"], " ", cleaned_title) # clear russian cast information cleaned_title = re.sub(patterns["RELEASE_GROUP_REGEX_START"], r"\1", cleaned_title) # remove release group markings sections from the start cleaned_title = re.sub(patterns["RELEASE_GROUP_REGEX_END"], r"\1", cleaned_title) # remove unneeded markings section at the end if present cleaned_title = re.sub(patterns["ALT_TITLES_REGEX"], "", cleaned_title) # remove alt language titles cleaned_title = re.sub(patterns["NOT_ONLY_NON_ENGLISH_REGEX"], "", cleaned_title) # remove non english chars if they are not the only ones left return cleaned_title + + @staticmethod + def _is_overlap(part_slice: Tuple[int, int], match_slice: Tuple[int, int]) -> bool: + # Strict smaller/larger than since punctuation can overlap. + return (part_slice[0] < match_slice[0] < part_slice[1]) or (part_slice[0] < match_slice[1] < part_slice[1]) diff --git a/PTN/patterns.py b/PTN/patterns.py index fa96a67..b5be9ba 100644 --- a/PTN/patterns.py +++ b/PTN/patterns.py @@ -1,13 +1,5 @@ #!/usr/bin/env python -# Patterns are either just a regex, or a tuple (or list of tuples) that contain the regex -# to match, (optional) what it should be replaced with (None if to not replace), and -# (optional) a string function's name to transform the value after everything (None if -# to do nothing). The transform can also be a tuple (or list of tuples) with function names -# and list of arguments. -# The list of regexes all get matched, but only the first gets added to the returning info, -# the rest are just matched to be removed from `excess`. - from .extras import ( delimiters, genres, @@ -17,29 +9,24 @@ suffix_pattern_with, ) +# Patterns are either just a regex, or a tuple (or list of tuples) that contain the regex +# to match, (optional) what it should be replaced with (None if to not replace), and +# (optional) a string function's name to transform the value after everything (None if +# to do nothing). The transform can also be a tuple (or list of tuples) with function names +# and list of arguments. +# The list of regexes all get matched, but only the first gets added to the returning info, +# the rest are just matched to be removed from `excess`. + + season_range_pattern = ( - "(?:Complete" - + delimiters - + "*)?" - + delimiters - + "*(?:s(?:easons?)?)" - + delimiters - + "*(?:s?[0-9]{1,2}[\s]*(?:(?:\-|(?:\s*to\s*))[\s]*s?[0-9]{1,2}))(?:" - + delimiters - + "*Complete)?" + rf"(?:Complete{delimiters}*)?{delimiters}*(?:s(?:easons?)?){delimiters}*(?:s?[0-9]{{1,2}}[\s]*(?:(?:\-|(?:\s*to\s*))[\s]*s?[0-9]{{1,2}}))(?:{delimiters}*Complete)?" ) -year_pattern = "(?:19[0-9]|20[0-2])[0-9]" -month_pattern = "0[1-9]|1[0-2]" -day_pattern = "[0-2][0-9]|3[01]" +year_pattern = r"(?:19[0-9]|20[0-2])[0-9]" +month_pattern = r"0[1-9]|1[0-2]" +day_pattern = r"[0-2][0-9]|3[01]" -episode_name_pattern = ( - "((?:[Pp](?:ar)?t" - + delimiters - + "*[0-9]|(?:[A-Za-z]|[0-9])[a-z]*(?:" - + delimiters - + "|$))+)" -) +episode_name_pattern = rf"((?:[Pp](?:ar)?t{delimiters}*[0-9]|(?:[A-Za-z]|[0-9])[a-z]*(?:{delimiters}|$))+)" pre_website_encoder_pattern = r"[^\s\.\[\]\-\(\)]+\)\s*\[[^\s\-]+\]|[^\s\.\[\]\-\(\)]+\s*(?:-\s)?[^\s\.\[\]\-]+$" # Forces an order to go by the regexes, as we want this to be deterministic (different @@ -94,7 +81,7 @@ "episodes", "languages", "subtitles", - "sbs" + "sbs", ] patterns = {} @@ -103,86 +90,70 @@ # Very specific as it could match too liberally r"\s\-\s\d{1,3}\s", r"\b[0-9]{1,2}x([0-9]{2})\b", - r"\bepisod(?:e|io)" + delimiters + r"\d{1,2}\b", + rf"\bepisod(?:e|io){delimiters}\d{{1,2}}\b", ] # If adding seasons patterns, remember to look at episodes, as it uses the last few! patterns["seasons"] = [ - r"\b(?:Seasons?)" - + delimiters - + r"(\d{1,2})" + "(?:(?:" + delimiters + r"|&|and|to){1,3}(\d{1,2})){2,}\b", + rf"\b(?:Seasons?){delimiters}(\d{{1,2}})(?:(?:{delimiters}|&|and|to){{1,3}}(\d{{1,2}})){{2,}}\b", r"\ss?(\d{1,2})\s\-\s\d{1,2}\s", # Avoids matching some anime releases seasons and episodes as a season range - r"\b" + season_range_pattern + r"\b", # Describes season ranges + rf"\b{season_range_pattern}\b", # Describes season ranges r"(?:s\d{1,2}[.+\s]*){2,}\b", # for S01.S02.etc. patterns # Describes season, optionally with complete or episodes - r"\b(?:Complete" - + delimiters - + ")?s([0-9]{1,2})" - + link_patterns(patterns["episodes"]) - + r"?\b", + rf"\b(?:Complete{delimiters})?s([0-9]{{1,2}}){link_patterns(patterns['episodes'])}?\b", r"\b([0-9]{1,2})x[0-9]{2}\b", # Describes 5x02, 12x15 type descriptions - "[0-9]{1,2}(?:st|nd|rd|th)" + delimiters + "season", - "Series" + delimiters + "\d{1,2}", - r"\b(?:Complete" - + delimiters - + r")?Season[\. -][0-9]{1,2}\b", # Describes Season.15 type descriptions + rf"[0-9]{{1,2}}(?:st|nd|rd|th){delimiters}season", + rf"Series{delimiters}\d{{1,2}}", + rf"\b(?:Complete{delimiters})?Season[\. -][0-9]{{1,2}}\b", # Describes Season.15 type descriptions ] # The first 4 season regexes won't have 'Part' in them. patterns["episodes"] += [ - link_patterns(patterns["seasons"][6:]) - + delimiters - + "*P(?:ar)?t" - + delimiters - + "*(\d{1,3})" + rf"{link_patterns(patterns['seasons'][6:])}{delimiters}*P(?:ar)?t{delimiters}*(\d{{1,3}})", ] patterns["year"] = year_pattern -patterns["month"] = "(?:{year}){d}({month}){d}(?:{day})".format( - d=delimiters, year=year_pattern, month=month_pattern, day=day_pattern -) -patterns["day"] = "(?:{year}){d}(?:{month}){d}({day})".format( - d=delimiters, year=year_pattern, month=month_pattern, day=day_pattern -) +patterns["month"] = rf"(?:{year_pattern}){delimiters}({month_pattern}){delimiters}(?:{day_pattern})" +patterns["day"] = rf"(?:{year_pattern}){delimiters}(?:{month_pattern}){delimiters}({day_pattern})" # resolution pattern according to https://ihax.io/display-resolution-explained/ and GPT4. # order from highest to lowest due to some torrent name having '4K HD' in them but its technically 4K. patterns["resolution"] = [ (r"([0-9]{3,4}(?:p|i))", None, "lower"), # Generic pattern for resolutions like 480p, 720p, 1080p, etc. - (r"(8K|7680{d}?x{d}?4320p?)".format(d=delimiters), "8K"), # Pattern for 8K - (r"(5K|5120{d}?x{d}?2880p?)".format(d=delimiters), "5K"), # Pattern for 5K - (r"(4K UHD|UHD|3840{d}?x{d}?2160p?)".format(d=delimiters), "2160p"), # Pattern for 4K UHD / 2160p - (r"(4K|4096{d}?x{d}?2160p?)".format(d=delimiters), "4K"), # Pattern for 4K / Cinema 4K - (r"(QHD|QuadHD|WQHD|2560{d}?x{d}?1440p?)".format(d=delimiters), "1440p"), # Pattern for QHD / 1440p - (r"(2K|2048{d}?x{d}?1080p?)".format(d=delimiters), "2K"), # Pattern for 2K - (r"(Full HD|FHD|1920{d}?x{d}?1080p?)".format(d=delimiters), "1080p"), # Pattern for Full HD / 1080p - (r"(HD|1280{d}?x{d}?720p?)".format(d=delimiters), "720p"), # Pattern for HD / 720p + (rf"(8K|7680{delimiters}?x{delimiters}?4320p?)", "8K"), # Pattern for 8K + (rf"(5K|5120{delimiters}?x{delimiters}?2880p?)", "5K"), # Pattern for 5K + (rf"(4K UHD|UHD|3840{delimiters}?x{delimiters}?2160p?)", "2160p"), # Pattern for 4K UHD / 2160p + (rf"(4K|4096{delimiters}?x{delimiters}?2160p?)", "4K"), # Pattern for 4K / Cinema 4K + (rf"(QHD|QuadHD|WQHD|2560{delimiters}?x{delimiters}?1440p?)", "1440p"), # Pattern for QHD / 1440p + (rf"(2K|2048{delimiters}?x{delimiters}?1080p?)", "2K"), # Pattern for 2K + (rf"(Full HD|FHD|1920{delimiters}?x{delimiters}?1080p?)", "1080p"), # Pattern for Full HD / 1080p + (rf"(HD|1280{delimiters}?x{delimiters}?720p?)", "720p"), # Pattern for HD / 720p (r"(qHD)", "540p"), # Pattern for quarter High Definition (r"(SD)", "480p"), # Pattern for Standard Definition ] patterns["quality"] = [ - ("WEB[ -\.]?DL(?:Rip|Mux)?|HDRip", "WEB-DL"), + (r"WEB[ -\.]?DL(?:Rip|Mux)?|HDRip", "WEB-DL"), # Match WEB-DL's first as they can show up with others. - ("WEB[ -]?Cap", "WEBCap"), - ("W[EB]B[ -]?(?:Rip)|WEB", "WEBRip"), - ("(?:HD)?CAM(?:-?Rip)?", "Cam"), - ("(?:HD)?TS|TELESYNC|PDVD|PreDVDRip", "Telesync"), - ("WP|WORKPRINT", "Workprint"), - ("(?:HD)?TC|TELECINE", "Telecine"), - ("(?:DVD)?SCR(?:EENER)?|BDSCR", "Screener"), - ("DDC", "Digital Distribution Copy"), - ("DVD-?(?:Rip|Mux)", "DVD-Rip"), - ("DVDR|DVD-Full|Full-rip", "DVD-R"), - ("PDTV|DVBRip", "PDTV"), - ("DSR(?:ip)?|SATRip|DTHRip", "DSRip"), - ("AHDTV(?:Mux)?", "AHDTV"), - ("HDTV(?:Rip)?", "HDTV"), - ("D?TVRip|DVBRip", "TVRip"), - ("VODR(?:ip)?", "VODRip"), - ("HD-Rip", "HD-Rip"), - ("Blu-?Ray{d}Rip|BDR(?:ip)?".format(d=delimiters), "BDRip"), - ("Blu-?Ray|(?:US|JP)?BD(?:remux)?", "Blu-ray"), - ("BR-?Rip", "BRRip"), - ("HDDVD", "HD DVD"), + (r"WEB[ -]?Cap", "WEBCap"), + (r"W[EB]B[ -]?(?:Rip)|WEB", "WEBRip"), + (r"(?:HD)?CAM(?:-?Rip)?", "Cam"), + (r"(?:HD)?TS|TELESYNC|PDVD|PreDVDRip", "Telesync"), + (r"WP|WORKPRINT", "Workprint"), + (r"(?:HD)?TC|TELECINE", "Telecine"), + (r"(?:DVD)?SCR(?:EENER)?|BDSCR", "Screener"), + (r"DDC", "Digital Distribution Copy"), + (r"DVD-?(?:Rip|Mux)", "DVD-Rip"), + (r"DVDR|DVD-Full|Full-rip", "DVD-R"), + (r"PDTV|DVBRip", "PDTV"), + (r"DSR(?:ip)?|SATRip|DTHRip", "DSRip"), + (r"AHDTV(?:Mux)?", "AHDTV"), + (r"HDTV(?:Rip)?", "HDTV"), + (r"D?TVRip|DVBRip", "TVRip"), + (r"VODR(?:ip)?", "VODRip"), + (r"HD-Rip", "HD-Rip"), + (rf"Blu-?Ray{delimiters}Rip|BDR(?:ip)?", "BDRip"), + (r"Blu-?Ray|(?:US|JP)?BD(?:remux)?", "Blu-ray"), + (r"BR-?Rip", "BRRip"), + (r"HDDVD", "HD DVD"), # Match this last as it can show up with others. - ("PPV(?:Rip)?", "Pay-Per-View Rip"), + (r"PPV(?:Rip)?", "Pay-Per-View Rip"), ] patterns["network"] = [ ("ATVP", "Apple TV+"), @@ -229,160 +200,127 @@ ("Hallmark", "Hallmark"), ("Sony\s?LIV", "SONY LIV"), ] + patterns["codec"] = [ - ("xvid", "Xvid"), - ("av1", "AV1"), - ("[hx]{d}?264".format(d=delimiters), "H.264"), - ("AVC", "H.264"), - ("HEVC(?:{d}Main{d}?10P?)".format(d=delimiters), "H.265 Main 10"), - ( - "[hx]{d}?265".format(d=delimiters), - "H.265", - ), # Separate from HEVC so if both are present, it won't pollute excess. - ("HEVC", "H.265"), - ("[h]{d}?263".format(d=delimiters), "H.263"), - ("VC-1", "VC-1"), - ("MPEG{d}?2".format(d=delimiters), "MPEG-2"), + (r"xvid", "Xvid"), + (r"av1", "AV1"), + (rf"[hx]{delimiters}?264", "H.264"), + (r"AVC", "H.264"), + (rf"HEVC(?:{delimiters}Main{delimiters}?10P?)", "H.265 Main 10"), + (rf"[hx]{delimiters}?265", "H.265"), # Separate from HEVC so if both are present, it won't pollute excess. + (r"HEVC", "H.265"), + (rf"[h]{delimiters}?263", "H.263"), + (r"VC-1", "VC-1"), + (rf"MPEG{delimiters}?2", "MPEG-2"), ] + patterns["audio"] = get_channel_audio_options( [ - ("TrueHD", "Dolby TrueHD"), - ("Atmos", "Dolby Atmos"), - ("DD{d}?EX".format(d=delimiters), "Dolby Digital EX"), - ("DD|AC{d}?3|DolbyD".format(d=delimiters), "Dolby Digital"), - ("DDP|E{d}?AC{d}?3|EC{d}?3".format(d=delimiters), "Dolby Digital Plus"), - ( - "DTS{d}?HD(?:{d}?(?:MA|Masters?(?:{d}Audio)?))".format(d=delimiters), - "DTS-HD MA", - ), - ("DTSMA", "DTS-HD MA"), - ("DTS{d}?HD".format(d=delimiters), "DTS-HD"), - ("DTS{d}?ES".format(d=delimiters), "DTS-ES"), - ("DTS{d}?EX".format(d=delimiters), "DTS-EX"), - ("DTS{d}?X".format(d=delimiters), "DTS:X"), - ("DTS", "DTS"), - ("HE{d}?AAC".format(d=delimiters), "HE-AAC"), - ("HE{d}?AACv2".format(d=delimiters), "HE-AAC v2"), - ("AAC{d}?LC".format(d=delimiters), "AAC-LC"), - ("AAC", "AAC"), - ("Dual{d}Audios?".format(d=delimiters), "Dual"), - ("Custom{d}Audios?".format(d=delimiters), "Custom"), - ("FLAC", "FLAC"), - ("OGG", "OGG"), + (r"TrueHD", "Dolby TrueHD"), + (r"Atmos", "Dolby Atmos"), + (rf"DD{delimiters}?EX", "Dolby Digital EX"), + (rf"DD|AC{delimiters}?3|DolbyD", "Dolby Digital"), + (rf"DDP|E{delimiters}?AC{delimiters}?3|EC{delimiters}?3", "Dolby Digital Plus"), + (rf"DTS{delimiters}?HD(?:{delimiters}?(?:MA|Masters?(?:{delimiters}Audio)?))", "DTS-HD MA"), + (r"DTSMA", "DTS-HD MA"), + (rf"DTS{delimiters}?HD", "DTS-HD"), + (rf"DTS{delimiters}?ES", "DTS-ES"), + (rf"DTS{delimiters}?EX", "DTS-EX"), + (rf"DTS{delimiters}?X", "DTS:X"), + (r"DTS", "DTS"), + (rf"HE{delimiters}?AAC", "HE-AAC"), + (rf"HE{delimiters}?AACv2", "HE-AAC v2"), + (rf"AAC{delimiters}?LC", "AAC-LC"), + (r"AAC", "AAC"), + (rf"Dual{delimiters}Audios?", "Dual"), + (rf"Custom{delimiters}Audios?", "Custom"), + (r"FLAC", "FLAC"), + (r"OGG", "OGG"), ] ) + [ - ("7.1(?:{d}?ch(?:annel)?(?:{d}?Audio)?)?".format(d=delimiters), "7.1"), - ("5.1(?:{d}?ch(?:annel)?(?:{d}?Audio)?)?".format(d=delimiters), "5.1"), - ("MP3", None, "upper"), - ("2.0(?:{d}?ch(?:annel)?(?:{d}?Audio)?)?|2CH|stereo".format(d=delimiters), "Dual"), - ("1{d}?Ch(?:annel)?(?:{d}?Audio)?".format(d=delimiters), "Mono"), - ("(?:Original|Org)" + delimiters + "Aud(?:io)?", "Original"), - ("LiNE", "LiNE"), + (rf"7.1(?:{delimiters}?ch(?:annel)?(?:{delimiters}?Audio)?)?", "7.1"), + (rf"5.1(?:{delimiters}?ch(?:annel)?(?:{delimiters}?Audio)?)?", "5.1"), + (r"MP3", None, "upper"), + (rf"2.0(?:{delimiters}?ch(?:annel)?(?:{delimiters}?Audio)?)?|2CH|stereo", "Dual"), + (rf"1{delimiters}?Ch(?:annel)?(?:{delimiters}?Audio)?", "Mono"), + (rf"(?:Original|Org){delimiters}Aud(?:io)?", "Original"), + (r"LiNE", "LiNE"), ] -patterns["region"] = ("R[0-9]", None, "upper") + +patterns["region"] = (r"R[0-9]", None, "upper") # If changing below, remember to change patterns_ignore_title (in extras.py) too. patterns["extended"] = [ - "(EXTENDED)", - "(EXTENDED{d}(?:(?:CUT|EDITIONS?)))".format(d=delimiters), - ] - + r"(EXTENDED)", + rf"(EXTENDED{delimiters}(?:(?:CUT|EDITIONS?)))", +] -patterns["hardcoded"] = "HC" -patterns["proper"] = "PROPER" -patterns["repack"] = "REPACK" -patterns["fps"] = "([1-9][0-9]{1,2})" + delimiters + "*fps" +patterns["hardcoded"] = r"HC" +patterns["proper"] = r"PROPER" +patterns["repack"] = r"REPACK" +patterns["fps"] = rf"([1-9][0-9]{{1,2}}){delimiters}*fps" patterns["filetype"] = [ (r"\.?(MKV|AVI|(?:SRT|SUB|SSA)$)", None, "upper"), - ("MP-?4", "MP4"), + (r"MP-?4", "MP4"), (r"\.?(iso)$", "ISO"), ] -patterns["widescreen"] = "WS" +patterns["widescreen"] = r"WS" # Valid the sites with strict URL rules and then other possible sites with more relaxed rules -patterns["site"] = [r"^(www\.[\w-]+\.[\w-]+)\s+-\s*", r"^((?:www\.)?[\w-]+\.[\w-]+(?:\.[\w-]+)?)\s+-\s*", r"^(\[ ?([^\]]+?)\s?\])"] +patterns["site"] = [ + r"^(www\.[\w-]+\.[\w-]+)\s+-\s*", + r"^((?:www\.)?[\w-]+\.[\w-]+(?:\.[\w-]+)?)\s+-\s*", + r"^(\[ ?([^\]]+?)\s?\])" +] lang_list_pattern = ( - r"\b(?:" - + link_patterns(langs) - + "(?:" - + delimiters - + "+(?:dub(?:bed)?|" - + link_patterns(patterns["audio"]) - + "))?" - + "(?:" - + delimiters - + r"+|\b))" + rf"\b(?:{link_patterns(langs)}(?:{delimiters}+(?:dub(?:bed)?|{link_patterns(patterns['audio'])}))?(?:{delimiters}+|\b))" ) -subs_list_pattern = r"(?:" + link_patterns(langs) + delimiters + "*)" +subs_list_pattern = rf"(?:{link_patterns(langs)}{delimiters}*)" patterns["subtitles"] = [ # Below must stay first, see patterns["languages"] - "sub(?:title|bed)?s?{d}*{langs}+".format(d=delimiters, langs=subs_list_pattern), - "(?:soft{d}*)?{langs}+(?:(?:m(?:ulti(?:ple)?)?{d}*)?sub(?:title|bed)?s?)".format( - d=delimiters, langs=subs_list_pattern - ), + rf"sub(?:title|bed)?s?{delimiters}*{subs_list_pattern}+", + rf"(?:soft{delimiters}*)?{subs_list_pattern}+(?:(?:m(?:ulti(?:ple)?)?{delimiters}*)?sub(?:title|bed)?s?)", ("VOSTFR", ["French"]), # The following are patterns just for the 'subs' strings. Add normal sub stuff above. # Need a pattern just for subs, and can't just make above regexes * over + as we want # just 'subs' to match last. # The second-last one must stay second-last, see patterns["languages"] - "(?:m(?:ulti(?:ple)?)?{d}*)sub(?:title|bed)?s?".format(d=delimiters), - "(?:m(?:ulti(?:ple)?)?[\.\s\-\+_\/]*)?sub(?:title|bed)?s?{d}*".format(d=delimiters), + rf"(?:m(?:ulti(?:ple)?)?{delimiters}*)sub(?:title|bed)?s?", + rf"(?:m(?:ulti(?:ple)?)?[\.\s\-\+_\/]*)?sub(?:title|bed)?s?{delimiters}*", ] # Language takes precedence over subs when ambiguous - if we have a lang match, and # then a subtitles match starting with subs, the first langs are languages, and the # rest will be left as subtitles. Otherwise, don't match if there are subtitles matches # after the langs. patterns["languages"] = [ - "(" - + lang_list_pattern - + "+)(?:" - + delimiters - + "*" - + patterns["subtitles"][0] - + ")", - "(" - + lang_list_pattern - + "+)" - + "" - + "(?!" - + delimiters - + "*" - + link_patterns(patterns["subtitles"]) - + ")", - "(" - + lang_list_pattern - + "+)(?:" - + delimiters - + "*" - + patterns["subtitles"][-2] - + ")", + rf"({lang_list_pattern}+)(?:{delimiters}*{patterns['subtitles'][0]})", + rf"({lang_list_pattern}+)(?!{delimiters}*{link_patterns(patterns['subtitles'])})", + rf"({lang_list_pattern}+)(?:{delimiters}*{patterns['subtitles'][-2]})", ] patterns["sbs"] = [("Half-SBS", "Half SBS"), ("SBS", None, "upper")] -patterns["unrated"] = "UNRATED" +patterns["unrated"] = r"UNRATED" patterns["size"] = ( - "\d+(?:\.\d+)?\s?(?:GB|MB)", + r"\d+(?:\.\d+)?\s?(?:GB|MB)", None, [("upper", []), ("replace", [" ", ""])], ) -patterns["bitDepth"] = "(8|10)-?bits?" -patterns["3d"] = "3D" -patterns["internal"] = "iNTERNAL" -patterns["readnfo"] = "READNFO" -patterns["hdr"] = "HDR(?:10)?" -patterns["documentary"] = "DOCU(?:menta?ry)?" -patterns["limited"] = "LIMITED" -patterns["remastered"] = "REMASTERED" -patterns["directorsCut"] = "DC|Director'?s.?Cut" -patterns["upscaled"] = "(?:AI{d}*)?upscaled?".format(d=delimiters) -patterns["untouched"] = "untouched" -patterns["remux"] = "REMUX" -patterns["internationalCut"] = "International{d}Cut".format(d=delimiters) +patterns["bitDepth"] = r"(8|10)-?bits?" +patterns["3d"] = r"3D" +patterns["internal"] = r"iNTERNAL" +patterns["readnfo"] = r"READNFO" +patterns["hdr"] = r"HDR(?:10)?" +patterns["documentary"] = r"DOCU(?:menta?ry)?" +patterns["limited"] = r"LIMITED" +patterns["remastered"] = r"REMASTERED" +patterns["directorsCut"] = r"DC|Director'?s.?Cut" +patterns["upscaled"] = rf"(?:AI{delimiters}*)?upscaled?" +patterns["untouched"] = r"untouched" +patterns["remux"] = r"REMUX" +patterns["internationalCut"] = rf"International{delimiters}Cut" # Spaces are only allowed before the genres list if after a word boundary or punctuation -patterns["genres"] = ( - r"\b\s*[\(\-\]]+\s*((?:" + link_patterns(genres) + delimiters + r"?)+)\b" -) +patterns["genres"] = rf"\b\s*[\(\-\]]+\s*((?:{link_patterns(genres)}{delimiters}?)+)\b" types = { "seasons": "integer", @@ -412,16 +350,17 @@ "internationalCut": "boolean", } -patterns["NON_ENGLISH_CHARS"] = "\u3040-\u30ff" # Japanese characters -patterns["NON_ENGLISH_CHARS"] += "\u3400-\u4dbf" # Chinese characters -patterns["NON_ENGLISH_CHARS"] += "\u4e00-\u9fff" # Chinese characters -patterns["NON_ENGLISH_CHARS"] += "\uf900-\ufaff" # CJK Compatibility Ideographs -patterns["NON_ENGLISH_CHARS"] += "\uff66-\uff9f" # Halfwidth Katakana Japanese characters -patterns["NON_ENGLISH_CHARS"] += "\u0400-\u04ff" # Cyrillic characters (Russian) -patterns["NON_ENGLISH_CHARS"] += "\u0600-\u06ff" # Arabic characters - +patterns["NON_ENGLISH_CHARS"] = ( + "\u3040-\u30ff" # Japanese characters + "\u3400-\u4dbf" # Chinese characters + "\u4e00-\u9fff" # Chinese characters + "\uf900-\ufaff" # CJK Compatibility Ideographs + "\uff66-\uff9f" # Halfwidth Katakana Japanese characters + "\u0400-\u04ff" # Cyrillic characters (Russian) + "\u0600-\u06ff" # Arabic characters +) patterns["RUSSIAN_CAST_REGEX"] = r"\([^)]*[\u0400-\u04ff][^)]*\)$|\/.*\((.*)\)$" -patterns["ALT_TITLES_REGEX"] = f"[^/|(]*[{patterns['NON_ENGLISH_CHARS']}][^/|]*/|[/|][^/|(]*[{patterns['NON_ENGLISH_CHARS']}][^/|]*" +patterns["ALT_TITLES_REGEX"] = rf"[^/|(]*[{patterns['NON_ENGLISH_CHARS']}][^/|]*/|[/|][^/|(]*[{patterns['NON_ENGLISH_CHARS']}][^/|]*" patterns["NOT_ONLY_NON_ENGLISH_REGEX"] = rf"(?:[a-zA-Z][^{patterns['NON_ENGLISH_CHARS']}]+|^)[{patterns['NON_ENGLISH_CHARS']}].*[{patterns['NON_ENGLISH_CHARS']}]|[{patterns['NON_ENGLISH_CHARS']}].*[{patterns['NON_ENGLISH_CHARS']}](?=[^{patterns['NON_ENGLISH_CHARS']}]+[a-zA-Z])" patterns["NOT_ALLOWED_SYMBOLS_AT_START_AND_END"] = rf"^[^\w{patterns['NON_ENGLISH_CHARS']}#[【★]+|[ \-:/\\\[|{{(#$&^]+$" patterns["REMAINING_NOT_ALLOWED_SYMBOLS_AT_START_AND_END"] = rf"^[^\w{patterns['NON_ENGLISH_CHARS']}#]+|]$" diff --git a/PTN/post.py b/PTN/post.py index c6daabf..7f64c52 100644 --- a/PTN/post.py +++ b/PTN/post.py @@ -1,10 +1,11 @@ #!/usr/bin/env python -# Post-processing functions that run after the main parsing. +import re +from typing import List, Dict, Any +from .extras import link_patterns, complete_series, langs +from .patterns import episode_name_pattern, patterns, pre_website_encoder_pattern -from . import re -from .extras import link_patterns, complete_series -from .patterns import episode_name_pattern, langs, patterns, pre_website_encoder_pattern +# Post-processing functions that run after the main parsing. # Before excess functions (before we split what was unmatched in the title into a list). # They all take in the parse object and what was unmatched, and must return the latter minus @@ -12,46 +13,29 @@ # Try and find the episode name. -def try_episode_name(self, unmatched): +def try_episode_name(self: Any, unmatched: str) -> str: match = re.findall(episode_name_pattern, unmatched) - # First we see if there's a match in unmatched, then we look if it's after an episode, a day, - # or a year, in the full release title. if match: match = re.search( - "(?:" - + link_patterns(patterns["episodes"]) - + "|" - + patterns["day"] - + "|" - + patterns["year"] - + r")[._\-\s+]*(" - + re.escape(match[0]) - + ")", + rf"(?:{link_patterns(patterns['episodes'])}|{patterns['day']}|{patterns['year']})[._\-\s+]*({re.escape(match[0])})", self.torrent_name, re.IGNORECASE, ) if match: - match_s, match_e = match.start(len(match.groups())), match.end( - len(match.groups()) - ) + match_s, match_e = match.start(len(match.groups())), match.end(len(match.groups())) match = match.groups()[-1] self._part("episodeName", (match_s, match_e), self._clean_string(match)) unmatched = unmatched.replace(match, "") return unmatched -def try_encoder_before_site(self, unmatched): +def try_encoder_before_site(self: Any, unmatched: str) -> str: match = re.findall(pre_website_encoder_pattern, unmatched.strip()) - if match: found_match = None for m in match: full_title_match = re.search( - r"[\s\-](" - + re.escape(m) - + ")(?:\." - + link_patterns(patterns["filetype"]) - + ")?$", + rf"[\s\-]({re.escape(m)})(?:\.{link_patterns(patterns['filetype'])})?$", self.torrent_name, re.I, ) @@ -61,37 +45,23 @@ def try_encoder_before_site(self, unmatched): match = found_match if match: match_s, match_e = match.start(0), match.end(0) - encoder_and_site = list( - filter(None, re.split(r"[\-\s\)]", match.groups()[0])) - ) + encoder_and_site = list(filter(None, re.split(r"[\-\s\)]", match.groups()[0]))) if len(encoder_and_site) == 2: - encoder_raw = encoder_and_site[0] - site_raw = encoder_and_site[1] - self._part( - "encoder", - (match_s, match_e - len(site_raw)), - self._clean_string(encoder_raw), - ) - self._part( - "site", - (match_s + len(encoder_raw), match_e), - self._clean_string(site_raw), - overwrite=False, - ) + encoder_raw, site_raw = encoder_and_site + self._part("encoder", (match_s, match_e - len(site_raw)), self._clean_string(encoder_raw)) + self._part("site", (match_s + len(encoder_raw), match_e), self._clean_string(site_raw), overwrite=False) unmatched = unmatched.replace(match.group(0), "") - return unmatched -def remove_complete_series_string(self, unmatched): +def remove_complete_series_string(self: Any, unmatched: str) -> str: if "title" in self.parts: complete_series_regex = link_patterns(complete_series) complete_match = re.search(complete_series_regex, self.parts["title"], flags=re.I) if complete_match: title = self.parts["title"] - title = title[: complete_match.start()] + title[complete_match.end() :] + title = title[:complete_match.start()] + title[complete_match.end():] self._part("title", (complete_match.start(), complete_match.end()), self._clean_string(title), overwrite=True) - return unmatched @@ -106,7 +76,7 @@ def remove_complete_series_string(self, unmatched): # encoder is assumed to be the last element of `excess`, if not already added. -def try_encoder(self): +def try_encoder(self: Any) -> None: if "excess" not in self.parts or "encoder" in self.parts: return excess = self.parts["excess"] @@ -124,8 +94,8 @@ def try_encoder(self): # Split encoder name and site, adding the latter to self.parts -def try_site(self): - if "encoder" not in self.parts or "website" in self.parts: +def try_site(self: Any) -> None: + if "encoder" not in self.parts or "site" in self.parts: return encoder = self.parts["encoder"] if self.coherent_types: @@ -143,7 +113,7 @@ def try_site(self): # If there are no languages, but subtitles were matched, we should assume the first lang # is the actual languages, and remove it from the subtitles. -def fix_subtitles_no_language(self): +def fix_subtitles_no_language(self: Any) -> None: if ( "languages" not in self.parts and "subtitles" in self.parts @@ -157,22 +127,21 @@ def fix_subtitles_no_language(self): # Language matches, to support multi-languages releases that have the audio with each # languages, will contain audio info (or simply extra strings like 'dub'). # We remove non-lang matching items from this list. -def filter_non_languages(self): +def filter_non_languages(self: Any) -> None: if "languages" in self.parts and isinstance(self.parts["languages"], list): languages = list(self.parts["languages"]) for lang in self.parts["languages"]: matched = False - for (lang_regex, lang_clean) in langs: + for lang_regex, lang_clean in langs: if re.match(lang_regex, lang, re.IGNORECASE): matched = True break if not matched: languages.remove(lang) - self._part("languages", self.part_slices["languages"], languages, overwrite=True) -def is_subtitle_available(self): +def is_subtitle_available(self: Any) -> None: if "subtitles" not in self.parts: return @@ -187,9 +156,9 @@ def is_subtitle_available(self): self.parts.pop("subtitles") -def try_vague_season_episode(self): +def try_vague_season_episode(self: Any) -> None: title = self.parts["title"] - m = re.search("(\d{1,2})-(\d{1,2})$", title) + m = re.search(r"(\d{1,2})-(\d{1,2})$", title) if m: if "seasons" not in self.parts and "episodes" not in self.parts: new_title = title[: m.start()] @@ -210,20 +179,13 @@ def try_vague_season_episode(self): # Probably for movies like 1917, where the title is just the year (would need the release year to also be absent) -def use_year_as_title_if_absent(self): +def use_year_as_title_if_absent(self: Any) -> None: if "year" in self.parts and not self.parts.get("title"): self._part("title", None, str(self.parts["year"]), overwrite=True) self.parts.pop("year") - -def remove_empty_parts(self): - non_empty_parts = {} - for part in self.parts: - if self.parts[part] != "": - non_empty_parts[part] = self.parts[part] - - self.parts = non_empty_parts - +def remove_empty_parts(self: Any) -> None: + self.parts = {part: value for part, value in self.parts.items() if value != ""} post_processing_after_excess = [ try_encoder, From 98005161bbb3bba89eca788b5f89db44d010b16c Mon Sep 17 00:00:00 2001 From: mhdzumair Date: Sat, 13 Jul 2024 08:28:11 +0530 Subject: [PATCH 20/21] Further improvements --- PTN/extras.py | 11 +- PTN/parse.py | 241 ++++++++++++++++---------------------------- PTN/post.py | 92 ++++++----------- tests/test_parse.py | 70 ++++++------- 4 files changed, 157 insertions(+), 257 deletions(-) diff --git a/PTN/extras.py b/PTN/extras.py index a708b0b..be699b5 100644 --- a/PTN/extras.py +++ b/PTN/extras.py @@ -1,6 +1,5 @@ #!/usr/bin/env python -import re -from typing import List, Tuple, Union, Optional +from typing import List, Tuple, Union # Helper functions and constants for patterns.py @@ -97,8 +96,8 @@ # Empty list indicates to always do so, as opposed to matching specific regexes. patterns_ignore_title = { "languages": [], - "audio": ["LiNE"], - "network": ["Hallmark"], + "audio": [r"LiNE"], + "network": [r"Hallmark"], "untouched": [], "internal": [], "limited": [], @@ -171,6 +170,4 @@ def suffix_pattern_with(suffixes: Union[str, List[str]], pattern_options: Union[ def link_patterns(pattern_options: Union[str, List[Union[str, Tuple]]]) -> str: if not isinstance(pattern_options, list): return pattern_options - return ( - rf"(?:{'|'.join([pattern_option[0] if isinstance(pattern_option, tuple) else pattern_option for pattern_option in pattern_options])})" - ) + return rf"(?:{'|'.join([pattern_option[0] if isinstance(pattern_option, tuple) else pattern_option for pattern_option in pattern_options])})" diff --git a/PTN/parse.py b/PTN/parse.py index 5816fc1..e670fb7 100644 --- a/PTN/parse.py +++ b/PTN/parse.py @@ -1,16 +1,13 @@ #!/usr/bin/env python import re from typing import Dict, List, Tuple, Union, Optional, Any + from .extras import ( delimiters, langs, genres, exceptions, - complete_series, patterns_ignore_title, - get_channel_audio_options, - prefix_pattern_with, - suffix_pattern_with, link_patterns, ) from .patterns import patterns, patterns_ordered, types, patterns_allow_overlap @@ -56,7 +53,6 @@ def _clean_dots(string: str) -> str: def _clean_string(self, string: str) -> str: clean = re.sub(r"^( -|\(|\[)", "", string) clean = self._clean_dots(clean) - clean = re.sub(r"_", " ", clean) clean = re.sub(r"([\[)_\]]|- )$", "", clean).strip() clean = clean.strip(" _-") @@ -98,7 +94,7 @@ def _apply_patterns(self, key: str, pattern_options: List[Tuple[str, Optional[st if key not in ("seasons", "episodes", "site", "languages", "genres"): pattern = rf"\b(?:{pattern})\b" - clean_name = re.sub(r"_", " ", self.torrent_name) + clean_name = self.torrent_name.replace("_", " ") matches = self.get_matches(pattern, clean_name, key) if not matches: @@ -116,143 +112,79 @@ def _apply_patterns(self, key: str, pattern_options: List[Tuple[str, Optional[st continue index = self.get_match_indexes(match) - - if key in ("seasons", "episodes"): - clean = self.get_season_episode(match) - elif key == "subtitles": - clean = self.get_subtitles(match) - elif key in ("languages", "genres"): - clean = self.split_multi(match) - elif key in types and types[key] == "boolean": - clean = True - else: - clean = match[index["clean"]] - if key in types and types[key] == "integer": - clean = int(clean) + clean = self._get_clean_value(key, match, index) if self.standardise: clean = self.standardise_clean(clean, key, replace, transforms) - part_overlaps = any( - self._is_overlap(part_slice, (match_start, match_end)) - for part, part_slice in self.part_slices.items() - if part not in patterns_allow_overlap - ) - - if not part_overlaps: + if not self._has_overlap(match_start, match_end): self._part(key, (match_start, match_end), clean) # Handles all the optional/missing tuple elements into a consistent list. @staticmethod def normalise_pattern_options(pattern_options: Union[str, Tuple, List[Union[str, Tuple]]]) -> List[Tuple[str, Optional[str], Optional[Union[str, List[Tuple[str, List[Any]]]]]]]: - pattern_options_norm = [] - - if isinstance(pattern_options, tuple): + if isinstance(pattern_options, (str, tuple)): pattern_options = [pattern_options] - elif not isinstance(pattern_options, list): - pattern_options = [(pattern_options, None, None)] - for options in pattern_options: - if len(options) == 2: # No transformation - pattern_options_norm.append(options + (None,)) - elif isinstance(options, tuple): - if isinstance(options[2], tuple): - pattern_options_norm.append( - tuple(list(options[:2]) + [[options[2]]]) - ) - elif isinstance(options[2], list): - pattern_options_norm.append(options) - else: - pattern_options_norm.append( - tuple(list(options[:2]) + [[(options[2], [])]]) - ) - + normalized = [] + for option in pattern_options: + if isinstance(option, str): + normalized.append((option, None, None)) + elif len(option) == 2: + normalized.append(option + (None,)) else: - pattern_options_norm.append((options, None, None)) - return pattern_options_norm + transforms = option[2] + if isinstance(transforms, tuple): + transforms = [transforms] + elif not isinstance(transforms, list): + transforms = [(transforms, [])] + normalized.append((option[0], option[1], transforms)) + return normalized def get_matches(self, pattern: str, clean_name: str, key: str) -> List[Dict[str, Union[str, int]]]: - grouped_matches = [] - matches = list(re.finditer(pattern, clean_name, re.IGNORECASE)) - for m in matches: - if m.start() < self.ignore_before_index(clean_name, key): - continue - groups = m.groups() - if not groups: - grouped_matches.append((m.group(), m.start(), m.end())) - else: - grouped_matches.append((groups, m.start(), m.end())) - - parsed_matches = [] - for match in grouped_matches: - m = match[0] - if isinstance(m, tuple): - m = list(m) - else: - m = [m] - parsed_matches.append({"match": m, "start": match[1], "end": match[2]}) - return parsed_matches + compiled_pattern = re.compile(pattern, re.IGNORECASE) + matches = compiled_pattern.finditer(clean_name) + grouped_matches = [ + {"match": (m.groups() if m.groups() else [m.group()]), "start": m.start(), "end": m.end()} + for m in matches if m.start() >= self.ignore_before_index(clean_name, key) + ] + return grouped_matches - # Only use part of the torrent name after the (guessed) title (split at a season or year) - # to avoid matching certain patterns that could show up in a release title. def ignore_before_index(self, clean_name: str, key: str) -> int: - match = None - if key in patterns_ignore_title: - patterns_ignored = patterns_ignore_title[key] - if not patterns_ignored: - match = re.search(self.post_title_pattern, clean_name, re.IGNORECASE) - else: - for ignore_pattern in patterns_ignored: - if re.findall(ignore_pattern, clean_name, re.IGNORECASE): - match = re.search(self.post_title_pattern, clean_name, re.IGNORECASE) - - if match: - return match.start() - return 0 + if key not in patterns_ignore_title: + return 0 + patterns_ignored = patterns_ignore_title[key] + match = re.search(self.post_title_pattern, clean_name, re.IGNORECASE) if not patterns_ignored else None + if not match: + for ignore_pattern in patterns_ignored: + if re.findall(ignore_pattern, clean_name, re.IGNORECASE): + match = re.search(self.post_title_pattern, clean_name, re.IGNORECASE) + if match: + break + return match.start() if match else 0 @staticmethod def get_match_indexes(match: List[str]) -> Dict[str, int]: - index = {"raw": 0, "clean": 0} - - if len(match) > 1: - # for season we might have it in index 1 or index 2 - # e.g. "5x09" TODO is this weirdness necessary - for i in range(1, len(match)): - if match[i]: - index["clean"] = i - break - - return index + return {"raw": 0, "clean": next((i for i in range(1, len(match)) if match[i]), 0)} @staticmethod def get_season_episode(match: List[str]) -> Optional[List[int]]: m = re.findall(r"[0-9]+", match[0]) if m and len(m) > 1: return list(range(int(m[0]), int(m[-1]) + 1)) - elif len(match) > 1 and match[1] and m: + if len(match) > 1 and match[1] and m: return list(range(int(m[0]), int(match[1]) + 1)) - elif m: + if m: return [int(m[0])] return None @staticmethod def split_multi(match: List[str]) -> List[str]: - m = re.split(rf"{delimiters}+", match[0]) - return list(filter(None, m)) + return list(filter(None, re.split(rf"{delimiters}+", match[0]))) @staticmethod def get_subtitles(match: List[str]) -> List[str]: - # handle multi subtitles - m = re.split(rf"{delimiters}+", match[0]) - m = list(filter(None, m)) - clean = [] - # If it's only 1 result, it's fine if it's just 'subs'. - if len(m) == 1: - clean = m - else: - for x in m: - if not re.match("subs?|soft", x, re.I): - clean.append(x) - return clean + m = list(filter(None, re.split(rf"{delimiters}+", match[0]))) + return m if len(m) == 1 else [x for x in m if not re.match("subs?|soft", x, re.I)] def standardise_clean(self, clean: Union[str, List[str]], key: str, replace: Optional[str], transforms: Optional[Union[str, List[Tuple[str, List[Any]]]]]) -> Union[str, List[str]]: if replace: @@ -282,40 +214,29 @@ def standardise_languages(clean: List[str]) -> List[str]: def standardise_genres(clean: List[str]) -> List[str]: standard_genres = [] for genre in clean: - for regex, clean in genres: + for regex, clean_genre in genres: if re.match(regex, genre, re.IGNORECASE): - standard_genres.append(clean) + standard_genres.append(clean_genre) break return standard_genres - # Merge all the match slices (such as when they overlap), then remove - # them from excess. def merge_match_slices(self) -> None: - matches = sorted(self.match_slices, key=lambda match: match[0]) - slices = [] + self.match_slices.sort(key=lambda match: match[0]) + merged = [] i = 0 - while i < len(matches): - start, end = matches[i] + while i < len(self.match_slices): + start, end = self.match_slices[i] i += 1 - for next_start, next_end in matches[i:]: - if next_start <= end: - end = max(end, next_end) - i += 1 - else: - break - slices.append((start, end)) - self.match_slices = slices + while i < len(self.match_slices) and self.match_slices[i][0] <= end: + end = max(end, self.match_slices[i][1]) + i += 1 + merged.append((start, end)) + self.match_slices = merged def process_title(self) -> None: unmatched = self.unmatched_list(keep_punctuation=False) - - # Use the first one as the title if unmatched: - title_start, title_end = unmatched[0][0], unmatched[0][1] - - # If our unmatched is after the first 3 matches, we assume the title is missing - # (or more likely got parsed as something else), as no torrents have it that - # far away from the beginning of the release title. + title_start, title_end = unmatched[0] if len(self.part_slices) > 3 and title_start > sorted(self.part_slices.values(), key=lambda s: s[0])[3][0]: self._part("title", None, "") @@ -372,40 +293,48 @@ def fix_known_exceptions(self) -> None: self._part("title", None, exception["actual_title"], overwrite=True) def get_unmatched(self) -> str: - unmatched = "" - for start, end in self.unmatched_list(): - unmatched += self.torrent_name[start:end] - return unmatched + return "".join([self.torrent_name[start:end] for start, end in self.unmatched_list()]) def clean_unmatched(self) -> List[str]: - unmatched = [] - for start, end in self.unmatched_list(): - unmatched.append(self.torrent_name[start:end]) - + unmatched = [self.torrent_name[start:end] for start, end in self.unmatched_list()] unmatched_clean = [] for raw in unmatched: clean = re.sub(r"(^[-_.\s(),]+)|([-.\s,]+$)", "", raw) clean = re.sub(r"[()/]", " ", clean) - unmatched_clean += re.split(r"\.\.+|\s+", clean) - - filtered = [] - for extra in unmatched_clean: - # re.fullmatch() is not available in python 2.7, so we manually do it with \Z. - if not re.match(rf"(?:Complete|Season|Full)?[\]\[,.+\- ]*(?:Complete|Season|Full)?\Z", extra, re.IGNORECASE): - filtered.append(extra) - return filtered + unmatched_clean.extend(re.split(r"\.\.+|\s+", clean)) + return [extra for extra in unmatched_clean if not re.match(rf"(?:Complete|Season|Full)?[\]\[,.+\- ]*(?:Complete|Season|Full)?\Z", extra, re.IGNORECASE)] @staticmethod def clean_title(raw_title: str) -> str: - cleaned_title = raw_title.replace(r"[[(]movie[)\]]", "") # clear movie indication flag - cleaned_title = re.sub(patterns["RUSSIAN_CAST_REGEX"], " ", cleaned_title) # clear russian cast information - cleaned_title = re.sub(patterns["RELEASE_GROUP_REGEX_START"], r"\1", cleaned_title) # remove release group markings sections from the start - cleaned_title = re.sub(patterns["RELEASE_GROUP_REGEX_END"], r"\1", cleaned_title) # remove unneeded markings section at the end if present - cleaned_title = re.sub(patterns["ALT_TITLES_REGEX"], "", cleaned_title) # remove alt language titles - cleaned_title = re.sub(patterns["NOT_ONLY_NON_ENGLISH_REGEX"], "", cleaned_title) # remove non english chars if they are not the only ones left + cleaned_title = raw_title.replace(r"[[(]movie[)\]]", "") + cleaned_title = re.sub(patterns["RUSSIAN_CAST_REGEX"], " ", cleaned_title) + cleaned_title = re.sub(patterns["RELEASE_GROUP_REGEX_START"], r"\1", cleaned_title) + cleaned_title = re.sub(patterns["RELEASE_GROUP_REGEX_END"], r"\1", cleaned_title) + cleaned_title = re.sub(patterns["ALT_TITLES_REGEX"], "", cleaned_title) + cleaned_title = re.sub(patterns["NOT_ONLY_NON_ENGLISH_REGEX"], "", cleaned_title) return cleaned_title @staticmethod def _is_overlap(part_slice: Tuple[int, int], match_slice: Tuple[int, int]) -> bool: - # Strict smaller/larger than since punctuation can overlap. return (part_slice[0] < match_slice[0] < part_slice[1]) or (part_slice[0] < match_slice[1] < part_slice[1]) + + def _get_clean_value(self, key: str, match: List[str], index: Dict[str, int]) -> Union[str, int, List[int], bool]: + if key in ("seasons", "episodes"): + return self.get_season_episode(match) + if key == "subtitles": + return self.get_subtitles(match) + if key in ("languages", "genres"): + return self.split_multi(match) + if key in types and types[key] == "boolean": + return True + clean = match[index["clean"]] + if key in types and types[key] == "integer": + return int(clean) + return clean + + def _has_overlap(self, match_start: int, match_end: int) -> bool: + return any( + self._is_overlap(part_slice, (match_start, match_end)) + for part, part_slice in self.part_slices.items() + if part not in patterns_allow_overlap + ) diff --git a/PTN/post.py b/PTN/post.py index 7f64c52..61f43af 100644 --- a/PTN/post.py +++ b/PTN/post.py @@ -1,17 +1,18 @@ #!/usr/bin/env python import re -from typing import List, Dict, Any +from typing import Any + from .extras import link_patterns, complete_series, langs from .patterns import episode_name_pattern, patterns, pre_website_encoder_pattern + # Post-processing functions that run after the main parsing. # Before excess functions (before we split what was unmatched in the title into a list). # They all take in the parse object and what was unmatched, and must return the latter minus # what they used. - # Try and find the episode name. def try_episode_name(self: Any, unmatched: str) -> str: match = re.findall(episode_name_pattern, unmatched) @@ -23,16 +24,14 @@ def try_episode_name(self: Any, unmatched: str) -> str: ) if match: match_s, match_e = match.start(len(match.groups())), match.end(len(match.groups())) - match = match.groups()[-1] - self._part("episodeName", (match_s, match_e), self._clean_string(match)) - unmatched = unmatched.replace(match, "") + self._part("episodeName", (match_s, match_e), self._clean_string(match.group(len(match.groups())))) + unmatched = unmatched.replace(match.group(len(match.groups())), "") return unmatched def try_encoder_before_site(self: Any, unmatched: str) -> str: match = re.findall(pre_website_encoder_pattern, unmatched.strip()) if match: - found_match = None for m in match: full_title_match = re.search( rf"[\s\-]({re.escape(m)})(?:\.{link_patterns(patterns['filetype'])})?$", @@ -40,17 +39,14 @@ def try_encoder_before_site(self: Any, unmatched: str) -> str: re.I, ) if full_title_match: - found_match = full_title_match + match_s, match_e = full_title_match.start(0), full_title_match.end(0) + encoder_and_site = list(filter(None, re.split(r"[\-\s\)]", full_title_match.group(1)))) + if len(encoder_and_site) == 2: + encoder_raw, site_raw = encoder_and_site + self._part("encoder", (match_s, match_e - len(site_raw)), self._clean_string(encoder_raw)) + self._part("site", (match_s + len(encoder_raw), match_e), self._clean_string(site_raw), overwrite=False) + unmatched = unmatched.replace(full_title_match.group(0), "") break - match = found_match - if match: - match_s, match_e = match.start(0), match.end(0) - encoder_and_site = list(filter(None, re.split(r"[\-\s\)]", match.groups()[0]))) - if len(encoder_and_site) == 2: - encoder_raw, site_raw = encoder_and_site - self._part("encoder", (match_s, match_e - len(site_raw)), self._clean_string(encoder_raw)) - self._part("site", (match_s + len(encoder_raw), match_e), self._clean_string(site_raw), overwrite=False) - unmatched = unmatched.replace(match.group(0), "") return unmatched @@ -74,7 +70,6 @@ def remove_complete_series_string(self: Any, unmatched: str) -> str: # After excess functions take in just the parse object, and shouldn't return anything. - # encoder is assumed to be the last element of `excess`, if not already added. def try_encoder(self: Any) -> None: if "excess" not in self.parts or "encoder" in self.parts: @@ -82,11 +77,9 @@ def try_encoder(self: Any) -> None: excess = self.parts["excess"] if not isinstance(excess, list): excess = [excess] - if excess: encoder = excess.pop() self._part("encoder", None, encoder, overwrite=True) - if not excess: self.parts.pop("excess") else: @@ -100,15 +93,12 @@ def try_site(self: Any) -> None: encoder = self.parts["encoder"] if self.coherent_types: encoder = encoder[0] - pat = r"(\[(.*)\])" - match = re.findall(pat, encoder, re.IGNORECASE) + match = re.findall(r"(\[(.*)\])", encoder, re.IGNORECASE) if match: - match = match[0] - raw = match[0] - if match: - if not re.match(r"[\[\],.+\-]*\Z", match[1], re.IGNORECASE): - self._part("site", None, match[1]) - self._part("encoder", None, encoder.replace(raw, ""), overwrite=True) + raw, site = match[0] + if not re.match(r"[\[\],.+\-]*\Z", site, re.IGNORECASE): + self._part("site", None, site) + self._part("encoder", None, encoder.replace(raw, ""), overwrite=True) # If there are no languages, but subtitles were matched, we should assume the first lang @@ -129,53 +119,33 @@ def fix_subtitles_no_language(self: Any) -> None: # We remove non-lang matching items from this list. def filter_non_languages(self: Any) -> None: if "languages" in self.parts and isinstance(self.parts["languages"], list): - languages = list(self.parts["languages"]) - for lang in self.parts["languages"]: - matched = False - for lang_regex, lang_clean in langs: - if re.match(lang_regex, lang, re.IGNORECASE): - matched = True - break - if not matched: - languages.remove(lang) + languages = [ + lang for lang in self.parts["languages"] + if any(re.match(lang_regex, lang, re.IGNORECASE) for lang_regex, _ in langs) + ] self._part("languages", self.part_slices["languages"], languages, overwrite=True) def is_subtitle_available(self: Any) -> None: if "subtitles" not in self.parts: return - - languages = self.parts.get("languages") subtitles = self.parts.get("subtitles") - self.parts["is_subtitle_available"] = bool(subtitles) - - if "Available" == subtitles and languages: - self._part("subtitles", self.part_slices["subtitles"], languages, overwrite=True) - elif "Available" == subtitles: + if subtitles == "Available": + self._part("subtitles", self.part_slices["subtitles"], self.parts.get("languages", []), overwrite=True) + elif subtitles == "Available": self.parts.pop("subtitles") def try_vague_season_episode(self: Any) -> None: title = self.parts["title"] m = re.search(r"(\d{1,2})-(\d{1,2})$", title) - if m: - if "seasons" not in self.parts and "episodes" not in self.parts: - new_title = title[: m.start()] - offset = self.part_slices["title"][0] - # Setting the match slices here doesn't actually matter, but good practice. - self._part( - "seasons", (offset + m.start(1), offset + m.end(1)), [int(m.group(1))] - ) - self._part( - "episodes", (offset + m.start(2), offset + m.end(2)), [int(m.group(2))] - ) - self._part( - "title", - (offset, offset + len(new_title)), - self._clean_string(new_title), - overwrite=True, - ) + if m and "seasons" not in self.parts and "episodes" not in self.parts: + offset = self.part_slices["title"][0] + self._part("seasons", (offset + m.start(1), offset + m.end(1)), [int(m.group(1))]) + self._part("episodes", (offset + m.start(2), offset + m.end(2)), [int(m.group(2))]) + new_title = title[:m.start()] + self._part("title", (offset, offset + len(new_title)), self._clean_string(new_title), overwrite=True) # Probably for movies like 1917, where the title is just the year (would need the release year to also be absent) @@ -184,9 +154,11 @@ def use_year_as_title_if_absent(self: Any) -> None: self._part("title", None, str(self.parts["year"]), overwrite=True) self.parts.pop("year") + def remove_empty_parts(self: Any) -> None: self.parts = {part: value for part, value in self.parts.items() if value != ""} + post_processing_after_excess = [ try_encoder, try_site, diff --git a/tests/test_parse.py b/tests/test_parse.py index 86cdcef..866ef00 100644 --- a/tests/test_parse.py +++ b/tests/test_parse.py @@ -11,24 +11,14 @@ def load_json_file(file_name): return json.load(input_file) -def get_raw_data(): - json_input = os.path.join(os.path.dirname(__file__), "files/input.json") +def get_test_data(file_name_input, file_name_output): + json_input = os.path.join(os.path.dirname(__file__), file_name_input) torrents = load_json_file(json_input) - json_output = os.path.join(os.path.dirname(__file__), "files/output_raw.json") + json_output = os.path.join(os.path.dirname(__file__), file_name_output) expected_results = load_json_file(json_output) - return zip(torrents, expected_results) - - -def get_standard_data(): - json_input = os.path.join(os.path.dirname(__file__), "files/input.json") - torrents = load_json_file(json_input) - - json_output = os.path.join(os.path.dirname(__file__), "files/output_standard.json") - expected_results = load_json_file(json_output) - - return zip(torrents, expected_results) + return list(zip(torrents, expected_results)) class TestTorrentParser: @@ -40,30 +30,42 @@ def setup_class(cls): @classmethod def teardown_class(cls): - print("\nExcess elements total: {}".format(cls.total_excess)) + print(f"\nExcess elements total: {cls.total_excess}") - @pytest.mark.parametrize("torrent,expected_result", get_raw_data()) + @pytest.mark.parametrize("torrent,expected_result", get_test_data("files/input.json", "files/output_raw.json")) def test_all_raw(self, torrent, expected_result): + print(f"Testing raw: {torrent}") + print(f"Expected raw result: {expected_result}") result = PTN.parse(torrent, standardise=False) + print(f"Parsed raw result: {result}") + self._check_excess(result) + self._assert_results(result, expected_result, torrent, check_extras=True) + + @pytest.mark.parametrize("torrent,expected_result", get_test_data("files/input.json", "files/output_standard.json")) + def test_standardised(self, torrent, expected_result): + print(f"Testing standardised: {torrent}") + print(f"Expected standardised result: {expected_result}") + result = PTN.parse(torrent, standardise=True) + print(f"Parsed standardised result: {result}") + self._assert_results(result, expected_result, torrent, check_extras=False) + + def _check_excess(self, result): if "excess" in result: if isinstance(result["excess"], list): - TestTorrentParser.total_excess += len(result["excess"]) + self.total_excess += len(result["excess"]) else: - TestTorrentParser.total_excess += 1 - for key in expected_result: - assert key in result, "'{}' was missing for \n{}".format(key, torrent) - assert result[key] == expected_result[key], "'{}' failed for \n{}".format( - key, torrent - ) - for key in result.keys(): - if key not in ("encoder", "excess", "site"): # Not needed in tests - assert key in expected_result - - @pytest.mark.parametrize("torrent,expected_result", get_standard_data()) - def test_standardised(self, torrent, expected_result): - result = PTN.parse(torrent, standardise=True) + self.total_excess += 1 + + def _assert_results(self, result, expected_result, torrent, check_extras): for key in expected_result: - assert key in result, "'{}' was missing for \n{}".format(key, torrent) - assert result[key] == expected_result[key], "'{}' failed for \n{}".format( - key, torrent - ) + assert key in result, f"'{key}' was missing for \n{torrent}" + assert result[key] == expected_result[key], f"'{key}' failed for \n{torrent}\nExpected: {expected_result[key]}\nFound: {result[key]}" + + if check_extras: + # Check that there are no unexpected keys in the result for raw test cases + unexpected_keys = set(result.keys()) - set(expected_result.keys()) - {"encoder", "excess", "site"} + assert not unexpected_keys, f"Unexpected keys found in result for \n{torrent}: {unexpected_keys}" + + +if __name__ == "__main__": + pytest.main() From 9e259e9287135ae6911eed86b25eab735d1897ff Mon Sep 17 00:00:00 2001 From: mhdzumair Date: Sat, 13 Jul 2024 08:58:01 +0530 Subject: [PATCH 21/21] Compile the regex for Improve performance & doc strings --- PTN/__init__.py | 6 ++- PTN/parse.py | 120 ++++++++++++++++++++++++++++++++++++++++-------- PTN/post.py | 27 ++++++----- 3 files changed, 119 insertions(+), 34 deletions(-) diff --git a/PTN/__init__.py b/PTN/__init__.py index ab71830..25ed6d9 100644 --- a/PTN/__init__.py +++ b/PTN/__init__.py @@ -1,6 +1,5 @@ #!/usr/bin/env python -import re from .parse import PTN __author__ = "Giorgio Momigliano" @@ -8,6 +7,9 @@ __version__ = "2.8.2" __license__ = "MIT" +# Singleton instance of PTN +_ptn_instance = PTN() + def parse(name: str, standardise: bool = True, coherent_types: bool = False) -> dict: """ @@ -18,4 +20,4 @@ def parse(name: str, standardise: bool = True, coherent_types: bool = False) -> :param coherent_types: Whether to ensure coherent types in the parsed results. :return: A dictionary of parsed components. """ - return PTN().parse(name, standardise, coherent_types) + return _ptn_instance.parse(name, standardise, coherent_types) diff --git a/PTN/parse.py b/PTN/parse.py index e670fb7..ddc475a 100644 --- a/PTN/parse.py +++ b/PTN/parse.py @@ -15,19 +15,33 @@ class PTN: + """ + Class to parse torrent names into meaningful components. + """ + def __init__(self): - self.torrent_name = None - self.parts: Dict[str, Union[str, int, List[int], bool]] = {} - self.part_slices: Dict[str, Tuple[int, int]] = {} - self.match_slices: List[Tuple[int, int]] = [] - self.standardise = False - self.coherent_types = False - self.post_title_pattern = self._generate_post_title_pattern() + self.compiled_patterns = self._compile_patterns() def _generate_post_title_pattern(self) -> str: + """ + Generate the pattern to match post titles. + """ return f"(?:{link_patterns(patterns['seasons'])}|{link_patterns(patterns['year'])}|720p|1080p)" + def _compile_patterns(self) -> Dict[str, List[Tuple[re.Pattern, Optional[str], Optional[Union[str, List[Tuple[str, List[Any]]]]]]]]: + """ + Compile all regex patterns for better performance. + """ + compiled_patterns = {} + for key in patterns: + pattern_options = self.normalise_pattern_options(patterns[key]) + compiled_patterns[key] = [(re.compile(opt[0], re.IGNORECASE), opt[1], opt[2]) for opt in pattern_options] + return compiled_patterns + def _part(self, name: str, match_slice: Optional[Tuple[int, int]], clean: Union[str, int, List[int], bool], overwrite: bool = False) -> None: + """ + Add a part to the parts dictionary. + """ if overwrite or name not in self.parts: if self.coherent_types and name not in ["title", "episodeName"] and not isinstance(clean, bool): if not isinstance(clean, list): @@ -41,6 +55,9 @@ def _part(self, name: str, match_slice: Optional[Tuple[int, int]], clean: Union[ @staticmethod def _clean_dots(string: str) -> str: + """ + Clean dots in a string. + """ if ' ' not in string and '.' in string: string = re.sub(r"\.{4,}", "... ", string) @@ -51,6 +68,9 @@ def _clean_dots(string: str) -> str: return string def _clean_string(self, string: str) -> str: + """ + Clean a string. + """ clean = re.sub(r"^( -|\(|\[)", "", string) clean = self._clean_dots(clean) clean = re.sub(r"_", " ", clean) @@ -61,16 +81,20 @@ def _clean_string(self, string: str) -> str: clean = self._clean_dots(clean).strip() return clean - def parse(self, name: str, standardise: bool, coherent_types: bool) -> Dict[str, Union[str, int, List[int], bool]]: + def parse(self, name: str, standardise: bool = False, coherent_types: bool = False) -> Dict[str, Union[str, int, List[int], bool]]: + """ + Parse a torrent name into its components. + """ self.torrent_name = name.strip() self.parts = {} self.part_slices = {} self.match_slices = [] self.standardise = standardise self.coherent_types = coherent_types + self.post_title_pattern = self._generate_post_title_pattern() for key in patterns_ordered: - pattern_options = self.normalise_pattern_options(patterns[key]) + pattern_options = self.compiled_patterns[key] self._apply_patterns(key, pattern_options) self.process_title() @@ -89,10 +113,13 @@ def parse(self, name: str, standardise: bool, coherent_types: bool) -> Dict[str, return self.parts - def _apply_patterns(self, key: str, pattern_options: List[Tuple[str, Optional[str], Optional[Union[str, List[Tuple[str, List[Any]]]]]]]) -> None: + def _apply_patterns(self, key: str, pattern_options: List[Tuple[re.Pattern, Optional[str], Optional[Union[str, List[Tuple[str, List[Any]]]]]]]) -> None: + """ + Apply patterns to the torrent name. + """ for pattern, replace, transforms in pattern_options: if key not in ("seasons", "episodes", "site", "languages", "genres"): - pattern = rf"\b(?:{pattern})\b" + pattern = re.compile(rf"\b(?:{pattern.pattern})\b", re.IGNORECASE) clean_name = self.torrent_name.replace("_", " ") matches = self.get_matches(pattern, clean_name, key) @@ -120,9 +147,11 @@ def _apply_patterns(self, key: str, pattern_options: List[Tuple[str, Optional[st if not self._has_overlap(match_start, match_end): self._part(key, (match_start, match_end), clean) - # Handles all the optional/missing tuple elements into a consistent list. @staticmethod def normalise_pattern_options(pattern_options: Union[str, Tuple, List[Union[str, Tuple]]]) -> List[Tuple[str, Optional[str], Optional[Union[str, List[Tuple[str, List[Any]]]]]]]: + """ + Normalise pattern options. + """ if isinstance(pattern_options, (str, tuple)): pattern_options = [pattern_options] normalized = [] @@ -140,9 +169,11 @@ def normalise_pattern_options(pattern_options: Union[str, Tuple, List[Union[str, normalized.append((option[0], option[1], transforms)) return normalized - def get_matches(self, pattern: str, clean_name: str, key: str) -> List[Dict[str, Union[str, int]]]: - compiled_pattern = re.compile(pattern, re.IGNORECASE) - matches = compiled_pattern.finditer(clean_name) + def get_matches(self, pattern: re.Pattern, clean_name: str, key: str) -> List[Dict[str, Union[str, int]]]: + """ + Get all matches for a pattern in the clean_name. + """ + matches = pattern.finditer(clean_name) grouped_matches = [ {"match": (m.groups() if m.groups() else [m.group()]), "start": m.start(), "end": m.end()} for m in matches if m.start() >= self.ignore_before_index(clean_name, key) @@ -150,6 +181,9 @@ def get_matches(self, pattern: str, clean_name: str, key: str) -> List[Dict[str, return grouped_matches def ignore_before_index(self, clean_name: str, key: str) -> int: + """ + Ignore matches before a certain index to avoid false positives. + """ if key not in patterns_ignore_title: return 0 patterns_ignored = patterns_ignore_title[key] @@ -164,10 +198,16 @@ def ignore_before_index(self, clean_name: str, key: str) -> int: @staticmethod def get_match_indexes(match: List[str]) -> Dict[str, int]: + """ + Get the indexes of raw and clean matches. + """ return {"raw": 0, "clean": next((i for i in range(1, len(match)) if match[i]), 0)} @staticmethod def get_season_episode(match: List[str]) -> Optional[List[int]]: + """ + Get season or episode numbers from a match. + """ m = re.findall(r"[0-9]+", match[0]) if m and len(m) > 1: return list(range(int(m[0]), int(m[-1]) + 1)) @@ -179,14 +219,23 @@ def get_season_episode(match: List[str]) -> Optional[List[int]]: @staticmethod def split_multi(match: List[str]) -> List[str]: + """ + Split multiple values in a match. + """ return list(filter(None, re.split(rf"{delimiters}+", match[0]))) @staticmethod def get_subtitles(match: List[str]) -> List[str]: + """ + Get subtitles from a match. + """ m = list(filter(None, re.split(rf"{delimiters}+", match[0]))) return m if len(m) == 1 else [x for x in m if not re.match("subs?|soft", x, re.I)] def standardise_clean(self, clean: Union[str, List[str]], key: str, replace: Optional[str], transforms: Optional[Union[str, List[Tuple[str, List[Any]]]]]) -> Union[str, List[str]]: + """ + Standardise the clean value. + """ if replace: clean = replace if transforms: @@ -202,6 +251,9 @@ def standardise_clean(self, clean: Union[str, List[str]], key: str, replace: Opt @staticmethod def standardise_languages(clean: List[str]) -> List[str]: + """ + Standardise language names. + """ cleaned_langs = [] for lang in clean: for lang_regex, lang_clean in langs: @@ -212,6 +264,9 @@ def standardise_languages(clean: List[str]) -> List[str]: @staticmethod def standardise_genres(clean: List[str]) -> List[str]: + """ + Standardise genre names. + """ standard_genres = [] for genre in clean: for regex, clean_genre in genres: @@ -221,6 +276,9 @@ def standardise_genres(clean: List[str]) -> List[str]: return standard_genres def merge_match_slices(self) -> None: + """ + Merge overlapping match slices. + """ self.match_slices.sort(key=lambda match: match[0]) merged = [] i = 0 @@ -234,6 +292,9 @@ def merge_match_slices(self) -> None: self.match_slices = merged def process_title(self) -> None: + """ + Process the title from unmatched parts. + """ unmatched = self.unmatched_list(keep_punctuation=False) if unmatched: title_start, title_end = unmatched[0] @@ -262,6 +323,9 @@ def process_title(self) -> None: self._part("title", None, "") def unmatched_list(self, keep_punctuation: bool = True) -> List[Tuple[int, int]]: + """ + Get list of unmatched parts of the torrent name. + """ self.merge_match_slices() unmatched = [] prev_start = 0 @@ -283,19 +347,27 @@ def unmatched_list(self, keep_punctuation: bool = True) -> List[Tuple[int, int]] return unmatched def fix_known_exceptions(self) -> None: - # Considerations for results that are known to cause issues, such - # as media with years in them but without a release year. + """ + Fix known exceptions in the parsing. + Considerations for results that are known to cause issues, such as media with years in them but without a release year. + """ for exception in exceptions: incorrect_key, incorrect_value = exception["incorrect_parse"] - if self.parts["title"] == exception["parsed_title"] and incorrect_key in self.parts: + if self.parts.get("title") == exception["parsed_title"] and incorrect_key in self.parts: if self.parts[incorrect_key] == incorrect_value or (self.coherent_types and incorrect_value in self.parts[incorrect_key]): self.parts.pop(incorrect_key) self._part("title", None, exception["actual_title"], overwrite=True) def get_unmatched(self) -> str: + """ + Get the unmatched parts of the torrent name. + """ return "".join([self.torrent_name[start:end] for start, end in self.unmatched_list()]) def clean_unmatched(self) -> List[str]: + """ + Clean the unmatched parts of the torrent name. + """ unmatched = [self.torrent_name[start:end] for start, end in self.unmatched_list()] unmatched_clean = [] for raw in unmatched: @@ -306,6 +378,9 @@ def clean_unmatched(self) -> List[str]: @staticmethod def clean_title(raw_title: str) -> str: + """ + Clean the title string. + """ cleaned_title = raw_title.replace(r"[[(]movie[)\]]", "") cleaned_title = re.sub(patterns["RUSSIAN_CAST_REGEX"], " ", cleaned_title) cleaned_title = re.sub(patterns["RELEASE_GROUP_REGEX_START"], r"\1", cleaned_title) @@ -316,9 +391,15 @@ def clean_title(raw_title: str) -> str: @staticmethod def _is_overlap(part_slice: Tuple[int, int], match_slice: Tuple[int, int]) -> bool: + """ + Check if two slices overlap. + """ return (part_slice[0] < match_slice[0] < part_slice[1]) or (part_slice[0] < match_slice[1] < part_slice[1]) def _get_clean_value(self, key: str, match: List[str], index: Dict[str, int]) -> Union[str, int, List[int], bool]: + """ + Get clean value based on key and match. + """ if key in ("seasons", "episodes"): return self.get_season_episode(match) if key == "subtitles": @@ -333,6 +414,9 @@ def _get_clean_value(self, key: str, match: List[str], index: Dict[str, int]) -> return clean def _has_overlap(self, match_start: int, match_end: int) -> bool: + """ + Check if there is an overlap with existing parts. + """ return any( self._is_overlap(part_slice, (match_start, match_end)) for part, part_slice in self.part_slices.items() diff --git a/PTN/post.py b/PTN/post.py index 61f43af..4330e13 100644 --- a/PTN/post.py +++ b/PTN/post.py @@ -6,6 +6,12 @@ from .extras import link_patterns, complete_series, langs from .patterns import episode_name_pattern, patterns, pre_website_encoder_pattern +# Compile regex patterns once for reuse +episode_name_compiled = re.compile(episode_name_pattern) +pre_website_encoder_compiled = re.compile(pre_website_encoder_pattern.strip(), re.IGNORECASE) +complete_series_compiled = re.compile(link_patterns(complete_series), re.IGNORECASE) +filetype_pattern_compiled = re.compile(link_patterns(patterns['filetype']), re.IGNORECASE) + # Post-processing functions that run after the main parsing. @@ -15,13 +21,10 @@ # Try and find the episode name. def try_episode_name(self: Any, unmatched: str) -> str: - match = re.findall(episode_name_pattern, unmatched) + match = episode_name_compiled.findall(unmatched) if match: - match = re.search( - rf"(?:{link_patterns(patterns['episodes'])}|{patterns['day']}|{patterns['year']})[._\-\s+]*({re.escape(match[0])})", - self.torrent_name, - re.IGNORECASE, - ) + pattern = rf"(?:{link_patterns(patterns['episodes'])}|{patterns['day']}|{patterns['year']})[._\-\s+]*({re.escape(match[0])})" + match = re.search(pattern, self.torrent_name, re.IGNORECASE) if match: match_s, match_e = match.start(len(match.groups())), match.end(len(match.groups())) self._part("episodeName", (match_s, match_e), self._clean_string(match.group(len(match.groups())))) @@ -30,14 +33,11 @@ def try_episode_name(self: Any, unmatched: str) -> str: def try_encoder_before_site(self: Any, unmatched: str) -> str: - match = re.findall(pre_website_encoder_pattern, unmatched.strip()) + match = pre_website_encoder_compiled.findall(unmatched.strip()) if match: for m in match: - full_title_match = re.search( - rf"[\s\-]({re.escape(m)})(?:\.{link_patterns(patterns['filetype'])})?$", - self.torrent_name, - re.I, - ) + pattern = rf"[\s\-]({re.escape(m)})(?:\.{filetype_pattern_compiled.pattern})?$" + full_title_match = re.search(pattern, self.torrent_name, re.I) if full_title_match: match_s, match_e = full_title_match.start(0), full_title_match.end(0) encoder_and_site = list(filter(None, re.split(r"[\-\s\)]", full_title_match.group(1)))) @@ -52,8 +52,7 @@ def try_encoder_before_site(self: Any, unmatched: str) -> str: def remove_complete_series_string(self: Any, unmatched: str) -> str: if "title" in self.parts: - complete_series_regex = link_patterns(complete_series) - complete_match = re.search(complete_series_regex, self.parts["title"], flags=re.I) + complete_match = complete_series_compiled.search(self.parts["title"]) if complete_match: title = self.parts["title"] title = title[:complete_match.start()] + title[complete_match.end():]