From 016bf0f11b4d2436ad2cee6ccc949afc56fe9516 Mon Sep 17 00:00:00 2001 From: allejok96 Date: Sun, 21 Mar 2021 11:47:05 +0100 Subject: [PATCH] New --update flag + bugfixes and improvements of output writing --- jwb-index | 13 ++++ jwlib/common.py | 3 +- jwlib/output.py | 198 ++++++++++++++++++++++++++++-------------------- jwlib/parse.py | 39 +++++++++- 4 files changed, 166 insertions(+), 87 deletions(-) diff --git a/jwb-index b/jwb-index index 4e55376..3f43c6b 100755 --- a/jwb-index +++ b/jwb-index @@ -110,6 +110,8 @@ p.add_argument('--since', metavar='YYYY-MM-DD', dest='min_date', p.add_argument('--sort', choices=['newest', 'oldest', 'name', 'random'], help='sort output') +p.add_argument('--update', action='store_true', + help='update existing categories with the latest videos (implies --append --latest --sort=newest)') p.add_argument('positional_arguments', nargs='*', metavar='DIR|FILE|COMMAND', help='where to send output (depends on mode)') @@ -123,11 +125,22 @@ if s.print_category: s.print_category = True parse_broadcasting(s) exit() + # Required arguments if not (s.mode or s.download or s.download_subtitles or s.import_dir): msg('please use --mode or --download') exit(1) +# Implicit arguments +if s.update: + if s.mode != 'filesystem' and not s.mode.endswith('_tree') and not s.mode.endswith('_multi'): + msg("this mode does not support --update") + exit(1) + s.append = True + s.include_categories = ['LatestVideos'] + if not s.sort: + s.sort = 'newest' + # Handle positional arguments depending on mode # COMMAND [ARGS] if s.mode == 'run': diff --git a/jwlib/common.py b/jwlib/common.py index 53fd3e4..7a125e5 100644 --- a/jwlib/common.py +++ b/jwlib/common.py @@ -58,9 +58,10 @@ class Settings: # Output stuff append = False clean_all_symlinks = False + update = False mode = '' safe_filenames = False - sort = 'none' + sort = '' def __setattr__(self, key, value): # This will raise an error if the attribute we are trying to set doesn't already exist diff --git a/jwlib/output.py b/jwlib/output.py index a199e09..407d5a7 100644 --- a/jwlib/output.py +++ b/jwlib/output.py @@ -1,10 +1,11 @@ +import glob import html import os import subprocess from random import shuffle from typing import List, Type -from .parse import Category, Media +from .parse import Category, Media, CategoryError from .common import Settings, msg pj = os.path.join @@ -17,14 +18,17 @@ def __init__(self, name: str, source: str, duration=0): self.duration = duration -class BaseOutputWriter: - """Base class for creation of output files +class AbstractOutputWriter: + """Base class for generation of output - Usage: - 1. Load existing file data (stripping start and end). - 2. Keep history of URLs so we don't include doublets. - 3. Add lines to a queue. - 4. Write out the queue to wherever (reversing happens here). + Use add_to_queue() to add lines. Doublets will be skipped. + Define what to do with the queue in dump_queue(). Reversal should happen there too. + Some data members are only for file writing sub-classes, but defined here to avoid type errors. + + CLASS VARIABLES: + - start_string: first string in file + - end_string: last string in file + - ext: file name extension (dot included) """ start_string = '' end_string = '' @@ -35,16 +39,71 @@ def __init__(self, s: Settings, filename: str): :param filename: File name (can also be a relative path) """ self.quiet = s.quiet - self.append = s.append # append instead of overwriting - self.reverse = s.sort == 'newest' # prepend instead of append - self.file = pj(s.work_dir, filename) + self.reverse = s.sort == 'newest' self.queue = [] self.history = set() + def add_to_history(self, string: str): + """Return False if the string has been added before""" + + if string in self.history: + return False + else: + self.history.add(string) + return True + + def add_to_queue(self, entry: PlaylistEntry): + """Adds a line to the queue""" + + if self.add_to_history(entry.source): + self.queue.append(self.string_format(entry)) + + def string_format(self, entry: PlaylistEntry) -> str: + """Turn a playlist entry into a string""" + + return entry.source + + def string_parse(self, string: str) -> str: + """Extract URL from a string""" + + return string + + def dump_queue(self): + """Should do something with the queue (reversing happens here)""" + + raise NotImplementedError + + +class TxtWriter(AbstractOutputWriter): + """ Base class for writing text files + + Usage: + 1. Load existing file data (stripping start and end). + 2. Keep history of URLs so we don't include doublets. + 3. Add lines to a queue. + 4. Write out the queue to wherever (reversing happens here). + """ + + def __init__(self, s, filename): + super().__init__(s, filename) + self.append = s.append + + # File name expansion + if '*' in filename: + matches = glob.glob(pj(glob.escape(s.work_dir), filename)) + if len(matches) == 1: + self.file = matches[0] + elif len(matches) == 0: + raise CategoryError("no matching file") + else: + raise CategoryError("multiple matching files") + else: + self.file = pj(s.work_dir, filename) + # Get existing lines from file self.loaded_data = '' - if self.append or self.reverse: + if (s.append or s.sort == 'newest') and self.file: self.load_existing() def load_existing(self): @@ -52,11 +111,11 @@ def load_existing(self): try: with open(self.file, 'r', encoding='utf-8') as file: - data = file.read() + data = file.read().rstrip('\n') if data.startswith(self.start_string): data = data[len(self.start_string):] + # Note: don't run with empty end string, because list[:-0] -> [] if self.end_string and data.endswith(self.end_string): - # Note: don't run with empty end string, because list[:-0] -> [] data = data[:-len(self.end_string)] self.loaded_data = data # Generate history from loaded data @@ -65,25 +124,10 @@ def load_existing(self): except OSError: pass - def add_to_history(self, string: str): - """Return False if the string has been added before""" - - if string in self.history: - return False - else: - self.history.add(string) - return True - - def add_to_queue(self, entry: PlaylistEntry): - """Adds a line to the queue""" - - if self.add_to_history(entry.source): - self.queue.append(self.string_format(entry)) - def dump_queue(self): """Create dir and write out queue to file""" - if not self.queue: + if not self.queue or not self.file: return if self.reverse: self.queue.reverse() @@ -92,10 +136,8 @@ def dump_queue(self): os.makedirs(d, exist_ok=True) # All IO is done in binary mode, since text mode is not seekable - try: - if self.reverse or not self.append or not self.loaded_data: - raise OSError # file must be overwritten - + if self.append and not self.reverse and self.loaded_data: + # Note: loaded_data insures that files exists before trying to open with 'r' file = open(self.file, 'r+b') if self.quiet < 1: msg('extending: {}'.format(self.file)) @@ -109,13 +151,16 @@ def dump_queue(self): else: raise OSError except OSError: - raise RuntimeError(self.file + ': refusing to append to file which does not match output format') - - except OSError: + msg('file does not match output format: ' + self.file) + return + else: # Overwrite with start string file = open(self.file, 'wb') if self.quiet < 1: - msg('writing: {}'.format(self.file)) + if self.loaded_data: + msg('updating: {}'.format(self.file)) + else: + msg('creating: {}'.format(self.file)) file.write(self.start_string.encode('utf-8')) with file: @@ -128,20 +173,8 @@ def dump_queue(self): # End string file.write(self.end_string.encode('utf-8')) - def string_format(self, entry: PlaylistEntry) -> str: - """Turn a playlist entry into a string""" - return entry.source - - def string_parse(self, string: str) -> str: - """Extract URL from a string""" - return string - - -class TxtWriter(BaseOutputWriter): - pass - -class M3uWriter(BaseOutputWriter): +class M3uWriter(TxtWriter): start_string = '#EXTM3U\n' ext = '.m3u' @@ -153,7 +186,7 @@ def string_parse(self, string): return string -class HtmlWriter(BaseOutputWriter): +class HtmlWriter(TxtWriter): start_string = '\n\n' end_string = '' ext = '.html' @@ -166,14 +199,10 @@ def string_format(self, entry): def string_parse(self, string): # The thing between the first quotes is an URL - html.unescape(string.split('"')[1]) - + return html.unescape(string.split('"')[1]) -class StdoutWriter(BaseOutputWriter): - def load_existing(self): - """Don't read files""" - pass +class StdoutWriter(AbstractOutputWriter): def dump_queue(self): """Write to stdout""" @@ -183,14 +212,11 @@ def dump_queue(self): print(line) -class CommandWriter(BaseOutputWriter): +class CommandWriter(AbstractOutputWriter): def __init__(self, s, filename): super().__init__(s, filename) self.command = s.command - def load_existing(self): - pass - def dump_queue(self): """Run a program with queue entries as arguments""" @@ -206,10 +232,10 @@ def dump_queue(self): self.queue = self.queue[300:] -def sort_media(media_list: list, sort: str): +def sort_media(media_list: List[Media], sort: str): """Sort a list of Media objects in place""" - if sort == 'none': + if sort in ('none', ''): return elif sort == 'name': media_list.sort(key=lambda x: x.name) @@ -239,8 +265,7 @@ def create_output(s: Settings, data: List[Category]): elif s.mode.startswith('txt'): writer = TxtWriter else: - # Note: ArgumentParser will make sure we never end up here - raise RuntimeError('invalid mode') + raise RuntimeError if s.mode.endswith('multi'): output_multi(s, data, writer, tree=False) @@ -250,15 +275,15 @@ def create_output(s: Settings, data: List[Category]): output_single(s, data, writer) -def output_single(s: Settings, data: List[Category], writercls: Type[BaseOutputWriter]): +def output_single(s: Settings, data: List[Category], writercls: Type[AbstractOutputWriter]): """Create a concatenated output file""" all_media = [item for category in data for item in category.contents if isinstance(item, Media)] sort_media(all_media, s.sort) # Filename falls back to the name of the first category - filename = s.output_filename or (data[0].safe_name + writercls.ext) - writer = writercls(s, filename) + # Note: CategoryError will be prevented in argument handling + writer = writercls(s, s.output_filename or data[0].safe_name + writercls.ext) for media in all_media: if media.exists_in(pj(s.work_dir, s.sub_dir)): @@ -270,7 +295,7 @@ def output_single(s: Settings, data: List[Category], writercls: Type[BaseOutputW writer.dump_queue() -def output_multi(s: Settings, data: List[Category], writercls: Type[BaseOutputWriter], tree=True): +def output_multi(s: Settings, data: List[Category], writercls: Type[AbstractOutputWriter], tree=True): """Create a tree of output files :keyword writercls: a PlaylistWriter class @@ -285,6 +310,7 @@ def output_multi(s: Settings, data: List[Category], writercls: Type[BaseOutputWr # Output file is outside subdir, with nice name # Links point inside the subdir source_prepend_dir = sd + # Note: CategoryError cannot occur on home categories writer = writercls(s, category.safe_name + writercls.ext) elif tree: # For sub-categories in a tree: @@ -297,27 +323,31 @@ def output_multi(s: Settings, data: List[Category], writercls: Type[BaseOutputWr # Output file is outside subdir, has both ugly and nice name # Links point inside subdir source_prepend_dir = sd - writer = writercls(s, category.key + ' - ' + category.safe_name + writercls.ext) + try: + writer = writercls(s, category.key + ' - ' + category.optional_name + writercls.ext) + except CategoryError as e: + if s.quiet < 1: + msg("{}: {}".format(e.message, category.key)) + continue - media_entries = [] + # All categories go on top of the queue for item in category.contents: if isinstance(item, Category): # Only link to categories if we are creating a tree structure if tree: source = pj('.', source_prepend_dir, item.key + writer.ext) - # Categories go on top of the queue + writer.add_to_queue(PlaylistEntry(item.name.upper(), source)) - else: - if item.exists_in(pj(wd, sd)): - source = pj('.', source_prepend_dir, item.filename) - else: - source = item.url - # Hold on to media links so we can sort them later - media_entries.append(PlaylistEntry(item.name, source, item.duration)) - sort_media(media_entries, s.sort) - for entry in media_entries: - writer.add_to_queue(entry) + media_items = [m for m in category.contents if isinstance(m, Media)] + sort_media(media_items, s.sort) + + for media in media_items: + if media.exists_in(pj(wd, sd)): + source = pj('.', source_prepend_dir, media.filename) + else: + source = media.url + writer.add_to_queue(PlaylistEntry(media.name, source, media.duration)) writer.dump_queue() @@ -339,6 +369,7 @@ def output_filesystem(s: Settings, data: List[Category]): # Index/starting/home categories: create link outside subdir if category.home: + # Note: CategoryError cannot occur on home categories link = pj(wd, category.safe_name) if s.safe_filenames: source = pj(wd, sd, category.key) @@ -361,6 +392,7 @@ def output_filesystem(s: Settings, data: List[Category]): source = d else: source = pj('..', item.key) + # Note: CategoryError cannot occur on categories inside other categories contents link = pj(output_dir, item.safe_name) else: diff --git a/jwlib/parse.py b/jwlib/parse.py index 40683ff..77ccefd 100644 --- a/jwlib/parse.py +++ b/jwlib/parse.py @@ -13,6 +13,11 @@ FRIENDLY_FILENAMES = False +class CategoryError(Exception): + def __init__(self, message: str = None): + self.message = message or "requested name of unnamed category" + + class Category: """Object to put category info in.""" key = '' @@ -28,8 +33,19 @@ def __repr__(self): @property def safe_name(self): + """Returns name with special characters removed, or raises CategoryError if unset""" + if not self.name: + raise CategoryError return format_filename(self.name) + @property + def optional_name(self): + """Returns name with special characters removed, or '*' if unset""" + try: + return self.safe_name + except CategoryError: + return '*' + class Media: """Object to put media info in.""" @@ -83,7 +99,8 @@ def format_filename(string): forbidden = '<>|?\\*/\0\n' else: # Unix forbidden characters - forbidden = '/\0' + # Remove asterisk as this is used by glob expansion later in the script + forbidden = '/\0*' return ''.join(x for x in string if x not in forbidden) @@ -150,10 +167,11 @@ def parse_broadcasting(s: Settings): raise e cat = Category() - result.append(cat) cat.key = j['category']['key'] cat.name = j['category']['name'] cat.home = cat.key in s.include_categories + if not s.update: + result.append(cat) if s.quiet < 1: if s.print_category: @@ -169,6 +187,7 @@ def parse_broadcasting(s: Settings): if s.print_category: print(j_sub['key']) continue + sub = Category() sub.key = j_sub['key'] sub.name = j_sub['name'] @@ -233,6 +252,20 @@ def parse_broadcasting(s: Settings): if s.quiet < 1: msg('could not get timestamp on: {}'.format(j_media['title'])) - cat.contents.append(media) + if s.update: + try: + # Find a previously added category + pcat = next(c for c in result if c.key == j_media["primaryCategory"]) + except StopIteration: + # Create a new homeless category + pcat = Category() + pcat.key = j_media["primaryCategory"] + pcat.home = False + result.append(pcat) + # Add media to its primary category + pcat.contents.append(media) + else: + # Add media to current category + cat.contents.append(media) return result