From 016bf0f11b4d2436ad2cee6ccc949afc56fe9516 Mon Sep 17 00:00:00 2001
From: allejok96 <allejok96@gmail.com>
Date: Sun, 21 Mar 2021 11:47:05 +0100
Subject: [PATCH] New --update flag + bugfixes and improvements of output
 writing

---
 jwb-index       |  13 ++++
 jwlib/common.py |   3 +-
 jwlib/output.py | 198 ++++++++++++++++++++++++++++--------------------
 jwlib/parse.py  |  39 +++++++++-
 4 files changed, 166 insertions(+), 87 deletions(-)

diff --git a/jwb-index b/jwb-index
index 4e55376..3f43c6b 100755
--- a/jwb-index
+++ b/jwb-index
@@ -110,6 +110,8 @@ p.add_argument('--since', metavar='YYYY-MM-DD', dest='min_date',
 p.add_argument('--sort',
                choices=['newest', 'oldest', 'name', 'random'],
                help='sort output')
+p.add_argument('--update', action='store_true',
+               help='update existing categories with the latest videos (implies --append --latest --sort=newest)')
 p.add_argument('positional_arguments', nargs='*', metavar='DIR|FILE|COMMAND',
                help='where to send output (depends on mode)')
 
@@ -123,11 +125,22 @@ if s.print_category:
         s.print_category = True
     parse_broadcasting(s)
     exit()
+
 # Required arguments
 if not (s.mode or s.download or s.download_subtitles or s.import_dir):
     msg('please use --mode or --download')
     exit(1)
 
+# Implicit arguments
+if s.update:
+    if s.mode != 'filesystem' and not s.mode.endswith('_tree') and not s.mode.endswith('_multi'):
+        msg("this mode does not support --update")
+        exit(1)
+    s.append = True
+    s.include_categories = ['LatestVideos']
+    if not s.sort:
+        s.sort = 'newest'
+
 # Handle positional arguments depending on mode
 # COMMAND [ARGS]
 if s.mode == 'run':
diff --git a/jwlib/common.py b/jwlib/common.py
index 53fd3e4..7a125e5 100644
--- a/jwlib/common.py
+++ b/jwlib/common.py
@@ -58,9 +58,10 @@ class Settings:
     # Output stuff
     append = False
     clean_all_symlinks = False
+    update = False
     mode = ''
     safe_filenames = False
-    sort = 'none'
+    sort = ''
 
     def __setattr__(self, key, value):
         # This will raise an error if the attribute we are trying to set doesn't already exist
diff --git a/jwlib/output.py b/jwlib/output.py
index a199e09..407d5a7 100644
--- a/jwlib/output.py
+++ b/jwlib/output.py
@@ -1,10 +1,11 @@
+import glob
 import html
 import os
 import subprocess
 from random import shuffle
 from typing import List, Type
 
-from .parse import Category, Media
+from .parse import Category, Media, CategoryError
 from .common import Settings, msg
 
 pj = os.path.join
@@ -17,14 +18,17 @@ def __init__(self, name: str, source: str, duration=0):
         self.duration = duration
 
 
-class BaseOutputWriter:
-    """Base class for creation of output files
+class AbstractOutputWriter:
+    """Base class for generation of output
 
-    Usage:
-    1. Load existing file data (stripping start and end).
-    2. Keep history of URLs so we don't include doublets.
-    3. Add lines to a queue.
-    4. Write out the queue to wherever (reversing happens here).
+    Use add_to_queue() to add lines. Doublets will be skipped.
+    Define what to do with the queue in dump_queue(). Reversal should happen there too.
+    Some data members are only for file writing sub-classes, but defined here to avoid type errors.
+
+    CLASS VARIABLES:
+     - start_string: first string in file
+     - end_string: last string in file
+     - ext: file name extension (dot included)
     """
     start_string = ''
     end_string = ''
@@ -35,16 +39,71 @@ def __init__(self, s: Settings, filename: str):
         :param filename: File name (can also be a relative path)
         """
         self.quiet = s.quiet
-        self.append = s.append  # append instead of overwriting
-        self.reverse = s.sort == 'newest'  # prepend instead of append
-        self.file = pj(s.work_dir, filename)
+        self.reverse = s.sort == 'newest'
 
         self.queue = []
         self.history = set()
 
+    def add_to_history(self, string: str):
+        """Return False if the string has been added before"""
+
+        if string in self.history:
+            return False
+        else:
+            self.history.add(string)
+            return True
+
+    def add_to_queue(self, entry: PlaylistEntry):
+        """Adds a line to the queue"""
+
+        if self.add_to_history(entry.source):
+            self.queue.append(self.string_format(entry))
+
+    def string_format(self, entry: PlaylistEntry) -> str:
+        """Turn a playlist entry into a string"""
+
+        return entry.source
+
+    def string_parse(self, string: str) -> str:
+        """Extract URL from a string"""
+
+        return string
+
+    def dump_queue(self):
+        """Should do something with the queue (reversing happens here)"""
+
+        raise NotImplementedError
+
+
+class TxtWriter(AbstractOutputWriter):
+    """ Base class for writing text files
+
+    Usage:
+    1. Load existing file data (stripping start and end).
+    2. Keep history of URLs so we don't include doublets.
+    3. Add lines to a queue.
+    4. Write out the queue to wherever (reversing happens here).
+    """
+
+    def __init__(self, s, filename):
+        super().__init__(s, filename)
+        self.append = s.append
+
+        # File name expansion
+        if '*' in filename:
+            matches = glob.glob(pj(glob.escape(s.work_dir), filename))
+            if len(matches) == 1:
+                self.file = matches[0]
+            elif len(matches) == 0:
+                raise CategoryError("no matching file")
+            else:
+                raise CategoryError("multiple matching files")
+        else:
+            self.file = pj(s.work_dir, filename)
+
         # Get existing lines from file
         self.loaded_data = ''
-        if self.append or self.reverse:
+        if (s.append or s.sort == 'newest') and self.file:
             self.load_existing()
 
     def load_existing(self):
@@ -52,11 +111,11 @@ def load_existing(self):
 
         try:
             with open(self.file, 'r', encoding='utf-8') as file:
-                data = file.read()
+                data = file.read().rstrip('\n')
                 if data.startswith(self.start_string):
                     data = data[len(self.start_string):]
+                # Note: don't run with empty end string, because list[:-0] -> []
                 if self.end_string and data.endswith(self.end_string):
-                    # Note: don't run with empty end string, because list[:-0] -> []
                     data = data[:-len(self.end_string)]
                 self.loaded_data = data
                 # Generate history from loaded data
@@ -65,25 +124,10 @@ def load_existing(self):
         except OSError:
             pass
 
-    def add_to_history(self, string: str):
-        """Return False if the string has been added before"""
-
-        if string in self.history:
-            return False
-        else:
-            self.history.add(string)
-            return True
-
-    def add_to_queue(self, entry: PlaylistEntry):
-        """Adds a line to the queue"""
-
-        if self.add_to_history(entry.source):
-            self.queue.append(self.string_format(entry))
-
     def dump_queue(self):
         """Create dir and write out queue to file"""
 
-        if not self.queue:
+        if not self.queue or not self.file:
             return
         if self.reverse:
             self.queue.reverse()
@@ -92,10 +136,8 @@ def dump_queue(self):
         os.makedirs(d, exist_ok=True)
 
         # All IO is done in binary mode, since text mode is not seekable
-        try:
-            if self.reverse or not self.append or not self.loaded_data:
-                raise OSError  # file must be overwritten
-
+        if self.append and not self.reverse and self.loaded_data:
+            # Note: loaded_data insures that files exists before trying to open with 'r'
             file = open(self.file, 'r+b')
             if self.quiet < 1:
                 msg('extending: {}'.format(self.file))
@@ -109,13 +151,16 @@ def dump_queue(self):
                 else:
                     raise OSError
             except OSError:
-                raise RuntimeError(self.file + ': refusing to append to file which does not match output format')
-
-        except OSError:
+                msg('file does not match output format: ' + self.file)
+                return
+        else:
             # Overwrite with start string
             file = open(self.file, 'wb')
             if self.quiet < 1:
-                msg('writing: {}'.format(self.file))
+                if self.loaded_data:
+                    msg('updating: {}'.format(self.file))
+                else:
+                    msg('creating: {}'.format(self.file))
             file.write(self.start_string.encode('utf-8'))
 
         with file:
@@ -128,20 +173,8 @@ def dump_queue(self):
             # End string
             file.write(self.end_string.encode('utf-8'))
 
-    def string_format(self, entry: PlaylistEntry) -> str:
-        """Turn a playlist entry into a string"""
-        return entry.source
-
-    def string_parse(self, string: str) -> str:
-        """Extract URL from a string"""
-        return string
-
-
-class TxtWriter(BaseOutputWriter):
-    pass
 
-
-class M3uWriter(BaseOutputWriter):
+class M3uWriter(TxtWriter):
     start_string = '#EXTM3U\n'
     ext = '.m3u'
 
@@ -153,7 +186,7 @@ def string_parse(self, string):
             return string
 
 
-class HtmlWriter(BaseOutputWriter):
+class HtmlWriter(TxtWriter):
     start_string = '<!DOCTYPE html>\n<html><head><meta charset="utf-8"/></head><body>\n'
     end_string = '</body></html>'
     ext = '.html'
@@ -166,14 +199,10 @@ def string_format(self, entry):
 
     def string_parse(self, string):
         # The thing between the first quotes is an URL
-        html.unescape(string.split('"')[1])
-
+        return html.unescape(string.split('"')[1])
 
-class StdoutWriter(BaseOutputWriter):
-    def load_existing(self):
-        """Don't read files"""
-        pass
 
+class StdoutWriter(AbstractOutputWriter):
     def dump_queue(self):
         """Write to stdout"""
 
@@ -183,14 +212,11 @@ def dump_queue(self):
             print(line)
 
 
-class CommandWriter(BaseOutputWriter):
+class CommandWriter(AbstractOutputWriter):
     def __init__(self, s, filename):
         super().__init__(s, filename)
         self.command = s.command
 
-    def load_existing(self):
-        pass
-
     def dump_queue(self):
         """Run a program with queue entries as arguments"""
 
@@ -206,10 +232,10 @@ def dump_queue(self):
             self.queue = self.queue[300:]
 
 
-def sort_media(media_list: list, sort: str):
+def sort_media(media_list: List[Media], sort: str):
     """Sort a list of Media objects in place"""
 
-    if sort == 'none':
+    if sort in ('none', ''):
         return
     elif sort == 'name':
         media_list.sort(key=lambda x: x.name)
@@ -239,8 +265,7 @@ def create_output(s: Settings, data: List[Category]):
     elif s.mode.startswith('txt'):
         writer = TxtWriter
     else:
-        # Note: ArgumentParser will make sure we never end up here
-        raise RuntimeError('invalid mode')
+        raise RuntimeError
 
     if s.mode.endswith('multi'):
         output_multi(s, data, writer, tree=False)
@@ -250,15 +275,15 @@ def create_output(s: Settings, data: List[Category]):
         output_single(s, data, writer)
 
 
-def output_single(s: Settings, data: List[Category], writercls: Type[BaseOutputWriter]):
+def output_single(s: Settings, data: List[Category], writercls: Type[AbstractOutputWriter]):
     """Create a concatenated output file"""
 
     all_media = [item for category in data for item in category.contents if isinstance(item, Media)]
     sort_media(all_media, s.sort)
 
     # Filename falls back to the name of the first category
-    filename = s.output_filename or (data[0].safe_name + writercls.ext)
-    writer = writercls(s, filename)
+    # Note: CategoryError will be prevented in argument handling
+    writer = writercls(s, s.output_filename or data[0].safe_name + writercls.ext)
 
     for media in all_media:
         if media.exists_in(pj(s.work_dir, s.sub_dir)):
@@ -270,7 +295,7 @@ def output_single(s: Settings, data: List[Category], writercls: Type[BaseOutputW
     writer.dump_queue()
 
 
-def output_multi(s: Settings, data: List[Category], writercls: Type[BaseOutputWriter], tree=True):
+def output_multi(s: Settings, data: List[Category], writercls: Type[AbstractOutputWriter], tree=True):
     """Create a tree of output files
 
     :keyword writercls: a PlaylistWriter class
@@ -285,6 +310,7 @@ def output_multi(s: Settings, data: List[Category], writercls: Type[BaseOutputWr
             # Output file is outside subdir, with nice name
             # Links point inside the subdir
             source_prepend_dir = sd
+            # Note: CategoryError cannot occur on home categories
             writer = writercls(s, category.safe_name + writercls.ext)
         elif tree:
             # For sub-categories in a tree:
@@ -297,27 +323,31 @@ def output_multi(s: Settings, data: List[Category], writercls: Type[BaseOutputWr
             # Output file is outside subdir, has both ugly and nice name
             # Links point inside subdir
             source_prepend_dir = sd
-            writer = writercls(s, category.key + ' - ' + category.safe_name + writercls.ext)
+            try:
+                writer = writercls(s, category.key + ' - ' + category.optional_name + writercls.ext)
+            except CategoryError as e:
+                if s.quiet < 1:
+                    msg("{}: {}".format(e.message, category.key))
+                continue
 
-        media_entries = []
+        # All categories go on top of the queue
         for item in category.contents:
             if isinstance(item, Category):
                 # Only link to categories if we are creating a tree structure
                 if tree:
                     source = pj('.', source_prepend_dir, item.key + writer.ext)
-                    # Categories go on top of the queue
+
                     writer.add_to_queue(PlaylistEntry(item.name.upper(), source))
-            else:
-                if item.exists_in(pj(wd, sd)):
-                    source = pj('.', source_prepend_dir, item.filename)
-                else:
-                    source = item.url
-                # Hold on to media links so we can sort them later
-                media_entries.append(PlaylistEntry(item.name, source, item.duration))
 
-        sort_media(media_entries, s.sort)
-        for entry in media_entries:
-            writer.add_to_queue(entry)
+        media_items = [m for m in category.contents if isinstance(m, Media)]
+        sort_media(media_items, s.sort)
+
+        for media in media_items:
+            if media.exists_in(pj(wd, sd)):
+                source = pj('.', source_prepend_dir, media.filename)
+            else:
+                source = media.url
+            writer.add_to_queue(PlaylistEntry(media.name, source, media.duration))
 
         writer.dump_queue()
 
@@ -339,6 +369,7 @@ def output_filesystem(s: Settings, data: List[Category]):
 
         # Index/starting/home categories: create link outside subdir
         if category.home:
+            # Note: CategoryError cannot occur on home categories
             link = pj(wd, category.safe_name)
             if s.safe_filenames:
                 source = pj(wd, sd, category.key)
@@ -361,6 +392,7 @@ def output_filesystem(s: Settings, data: List[Category]):
                     source = d
                 else:
                     source = pj('..', item.key)
+                # Note: CategoryError cannot occur on categories inside other categories contents
                 link = pj(output_dir, item.safe_name)
 
             else:
diff --git a/jwlib/parse.py b/jwlib/parse.py
index 40683ff..77ccefd 100644
--- a/jwlib/parse.py
+++ b/jwlib/parse.py
@@ -13,6 +13,11 @@
 FRIENDLY_FILENAMES = False
 
 
+class CategoryError(Exception):
+    def __init__(self, message: str = None):
+        self.message = message or "requested name of unnamed category"
+
+
 class Category:
     """Object to put category info in."""
     key = ''
@@ -28,8 +33,19 @@ def __repr__(self):
 
     @property
     def safe_name(self):
+        """Returns name with special characters removed, or raises CategoryError if unset"""
+        if not self.name:
+            raise CategoryError
         return format_filename(self.name)
 
+    @property
+    def optional_name(self):
+        """Returns name with special characters removed, or '*' if unset"""
+        try:
+            return self.safe_name
+        except CategoryError:
+            return '*'
+
 
 class Media:
     """Object to put media info in."""
@@ -83,7 +99,8 @@ def format_filename(string):
         forbidden = '<>|?\\*/\0\n'
     else:
         # Unix forbidden characters
-        forbidden = '/\0'
+        # Remove asterisk as this is used by glob expansion later in the script
+        forbidden = '/\0*'
 
     return ''.join(x for x in string if x not in forbidden)
 
@@ -150,10 +167,11 @@ def parse_broadcasting(s: Settings):
                 raise e
 
         cat = Category()
-        result.append(cat)
         cat.key = j['category']['key']
         cat.name = j['category']['name']
         cat.home = cat.key in s.include_categories
+        if not s.update:
+            result.append(cat)
 
         if s.quiet < 1:
             if s.print_category:
@@ -169,6 +187,7 @@ def parse_broadcasting(s: Settings):
             if s.print_category:
                 print(j_sub['key'])
                 continue
+
             sub = Category()
             sub.key = j_sub['key']
             sub.name = j_sub['name']
@@ -233,6 +252,20 @@ def parse_broadcasting(s: Settings):
                     if s.quiet < 1:
                         msg('could not get timestamp on: {}'.format(j_media['title']))
 
-            cat.contents.append(media)
+            if s.update:
+                try:
+                    # Find a previously added category
+                    pcat = next(c for c in result if c.key == j_media["primaryCategory"])
+                except StopIteration:
+                    # Create a new homeless category
+                    pcat = Category()
+                    pcat.key = j_media["primaryCategory"]
+                    pcat.home = False
+                    result.append(pcat)
+                # Add media to its primary category
+                pcat.contents.append(media)
+            else:
+                # Add media to current category
+                cat.contents.append(media)
 
     return result