Skip to content

Commit

Permalink
Add tags to jobs and a command line arg to select them (#789)
Browse files Browse the repository at this point in the history
This allows the user to selectively run jobs based on a tag for use
cases where you might want to run urlwatch on different schedules for
different jobs and other use cases.

---------

Signed-off-by: James Hewitt <[email protected]>
Co-authored-by: Thomas Perl <[email protected]>
  • Loading branch information
Jamstah and thp authored Apr 24, 2024
1 parent 342dd6a commit 1b045b6
Show file tree
Hide file tree
Showing 11 changed files with 188 additions and 16 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ The format mostly follows [Keep a Changelog](http://keepachangelog.com/en/1.0.0/
- New `enabled` option for all jobs. Set to false to disable a job without needing to remove it or comment it out (Requested in #625 by snowman, contributed in #785 by jamstah)
- New option `ignore_incomplete_reads` (Requested in #725 by wschoot, contributed in #787 by wfrisch)
- New option `wait_for` in browser jobs (Requested in #763 by yuis-ice, contributed in #810 by jamstah)
- Added tags to jobs and the ability to select them at the command line (#789 by jamstah)

### Changed

Expand Down
1 change: 1 addition & 0 deletions docs/source/jobs.rst
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,7 @@ Optional keys for all job types
-------------------------------

- ``name``: Human-readable name/label of the job
- ``tags``: Array of tags
- ``filter``: :doc:`filters` (if any) to apply to the output (can be tested with ``--test-filter``)
- ``max_tries``: After this many sequential failed runs, the error will be reported rather than ignored
- ``diff_tool``: Command to a custom tool for generating diff text
Expand Down
8 changes: 6 additions & 2 deletions docs/source/manpage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,17 @@ This manpage describes the CLI tool.

positional arguments:
JOB
index of job(s) to run, as numbered according to the --list command.
If none are specified, then all jobs will be run.
indexes or tags of job(s) to run.
If --tags is set, each JOB is a tag,
if not, each JOB is an index numbered according to the --list command.

optional arguments:
-h, --help
show this help message and exit

--tags
use tags instead of indexes to select jobs to run

--version
show program's version number and exit

Expand Down
8 changes: 4 additions & 4 deletions lib/urlwatch/command.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,16 +95,16 @@ def show_features(self):
return 0

def list_urls(self):
for idx, job in enumerate(self.urlwatcher.jobs):
for idx, job in enumerate(self.urlwatcher.jobs, 1):
if self.urlwatch_config.verbose:
print('%d: %s' % (idx + 1, repr(job)))
print('%d: %s' % (idx, repr(job)))
else:
pretty_name = job.pretty_name()
location = job.get_location()
if pretty_name != location:
print('%d: %s ( %s )' % (idx + 1, pretty_name, location))
print('%d: %s ( %s )' % (idx, pretty_name, location))
else:
print('%d: %s' % (idx + 1, pretty_name))
print('%d: %s' % (idx, pretty_name))
return 0

def _find_job(self, query):
Expand Down
19 changes: 16 additions & 3 deletions lib/urlwatch/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,12 +64,13 @@ def __init__(self, args, pkgname, urlwatch_dir, prefix, config, urls, hooks, cac
self.parse_args(args)

def parse_args(self, cmdline_args):

parser = argparse.ArgumentParser(description=urlwatch.__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument('joblist', metavar='JOB', type=int, nargs="*", help='index of job(s) to run, as numbered according to the --list command. If none specified, then all jobs will be run.')
parser.add_argument('joblist', metavar='JOB', type=str, nargs="*", help='indexes or tags of job(s) to run, depending on --tags. If using indexes, they are as numbered according to the --list command. If none are specified, then all jobs will be run.')
parser.add_argument('--tags', action='store_true', help='Use tags instead of indexes to select jobs to run')
parser.add_argument('--version', action='version', version='%(prog)s {}'.format(urlwatch.__version__))
parser.add_argument('-v', '--verbose', action='store_true', help='show debug output')

group = parser.add_argument_group('files and directories')
group.add_argument('--urls', metavar='FILE', help='read job list (URLs) from FILE',
default=self.urls)
Expand All @@ -95,17 +96,29 @@ def parse_args(self, cmdline_args):
group.add_argument('--test-diff-filter', metavar='JOB',
help='test diff filter output of job by location or index (needs at least 2 snapshots)')
group.add_argument('--dump-history', metavar='JOB', help='dump historical cached data for a job')

group = parser.add_argument_group('interactive commands ($EDITOR/$VISUAL)')
group.add_argument('--edit', action='store_true', help='edit URL/job list')
group.add_argument('--edit-config', action='store_true', help='edit configuration file')
group.add_argument('--edit-hooks', action='store_true', help='edit hooks script')

group = parser.add_argument_group('miscellaneous')
group.add_argument('--features', action='store_true', help='list supported jobs/filters/reporters')
group.add_argument('--gc-cache', metavar='RETAIN_LIMIT', type=int, help='remove old cache entries, keeping the latest RETAIN_LIMIT (default: 1)',
nargs='?', const=1)

args = parser.parse_args(cmdline_args)

for i, arg in enumerate(vars(args)):
if args.tags:
if not args.joblist:
raise SystemExit("No tags specified")
self.tag_set = frozenset(args.joblist)
else:
try:
self.idx_set = frozenset(int(s) for s in args.joblist)
except ValueError as e:
parser.error(e)

for arg in vars(args):
argval = getattr(args, arg)
setattr(self, arg, argval)
17 changes: 16 additions & 1 deletion lib/urlwatch/jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
import re
import subprocess
import textwrap
from typing import Iterable, Optional, Set, FrozenSet

import requests
from requests.packages.urllib3.exceptions import InsecureRequestWarning
Expand Down Expand Up @@ -196,7 +197,10 @@ def ignore_error(self, exception):

class Job(JobBase):
__required__ = ()
__optional__ = ('name', 'filter', 'max_tries', 'diff_tool', 'compared_versions', 'diff_filter', 'enabled', 'treat_new_as_changed', 'user_visible_url')
__optional__ = ('name', 'filter', 'max_tries', 'diff_tool', 'compared_versions', 'diff_filter', 'enabled', 'treat_new_as_changed', 'user_visible_url', 'tags')

def matching_tags(self, tags: Set[str]) -> Set[str]:
return self.tags & tags

# determine if hyperlink "a" tag is used in HtmlReporter
def location_is_url(self):
Expand All @@ -208,6 +212,17 @@ def pretty_name(self):
def is_enabled(self):
return self.enabled is None or self.enabled

@property
def tags(self) -> Optional[FrozenSet[str]]:
return self._tags

@tags.setter
def tags(self, value: Optional[Iterable[str]]):
if value is None:
self._tags = None
else:
self._tags = frozenset(value)


class ShellJob(Job):
"""Run a shell command and get its standard output"""
Expand Down
15 changes: 15 additions & 0 deletions lib/urlwatch/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,21 @@ def __init__(self, urlwatch_config, config_storage, cache_storage, urls_storage)
if hasattr(self.urlwatch_config, 'migrate_urls'):
self.urlwatch_config.migrate_cache(self)

def should_run(self, idx, job):
if not job.is_enabled():
return False

# Tag mode and tag(s) were specified
if self.urlwatch_config.tags and self.urlwatch_config.tag_set:
return job.matching_tags(self.urlwatch_config.tag_set)

# Index mode and index(es) were specified
if not self.urlwatch_config.tags and self.urlwatch_config.idx_set:
return idx in self.urlwatch_config.idx_set

# Either mode, and no jobs were specified
return True

def check_directories(self):
if not os.path.exists(self.urlwatch_config.config):
self.config_storage.write_default_config(self.urlwatch_config.config)
Expand Down
4 changes: 2 additions & 2 deletions lib/urlwatch/reporters.py
Original file line number Diff line number Diff line change
Expand Up @@ -313,7 +313,7 @@ def submit(self):
sep = (line_length * '=') or None
yield from (part for part in itertools.chain(
(sep,),
('%02d. %s' % (idx + 1, line) for idx, line in enumerate(summary)),
('%02d. %s' % (idx, line) for idx, line in enumerate(summary, 1)),
(sep, ''),
) if part is not None)

Expand Down Expand Up @@ -860,7 +860,7 @@ def _render(cls, max_length, summary=None, details=None, footer=None):
# The footer/summary lengths are the sum of the length of their parts
# plus the space taken up by newlines.
if summary:
summary = ['%d. %s' % (idx + 1, line) for idx, line in enumerate(summary)]
summary = ['%d. %s' % (idx, line) for idx, line in enumerate(summary, 1)]
summary_len = sum(len(part) for part in summary) + len(summary) - 1
else:
summary_len = 0
Expand Down
17 changes: 17 additions & 0 deletions lib/urlwatch/tests/data/jobs-with-tags.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
---
name: UTC
command: date -u
tags:
- arg
- utc
---
name: RFC
command: date -R
tags:
- arg
- rfc
---
name: Local
command: date
tags:
- local
108 changes: 106 additions & 2 deletions lib/urlwatch/tests/test_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,8 +78,8 @@ def test_load_hooks_py():


class ConfigForTest(CommandConfig):
def __init__(self, config, urls, cache, hooks, verbose):
super().__init__([], 'urlwatch', os.path.dirname(__file__), root, config, urls, hooks, cache, verbose)
def __init__(self, config, urls, cache, hooks, verbose, args=()):
super().__init__(args, 'urlwatch', os.path.dirname(__file__), root, config, urls, hooks, cache, verbose)


@contextlib.contextmanager
Expand Down Expand Up @@ -112,6 +112,110 @@ def test_run_watcher():
cache_storage.close()


def prepare_tags_test(args):
urls = os.path.join(here, 'data', 'jobs-with-tags.yaml')
config = os.path.join(here, 'data', 'urlwatch.yaml')
cache = os.path.join(here, 'data', 'cache.db')
hooks = ''

config_storage = YamlConfigStorage(config)
urls_storage = UrlsYaml(urls)
cache_storage = CacheMiniDBStorage(cache)

urlwatch_config = ConfigForTest(config, urls, cache, hooks, True, args=args)
urlwatcher = Urlwatch(urlwatch_config, config_storage, cache_storage, urls_storage)

return urlwatcher, cache_storage


def test_idxs_none():
with teardown_func():
urlwatcher, cache_storage = prepare_tags_test([])
try:
urlwatcher.run_jobs()

assert len(urlwatcher.report.job_states) == 3
finally:
cache_storage.close()


def test_idxs_zero():
with teardown_func():
urlwatcher, cache_storage = prepare_tags_test(['0'])
try:
with pytest.raises(ValueError):
urlwatcher.run_jobs()
finally:
cache_storage.close()


def test_idxs_massive():
with teardown_func():
urlwatcher, cache_storage = prepare_tags_test(['99999'])
try:
with pytest.raises(ValueError):
urlwatcher.run_jobs()
finally:
cache_storage.close()


def test_idxs_nan():
with teardown_func():
with pytest.raises(SystemExit):
ConfigForTest('', '', '', '', True, ['NaN'])


def test_idxs_one():
with teardown_func():
urlwatcher, cache_storage = prepare_tags_test(['1'])
try:
urlwatcher.run_jobs()

assert len(urlwatcher.report.job_states) == 1
assert urlwatcher.report.job_states[0].job.name == "UTC"
finally:
cache_storage.close()


def test_tags_empty():
with teardown_func():
with pytest.raises(SystemExit):
ConfigForTest('', '', '', '', True, ['--tags'])


def test_tags_no_match():
with teardown_func():
urlwatcher, cache_storage = prepare_tags_test(['--tags', 'foo'])
try:
urlwatcher.run_jobs()

assert len(urlwatcher.report.job_states) == 0
finally:
cache_storage.close()


def test_tags_single():
with teardown_func():
urlwatcher, cache_storage = prepare_tags_test(['--tags', 'arg'])
try:
urlwatcher.run_jobs()

assert len(urlwatcher.report.job_states) == 2
finally:
cache_storage.close()


def test_tags_multiple():
with teardown_func():
urlwatcher, cache_storage = prepare_tags_test(['--tags', 'utc', 'local'])
try:
urlwatcher.run_jobs()

assert len(urlwatcher.report.job_states) == 2
finally:
cache_storage.close()


def test_disabled_job():
with teardown_func():
urls = os.path.join(here, 'data', 'disabled-job.yaml')
Expand Down
6 changes: 4 additions & 2 deletions lib/urlwatch/worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,11 +51,13 @@ def run_parallel(func, items):


def run_jobs(urlwatcher):
if not all(1 <= idx <= len(urlwatcher.jobs) for idx in urlwatcher.urlwatch_config.joblist):
if not urlwatcher.urlwatch_config.tags and not all(1 <= idx <= len(urlwatcher.jobs) for idx in urlwatcher.urlwatch_config.idx_set):
raise ValueError(f'All job indices must be between 1 and {len(urlwatcher.jobs)}: {urlwatcher.urlwatch_config.joblist}')
cache_storage = urlwatcher.cache_storage
jobs = [job.with_defaults(urlwatcher.config_storage.config)
for (idx, job) in enumerate(urlwatcher.jobs) if job.is_enabled() and ((idx + 1) in urlwatcher.urlwatch_config.joblist or (not urlwatcher.urlwatch_config.joblist))]
for (idx, job) in enumerate(urlwatcher.jobs, 1)
if urlwatcher.should_run(idx, job)
]
report = urlwatcher.report

logger.debug('Processing %d jobs (out of %d)', len(jobs), len(urlwatcher.jobs))
Expand Down

0 comments on commit 1b045b6

Please sign in to comment.