Skip to content

Commit 0c2fafe

Browse files
committed
fix CLI and cleanup code and add tests
1 parent d758a83 commit 0c2fafe

File tree

13 files changed

+290
-245
lines changed

13 files changed

+290
-245
lines changed

CHANGES.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,10 @@
1111
* Adding an end date to `CMIP6_UofT`'s temporal extent for better rendering in STAC Browser
1212
* Updates to datacube extension helper routines for `CMIP6_UofT`.
1313
* Make pyessv-archive a requirement for *only* the cmip6 implementation instead of for the whole CLI
14+
* Fix bug where logger setup failed
15+
* Simplify CLI argument constructor code (for cleaner and more testable code)
16+
* Add tests for CLI and implementations when invoked through the CLI
17+
* Refactored code dealing with requests and authentication to the `requests.py` file
1418

1519
## [0.6.0](https://github.com/crim-ca/stac-populator/tree/0.6.0) (2024-02-22)
1620

STACpopulator/cli.py

Lines changed: 38 additions & 188 deletions
Original file line numberDiff line numberDiff line change
@@ -1,217 +1,67 @@
11
import argparse
2-
import glob
2+
import functools
33
import importlib
44
import logging
5-
import os
65
import sys
6+
from types import ModuleType
77
import warnings
8-
from datetime import datetime
9-
from http import cookiejar
10-
from typing import Callable, Optional
8+
from datetime import datetime, timezone
9+
from typing import Callable
1110

12-
import requests
13-
from requests.auth import AuthBase, HTTPBasicAuth, HTTPDigestAuth, HTTPProxyAuth
14-
from requests.sessions import Session
15-
16-
from STACpopulator import __version__
11+
from STACpopulator import __version__, implementations
1712
from STACpopulator.exceptions import STACPopulatorError
1813
from STACpopulator.logging import setup_logging
1914

20-
POPULATORS = {}
21-
22-
23-
class HTTPBearerTokenAuth(AuthBase):
24-
def __init__(self, token: str) -> None:
25-
self._token = token
26-
27-
def __call__(self, r: requests.PreparedRequest) -> requests.PreparedRequest:
28-
r.headers["Authorization"] = f"Bearer {self._token}"
29-
return r
30-
31-
32-
class HTTPCookieAuth(cookiejar.MozillaCookieJar):
33-
"""
34-
Employ a cookie-jar file for authorization.
35-
36-
Examples of useful command:
37-
38-
.. code-block:: shell
39-
40-
curl --cookie-jar /path/to/cookie-jar.txt [authorization-provider-arguments]
41-
42-
curl \
43-
-k \
44-
-X POST \
45-
--cookie-jar /tmp/magpie-cookie.txt \
46-
-d '{"user_name":"...","password":"..."}' \
47-
-H 'Accept:application/json' \
48-
-H 'Content-Type:application/json' \
49-
'https://{hostname}/magpie/signin'
50-
51-
.. note::
52-
Due to implementation details with :mod:`requests`, this must be passed directly to the ``cookies``
53-
attribute rather than ``auth`` as in the case for other authorization handlers.
54-
"""
55-
56-
57-
def add_request_options(parser: argparse.ArgumentParser) -> None:
58-
"""
59-
Adds arguments to a parser to allow update of a request session definition used across a populator procedure.
60-
"""
61-
parser.add_argument(
62-
"--no-verify",
63-
"--no-ssl",
64-
"--no-ssl-verify",
65-
dest="verify",
66-
action="store_false",
67-
help="Disable SSL verification (not recommended unless for development/test servers).",
68-
)
69-
parser.add_argument("--cert", type=argparse.FileType(), required=False, help="Path to a certificate file to use.")
70-
parser.add_argument(
71-
"--auth-handler",
72-
choices=["basic", "digest", "bearer", "proxy", "cookie"],
73-
required=False,
74-
help="Authentication strategy to employ for the requests session.",
75-
)
76-
parser.add_argument(
77-
"--auth-identity",
78-
required=False,
79-
help="Bearer token, cookie-jar file or proxy/digest/basic username:password for selected authorization handler.",
80-
)
81-
8215

83-
def apply_request_options(session: Session, namespace: argparse.Namespace) -> None:
84-
"""
85-
Applies the relevant request session options from parsed input arguments.
86-
"""
87-
session.verify = namespace.verify
88-
session.cert = namespace.cert
89-
if namespace.auth_handler in ["basic", "digest", "proxy"]:
90-
usr, pwd = namespace.auth_identity.split(":", 1)
91-
if namespace.auth_handler == "basic":
92-
session.auth = HTTPBasicAuth(usr, pwd)
93-
elif namespace.auth_handler == "digest":
94-
session.auth = HTTPDigestAuth(usr, pwd)
95-
else:
96-
session.auth = HTTPProxyAuth(usr, pwd)
97-
elif namespace.auth_handler == "bearer":
98-
session.auth = HTTPBearerTokenAuth(namespace.auth_identity)
99-
elif namespace.auth_handler == "cookie":
100-
session.cookies = HTTPCookieAuth(namespace.auth_identity)
101-
session.cookies.load(namespace.auth_identity)
102-
103-
104-
def make_main_parser() -> argparse.ArgumentParser:
105-
parser = argparse.ArgumentParser(prog="stac-populator", description="STACpopulator operations.")
16+
def add_parser_args(parser: argparse.ArgumentParser) -> dict[str, Callable]:
10617
parser.add_argument(
10718
"--version",
10819
"-V",
10920
action="version",
11021
version=f"%(prog)s {__version__}",
11122
help="prints the version of the library and exits",
11223
)
113-
commands = parser.add_subparsers(title="command", dest="command", description="STAC populator command to execute.")
114-
115-
run_cmd_parser = make_run_command_parser(parser.prog)
116-
commands.add_parser(
117-
"run",
118-
prog=f"{parser.prog} {run_cmd_parser.prog}",
119-
parents=[run_cmd_parser],
120-
formatter_class=run_cmd_parser.formatter_class,
121-
usage=run_cmd_parser.usage,
122-
add_help=False,
123-
help=run_cmd_parser.description,
124-
description=run_cmd_parser.description,
24+
parser.add_argument("--debug", action="store_const", const=logging.DEBUG, help="set logger level to debug")
25+
parser.add_argument(
26+
"--log_file", help="file to write log output to. By default logs will be written to the current directory."
12527
)
28+
commands_subparser = parser.add_subparsers(
29+
title="command", dest="command", description="STAC populator command to execute.", required=True
30+
)
31+
run_parser = commands_subparser.add_parser("run", description="Run a STACpopulator implementation")
32+
populators_subparser = run_parser.add_subparsers(
33+
title="populator", dest="populator", description="Implementation to run."
34+
)
35+
for implementation_module_name, module in implementation_modules().items():
36+
implementation_parser = populators_subparser.add_parser(implementation_module_name)
37+
module.add_parser_args(implementation_parser)
12638

127-
# add more commands as needed...
128-
parser.add_argument("--debug", action="store_true", help="Set logger level to debug")
129-
130-
return parser
131-
132-
133-
def make_run_command_parser(parent) -> argparse.ArgumentParser:
134-
"""
135-
Groups all sub-populator CLI listed in :py:mod:`STACpopulator.implementations` as a common ``stac-populator`` CLI.
136-
137-
Dispatches the provided arguments to the appropriate sub-populator CLI as requested. Each sub-populator CLI must
138-
implement functions ``make_parser`` and ``main`` to generate the arguments and dispatch them to the corresponding
139-
caller. The ``main`` function should accept a sequence of string arguments, which can be passed to the parser
140-
obtained from ``make_parser``.
14139

142-
An optional ``runner`` can also be defined in each populator module. If provided, the namespace arguments that have
143-
already been parsed to resolve the populator to run will be used directly, avoiding parsing arguments twice.
144-
"""
145-
parser = argparse.ArgumentParser(prog="run", description="STACpopulator implementation runner.")
146-
subparsers = parser.add_subparsers(title="populator", dest="populator", description="Implementation to run.")
147-
populators_impl = "implementations"
148-
populators_dir = os.path.join(os.path.dirname(__file__), populators_impl)
149-
populator_mods = glob.glob(f"{populators_dir}/**/[!__init__]*.py", recursive=True) # potential candidate scripts
150-
for populator_path in sorted(populator_mods):
151-
populator_script = populator_path.split(populators_dir, 1)[1][1:]
152-
populator_py_mod = os.path.splitext(populator_script)[0].replace(os.sep, ".")
153-
populator_name, pop_mod_file = populator_py_mod.rsplit(".", 1)
154-
populator_root = f"STACpopulator.{populators_impl}.{populator_name}"
155-
pop_mod_file_loc = f"{populator_root}.{pop_mod_file}"
40+
@functools.cache
41+
def implementation_modules() -> dict[str, ModuleType]:
42+
modules = {}
43+
for implementation_module_name in implementations.__all__:
15644
try:
157-
populator_module = importlib.import_module(pop_mod_file_loc, populator_root)
158-
except STACPopulatorError as e:
159-
warnings.warn(f"Could not load extension {populator_name} because of error {e}")
160-
continue
161-
parser_maker: Callable[[], argparse.ArgumentParser] = getattr(populator_module, "make_parser", None)
162-
populator_runner = getattr(populator_module, "runner", None) # optional, call main directly if not available
163-
populator_caller = getattr(populator_module, "main", None)
164-
if callable(parser_maker) and callable(populator_caller):
165-
populator_parser = parser_maker()
166-
populator_prog = f"{parent} {parser.prog} {populator_name}"
167-
subparsers.add_parser(
168-
populator_name,
169-
prog=populator_prog,
170-
parents=[populator_parser],
171-
formatter_class=populator_parser.formatter_class,
172-
add_help=False, # add help disabled otherwise conflicts with this main populator help
173-
help=populator_parser.description,
174-
description=populator_parser.description,
175-
usage=populator_parser.usage,
45+
modules[implementation_module_name] = importlib.import_module(
46+
f".{implementation_module_name}", implementations.__package__
17647
)
177-
POPULATORS[populator_name] = {
178-
"name": populator_name,
179-
"caller": populator_caller,
180-
"parser": populator_parser,
181-
"runner": populator_runner,
182-
}
183-
return parser
48+
except STACPopulatorError as e:
49+
warnings.warn(f"Could not load extension {implementation_module_name} because of error {e}")
50+
return modules
18451

18552

186-
def main(*args: str) -> Optional[int]:
187-
parser = make_main_parser()
188-
args = args or sys.argv[1:] # same as was parse args does, but we must provide them to subparser
189-
ns = parser.parse_args(args=args) # if 'command' or 'populator' unknown, auto prints the help message with exit(2)
190-
params = vars(ns)
191-
populator_cmd = params.pop("command")
192-
if not populator_cmd:
193-
parser.print_help()
194-
return 0
195-
result = None
196-
if populator_cmd == "run":
197-
populator_name = params.pop("populator")
53+
def run(ns: argparse.Namespace) -> int:
54+
if ns.command == "run":
55+
logfile_name = ns.log_file or f"{ns.populator}_log_{datetime.now(timezone.utc).isoformat() + 'Z'}.jsonl"
56+
setup_logging(logfile_name, ns.debug or logging.INFO)
57+
return implementation_modules()[ns.populator].runner(ns) or 0
19858

199-
# Setup the application logger:
200-
fname = f"{populator_name}_log_{datetime.utcnow().isoformat() + 'Z'}.jsonl"
201-
log_level = logging.DEBUG if ns.debug else logging.INFO
202-
setup_logging(fname, log_level)
20359

204-
if not populator_name:
205-
parser.print_help()
206-
return 0
207-
populator_args = args[2:] # skip [command] [populator]
208-
populator_caller = POPULATORS[populator_name]["caller"]
209-
populator_runner = POPULATORS[populator_name]["runner"]
210-
if populator_runner:
211-
result = populator_runner(ns)
212-
else:
213-
result = populator_caller(*populator_args)
214-
return 0 if result is None else result
60+
def main(*args: str) -> int:
61+
parser = argparse.ArgumentParser()
62+
add_parser_args(parser)
63+
ns = parser.parse_args(args or None)
64+
return run(ns)
21565

21666

21767
if __name__ == "__main__":
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
from .add_CMIP6 import add_parser_args, runner
2+
3+
__all__ = ["add_parser_args", "runner"]

STACpopulator/implementations/CMIP6_UofT/add_CMIP6.py

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,14 @@
22
import json
33
import logging
44
import os
5-
from typing import Any, MutableMapping, NoReturn, Optional, Union
5+
import sys
6+
from typing import Any, MutableMapping, Optional, Union
67

78
from pystac import STACValidationError
89
from pystac.extensions.datacube import DatacubeExtension
910
from requests.sessions import Session
1011

11-
from STACpopulator.cli import add_request_options, apply_request_options
12+
from STACpopulator.requests import add_request_options, apply_request_options
1213
from STACpopulator.extensions.cmip6 import CMIP6Helper, CMIP6Properties
1314
from STACpopulator.extensions.datacube import DataCubeHelper
1415
from STACpopulator.extensions.thredds import THREDDSExtension, THREDDSHelper
@@ -78,17 +79,17 @@ def create_stac_item(
7879

7980
try:
8081
item.validate()
81-
except STACValidationError:
82+
except STACValidationError as e:
8283
raise Exception("Failed to validate STAC item") from e
8384

8485
# print(json.dumps(item.to_dict()))
8586
return json.loads(json.dumps(item.to_dict()))
8687

8788

88-
def make_parser() -> argparse.ArgumentParser:
89-
parser = argparse.ArgumentParser(description="CMIP6 STAC populator from a THREDDS catalog or NCML XML.")
90-
parser.add_argument("stac_host", type=str, help="STAC API address")
91-
parser.add_argument("href", type=str, help="URL to a THREDDS catalog or a NCML XML with CMIP6 metadata.")
89+
def add_parser_args(parser: argparse.ArgumentParser) -> None:
90+
parser.description="CMIP6 STAC populator from a THREDDS catalog or NCML XML."
91+
parser.add_argument("stac_host", help="STAC API URL")
92+
parser.add_argument("href", help="URL to a THREDDS catalog or a NCML XML with CMIP6 metadata.")
9293
parser.add_argument("--update", action="store_true", help="Update collection and its items")
9394
parser.add_argument(
9495
"--mode",
@@ -105,10 +106,9 @@ def make_parser() -> argparse.ArgumentParser:
105106
),
106107
)
107108
add_request_options(parser)
108-
return parser
109109

110110

111-
def runner(ns: argparse.Namespace) -> Optional[int] | NoReturn:
111+
def runner(ns: argparse.Namespace) -> int:
112112
LOGGER.info(f"Arguments to call: {vars(ns)}")
113113

114114
with Session() as session:
@@ -123,13 +123,14 @@ def runner(ns: argparse.Namespace) -> Optional[int] | NoReturn:
123123
ns.stac_host, data_loader, update=ns.update, session=session, config_file=ns.config, log_debug=ns.debug
124124
)
125125
c.ingest()
126+
return 0
126127

127128

128-
def main(*args: str) -> Optional[int]:
129-
parser = make_parser()
129+
def main(*args: str) -> int:
130+
parser = argparse.ArgumentParser()
130131
ns = parser.parse_args(args or None)
131132
return runner(ns)
132133

133134

134135
if __name__ == "__main__":
135-
main()
136+
sys.exit(main())
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
from .crawl_directory import add_parser_args, runner
2+
3+
__all__ = ["add_parser_args", "runner"]

0 commit comments

Comments
 (0)