diff --git a/.gitignore b/.gitignore index 3abdcc3..ab39d9e 100644 --- a/.gitignore +++ b/.gitignore @@ -58,6 +58,7 @@ docs/_build *~ .*.sw[po] .build +.vscode .ve .env .cache diff --git a/src/dsdk/model.py b/src/dsdk/model.py index c2f519a..33641d9 100644 --- a/src/dsdk/model.py +++ b/src/dsdk/model.py @@ -4,16 +4,15 @@ from __future__ import annotations from abc import ABC -from logging import NullHandler, getLogger +from logging import INFO from typing import TYPE_CHECKING, Optional, cast from configargparse import ArgParser as ArgumentParser from .service import Model, Service -from .utils import load_pickle_file +from .utils import get_logger, load_pickle_file -logger = getLogger(__name__) -logger.addHandler(NullHandler()) +logger = get_logger(__name__, INFO) if TYPE_CHECKING: diff --git a/src/dsdk/mongo.py b/src/dsdk/mongo.py index bda9555..52c1a52 100644 --- a/src/dsdk/mongo.py +++ b/src/dsdk/mongo.py @@ -5,7 +5,7 @@ from abc import ABC from contextlib import contextmanager -from logging import NullHandler, getLogger +from logging import INFO from typing import ( TYPE_CHECKING, Any, @@ -19,7 +19,7 @@ from configargparse import ArgParser as ArgumentParser from .service import Batch, Model, Service -from .utils import retry +from .utils import get_logger, retry try: # Since not everyone will use mongo @@ -34,9 +34,7 @@ Database = None AutoReconnect = None - -logger = getLogger(__name__) -logger.addHandler(NullHandler()) +logger = get_logger(__name__, INFO) if TYPE_CHECKING: @@ -102,13 +100,22 @@ def open_batch( with super().open_batch(key) as batch: doc = batch.as_insert_doc(model) # <- model dependency with self.open_mongo() as database: - insert_one(database.batches, doc) - + key = insert_one(database.batches, doc) + logger.info( + f'"action": "insert", ' + f'"database": "{database.name}", ' + f'"collection": "{database.collection.name}"' + ) yield batch key, doc = batch.as_update_doc() with self.open_mongo() as database: update_one(database.batches, key, doc) + logger.info( + f'"action": "update", ' + f'"database": "{database.name}", ' + f'"collection": "{database.collection.name}"' + ) def store_evidence(self, batch: Batch, *args, **kwargs) -> None: """Store Evidence.""" @@ -123,10 +130,21 @@ def store_evidence(self, batch: Batch, *args, **kwargs) -> None: docs = columns.to_dict(orient="records") with self.open_mongo() as database: result = insert_many(database[key], docs) - assert columns.shape[0] == len( - result.inserted_ids - ) # TODO: Better exception + assert columns.shape[0] == len(result.inserted_ids), ( + '"action" "insert_many", "database": "%s", "collection": \ + "%s", "message": "columns.shape[0] != \ + len(results.inserted_ids)"' + % (database.name, database.collection.name) + ) + + # TODO: Better exception df.drop(columns=["batch_id"], inplace=True) + logger.info( + f'"action": "insert_many", ' + f'"database": "{database.name}", ' + f'"collection": "{database.collection.name}", ' + f'"count": {len(df.index)}' + ) @contextmanager diff --git a/src/dsdk/mssql.py b/src/dsdk/mssql.py index 86d03b1..02bb709 100644 --- a/src/dsdk/mssql.py +++ b/src/dsdk/mssql.py @@ -5,12 +5,15 @@ from abc import ABC from contextlib import contextmanager -from logging import NullHandler, getLogger +from logging import INFO from typing import TYPE_CHECKING, Generator, Optional, cast from configargparse import ArgParser as ArgumentParser from .service import Service +from .utils import get_logger + +logger = get_logger(__name__, INFO) try: # Since not everyone will use mssql @@ -19,10 +22,6 @@ create_engine = None -logger = getLogger(__name__) -logger.addHandler(NullHandler()) - - if TYPE_CHECKING: BaseMixin = Service else: @@ -67,3 +66,4 @@ def open_mssql(self) -> Generator: """Open mssql.""" with self._mssql.connect() as con: yield con + logger.info('"action": "connect"') diff --git a/src/dsdk/service.py b/src/dsdk/service.py index 72ba818..01a8907 100644 --- a/src/dsdk/service.py +++ b/src/dsdk/service.py @@ -6,15 +6,16 @@ from collections import OrderedDict from contextlib import contextmanager from datetime import datetime, timezone -from logging import NullHandler, getLogger +from logging import INFO from sys import argv as sys_argv from typing import Any, Dict, Generator, Optional, Sequence, Tuple, cast from configargparse import ArgParser as ArgumentParser from configargparse import Namespace -logger = getLogger(__name__) -logger.addHandler(NullHandler()) +from .utils import get_logger + +logger = get_logger(__name__, INFO) class Interval: # pylint: disable=too-few-public-methods @@ -123,6 +124,12 @@ def __call__(self) -> Batch: with self.open_batch() as batch: for task in self.pipeline: task(batch, self) + logger.info( + '"pipeline": "%s"', + ", ".join( + map(lambda s: str(s).split(" ")[0][1:], self.pipeline) + ), + ) return batch def check(self) -> None: @@ -154,6 +161,7 @@ def open_batch( # pylint: disable=no-self-use,unused-argument record = Interval(on=datetime.now(timezone.utc), end=None) yield Batch(key, record) record.end = datetime.now(timezone.utc) + logger.info(f'"action": "open_batch", ' f'"key": "{key}"') def store_evidence( # pylint: disable=no-self-use,unused-argument self, batch: Batch, *args, **kwargs @@ -162,6 +170,11 @@ def store_evidence( # pylint: disable=no-self-use,unused-argument while args: key, df, *args = args # type: ignore batch.evidence[key] = df + logger.info( + f'"action": "store_evidence", ' + f'"key": "{key}", ' + f'"count": {len(batch.evidence)}' + ) class Task: # pylint: disable=too-few-public-methods diff --git a/src/dsdk/utils.py b/src/dsdk/utils.py index 50ff444..e59472a 100644 --- a/src/dsdk/utils.py +++ b/src/dsdk/utils.py @@ -6,17 +6,54 @@ from functools import wraps from json import dump as json_dump from json import load as json_load -from logging import NullHandler, getLogger +from logging import INFO, Formatter, LoggerAdapter, StreamHandler, getLogger from pickle import dump as pickle_dump from pickle import load as pickle_load +from sys import stdout from time import sleep as default_sleep from typing import Any, Callable, Dict, List, Optional, Sequence from pandas import DataFrame from pandas import concat as pd_concat -logger = getLogger(__name__) -logger.addHandler(NullHandler()) + +def get_logger(name, level=INFO): + """Get logger. + + Actual handlers are typically set by the application. + Libraries (like DSDK) typically use a NullHandler, so that the application + logger configuration is used. + + Use this function to hide the logger implementation/config for now. + Show that the conventions demonstrated here work for the applications. + """ + # TODO Pass calling function from parent application + defaults = {"callingfunc": ""} + formatter_string = " - ".join( + ( + "%(asctime)-15s", + "%(name)s", + "%(levelname)s", + ", ".join( + ( + '{"callingfunc": "%(callingfunc)s"', + '"module": "%(module)s"', + '"function": "%(funcName)s"', + "%(message)s}", + ) + ), + ) + ) + handler = StreamHandler(stdout) + handler.setLevel(level) + handler.setFormatter(Formatter(formatter_string)) + result = getLogger(name) + result.propagate = False + result.addHandler(handler) + return LoggerAdapter(result, defaults) + + +logger = get_logger(__name__) def chunks(sequence: Sequence[Any], n: int):