diff --git a/README.md b/README.md index 3ba4ee51b..1df9a2ea3 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,7 @@ ![PyPI - Python Version](https://img.shields.io/pypi/pyversions/DataProfiler) ![GitHub](https://img.shields.io/github/license/CapitalOne/DataProfiler) ![GitHub last commit](https://img.shields.io/github/last-commit/CapitalOne/DataProfiler) +[![Downloads](https://static.pepy.tech/badge/dataprofiler)](https://pepy.tech/project/dataprofiler)

diff --git a/dataprofiler/profilers/json_encoder.py b/dataprofiler/profilers/json_encoder.py index 4e12eb649..cf0227f64 100644 --- a/dataprofiler/profilers/json_encoder.py +++ b/dataprofiler/profilers/json_encoder.py @@ -1,6 +1,7 @@ """Contains ProfilerEncoder class.""" import json +from datetime import datetime import numpy as np import pandas as pd @@ -52,7 +53,7 @@ def default(self, to_serialize): return int(to_serialize) elif isinstance(to_serialize, np.ndarray): return to_serialize.tolist() - elif isinstance(to_serialize, pd.Timestamp): + elif isinstance(to_serialize, (pd.Timestamp, datetime)): return to_serialize.isoformat() elif isinstance(to_serialize, BaseDataLabeler): # TODO: This does not allow the user to serialize a model if it is loaded diff --git a/dataprofiler/tests/labelers/test_labeler_utils.py b/dataprofiler/tests/labelers/test_labeler_utils.py index dcfb75020..f59a43e3f 100644 --- a/dataprofiler/tests/labelers/test_labeler_utils.py +++ b/dataprofiler/tests/labelers/test_labeler_utils.py @@ -235,8 +235,9 @@ def test_verbose(self): self.assertIn("f1-score ", log_output) self.assertIn("F1 Score: ", log_output) + @mock.patch("dataprofiler.labelers.labeler_utils.classification_report") @mock.patch("pandas.DataFrame") - def test_save_conf_mat(self, mock_dataframe): + def test_save_conf_mat(self, mock_dataframe, mock_report): # ideally mock out the actual contents written to file, but # would be difficult to get this completely worked out. diff --git a/dataprofiler/tests/profilers/test_datetime_column_profile.py b/dataprofiler/tests/profilers/test_datetime_column_profile.py index c00ac8e0d..dca3a8773 100644 --- a/dataprofiler/tests/profilers/test_datetime_column_profile.py +++ b/dataprofiler/tests/profilers/test_datetime_column_profile.py @@ -501,6 +501,47 @@ def test_json_encode_after_update(self): self.assertEqual(serialized, expected) + def test_json_encode_datetime(self): + data = ["1209214"] + df = pd.Series(data) + profiler = DateTimeColumn("0") + + expected_date_formats = [ + "%Y-%m-%d %H:%M:%S", + "%b %d, %Y", + "%m/%d/%y %H:%M", + ] + with patch.object( + profiler, "_combine_unique_sets", return_value=expected_date_formats + ): + with patch("time.time", return_value=0.0): + profiler.update(df) + + serialized = json.dumps(profiler, cls=ProfileEncoder) + + expected = json.dumps( + { + "class": "DateTimeColumn", + "data": { + "name": "0", + "col_index": np.nan, + "sample_size": 1, + "metadata": {}, + "times": defaultdict(float, {"datetime": 0.0}), + "thread_safe": True, + "match_count": 1, + "date_formats": expected_date_formats, + "min": "1209214", + "max": "1209214", + "_dt_obj_min": "9214-01-20T00:00:00", + "_dt_obj_max": "9214-01-20T00:00:00", + "_DateTimeColumn__calculations": dict(), + }, + } + ) + + self.assertEqual(serialized, expected) + def test_json_decode(self): fake_profile_name = None expected_profile = DateTimeColumn(fake_profile_name)