Merge branch 'dev' into cramjam1

capitalone · Mar 7, 2024 · 20946e9 · 20946e9
2 parents 86cfe10 + f8b3e5d
commit 20946e9
Show file tree

Hide file tree

Showing 4 changed files with 46 additions and 3 deletions.
diff --git a/dataprofiler/profilers/json_encoder.py b/dataprofiler/profilers/json_encoder.py
@@ -1,6 +1,7 @@
 """Contains ProfilerEncoder class."""
 
 import json
+from datetime import datetime
 
 import numpy as np
 import pandas as pd
@@ -52,7 +53,7 @@ def default(self, to_serialize):
             return int(to_serialize)
         elif isinstance(to_serialize, np.ndarray):
             return to_serialize.tolist()
-        elif isinstance(to_serialize, pd.Timestamp):
+        elif isinstance(to_serialize, (pd.Timestamp, datetime)):
             return to_serialize.isoformat()
         elif isinstance(to_serialize, BaseDataLabeler):
             # TODO: This does not allow the user to serialize a model if it is loaded

diff --git a/dataprofiler/tests/labelers/test_labeler_utils.py b/dataprofiler/tests/labelers/test_labeler_utils.py
@@ -235,8 +235,9 @@ def test_verbose(self):
         self.assertIn("f1-score ", log_output)
         self.assertIn("F1 Score: ", log_output)
 
+    @mock.patch("dataprofiler.labelers.labeler_utils.classification_report")
     @mock.patch("pandas.DataFrame")
-    def test_save_conf_mat(self, mock_dataframe):
+    def test_save_conf_mat(self, mock_dataframe, mock_report):
 
         # ideally mock out the actual contents written to file, but
         # would be difficult to get this completely worked out.

diff --git a/dataprofiler/tests/profilers/test_datetime_column_profile.py b/dataprofiler/tests/profilers/test_datetime_column_profile.py
@@ -501,6 +501,47 @@ def test_json_encode_after_update(self):
 
         self.assertEqual(serialized, expected)
 
+    def test_json_encode_datetime(self):
+        data = ["1209214"]
+        df = pd.Series(data)
+        profiler = DateTimeColumn("0")
+
+        expected_date_formats = [
+            "%Y-%m-%d %H:%M:%S",
+            "%b %d, %Y",
+            "%m/%d/%y %H:%M",
+        ]
+        with patch.object(
+            profiler, "_combine_unique_sets", return_value=expected_date_formats
+        ):
+            with patch("time.time", return_value=0.0):
+                profiler.update(df)
+
+        serialized = json.dumps(profiler, cls=ProfileEncoder)
+
+        expected = json.dumps(
+            {
+                "class": "DateTimeColumn",
+                "data": {
+                    "name": "0",
+                    "col_index": np.nan,
+                    "sample_size": 1,
+                    "metadata": {},
+                    "times": defaultdict(float, {"datetime": 0.0}),
+                    "thread_safe": True,
+                    "match_count": 1,
+                    "date_formats": expected_date_formats,
+                    "min": "1209214",
+                    "max": "1209214",
+                    "_dt_obj_min": "9214-01-20T00:00:00",
+                    "_dt_obj_max": "9214-01-20T00:00:00",
+                    "_DateTimeColumn__calculations": dict(),
+                },
+            }
+        )
+
+        self.assertEqual(serialized, expected)
+
     def test_json_decode(self):
         fake_profile_name = None
         expected_profile = DateTimeColumn(fake_profile_name)

diff --git a/dataprofiler/version.py b/dataprofiler/version.py
@@ -2,7 +2,7 @@
 
 MAJOR = 0
 MINOR = 10
-MICRO = 8
+MICRO = 9
 POST = None  # otherwise None
 
 VERSION = "%d.%d.%d" % (MAJOR, MINOR, MICRO)