Skip to content

Commit

Permalink
Merge branch 'dev' into cramjam1
Browse files Browse the repository at this point in the history
  • Loading branch information
gliptak authored Mar 7, 2024
2 parents 86cfe10 + f8b3e5d commit 20946e9
Show file tree
Hide file tree
Showing 4 changed files with 46 additions and 3 deletions.
3 changes: 2 additions & 1 deletion dataprofiler/profilers/json_encoder.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Contains ProfilerEncoder class."""

import json
from datetime import datetime

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -52,7 +53,7 @@ def default(self, to_serialize):
return int(to_serialize)
elif isinstance(to_serialize, np.ndarray):
return to_serialize.tolist()
elif isinstance(to_serialize, pd.Timestamp):
elif isinstance(to_serialize, (pd.Timestamp, datetime)):
return to_serialize.isoformat()
elif isinstance(to_serialize, BaseDataLabeler):
# TODO: This does not allow the user to serialize a model if it is loaded
Expand Down
3 changes: 2 additions & 1 deletion dataprofiler/tests/labelers/test_labeler_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,8 +235,9 @@ def test_verbose(self):
self.assertIn("f1-score ", log_output)
self.assertIn("F1 Score: ", log_output)

@mock.patch("dataprofiler.labelers.labeler_utils.classification_report")
@mock.patch("pandas.DataFrame")
def test_save_conf_mat(self, mock_dataframe):
def test_save_conf_mat(self, mock_dataframe, mock_report):

# ideally mock out the actual contents written to file, but
# would be difficult to get this completely worked out.
Expand Down
41 changes: 41 additions & 0 deletions dataprofiler/tests/profilers/test_datetime_column_profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -501,6 +501,47 @@ def test_json_encode_after_update(self):

self.assertEqual(serialized, expected)

def test_json_encode_datetime(self):
data = ["1209214"]
df = pd.Series(data)
profiler = DateTimeColumn("0")

expected_date_formats = [
"%Y-%m-%d %H:%M:%S",
"%b %d, %Y",
"%m/%d/%y %H:%M",
]
with patch.object(
profiler, "_combine_unique_sets", return_value=expected_date_formats
):
with patch("time.time", return_value=0.0):
profiler.update(df)

serialized = json.dumps(profiler, cls=ProfileEncoder)

expected = json.dumps(
{
"class": "DateTimeColumn",
"data": {
"name": "0",
"col_index": np.nan,
"sample_size": 1,
"metadata": {},
"times": defaultdict(float, {"datetime": 0.0}),
"thread_safe": True,
"match_count": 1,
"date_formats": expected_date_formats,
"min": "1209214",
"max": "1209214",
"_dt_obj_min": "9214-01-20T00:00:00",
"_dt_obj_max": "9214-01-20T00:00:00",
"_DateTimeColumn__calculations": dict(),
},
}
)

self.assertEqual(serialized, expected)

def test_json_decode(self):
fake_profile_name = None
expected_profile = DateTimeColumn(fake_profile_name)
Expand Down
2 changes: 1 addition & 1 deletion dataprofiler/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

MAJOR = 0
MINOR = 10
MICRO = 8
MICRO = 9
POST = None # otherwise None

VERSION = "%d.%d.%d" % (MAJOR, MINOR, MICRO)
Expand Down

0 comments on commit 20946e9

Please sign in to comment.