Skip to content

Commit

Permalink
Rebase post-deployment (#1111)
Browse files Browse the repository at this point in the history
* Staging into `main` from `dev`  (#1106)

* add downloads tile (#1085)

* Hot fix json bug (#1105)

* update

* update

* update version (#1107)

* add polars to requirements (#1087)

* add polars to requirements

* Update requirements.txt

Co-authored-by: Taylor Turner <[email protected]>

---------

Co-authored-by: Taylor Turner <[email protected]>

* update precommit env (#1088)

* Numerical column stats update (#1089)

* partial update to numerical_column_stats

* update with full polars replacement

* reduce redundant if statement

* fix histogram warning

* remove unneeded casting

* Profiler utils update (#1092)

* update profiler utils

* finish updates

---------

Co-authored-by: Andrew <[email protected]>
  • Loading branch information
taylorfturner and atl1502 committed Mar 7, 2024
1 parent 19c7ebe commit 2869c13
Show file tree
Hide file tree
Showing 4 changed files with 46 additions and 3 deletions.
3 changes: 2 additions & 1 deletion dataprofiler/profilers/json_encoder.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Contains ProfilerEncoder class."""

import json
from datetime import datetime

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -52,7 +53,7 @@ def default(self, to_serialize):
return int(to_serialize)
elif isinstance(to_serialize, np.ndarray):
return to_serialize.tolist()
elif isinstance(to_serialize, pd.Timestamp):
elif isinstance(to_serialize, (pd.Timestamp, datetime)):
return to_serialize.isoformat()
elif isinstance(to_serialize, BaseDataLabeler):
# TODO: This does not allow the user to serialize a model if it is loaded
Expand Down
3 changes: 2 additions & 1 deletion dataprofiler/tests/labelers/test_labeler_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,8 +235,9 @@ def test_verbose(self):
self.assertIn("f1-score ", log_output)
self.assertIn("F1 Score: ", log_output)

@mock.patch("dataprofiler.labelers.labeler_utils.classification_report")
@mock.patch("pandas.DataFrame")
def test_save_conf_mat(self, mock_dataframe):
def test_save_conf_mat(self, mock_dataframe, mock_report):

# ideally mock out the actual contents written to file, but
# would be difficult to get this completely worked out.
Expand Down
41 changes: 41 additions & 0 deletions dataprofiler/tests/profilers/test_datetime_column_profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -501,6 +501,47 @@ def test_json_encode_after_update(self):

self.assertEqual(serialized, expected)

def test_json_encode_datetime(self):
data = ["1209214"]
df = pd.Series(data)
profiler = DateTimeColumn("0")

expected_date_formats = [
"%Y-%m-%d %H:%M:%S",
"%b %d, %Y",
"%m/%d/%y %H:%M",
]
with patch.object(
profiler, "_combine_unique_sets", return_value=expected_date_formats
):
with patch("time.time", return_value=0.0):
profiler.update(df)

serialized = json.dumps(profiler, cls=ProfileEncoder)

expected = json.dumps(
{
"class": "DateTimeColumn",
"data": {
"name": "0",
"col_index": np.nan,
"sample_size": 1,
"metadata": {},
"times": defaultdict(float, {"datetime": 0.0}),
"thread_safe": True,
"match_count": 1,
"date_formats": expected_date_formats,
"min": "1209214",
"max": "1209214",
"_dt_obj_min": "9214-01-20T00:00:00",
"_dt_obj_max": "9214-01-20T00:00:00",
"_DateTimeColumn__calculations": dict(),
},
}
)

self.assertEqual(serialized, expected)

def test_json_decode(self):
fake_profile_name = None
expected_profile = DateTimeColumn(fake_profile_name)
Expand Down
2 changes: 1 addition & 1 deletion dataprofiler/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

MAJOR = 0
MINOR = 10
MICRO = 8
MICRO = 9
POST = None # otherwise None

VERSION = "%d.%d.%d" % (MAJOR, MINOR, MICRO)
Expand Down

0 comments on commit 2869c13

Please sign in to comment.