diff --git a/dataprofiler/profilers/float_column_profile.py b/dataprofiler/profilers/float_column_profile.py index bc426a44..29417584 100644 --- a/dataprofiler/profilers/float_column_profile.py +++ b/dataprofiler/profilers/float_column_profile.py @@ -305,7 +305,10 @@ def _get_float_precision( # length of sampled cells after all punctuation removed len_per_float = ( - df_series_clean.sample(sample_size).replace(to_replace=r, value="").map(len) + df_series_clean.sample(sample_size) + .astype(object) + .replace(to_replace=r, value="") + .map(len) ).astype(float) # Determine statistics precision diff --git a/dataprofiler/tests/profilers/test_float_column_profile.py b/dataprofiler/tests/profilers/test_float_column_profile.py index d79fdd64..06441dcb 100644 --- a/dataprofiler/tests/profilers/test_float_column_profile.py +++ b/dataprofiler/tests/profilers/test_float_column_profile.py @@ -211,6 +211,13 @@ def test_profiled_precision(self): msg=f"Errored for: {sample[0]}", ) + # Validate categorical series with trailing zeros supported + categorical_series = pd.Series( + [202209, 202210, 202211], dtype="category" + ).apply(str) + float_profiler = FloatColumn("Name") + float_profiler.update(categorical_series) + def test_profiled_min(self): # test with multiple values data = np.linspace(-5, 5, 11)