Skip to content

Commit

Permalink
Unalikeability Revisions (#341)
Browse files Browse the repository at this point in the history
* revised unalikeability functionality

* added test cases for revised unalikeability functionality

* update to 0.6.1
  • Loading branch information
az85252 committed Jul 16, 2021
1 parent 3aee648 commit 1303abe
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 1 deletion.
2 changes: 2 additions & 0 deletions dataprofiler/profilers/categorical_column_profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -260,6 +260,8 @@ def unalikeability(self):

if self.sample_size == 0:
return None
elif self.sample_size == 1:
return 0
unalike_sum = 0
for category in self._categories:
unalike_sum += (self.sample_size - self._categories[category]) * \
Expand Down
10 changes: 10 additions & 0 deletions dataprofiler/tests/profilers/test_categorical_column_profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -373,6 +373,16 @@ def test_unalikeability(self):
profile.update(df_categorical)
self.assertEqual(profile.unalikeability, 2*(10 + 15 + 6)/90)

df_categorical = pd.Series(["a"])
profile = CategoricalColumn(df_categorical.name)
profile.update(df_categorical)
self.assertEqual(0, profile.unalikeability)

df_categorical = pd.Series([])
profile = CategoricalColumn(df_categorical.name)
profile.update(df_categorical)
self.assertEqual(None, profile.unalikeability)

def test_top_k_categories_change(self):
# Test if top_k_categories is None
options = CategoricalOptions()
Expand Down
2 changes: 1 addition & 1 deletion dataprofiler/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

MAJOR = 0
MINOR = 6
MICRO = 0
MICRO = 1

VERSION = '%d.%d.%d' % (MAJOR, MINOR, MICRO)

Expand Down

0 comments on commit 1303abe

Please sign in to comment.