Skip to content

Commit

Permalink
Add test for numpy removing trailing nulls from strings and Increment…
Browse files Browse the repository at this point in the history
… Version 0.7.4 (#429)

* fix: add null string ending test

* fix: increment version - 0.7.4
  • Loading branch information
JGSweets authored Nov 19, 2021
1 parent a2caf66 commit 60da2c4
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 1 deletion.
38 changes: 38 additions & 0 deletions dataprofiler/tests/labelers/test_data_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -673,6 +673,44 @@ def test_process(self):
self.assertTrue((expected[0] == output[0]).all())
self.assertTrue((expected[1] == output[1]).all())

def test_process_ending_null_string(self):

preprocessor = CharPreprocessor(
max_length=5, default_label='UNKNOWN', pad_label='PAD',
flatten_split=1.0, is_separate_at_max_len=True,
)

label_mapping = {
'PAD': 0,
'UNKNOWN': 1,
"TEST1": 2,
"TEST2": 3,
"TEST3": 4,
}

# test a single sentence
test_sentences = np.array(['this\x00is\x00\x00\x00my test sentence. '
'How nice.\x00\x00\x00'], dtype=object)
expected_output = [
np.array([['this\x00'], ['is\x00\x00\x00']], dtype=object),
np.array([['my te'], ['st se']], dtype=object),
np.array([['ntenc'], ['e. Ho']], dtype=object),
np.array([['w nic'], ['e.\x00\x00\x00']], dtype=object),
]

# without labels process
process_generator = preprocessor.process(
test_sentences, label_mapping=label_mapping, batch_size=2)

# check to make sure string length is not stripped because ending in
# \x00
process_output = [data for data in process_generator]
for expected, output_batch in zip(expected_output, process_output):
for output in output_batch:
print(output)
self.assertEqual(5, len(output[0])) # validates not trimmed
np.testing.assert_equal(expected, output_batch)

def test_process_input_checks(self):
prep = CharPreprocessor()
multi_dim_msg = re.escape("Multidimensional data given to "
Expand Down
2 changes: 1 addition & 1 deletion dataprofiler/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

MAJOR = 0
MINOR = 7
MICRO = 3
MICRO = 4

VERSION = '%d.%d.%d' % (MAJOR, MINOR, MICRO)

Expand Down

0 comments on commit 60da2c4

Please sign in to comment.