Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ensure that primary and sequence keys cannot be the same #2106

Merged
merged 4 commits into from
Jul 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,6 @@ select = [
"PD"
]
ignore = [
"E501",
# pydocstyle
"D107", # Missing docstring in __init__
"D417", # Missing argument descriptions in the docstring, this is a bug from pydocstyle: https://github.com/PyCQA/pydocstyle/issues/449
Expand Down
6 changes: 4 additions & 2 deletions sdv/constraints/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,15 +128,17 @@ def _validate_inputs(cls, **kwargs):
if missing_values:
errors.append(
ValueError(
f'Missing required values {missing_values} in {article} {constraint} constraint.'
f'Missing required values {missing_values} '
f'in {article} {constraint} constraint.'
)
)

invalid_vals = set(kwargs) - set(args)
if invalid_vals:
errors.append(
ValueError(
f'Invalid values {invalid_vals} are present in {article} {constraint} constraint.'
f'Invalid values {invalid_vals} are present '
f'in {article} {constraint} constraint.'
)
)

Expand Down
8 changes: 8 additions & 0 deletions sdv/metadata/single_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -666,6 +666,14 @@ def _validate_key(self, column_name, key_type):
raise InvalidMetadataError(f"'{key_type}_key' must be a string.")

keys = {column_name} if isinstance(column_name, str) else set(column_name)
setting_sequence_as_primary = key_type == 'primary' and column_name == self.sequence_key
setting_primary_as_sequence = key_type == 'sequence' and column_name == self.primary_key
if setting_sequence_as_primary or setting_primary_as_sequence:
raise InvalidMetadataError(
f'The column ({column_name}) cannot be set as {key_type}_key as it is already '
f"set as the {'sequence' if key_type == 'primary' else 'primary'}_key."
)

invalid_ids = keys - set(self.columns)
if invalid_ids:
raise InvalidMetadataError(
Expand Down
64 changes: 64 additions & 0 deletions tests/integration/metadata/test_single_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -487,3 +487,67 @@ def test_column_relationship_validation():
# Run and Assert
with pytest.raises(InvalidMetadataError, match=expected_message):
metadata.validate()


def test_metadata_validate_same_sequence_primary():
"""Test metadata validation when both primary and sequence keys are the same."""
# Setup
metadata = SingleTableMetadata.load_from_dict({
'columns': {
'A': {'sdtype': 'id'},
'B': {'sdtype': 'datetime', 'datetime_format': '%Y-%m-%d'},
'C': {'sdtype': 'numerical'},
'D': {'sdtype': 'categorical'},
},
'primary_key': 'A',
'sequence_key': 'A',
})

expected_message = re.escape(
'The following errors were found in the metadata:\n\n'
'The column (A) cannot be set as primary_key as it is already set as the sequence_key.\n'
'The column (A) cannot be set as sequence_key as it is already set as the primary_key.'
)

# Run and Assert
with pytest.raises(InvalidMetadataError, match=expected_message):
metadata.validate()


def test_metadata_set_same_sequence_primary():
"""Test metadata throws error when setting the sequence and primary keys to be the same."""
# Setup
metadata_sequence = SingleTableMetadata.load_from_dict({
'columns': {
'A': {'sdtype': 'id'},
'B': {'sdtype': 'datetime', 'datetime_format': '%Y-%m-%d'},
'C': {'sdtype': 'numerical'},
'D': {'sdtype': 'categorical'},
},
})

# Run and Assert
metadata_sequence.set_sequence_key('A')
error_msg_sequence = re.escape(
'The column (A) cannot be set as primary_key as it is already set as the sequence_key.'
)
with pytest.raises(InvalidMetadataError, match=error_msg_sequence):
metadata_sequence.set_primary_key('A')

# Setup primary first
metadata_primary = SingleTableMetadata.load_from_dict({
'columns': {
'A': {'sdtype': 'id'},
'B': {'sdtype': 'datetime', 'datetime_format': '%Y-%m-%d'},
'C': {'sdtype': 'numerical'},
'D': {'sdtype': 'categorical'},
},
})

# Run and Assert
metadata_primary.set_primary_key('A')
error_msg_sequence = re.escape(
'The column (A) cannot be set as sequence_key as it is already set as the primary_key.'
)
with pytest.raises(InvalidMetadataError, match=error_msg_sequence):
metadata_primary.set_sequence_key('A')
21 changes: 21 additions & 0 deletions tests/unit/metadata/test_single_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -1541,6 +1541,27 @@ def test__validate_key_dataype_invalid_tuple(self):
# Assert
assert out is False

def test__validate_key_sequence_and_primary_key_same(self):
"""Test ``_validate_key`` for a column used as both sequence and primary keys."""
# Setup
instance_primary = SingleTableMetadata()
instance_primary.primary_key = 'A'
error_msg_primary = re.escape(
'The column (A) cannot be set as sequence_key as it is already set as the primary_key.'
)

instance_sequence = SingleTableMetadata()
instance_sequence.sequence_key = 'A'
error_msg_sequence = re.escape(
'The column (A) cannot be set as primary_key as it is already set as the sequence_key.'
)

# Run and Assert
with pytest.raises(InvalidMetadataError, match=error_msg_primary):
instance_primary._validate_key('A', 'sequence')
with pytest.raises(InvalidMetadataError, match=error_msg_sequence):
instance_sequence._validate_key('A', 'primary')

def test_set_primary_key_validation_dtype(self):
"""Test that ``set_primary_key`` crashes for invalid arguments.

Expand Down
Loading