Skip to content

Commit

Permalink
Allow union of {int,long}, {float,double}, etc (apache#1283)
Browse files Browse the repository at this point in the history
* Allow union of `{int,long}`, `{float,double}`, etc

* Thanks Kevin!

Co-authored-by: Kevin Liu <[email protected]>

* Thanks Kevin!

Co-authored-by: Kevin Liu <[email protected]>

* MOAR tests

* lint

* Make the tests happy

* Remove redundant test

---------

Co-authored-by: Kevin Liu <[email protected]>
  • Loading branch information
Fokko and kevinjqliu authored Nov 5, 2024
1 parent 9b8400a commit c3bf16c
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 4 deletions.
8 changes: 7 additions & 1 deletion pyiceberg/table/update/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -770,7 +770,13 @@ def _update_column(self, field: NestedField, existing_field: NestedField) -> Non
self.update_schema.make_column_optional(full_name)

if field.field_type.is_primitive and field.field_type != existing_field.field_type:
self.update_schema.update_column(full_name, field_type=field.field_type)
try:
# If the current type is wider than the new type, then
# we perform a noop
_ = promote(field.field_type, existing_field.field_type)
except ResolveError:
# If this is not the case, perform the type evolution
self.update_schema.update_column(full_name, field_type=field.field_type)

if field.doc is not None and field.doc != existing_field.doc:
self.update_schema.update_column(full_name, doc=field.doc)
Expand Down
39 changes: 36 additions & 3 deletions tests/test_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -1189,6 +1189,17 @@ def test_detect_invalid_top_level_maps() -> None:
_ = UpdateSchema(transaction=None, schema=current_schema).union_by_name(new_schema)._apply() # type: ignore


def test_allow_double_to_float() -> None:
current_schema = Schema(NestedField(field_id=1, name="aCol", field_type=DoubleType(), required=False))
new_schema = Schema(NestedField(field_id=1, name="aCol", field_type=FloatType(), required=False))

applied = UpdateSchema(transaction=None, schema=current_schema).union_by_name(new_schema)._apply() # type: ignore

assert applied.as_struct() == current_schema.as_struct()
assert len(applied.fields) == 1
assert isinstance(applied.fields[0].field_type, DoubleType)


def test_promote_float_to_double() -> None:
current_schema = Schema(NestedField(field_id=1, name="aCol", field_type=FloatType(), required=False))
new_schema = Schema(NestedField(field_id=1, name="aCol", field_type=DoubleType(), required=False))
Expand All @@ -1200,11 +1211,33 @@ def test_promote_float_to_double() -> None:
assert isinstance(applied.fields[0].field_type, DoubleType)


def test_detect_invalid_promotion_double_to_float() -> None:
current_schema = Schema(NestedField(field_id=1, name="aCol", field_type=DoubleType(), required=False))
def test_allow_long_to_int() -> None:
current_schema = Schema(NestedField(field_id=1, name="aCol", field_type=LongType(), required=False))
new_schema = Schema(NestedField(field_id=1, name="aCol", field_type=IntegerType(), required=False))

applied = UpdateSchema(transaction=None, schema=current_schema).union_by_name(new_schema)._apply() # type: ignore

assert applied.as_struct() == current_schema.as_struct()
assert len(applied.fields) == 1
assert isinstance(applied.fields[0].field_type, LongType)


def test_promote_int_to_long() -> None:
current_schema = Schema(NestedField(field_id=1, name="aCol", field_type=IntegerType(), required=False))
new_schema = Schema(NestedField(field_id=1, name="aCol", field_type=LongType(), required=False))

applied = UpdateSchema(transaction=None, schema=current_schema).union_by_name(new_schema)._apply() # type: ignore

assert applied.as_struct() == new_schema.as_struct()
assert len(applied.fields) == 1
assert isinstance(applied.fields[0].field_type, LongType)


def test_detect_invalid_promotion_string_to_float() -> None:
current_schema = Schema(NestedField(field_id=1, name="aCol", field_type=StringType(), required=False))
new_schema = Schema(NestedField(field_id=1, name="aCol", field_type=FloatType(), required=False))

with pytest.raises(ValidationError, match="Cannot change column type: aCol: double -> float"):
with pytest.raises(ValidationError, match="Cannot change column type: aCol: string -> float"):
_ = UpdateSchema(transaction=None, schema=current_schema).union_by_name(new_schema)._apply() # type: ignore


Expand Down

0 comments on commit c3bf16c

Please sign in to comment.