From 45d611fe351f6f3847bf329aa053d890d810e2b6 Mon Sep 17 00:00:00 2001 From: Fokko Driesprong Date: Thu, 31 Oct 2024 08:08:11 +0100 Subject: [PATCH] Allow for missing operation (#1263) And then default to `overwrite`, see: https://lists.apache.org/thread/h9qmrmlgxh91ol0y2v8olt90b9q6p9xr --- pyiceberg/table/snapshots.py | 6 +++++- tests/table/test_snapshots.py | 7 +++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/pyiceberg/table/snapshots.py b/pyiceberg/table/snapshots.py index 829bd60290..c5cb57e691 100644 --- a/pyiceberg/table/snapshots.py +++ b/pyiceberg/table/snapshots.py @@ -17,6 +17,7 @@ from __future__ import annotations import time +import warnings from collections import defaultdict from enum import Enum from typing import TYPE_CHECKING, Any, DefaultDict, Dict, Iterable, List, Mapping, Optional @@ -182,7 +183,10 @@ class Summary(IcebergBaseModel, Mapping[str, str]): operation: Operation = Field() _additional_properties: Dict[str, str] = PrivateAttr() - def __init__(self, operation: Operation, **data: Any) -> None: + def __init__(self, operation: Optional[Operation] = None, **data: Any) -> None: + if operation is None: + warnings.warn("Encountered invalid snapshot summary: operation is missing, defaulting to overwrite") + operation = Operation.OVERWRITE super().__init__(operation=operation, **data) self._additional_properties = data diff --git a/tests/table/test_snapshots.py b/tests/table/test_snapshots.py index ff9d92cea3..b4dde217d4 100644 --- a/tests/table/test_snapshots.py +++ b/tests/table/test_snapshots.py @@ -112,6 +112,13 @@ def test_deserialize_snapshot(snapshot: Snapshot) -> None: assert actual == snapshot +def test_deserialize_snapshot_without_operation(snapshot: Snapshot) -> None: + payload = """{"snapshot-id": 25, "parent-snapshot-id": 19, "sequence-number": 200, "timestamp-ms": 1602638573590, "manifest-list": "s3:/a/b/c.avro", "summary": {}, "schema-id": 3}""" + with pytest.warns(UserWarning, match="Encountered invalid snapshot summary: operation is missing, defaulting to overwrite"): + actual = Snapshot.model_validate_json(payload) + assert actual.summary.operation == Operation.OVERWRITE + + def test_deserialize_snapshot_with_properties(snapshot_with_properties: Snapshot) -> None: payload = """{"snapshot-id":25,"parent-snapshot-id":19,"sequence-number":200,"timestamp-ms":1602638573590,"manifest-list":"s3:/a/b/c.avro","summary":{"operation":"append","foo":"bar"},"schema-id":3}""" snapshot = Snapshot.model_validate_json(payload)