diff --git a/src/iceberg/CMakeLists.txt b/src/iceberg/CMakeLists.txt index 4c660d7..38fd28d 100644 --- a/src/iceberg/CMakeLists.txt +++ b/src/iceberg/CMakeLists.txt @@ -38,6 +38,7 @@ set(ICEBERG_SOURCES sort_field.cc sort_order.cc statistics_file.cc + table.cc table_metadata.cc transform.cc transform_function.cc diff --git a/src/iceberg/table.cc b/src/iceberg/table.cc new file mode 100644 index 0000000..c79f378 --- /dev/null +++ b/src/iceberg/table.cc @@ -0,0 +1,110 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "iceberg/table.h" + +#include "iceberg/partition_spec.h" +#include "iceberg/schema.h" +#include "iceberg/sort_order.h" +#include "iceberg/table_metadata.h" + +namespace iceberg { + +const std::string& Table::uuid() const { return metadata_->table_uuid; } + +Result> Table::schema() const { return metadata_->Schema(); } + +const std::shared_ptr>>& +Table::schemas() const { + if (!schemas_map_) { + schemas_map_ = + std::make_shared>>(); + for (const auto& schema : metadata_->schemas) { + if (schema->schema_id()) { + schemas_map_->emplace(schema->schema_id().value(), schema); + } + } + } + return schemas_map_; +} + +Result> Table::spec() const { + return metadata_->PartitionSpec(); +} + +const std::shared_ptr>>& +Table::specs() const { + if (!partition_spec_map_) { + partition_spec_map_ = + std::make_shared>>(); + for (const auto& spec : metadata_->partition_specs) { + partition_spec_map_->emplace(spec->spec_id(), spec); + } + } + return partition_spec_map_; +} + +Result> Table::sort_order() const { + return metadata_->SortOrder(); +} + +const std::shared_ptr>>& +Table::sort_orders() const { + if (!sort_orders_map_) { + sort_orders_map_ = + std::make_shared>>(); + for (const auto& order : metadata_->sort_orders) { + sort_orders_map_->emplace(order->order_id(), order); + } + } + return sort_orders_map_; +} + +const std::unordered_map& Table::properties() const { + return metadata_->properties; +} + +const std::string& Table::location() const { return metadata_->location; } + +Result> Table::current_snapshot() const { + return metadata_->Snapshot(); +} + +Result> Table::SnapshotById(int64_t snapshot_id) const { + auto iter = std::ranges::find_if(metadata_->snapshots, + [this, &snapshot_id](const auto& snapshot) { + return snapshot->snapshot_id == snapshot_id; + }); + if (iter == metadata_->snapshots.end()) { + return NotFound("Snapshot with ID {} is not found", snapshot_id); + } + return *iter; +} + +const std::vector>& Table::snapshots() const { + return metadata_->snapshots; +} + +const std::vector& Table::history() const { + return metadata_->snapshot_log; +} + +const std::shared_ptr& Table::io() const { return io_; } + +} // namespace iceberg diff --git a/src/iceberg/table.h b/src/iceberg/table.h index 11a9fc9..062890e 100644 --- a/src/iceberg/table.h +++ b/src/iceberg/table.h @@ -19,13 +19,14 @@ #pragma once -#include +#include #include #include #include #include "iceberg/iceberg_export.h" -#include "iceberg/result.h" +#include "iceberg/snapshot.h" +#include "iceberg/table_identifier.h" #include "iceberg/type_fwd.h" namespace iceberg { @@ -35,77 +36,88 @@ class ICEBERG_EXPORT Table { public: virtual ~Table() = default; - /// \brief Return the full name for this table - virtual const std::string& name() const = 0; + /// \brief Construct a table. + /// \param[in] identifier The identifier of the table. + /// \param[in] metadata The metadata for the table. + /// \param[in] metadata_location The location of the table metadata file. + /// \param[in] io The FileIO to read and write table data and metadata files. + /// \param[in] catalog The catalog that this table belongs to. If null, the table will + /// be read-only. + Table(TableIdentifier identifier, std::shared_ptr metadata, + std::string metadata_location, std::shared_ptr io, + std::shared_ptr catalog) + : identifier_(std::move(identifier)), + metadata_(std::move(metadata)), + metadata_location_(std::move(metadata_location)), + io_(std::move(io)), + catalog_(std::move(catalog)) {}; + + /// \brief Return the identifier of this table + const TableIdentifier& name() const { return identifier_; } /// \brief Returns the UUID of the table - virtual const std::string& uuid() const = 0; + const std::string& uuid() const; - /// \brief Refresh the current table metadata - virtual Status Refresh() = 0; - - /// \brief Return the schema for this table - virtual const std::shared_ptr& schema() const = 0; + /// \brief Return the schema for this table, return NotFoundError if not found + Result> schema() const; /// \brief Return a map of schema for this table - virtual const std::unordered_map>& schemas() const = 0; + const std::shared_ptr>>& schemas() + const; - /// \brief Return the partition spec for this table - virtual const std::shared_ptr& spec() const = 0; + /// \brief Return the partition spec for this table, return NotFoundError if not found + Result> spec() const; /// \brief Return a map of partition specs for this table - virtual const std::unordered_map>& specs() - const = 0; + const std::shared_ptr>>& + specs() const; - /// \brief Return the sort order for this table - virtual const std::shared_ptr& sort_order() const = 0; + /// \brief Return the sort order for this table, return NotFoundError if not found + Result> sort_order() const; /// \brief Return a map of sort order IDs to sort orders for this table - virtual const std::unordered_map>& sort_orders() - const = 0; + const std::shared_ptr>>& + sort_orders() const; /// \brief Return a map of string properties for this table - virtual const std::unordered_map& properties() const = 0; + const std::unordered_map& properties() const; /// \brief Return the table's base location - virtual const std::string& location() const = 0; + const std::string& location() const; - /// \brief Return the table's current snapshot - virtual const std::shared_ptr& current_snapshot() const = 0; + /// \brief Return the table's current snapshot, return NotFoundError if not found + Result> current_snapshot() const; - /// \brief Get the snapshot of this table with the given id, or null if there is no - /// matching snapshot + /// \brief Get the snapshot of this table with the given id /// /// \param snapshot_id the ID of the snapshot to get - /// \return the Snapshot with the given id - virtual Result> snapshot(int64_t snapshot_id) const = 0; + /// \return the Snapshot with the given id, return NotFoundError if not found + Result> SnapshotById(int64_t snapshot_id) const; /// \brief Get the snapshots of this table - virtual const std::vector>& snapshots() const = 0; + const std::vector>& snapshots() const; /// \brief Get the snapshot history of this table /// /// \return a vector of history entries - virtual const std::vector>& history() const = 0; - - /// \brief Create a new table scan for this table - /// - /// Once a table scan is created, it can be refined to project columns and filter data. - virtual std::unique_ptr NewScan() const = 0; - - /// \brief Create a new append API to add files to this table and commit - virtual std::shared_ptr NewAppend() = 0; - - /// \brief Create a new transaction API to commit multiple table operations at once - virtual std::unique_ptr NewTransaction() = 0; - - /// TODO(wgtmac): design of FileIO is not finalized yet. We intend to use an - /// IO-less design in the core library. - // /// \brief Returns a FileIO to read and write table data and metadata files - // virtual std::shared_ptr io() const = 0; - - /// \brief Returns a LocationProvider to provide locations for new data files - virtual std::unique_ptr location_provider() const = 0; + const std::vector& history() const; + + /// \brief Returns a FileIO to read and write table data and metadata files + const std::shared_ptr& io() const; + + private: + const TableIdentifier identifier_; + const std::shared_ptr metadata_; + const std::string metadata_location_; + std::shared_ptr io_; + std::shared_ptr catalog_; + + mutable std::shared_ptr>> + schemas_map_; + mutable std::shared_ptr>> + partition_spec_map_; + mutable std::shared_ptr>> + sort_orders_map_; }; } // namespace iceberg diff --git a/src/iceberg/table_metadata.cc b/src/iceberg/table_metadata.cc index 4e112fd..b820517 100644 --- a/src/iceberg/table_metadata.cc +++ b/src/iceberg/table_metadata.cc @@ -76,6 +76,16 @@ Result> TableMetadata::SortOrder() const { return *iter; } +Result> TableMetadata::Snapshot() const { + auto iter = std::ranges::find_if(snapshots, [this](const auto& snapshot) { + return snapshot->snapshot_id == current_snapshot_id; + }); + if (iter == snapshots.end()) { + return NotFound("Current snapshot with ID {} is not found", current_snapshot_id); + } + return *iter; +} + namespace { template diff --git a/src/iceberg/table_metadata.h b/src/iceberg/table_metadata.h index 9c7f37d..e4c0463 100644 --- a/src/iceberg/table_metadata.h +++ b/src/iceberg/table_metadata.h @@ -23,12 +23,14 @@ /// Table metadata for Iceberg tables. #include +#include #include #include #include #include #include "iceberg/iceberg_export.h" +#include "iceberg/snapshot.h" #include "iceberg/type_fwd.h" #include "iceberg/util/timepoint.h" @@ -135,6 +137,8 @@ struct ICEBERG_EXPORT TableMetadata { Result> PartitionSpec() const; /// \brief Get the current sort order, return NotFoundError if not found Result> SortOrder() const; + /// \brief Get the current snapshot, return NotFoundError if not found + Result> Snapshot() const; friend bool operator==(const TableMetadata& lhs, const TableMetadata& rhs); diff --git a/src/iceberg/type_fwd.h b/src/iceberg/type_fwd.h index a5996c4..cc5f0a7 100644 --- a/src/iceberg/type_fwd.h +++ b/src/iceberg/type_fwd.h @@ -99,6 +99,10 @@ class TransformFunction; struct PartitionStatisticsFile; struct Snapshot; struct SnapshotRef; + +struct MetadataLogEntry; +struct SnapshotLogEntry; + struct StatisticsFile; struct TableMetadata; @@ -113,7 +117,6 @@ enum class TransformType; /// TODO: Forward declarations below are not added yet. /// ---------------------------------------------------------------------------- -class HistoryEntry; class StructLike; class MetadataUpdate; diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 863bb09..125714f 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -49,6 +49,13 @@ target_sources(catalog_test PRIVATE in_memory_catalog_test.cc) target_link_libraries(catalog_test PRIVATE iceberg_static GTest::gtest_main GTest::gmock) add_test(NAME catalog_test COMMAND catalog_test) +add_executable(table_test) +target_include_directories(table_test PRIVATE "${CMAKE_BINARY_DIR}") +target_sources(table_test PRIVATE test_common.cc json_internal_test.cc table_test.cc + schema_json_test.cc) +target_link_libraries(table_test PRIVATE iceberg_static GTest::gtest_main GTest::gmock) +add_test(NAME table_test COMMAND table_test) + add_executable(expression_test) target_sources(expression_test PRIVATE expression_test.cc) target_link_libraries(expression_test PRIVATE iceberg_static GTest::gtest_main @@ -57,8 +64,8 @@ add_test(NAME expression_test COMMAND expression_test) add_executable(json_serde_test) target_include_directories(json_serde_test PRIVATE "${CMAKE_BINARY_DIR}") -target_sources(json_serde_test PRIVATE json_internal_test.cc metadata_serde_test.cc - schema_json_test.cc) +target_sources(json_serde_test PRIVATE test_common.cc json_internal_test.cc + metadata_serde_test.cc schema_json_test.cc) target_link_libraries(json_serde_test PRIVATE iceberg_static GTest::gtest_main GTest::gmock) add_test(NAME json_serde_test COMMAND json_serde_test) diff --git a/test/metadata_io_test.cc b/test/metadata_io_test.cc index 7d987e2..432101b 100644 --- a/test/metadata_io_test.cc +++ b/test/metadata_io_test.cc @@ -50,27 +50,25 @@ class MetadataIOTest : public TempFileTestBase { /*optional=*/false); auto schema = std::make_shared(std::move(schema_fields), /*schema_id=*/1); - TableMetadata metadata{ - .format_version = 1, - .table_uuid = "1234567890", - .location = "s3://bucket/path", - .last_sequence_number = 0, - .schemas = {schema}, - .current_schema_id = 1, - .default_spec_id = 0, - .last_partition_id = 0, - .properties = {{"key", "value"}}, - .current_snapshot_id = 3051729675574597004, - .snapshots = {std::make_shared(Snapshot{ - .snapshot_id = 3051729675574597004, - .sequence_number = 0, - .timestamp_ms = TimePointMsFromUnixMs(1515100955770).value(), - .manifest_list = "s3://a/b/1.avro", - .summary = {{"operation", "append"}}, - })}, - .default_sort_order_id = 0, - .next_row_id = 0}; - return metadata; + return TableMetadata{.format_version = 1, + .table_uuid = "1234567890", + .location = "s3://bucket/path", + .last_sequence_number = 0, + .schemas = {schema}, + .current_schema_id = 1, + .default_spec_id = 0, + .last_partition_id = 0, + .properties = {{"key", "value"}}, + .current_snapshot_id = 3051729675574597004, + .snapshots = {std::make_shared(Snapshot{ + .snapshot_id = 3051729675574597004, + .sequence_number = 0, + .timestamp_ms = TimePointMsFromUnixMs(1515100955770).value(), + .manifest_list = "s3://a/b/1.avro", + .summary = {{"operation", "append"}}, + })}, + .default_sort_order_id = 0, + .next_row_id = 0}; } std::shared_ptr io_; diff --git a/test/metadata_serde_test.cc b/test/metadata_serde_test.cc index 4a78e8c..4c78f65 100644 --- a/test/metadata_serde_test.cc +++ b/test/metadata_serde_test.cc @@ -20,13 +20,11 @@ #include #include #include -#include #include #include #include -#include "iceberg/json_internal.h" #include "iceberg/partition_field.h" #include "iceberg/partition_spec.h" #include "iceberg/schema.h" @@ -35,9 +33,9 @@ #include "iceberg/sort_field.h" #include "iceberg/sort_order.h" #include "iceberg/table_metadata.h" -#include "iceberg/test/test_config.h" #include "iceberg/transform.h" #include "iceberg/type.h" +#include "test_common.h" namespace iceberg { @@ -46,33 +44,6 @@ namespace { class MetadataSerdeTest : public ::testing::Test { protected: void SetUp() override {} - - static std::string GetResourcePath(const std::string& file_name) { - return std::string(ICEBERG_TEST_RESOURCES) + "/" + file_name; - } - - static void ReadJsonFile(const std::string& file_name, std::string* content) { - std::filesystem::path path{GetResourcePath(file_name)}; - ASSERT_TRUE(std::filesystem::exists(path)) - << "File does not exist: " << path.string(); - - std::ifstream file(path); - std::stringstream buffer; - buffer << file.rdbuf(); - *content = buffer.str(); - } - - static void ReadTableMetadata(const std::string& file_name, - std::unique_ptr* metadata) { - std::string json_content; - ReadJsonFile(file_name, &json_content); - - nlohmann::json json = nlohmann::json::parse(json_content); - auto result = TableMetadataFromJson(json); - ASSERT_TRUE(result.has_value()) << "Failed to parse table metadata from " << file_name - << ": " << result.error().message; - *metadata = std::move(result.value()); - } }; } // namespace @@ -112,6 +83,8 @@ TEST_F(MetadataSerdeTest, DeserializeV1Valid) { auto partition_spec = metadata->PartitionSpec(); ASSERT_TRUE(partition_spec.has_value()); EXPECT_EQ(*(partition_spec.value().get()), *expected_spec); + auto snapshot = metadata->Snapshot(); + ASSERT_FALSE(snapshot.has_value()); } TEST_F(MetadataSerdeTest, DeserializeV2Valid) { @@ -163,7 +136,11 @@ TEST_F(MetadataSerdeTest, DeserializeV2Valid) { ASSERT_TRUE(sort_order.has_value()); EXPECT_EQ(*(sort_order.value().get()), *expected_sort_order); + // Compare snapshot EXPECT_EQ(metadata->current_snapshot_id, 3055729675574597004); + auto snapshot = metadata->Snapshot(); + ASSERT_TRUE(snapshot.has_value()); + EXPECT_EQ(snapshot.value()->snapshot_id, 3055729675574597004); // Compare snapshots std::vector expected_snapshots{ diff --git a/test/table_test.cc b/test/table_test.cc new file mode 100644 index 0000000..56f475d --- /dev/null +++ b/test/table_test.cc @@ -0,0 +1,122 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "iceberg/table.h" + +#include +#include +#include +#include +#include + +#include +#include + +#include "iceberg/partition_spec.h" +#include "iceberg/schema.h" +#include "iceberg/snapshot.h" +#include "iceberg/table_metadata.h" +#include "test_common.h" + +namespace iceberg { + +TEST(Table, TableV1) { + std::unique_ptr metadata; + ASSERT_NO_FATAL_FAILURE(ReadTableMetadata("TableMetadataV1Valid.json", &metadata)); + TableIdentifier tableIdent{.ns = {}, .name = "test_table_v1"}; + Table table(tableIdent, std::move(metadata), "s3://bucket/test/location/meta/", nullptr, + nullptr); + ASSERT_EQ(table.name().name, "test_table_v1"); + + // Check table schema + auto schema = table.schema(); + ASSERT_TRUE(schema.has_value()); + ASSERT_EQ(schema.value()->fields().size(), 3); + auto schemas = table.schemas(); + ASSERT_TRUE(schemas->empty()); + + // Check table spec + auto spec = table.spec(); + ASSERT_TRUE(spec.has_value()); + auto specs = table.specs(); + ASSERT_EQ(1UL, specs->size()); + + // Check table sort_order + auto sort_order = table.sort_order(); + ASSERT_TRUE(sort_order.has_value()); + auto sort_orders = table.sort_orders(); + ASSERT_EQ(1UL, sort_orders->size()); + + // Check table location + auto location = table.location(); + ASSERT_EQ(location, "s3://bucket/test/location"); + + // Check table snapshots + auto snapshots = table.snapshots(); + ASSERT_TRUE(snapshots.empty()); + + auto io = table.io(); + ASSERT_TRUE(io == nullptr); +} + +TEST(Table, TableV2) { + std::unique_ptr metadata; + ASSERT_NO_FATAL_FAILURE(ReadTableMetadata("TableMetadataV2Valid.json", &metadata)); + TableIdentifier tableIdent{.ns = {}, .name = "test_table_v2"}; + + Table table(tableIdent, std::move(metadata), "s3://bucket/test/location/meta/", nullptr, + nullptr); + ASSERT_EQ(table.name().name, "test_table_v2"); + + // Check table schema + auto schema = table.schema(); + ASSERT_TRUE(schema.has_value()); + ASSERT_EQ(schema.value()->fields().size(), 3); + auto schemas = table.schemas(); + ASSERT_FALSE(schemas->empty()); + + // Check partition spec + auto spec = table.spec(); + ASSERT_TRUE(spec.has_value()); + auto specs = table.specs(); + ASSERT_EQ(1UL, specs->size()); + + // Check sort order + auto sort_order = table.sort_order(); + ASSERT_TRUE(sort_order.has_value()); + auto sort_orders = table.sort_orders(); + ASSERT_EQ(1UL, sort_orders->size()); + + // Check table location + auto location = table.location(); + ASSERT_EQ(location, "s3://bucket/test/location"); + + // Check snapshot + auto snapshots = table.snapshots(); + ASSERT_EQ(2UL, snapshots.size()); + auto snapshot = table.current_snapshot(); + ASSERT_TRUE(snapshot.has_value()); + snapshot = table.SnapshotById(snapshot.value()->snapshot_id); + ASSERT_TRUE(snapshot.has_value()); + auto invalid_snapshot_id = 9999; + snapshot = table.SnapshotById(invalid_snapshot_id); + ASSERT_FALSE(snapshot.has_value()); +} + +} // namespace iceberg diff --git a/test/test_common.cc b/test/test_common.cc new file mode 100644 index 0000000..25fa8f2 --- /dev/null +++ b/test/test_common.cc @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "test_common.h" + +#include +#include +#include +#include + +#include +#include + +#include "iceberg/json_internal.h" +#include "iceberg/test/test_config.h" + +namespace iceberg { + +std::string GetResourcePath(const std::string& file_name) { + return std::string(ICEBERG_TEST_RESOURCES) + "/" + file_name; +} + +void ReadJsonFile(const std::string& file_name, std::string* content) { + std::filesystem::path path{GetResourcePath(file_name)}; + ASSERT_TRUE(std::filesystem::exists(path)) << "File does not exist: " << path.string(); + + std::ifstream file(path); + std::stringstream buffer; + buffer << file.rdbuf(); + *content = buffer.str(); +} + +void ReadTableMetadata(const std::string& file_name, + std::unique_ptr* metadata) { + std::string json_content; + ReadJsonFile(file_name, &json_content); + + nlohmann::json json = nlohmann::json::parse(json_content); + auto result = TableMetadataFromJson(json); + ASSERT_TRUE(result.has_value()) << "Failed to parse table metadata from " << file_name + << ": " << result.error().message; + *metadata = std::move(result.value()); +} + +} // namespace iceberg diff --git a/test/test_common.h b/test/test_common.h new file mode 100644 index 0000000..a9dba8c --- /dev/null +++ b/test/test_common.h @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include +#include + +#include "iceberg/type_fwd.h" + +namespace iceberg { + +/// \brief Get the full path to a resource file in the test resources directory +std::string GetResourcePath(const std::string& file_name); + +/// \brief Read a JSON file from the test resources directory +void ReadJsonFile(const std::string& file_name, std::string* content); + +/// \brief Read table metadata from a JSON file in the test resources directory +void ReadTableMetadata(const std::string& file_name, + std::unique_ptr* metadata); + +} // namespace iceberg