19
19
20
20
#include " iceberg/manifest_reader.h"
21
21
22
+ #include < cstddef>
23
+
22
24
#include < arrow/filesystem/localfs.h>
23
25
#include < gtest/gtest.h>
24
26
25
27
#include " iceberg/arrow/arrow_fs_file_io_internal.h"
26
28
#include " iceberg/avro/avro_reader.h"
27
29
#include " iceberg/avro/avro_register.h"
28
- #include " iceberg/avro/avro_schema_util_internal.h"
29
30
#include " iceberg/manifest_entry.h"
30
31
#include " iceberg/schema.h"
31
32
#include " temp_file_test_base.h"
32
33
#include " test_common.h"
33
34
34
35
namespace iceberg {
35
36
36
- class ManifestReaderTest : public TempFileTestBase {
37
+ class ManifestReaderV1Test : public TempFileTestBase {
37
38
protected:
38
39
static void SetUpTestSuite () { avro::AvroReader::Register (); }
39
40
@@ -45,7 +46,7 @@ class ManifestReaderTest : public TempFileTestBase {
45
46
avro::RegisterLogicalTypes ();
46
47
}
47
48
48
- std::vector<ManifestEntry> prepare_manifest_entries () {
49
+ std::vector<ManifestEntry> PrepareV1ManifestEntries () {
49
50
std::vector<ManifestEntry> manifest_entries;
50
51
std::string test_dir_prefix = " /tmp/db/db/iceberg_test/data/" ;
51
52
std::vector<std::string> paths = {
@@ -102,7 +103,7 @@ class ManifestReaderTest : public TempFileTestBase {
102
103
std::shared_ptr<FileIO> file_io_;
103
104
};
104
105
105
- TEST_F (ManifestReaderTest, BasicTest ) {
106
+ TEST_F (ManifestReaderV1Test, V1PartitionedBasicTest ) {
106
107
iceberg::SchemaField partition_field (1000 , " order_ts_hour" , iceberg::int32 (), true );
107
108
auto partition_schema =
108
109
std::make_shared<Schema>(std::vector<SchemaField>({partition_field}));
@@ -115,7 +116,88 @@ TEST_F(ManifestReaderTest, BasicTest) {
115
116
auto read_result = manifest_reader->Entries ();
116
117
ASSERT_EQ (read_result.has_value (), true ) << read_result.error ().message ;
117
118
118
- auto expected_entries = prepare_manifest_entries ();
119
+ auto expected_entries = PrepareV1ManifestEntries ();
120
+ ASSERT_EQ (read_result.value (), expected_entries);
121
+ }
122
+
123
+ class ManifestReaderV2Test : public TempFileTestBase {
124
+ protected:
125
+ static void SetUpTestSuite () { avro::AvroReader::Register (); }
126
+
127
+ void SetUp () override {
128
+ TempFileTestBase::SetUp ();
129
+ local_fs_ = std::make_shared<::arrow::fs::LocalFileSystem>();
130
+ file_io_ = std::make_shared<iceberg::arrow::ArrowFileSystemFileIO>(local_fs_);
131
+
132
+ avro::RegisterLogicalTypes ();
133
+ }
134
+
135
+ std::vector<ManifestEntry> PrepareV2NonPartitionedManifestEntries () {
136
+ std::vector<ManifestEntry> manifest_entries;
137
+ std::string test_dir_prefix = " /tmp/db/db/v2_manifest_non_partitioned/data/" ;
138
+
139
+ std::vector<std::string> paths = {
140
+ " 00000-0-b0f98903-6d21-45fd-9e0b-afbd4963e365-0-00001.parquet" };
141
+
142
+ std::vector<int64_t > file_sizes = {1344 };
143
+ std::vector<int64_t > record_counts = {4 };
144
+
145
+ std::vector<std::map<int32_t , std::vector<uint8_t >>> lower_bounds = {
146
+ {{1 , {0x01 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 }},
147
+ {2 , {' r' , ' e' , ' c' , ' o' , ' r' , ' d' , ' _' , ' f' , ' o' , ' u' , ' r' }},
148
+ {3 , {' d' , ' a' , ' t' , ' a' , ' _' , ' c' , ' o' , ' n' , ' t' , ' e' , ' n' , ' t' , ' _' , ' 1' }},
149
+ {4 , {0xcd , 0xcc , 0xcc , 0xcc , 0xcc , 0xdc , 0x5e , 0x40 }}}};
150
+
151
+ std::vector<std::map<int32_t , std::vector<uint8_t >>> upper_bounds = {
152
+ {{1 , {0x04 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 }},
153
+ {2 , {' r' , ' e' , ' c' , ' o' , ' r' , ' d' , ' _' , ' t' , ' w' , ' o' }},
154
+ {3 , {' d' , ' a' , ' t' , ' a' , ' _' , ' c' , ' o' , ' n' , ' t' , ' e' , ' n' , ' t' , ' _' , ' 4' }},
155
+ {4 , {0x14 , 0xae , 0x47 , 0xe1 , 0x7a , 0x8c , 0x7c , 0x40 }}}};
156
+
157
+ manifest_entries.emplace_back (
158
+ ManifestEntry{.status = ManifestStatus::kAdded ,
159
+ .snapshot_id = 679879563479918846LL ,
160
+ .sequence_number = std::nullopt ,
161
+ .file_sequence_number = std::nullopt ,
162
+ .data_file = std::make_shared<DataFile>(
163
+ DataFile{.file_path = test_dir_prefix + paths[0 ],
164
+ .file_format = FileFormatType::kParquet ,
165
+ .record_count = record_counts[0 ],
166
+ .file_size_in_bytes = file_sizes[0 ],
167
+ .column_sizes = {{1 , 56 }, {2 , 73 }, {3 , 66 }, {4 , 67 }},
168
+ .value_counts = {{1 , 4 }, {2 , 4 }, {3 , 4 }, {4 , 4 }},
169
+ .null_value_counts = {{1 , 0 }, {2 , 0 }, {3 , 0 }, {4 , 0 }},
170
+ .nan_value_counts = {{4 , 0 }},
171
+ .lower_bounds = lower_bounds[0 ],
172
+ .upper_bounds = upper_bounds[0 ],
173
+ .key_metadata = {},
174
+ .split_offsets = {4 },
175
+ .equality_ids = {},
176
+ .sort_order_id = 0 ,
177
+ .first_row_id = std::nullopt ,
178
+ .referenced_data_file = std::nullopt ,
179
+ .content_offset = std::nullopt ,
180
+ .content_size_in_bytes = std::nullopt })});
181
+ return manifest_entries;
182
+ }
183
+
184
+ std::shared_ptr<::arrow::fs::LocalFileSystem> local_fs_;
185
+ std::shared_ptr<FileIO> file_io_;
186
+ };
187
+
188
+ TEST_F (ManifestReaderV2Test, V2NonPartitionedBasicTest) {
189
+ std::string path = GetResourcePath (" 2ddf1bc9-830b-4015-aced-c060df36f150-m0.avro" );
190
+
191
+ auto manifest_reader_result = ManifestReader::MakeReader (path, file_io_, nullptr );
192
+ ASSERT_EQ (manifest_reader_result.has_value (), true )
193
+ << manifest_reader_result.error ().message ;
194
+
195
+ auto manifest_reader = std::move (manifest_reader_result.value ());
196
+ auto read_result = manifest_reader->Entries ();
197
+ ASSERT_EQ (read_result.has_value (), true ) << read_result.error ().message ;
198
+ ASSERT_EQ (read_result.value ().size (), 1 );
199
+
200
+ auto expected_entries = PrepareV2NonPartitionedManifestEntries ();
119
201
ASSERT_EQ (read_result.value (), expected_entries);
120
202
}
121
203
0 commit comments