27
27
#include " iceberg/manifest_list.h"
28
28
#include " iceberg/schema.h"
29
29
#include " iceberg/type.h"
30
+ #include " iceberg/util/checked_cast.h"
30
31
#include " iceberg/util/macros.h"
31
32
32
33
namespace iceberg {
@@ -37,7 +38,7 @@ namespace iceberg {
37
38
}
38
39
39
40
#define PARSE_PRIMITIVE_FIELD (item, array_view, type ) \
40
- for (size_t row_idx = 0 ; row_idx < array_view->length; row_idx++) { \
41
+ for (int64_t row_idx = 0 ; row_idx < array_view->length; row_idx++) { \
41
42
if (!ArrowArrayViewIsNull (array_view, row_idx)) { \
42
43
auto value = ArrowArrayViewGetIntUnsafe (array_view, row_idx); \
43
44
item = static_cast <type>(value); \
@@ -48,7 +49,7 @@ namespace iceberg {
48
49
}
49
50
50
51
#define PARSE_STRING_FIELD (item, array_view ) \
51
- for (size_t row_idx = 0 ; row_idx < array_view->length; row_idx++) { \
52
+ for (int64_t row_idx = 0 ; row_idx < array_view->length; row_idx++) { \
52
53
if (!ArrowArrayViewIsNull (array_view, row_idx)) { \
53
54
auto value = ArrowArrayViewGetStringUnsafe (array_view, row_idx); \
54
55
item = std::string (value.data , value.size_bytes ); \
@@ -59,7 +60,7 @@ namespace iceberg {
59
60
}
60
61
61
62
#define PARSE_BINARY_FIELD (item, array_view ) \
62
- for (size_t row_idx = 0 ; row_idx < array_view->length; row_idx++) { \
63
+ for (int64_t row_idx = 0 ; row_idx < array_view->length; row_idx++) { \
63
64
if (!ArrowArrayViewIsNull (view_of_column, row_idx)) { \
64
65
item = ArrowArrayViewGetInt8Vector (array_view, row_idx); \
65
66
} else if (required) { \
@@ -225,66 +226,67 @@ Result<std::vector<ManifestFile>> ParseManifestList(ArrowSchema* schema,
225
226
auto field_name = field.value ()->get ().name ();
226
227
bool required = !field.value ()->get ().optional ();
227
228
auto view_of_column = array_view.children [idx];
228
- switch (idx) {
229
- case 0 :
229
+ ICEBERG_ASSIGN_OR_RAISE (auto manifest_file_field, ManifestFileFieldFromIndex (idx));
230
+ switch (manifest_file_field) {
231
+ case ManifestFileField::kManifestPath :
230
232
PARSE_STRING_FIELD (manifest_files[row_idx].manifest_path , view_of_column);
231
233
break ;
232
- case 1 :
234
+ case ManifestFileField:: kManifestLength :
233
235
PARSE_PRIMITIVE_FIELD (manifest_files[row_idx].manifest_length , view_of_column,
234
236
int64_t );
235
237
break ;
236
- case 2 :
238
+ case ManifestFileField:: kPartitionSpecId :
237
239
PARSE_PRIMITIVE_FIELD (manifest_files[row_idx].partition_spec_id , view_of_column,
238
240
int32_t );
239
241
break ;
240
- case 3 :
242
+ case ManifestFileField:: kContent :
241
243
PARSE_PRIMITIVE_FIELD (manifest_files[row_idx].content , view_of_column,
242
244
ManifestFile::Content);
243
245
break ;
244
- case 4 :
246
+ case ManifestFileField:: kSequenceNumber :
245
247
PARSE_PRIMITIVE_FIELD (manifest_files[row_idx].sequence_number , view_of_column,
246
248
int64_t );
247
249
break ;
248
- case 5 :
250
+ case ManifestFileField:: kMinSequenceNumber :
249
251
PARSE_PRIMITIVE_FIELD (manifest_files[row_idx].min_sequence_number , view_of_column,
250
252
int64_t );
251
253
break ;
252
- case 6 :
254
+ case ManifestFileField:: kAddedSnapshotId :
253
255
PARSE_PRIMITIVE_FIELD (manifest_files[row_idx].added_snapshot_id , view_of_column,
254
256
int64_t );
255
257
break ;
256
- case 7 :
258
+ case ManifestFileField:: kAddedFilesCount :
257
259
PARSE_PRIMITIVE_FIELD (manifest_files[row_idx].added_files_count , view_of_column,
258
260
int32_t );
259
261
break ;
260
- case 8 :
262
+ case ManifestFileField:: kExistingFilesCount :
261
263
PARSE_PRIMITIVE_FIELD (manifest_files[row_idx].existing_files_count ,
262
264
view_of_column, int32_t );
263
265
break ;
264
- case 9 :
266
+ case ManifestFileField:: kDeletedFilesCount :
265
267
PARSE_PRIMITIVE_FIELD (manifest_files[row_idx].deleted_files_count , view_of_column,
266
268
int32_t );
267
269
break ;
268
- case 10 :
270
+ case ManifestFileField:: kAddedRowsCount :
269
271
PARSE_PRIMITIVE_FIELD (manifest_files[row_idx].added_rows_count , view_of_column,
270
272
int64_t );
271
273
break ;
272
- case 11 :
274
+ case ManifestFileField:: kExistingRowsCount :
273
275
PARSE_PRIMITIVE_FIELD (manifest_files[row_idx].existing_rows_count , view_of_column,
274
276
int64_t );
275
277
break ;
276
- case 12 :
278
+ case ManifestFileField:: kDeletedRowsCount :
277
279
PARSE_PRIMITIVE_FIELD (manifest_files[row_idx].deleted_rows_count , view_of_column,
278
280
int64_t );
279
281
break ;
280
- case 13 :
282
+ case ManifestFileField:: kPartitionFieldSummary :
281
283
ICEBERG_RETURN_UNEXPECTED (
282
284
ParsePartitionFieldSummaryList (view_of_column, manifest_files));
283
285
break ;
284
- case 14 :
286
+ case ManifestFileField:: kKeyMetadata :
285
287
PARSE_BINARY_FIELD (manifest_files[row_idx].key_metadata , view_of_column);
286
288
break ;
287
- case 15 :
289
+ case ManifestFileField:: kFirstRowId :
288
290
PARSE_PRIMITIVE_FIELD (manifest_files[row_idx].first_row_id , view_of_column,
289
291
int64_t );
290
292
break ;
@@ -295,7 +297,7 @@ Result<std::vector<ManifestFile>> ParseManifestList(ArrowSchema* schema,
295
297
return manifest_files;
296
298
}
297
299
298
- Status ParseLiteral (ArrowArrayView* view_of_partition, size_t row_idx,
300
+ Status ParseLiteral (ArrowArrayView* view_of_partition, int64_t row_idx,
299
301
std::vector<ManifestEntry>& manifest_entries) {
300
302
if (view_of_partition->storage_type == ArrowType::NANOARROW_TYPE_BOOL) {
301
303
auto value = ArrowArrayViewGetUIntUnsafe (view_of_partition, row_idx);
@@ -355,7 +357,7 @@ Status ParseDataFile(const std::shared_ptr<StructType>& data_file_schema,
355
357
view_of_file_field);
356
358
break ;
357
359
case 2 :
358
- for (size_t row_idx = 0 ; row_idx < view_of_file_field->length ; row_idx++) {
360
+ for (int64_t row_idx = 0 ; row_idx < view_of_file_field->length ; row_idx++) {
359
361
if (!ArrowArrayViewIsNull (view_of_file_field, row_idx)) {
360
362
auto value = ArrowArrayViewGetStringUnsafe (view_of_file_field, row_idx);
361
363
std::string_view path_str (value.data , value.size_bytes );
@@ -510,7 +512,7 @@ Result<std::vector<ManifestEntry>> ParseManifestEntry(ArrowSchema* schema,
510
512
break ;
511
513
case 4 : {
512
514
auto data_file_schema =
513
- dynamic_pointer_cast <StructType>(field.value ()->get ().type ());
515
+ internal::checked_pointer_cast <StructType>(field.value ()->get ().type ());
514
516
ICEBERG_RETURN_UNEXPECTED (
515
517
ParseDataFile (data_file_schema, view_of_column, manifest_entries));
516
518
break ;
@@ -571,4 +573,11 @@ Result<std::vector<ManifestFile>> ManifestListReaderImpl::Files() const {
571
573
return manifest_files;
572
574
}
573
575
576
+ Result<ManifestFileField> ManifestFileFieldFromIndex (int32_t index) {
577
+ if (index >= 0 && index < static_cast <int32_t >(ManifestFileField::kNextId )) {
578
+ return static_cast <ManifestFileField>(index);
579
+ }
580
+ return InvalidArgument (" Invalid manifest file field index: {}" , index);
581
+ }
582
+
574
583
} // namespace iceberg
0 commit comments