42
42
import static org .apache .parquet .schema .Type .Repetition .REQUIRED ;
43
43
44
44
import java .util .ArrayList ;
45
+ import java .util .Collections ;
45
46
import java .util .List ;
46
47
import java .util .Optional ;
47
48
64
65
import org .apache .arrow .vector .types .pojo .ArrowType .Union ;
65
66
import org .apache .arrow .vector .types .pojo .ArrowType .Utf8 ;
66
67
import org .apache .arrow .vector .types .pojo .Field ;
68
+ import org .apache .arrow .vector .types .pojo .FieldType ;
67
69
import org .apache .arrow .vector .types .pojo .Schema ;
68
70
import org .apache .parquet .arrow .schema .SchemaMapping .ListTypeMapping ;
69
71
import org .apache .parquet .arrow .schema .SchemaMapping .PrimitiveTypeMapping ;
@@ -158,6 +160,11 @@ public TypeMapping visit(org.apache.arrow.vector.types.pojo.ArrowType.List type)
158
160
return createListTypeMapping ();
159
161
}
160
162
163
+ @ Override
164
+ public TypeMapping visit (ArrowType .LargeList largeList ) {
165
+ return createListTypeMapping ();
166
+ }
167
+
161
168
@ Override
162
169
public TypeMapping visit (org .apache .arrow .vector .types .pojo .ArrowType .FixedSizeList type ) {
163
170
return createListTypeMapping ();
@@ -179,6 +186,17 @@ public TypeMapping visit(Union type) {
179
186
return new UnionTypeMapping (field , addToBuilder (parquetTypes , Types .buildGroup (OPTIONAL )).named (fieldName ), parquetTypes );
180
187
}
181
188
189
+ @ Override
190
+ public TypeMapping visit (ArrowType .Map map ) {
191
+ if (children .size () != 2 ) {
192
+ throw new IllegalArgumentException ("Map fields must have exactly two children: " + field );
193
+ }
194
+ TypeMapping keyChild = fromArrow (children .get (0 ), "key" );
195
+ TypeMapping valueChild = fromArrow (children .get (1 ), "value" );
196
+ GroupType groupType = Types .optionalMap ().key (keyChild .getParquetType ()).value (valueChild .getParquetType ()).named (fieldName );
197
+ return new SchemaMapping .MapTypeMapping (field , new Map3Levels (groupType ), keyChild , valueChild );
198
+ }
199
+
182
200
@ Override
183
201
public TypeMapping visit (Int type ) {
184
202
boolean signed = type .getIsSigned ();
@@ -214,11 +232,21 @@ public TypeMapping visit(Utf8 type) {
214
232
return primitive (BINARY , stringType ());
215
233
}
216
234
235
+ @ Override
236
+ public TypeMapping visit (ArrowType .LargeUtf8 largeUtf8 ) {
237
+ return primitive (BINARY , stringType ());
238
+ }
239
+
217
240
@ Override
218
241
public TypeMapping visit (Binary type ) {
219
242
return primitive (BINARY );
220
243
}
221
244
245
+ @ Override
246
+ public TypeMapping visit (ArrowType .LargeBinary largeBinary ) {
247
+ return primitive (BINARY );
248
+ }
249
+
222
250
@ Override
223
251
public TypeMapping visit (Bool type ) {
224
252
return primitive (BOOLEAN );
@@ -289,6 +317,16 @@ public TypeMapping visit(Interval type) {
289
317
return primitiveFLBA (12 , LogicalTypeAnnotation .IntervalLogicalTypeAnnotation .getInstance ());
290
318
}
291
319
320
+ @ Override
321
+ public TypeMapping visit (ArrowType .Duration duration ) {
322
+ return primitiveFLBA (12 , LogicalTypeAnnotation .IntervalLogicalTypeAnnotation .getInstance ());
323
+ }
324
+
325
+ @ Override
326
+ public TypeMapping visit (ArrowType .ExtensionType type ) {
327
+ return ArrowTypeVisitor .super .visit (type );
328
+ }
329
+
292
330
@ Override
293
331
public TypeMapping visit (ArrowType .FixedSizeBinary fixedSizeBinary ) {
294
332
return primitive (BINARY );
@@ -358,7 +396,7 @@ private TypeMapping fromParquet(Type type, String name, Repetition repetition) {
358
396
if (repetition == REPEATED ) {
359
397
// case where we have a repeated field that is not in a List/Map
360
398
TypeMapping child = fromParquet (type , null , REQUIRED );
361
- Field arrowField = new Field (name , false , new ArrowType .List (), asList (child .getArrowField ()));
399
+ Field arrowField = new Field (name , FieldType . notNullable ( new ArrowType .List ()), Collections . singletonList (child .getArrowField ()));
362
400
return new RepeatedTypeMapping (arrowField , type , child );
363
401
}
364
402
if (type .isPrimitive ()) {
@@ -376,18 +414,32 @@ private TypeMapping fromParquet(Type type, String name, Repetition repetition) {
376
414
private TypeMapping fromParquetGroup (GroupType type , String name ) {
377
415
LogicalTypeAnnotation logicalType = type .getLogicalTypeAnnotation ();
378
416
if (logicalType == null ) {
417
+ final FieldType field ;
418
+ if (type .isRepetition (OPTIONAL )) {
419
+ field = FieldType .nullable (new Struct ());
420
+ } else {
421
+ field = FieldType .notNullable (new Struct ());
422
+ }
379
423
List <TypeMapping > typeMappings = fromParquet (type .getFields ());
380
- Field arrowField = new Field (name , type . isRepetition ( OPTIONAL ), new Struct () , fields (typeMappings ));
424
+ Field arrowField = new Field (name , field , fields (typeMappings ));
381
425
return new StructTypeMapping (arrowField , type , typeMappings );
382
426
} else {
383
427
return logicalType .accept (new LogicalTypeAnnotation .LogicalTypeAnnotationVisitor <TypeMapping >() {
384
428
@ Override
385
429
public Optional <TypeMapping > visit (LogicalTypeAnnotation .ListLogicalTypeAnnotation listLogicalType ) {
386
430
List3Levels list3Levels = new List3Levels (type );
387
431
TypeMapping child = fromParquet (list3Levels .getElement (), null , list3Levels .getElement ().getRepetition ());
388
- Field arrowField = new Field (name , type . isRepetition ( OPTIONAL ), new ArrowType .List (), asList (child .getArrowField ()));
432
+ Field arrowField = new Field (name , FieldType . nullable ( new ArrowType .List ()), Collections . singletonList (child .getArrowField ()));
389
433
return of (new ListTypeMapping (arrowField , list3Levels , child ));
390
434
}
435
+ @ Override
436
+ public Optional <TypeMapping > visit (LogicalTypeAnnotation .MapLogicalTypeAnnotation mapLogicalType ) {
437
+ Map3Levels map3levels = new Map3Levels (type );
438
+ TypeMapping keyType = fromParquet (map3levels .getKey (), null , map3levels .getKey ().getRepetition ());
439
+ TypeMapping valueType = fromParquet (map3levels .getValue (), null , map3levels .getValue ().getRepetition ());
440
+ Field arrowField = new Field (name , FieldType .nullable (new ArrowType .Map (false )), asList (keyType .getArrowField (), valueType .getArrowField ()));
441
+ return of (new SchemaMapping .MapTypeMapping (arrowField , map3levels , keyType , valueType ));
442
+ }
391
443
}).orElseThrow (() -> new UnsupportedOperationException ("Unsupported type " + type ));
392
444
}
393
445
}
@@ -401,7 +453,12 @@ private TypeMapping fromParquetPrimitive(final PrimitiveType type, final String
401
453
return type .getPrimitiveTypeName ().convert (new PrimitiveType .PrimitiveTypeNameConverter <TypeMapping , RuntimeException >() {
402
454
403
455
private TypeMapping field (ArrowType arrowType ) {
404
- Field field = new Field (name , type .isRepetition (OPTIONAL ), arrowType , null );
456
+ final Field field ;
457
+ if (type .isRepetition (OPTIONAL )) {
458
+ field = Field .nullable (name , arrowType );
459
+ } else {
460
+ field = Field .notNullable (name , arrowType );
461
+ }
405
462
return new PrimitiveTypeMapping (field , type );
406
463
}
407
464
@@ -607,6 +664,11 @@ public TypeMapping visit(org.apache.arrow.vector.types.pojo.ArrowType.List type)
607
664
return createListTypeMapping (type );
608
665
}
609
666
667
+ @ Override
668
+ public TypeMapping visit (ArrowType .LargeList largeList ) {
669
+ return createListTypeMapping (largeList );
670
+ }
671
+
610
672
@ Override
611
673
public TypeMapping visit (org .apache .arrow .vector .types .pojo .ArrowType .FixedSizeList type ) {
612
674
return createListTypeMapping (type );
@@ -639,6 +701,26 @@ public TypeMapping visit(Union type) {
639
701
return new UnionTypeMapping (arrowField , groupType , map (arrowField .getChildren (), groupType .getFields ()));
640
702
}
641
703
704
+ @ Override
705
+ public TypeMapping visit (ArrowType .Map map ) {
706
+ if (arrowField .getChildren ().size () != 2 ) {
707
+ throw new IllegalArgumentException ("Invalid map type: " + map );
708
+ }
709
+ if (parquetField .isPrimitive ()) {
710
+ throw new IllegalArgumentException ("Parquet type not a group: " + parquetField );
711
+ }
712
+ Map3Levels map3levels = new Map3Levels (parquetField .asGroupType ());
713
+ if (arrowField .getChildren ().size () != 2 ) {
714
+ throw new IllegalArgumentException ("invalid arrow map: " + arrowField );
715
+ }
716
+ Field keyChild = arrowField .getChildren ().get (0 );
717
+ Field valueChild = arrowField .getChildren ().get (1 );
718
+ return new SchemaMapping .MapTypeMapping (arrowField , map3levels ,
719
+ map (keyChild , map3levels .getKey ()),
720
+ map (valueChild , map3levels .getValue ())
721
+ );
722
+ }
723
+
642
724
@ Override
643
725
public TypeMapping visit (Int type ) {
644
726
return primitive ();
@@ -654,11 +736,21 @@ public TypeMapping visit(Utf8 type) {
654
736
return primitive ();
655
737
}
656
738
739
+ @ Override
740
+ public TypeMapping visit (ArrowType .LargeUtf8 largeUtf8 ) {
741
+ return primitive ();
742
+ }
743
+
657
744
@ Override
658
745
public TypeMapping visit (Binary type ) {
659
746
return primitive ();
660
747
}
661
748
749
+ @ Override
750
+ public TypeMapping visit (ArrowType .LargeBinary largeBinary ) {
751
+ return primitive ();
752
+ }
753
+
662
754
@ Override
663
755
public TypeMapping visit (Bool type ) {
664
756
return primitive ();
@@ -689,6 +781,11 @@ public TypeMapping visit(Interval type) {
689
781
return primitive ();
690
782
}
691
783
784
+ @ Override
785
+ public TypeMapping visit (ArrowType .Duration duration ) {
786
+ return primitive ();
787
+ }
788
+
692
789
@ Override
693
790
public TypeMapping visit (ArrowType .FixedSizeBinary fixedSizeBinary ) {
694
791
return primitive ();
0 commit comments