@@ -47,8 +47,82 @@ void CheckFieldIdAt(const ::avro::NodePtr& node, size_t index, int32_t field_id,
47
47
ASSERT_EQ (attrs.getAttribute (key), std::make_optional (std::to_string (field_id)));
48
48
}
49
49
50
+ // Helper function to check if a custom attribute exists for a field name preservation
51
+ void CheckIcebergFieldName (const ::avro::NodePtr& node, size_t index,
52
+ const std::string& original_name) {
53
+ ASSERT_LT (index, node->customAttributes ());
54
+ const auto & attrs = node->customAttributesAt (index);
55
+ ASSERT_EQ (attrs.getAttribute (" iceberg-field-name" ), std::make_optional (original_name));
56
+ }
57
+
50
58
} // namespace
51
59
60
+ TEST (ValidAvroNameTest, ValidNames) {
61
+ // Valid field names should return true
62
+ EXPECT_TRUE (ValidAvroName (" valid_field" ));
63
+ EXPECT_TRUE (ValidAvroName (" field123" ));
64
+ EXPECT_TRUE (ValidAvroName (" _private" ));
65
+ EXPECT_TRUE (ValidAvroName (" CamelCase" ));
66
+ EXPECT_TRUE (ValidAvroName (" field_with_underscores" ));
67
+ }
68
+
69
+ TEST (ValidAvroNameTest, InvalidNames) {
70
+ // Names starting with numbers should return false
71
+ EXPECT_FALSE (ValidAvroName (" 123field" ));
72
+ EXPECT_FALSE (ValidAvroName (" 0value" ));
73
+
74
+ // Names with special characters should return false
75
+ EXPECT_FALSE (ValidAvroName (" field-name" ));
76
+ EXPECT_FALSE (ValidAvroName (" field.name" ));
77
+ EXPECT_FALSE (ValidAvroName (" field name" ));
78
+ EXPECT_FALSE (ValidAvroName (" field@name" ));
79
+ EXPECT_FALSE (ValidAvroName (" field#name" ));
80
+ }
81
+
82
+ TEST (ValidAvroNameTest, EmptyName) {
83
+ // Empty name should return false
84
+ EXPECT_FALSE (ValidAvroName (" " ));
85
+ }
86
+
87
+ TEST (SanitizeFieldNameTest, ValidFieldNames) {
88
+ // Valid field names should remain unchanged
89
+ EXPECT_EQ (SanitizeFieldName (" valid_field" ), " valid_field" );
90
+ EXPECT_EQ (SanitizeFieldName (" field123" ), " field123" );
91
+ EXPECT_EQ (SanitizeFieldName (" _private" ), " _private" );
92
+ EXPECT_EQ (SanitizeFieldName (" CamelCase" ), " CamelCase" );
93
+ EXPECT_EQ (SanitizeFieldName (" field_with_underscores" ), " field_with_underscores" );
94
+ }
95
+
96
+ TEST (SanitizeFieldNameTest, InvalidFieldNames) {
97
+ // Field names starting with numbers should be prefixed with underscore
98
+ EXPECT_EQ (SanitizeFieldName (" 123field" ), " _123field" );
99
+ EXPECT_EQ (SanitizeFieldName (" 0value" ), " _0value" );
100
+
101
+ // Field names with special characters should be encoded with hex values
102
+ EXPECT_EQ (SanitizeFieldName (" field-name" ), " field_x2Dname" );
103
+ EXPECT_EQ (SanitizeFieldName (" field.name" ), " field_x2Ename" );
104
+ EXPECT_EQ (SanitizeFieldName (" field name" ), " field_x20name" );
105
+ EXPECT_EQ (SanitizeFieldName (" field@name" ), " field_x40name" );
106
+ EXPECT_EQ (SanitizeFieldName (" field#name" ), " field_x23name" );
107
+
108
+ // Complex field names with multiple issues
109
+ EXPECT_EQ (SanitizeFieldName (" 1field-with.special@chars" ),
110
+ " _1field_x2Dwith_x2Especial_x40chars" );
111
+ EXPECT_EQ (SanitizeFieldName (" user-email" ), " user_x2Demail" );
112
+ }
113
+
114
+ TEST (SanitizeFieldNameTest, EdgeCases) {
115
+ // Empty field name
116
+ EXPECT_EQ (SanitizeFieldName (" " ), " " );
117
+
118
+ // Field name with only special characters
119
+ EXPECT_EQ (SanitizeFieldName (" @#$" ), " _x40_x23_x24" );
120
+
121
+ // Field name starting with special character
122
+ EXPECT_EQ (SanitizeFieldName (" -field" ), " _x2Dfield" );
123
+ EXPECT_EQ (SanitizeFieldName (" .field" ), " _x2Efield" );
124
+ }
125
+
52
126
TEST (ToAvroNodeVisitorTest, BooleanType) {
53
127
::avro::NodePtr node;
54
128
EXPECT_THAT (ToAvroNodeVisitor{}.Visit (BooleanType{}, &node), IsOk ());
@@ -181,6 +255,60 @@ TEST(ToAvroNodeVisitorTest, StructType) {
181
255
EXPECT_EQ (node->leafAt (1 )->leafAt (1 )->type (), ::avro::AVRO_INT);
182
256
}
183
257
258
+ TEST (ToAvroNodeVisitorTest, StructTypeWithFieldNames) {
259
+ StructType struct_type{
260
+ {SchemaField{/* field_id=*/ 1 , " user-name" , iceberg::string (),
261
+ /* optional=*/ false },
262
+ SchemaField{/* field_id=*/ 2 , " valid_field" , iceberg::string (),
263
+ /* optional=*/ false },
264
+ SchemaField{/* field_id=*/ 3 , " email.address" , iceberg::string (),
265
+ /* optional=*/ true },
266
+ SchemaField{/* field_id=*/ 4 , " AnotherField" , iceberg::int32 (),
267
+ /* optional=*/ true },
268
+ SchemaField{/* field_id=*/ 5 , " 123field" , iceberg::int32 (),
269
+ /* optional=*/ false },
270
+ SchemaField{/* field_id=*/ 6 , " field with spaces" , iceberg::boolean (),
271
+ /* optional=*/ true }}};
272
+ ::avro::NodePtr node;
273
+ EXPECT_THAT (ToAvroNodeVisitor{}.Visit (struct_type, &node), IsOk ());
274
+ EXPECT_EQ (node->type (), ::avro::AVRO_RECORD);
275
+
276
+ ASSERT_EQ (node->names (), 6 );
277
+
278
+ EXPECT_EQ (node->nameAt (0 ), " user_x2Dname" ); // "user-name" -> "user_x2Dname"
279
+ EXPECT_EQ (node->nameAt (2 ),
280
+ " email_x2Eaddress" ); // "email.address" -> "email_x2Eaddress"
281
+ EXPECT_EQ (node->nameAt (4 ), " _123field" ); // "123field" -> "_123field"
282
+ EXPECT_EQ (
283
+ node->nameAt (5 ),
284
+ " field_x20with_x20spaces" ); // "field with spaces" -> "field_x20with_x20spaces"
285
+
286
+ EXPECT_EQ (node->nameAt (1 ), " valid_field" );
287
+ EXPECT_EQ (node->nameAt (3 ), " AnotherField" );
288
+
289
+ ASSERT_EQ (node->customAttributes (), 6 );
290
+ ASSERT_NO_FATAL_FAILURE (CheckFieldIdAt (node, /* index=*/ 0 , /* field_id=*/ 1 ));
291
+ ASSERT_NO_FATAL_FAILURE (CheckFieldIdAt (node, /* index=*/ 1 , /* field_id=*/ 2 ));
292
+ ASSERT_NO_FATAL_FAILURE (CheckFieldIdAt (node, /* index=*/ 2 , /* field_id=*/ 3 ));
293
+ ASSERT_NO_FATAL_FAILURE (CheckFieldIdAt (node, /* index=*/ 3 , /* field_id=*/ 4 ));
294
+ ASSERT_NO_FATAL_FAILURE (CheckFieldIdAt (node, /* index=*/ 4 , /* field_id=*/ 5 ));
295
+ ASSERT_NO_FATAL_FAILURE (CheckFieldIdAt (node, /* index=*/ 5 , /* field_id=*/ 6 ));
296
+
297
+ const auto & attrs1 = node->customAttributesAt (1 ); // valid_field
298
+ const auto & attrs3 = node->customAttributesAt (3 ); // AnotherField
299
+ EXPECT_FALSE (attrs1.getAttribute (" iceberg-field-name" ).has_value ());
300
+ EXPECT_FALSE (attrs3.getAttribute (" iceberg-field-name" ).has_value ());
301
+
302
+ ASSERT_NO_FATAL_FAILURE (
303
+ CheckIcebergFieldName (node, /* index=*/ 0 , /* original_name=*/ " user-name" ));
304
+ ASSERT_NO_FATAL_FAILURE (
305
+ CheckIcebergFieldName (node, /* index=*/ 2 , /* original_name=*/ " email.address" ));
306
+ ASSERT_NO_FATAL_FAILURE (
307
+ CheckIcebergFieldName (node, /* index=*/ 4 , /* original_name=*/ " 123field" ));
308
+ ASSERT_NO_FATAL_FAILURE (
309
+ CheckIcebergFieldName (node, /* index=*/ 5 , /* original_name=*/ " field with spaces" ));
310
+ }
311
+
184
312
TEST (ToAvroNodeVisitorTest, ListType) {
185
313
ListType list_type{SchemaField{/* field_id=*/ 5 , " element" , iceberg::string (),
186
314
/* optional=*/ true }};
@@ -1436,5 +1564,4 @@ TEST_F(NameMappingAvroSchemaTest, MissingFieldIdError) {
1436
1564
auto result = MakeAvroNodeWithFieldIds (avro_schema.root (), *name_mapping);
1437
1565
ASSERT_THAT (result, IsError (ErrorKind::kInvalidSchema ));
1438
1566
}
1439
-
1440
1567
} // namespace iceberg::avro
0 commit comments