Skip to content

Commit ca29e4c

Browse files
fix(QTDI-679): improve tests
1 parent acdc271 commit ca29e4c

File tree

5 files changed

+413
-194
lines changed

5 files changed

+413
-194
lines changed

component-api/src/main/java/org/talend/sdk/component/api/record/SchemaCompanionUtil.java

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -64,12 +64,13 @@ public static String sanitizeConnectionName(final String name) {
6464
if (sanitizedBuilder.length() == 0 && Character.isDigit(enc.charAt(0))) {
6565
sanitizedBuilder.append('_');
6666
}
67+
6768
for (int iter = 0; iter < enc.length(); iter++) {
68-
if (Character.isLetterOrDigit(enc.charAt(iter))) {
69-
sanitizedBuilder.append(enc.charAt(iter));
70-
} else {
71-
sanitizedBuilder.append('_');
72-
}
69+
final char encodedCurrentChar = enc.charAt(iter);
70+
final char sanitizedLetter = Character.isLetterOrDigit(encodedCurrentChar)
71+
? encodedCurrentChar
72+
: '_';
73+
sanitizedBuilder.append(sanitizedLetter);
7374
}
7475
}
7576
}
@@ -139,9 +140,9 @@ public static Schema.Entry avoidCollision(final Schema.Entry newEntry,
139140
}
140141

141142
private static Entry findCollidedEntry(final Entry newEntry, final Function<String, Entry> entryGetter) {
142-
final Optional<Entry> collision = Optional.ofNullable(entryGetter.apply(newEntry.getName()))
143-
.filter((final Entry fieldEntry) -> !Objects.equals(fieldEntry, newEntry));
144-
return collision.orElse(null);
143+
return Optional.ofNullable(entryGetter.apply(newEntry.getName()))
144+
.filter(retrievedEntry -> !Objects.equals(retrievedEntry, newEntry))
145+
.orElse(null);
145146
}
146147

147148
private static String newNotCollidedName(final Function<String, Entry> entryGetter, final String rawName) {
Lines changed: 225 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,225 @@
1+
/**
2+
* Copyright (C) 2006-2025 Talend Inc. - www.talend.com
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
package org.talend.sdk.component.api.record;
17+
18+
import java.nio.charset.StandardCharsets;
19+
import java.util.HashMap;
20+
import java.util.Map;
21+
import java.util.Random;
22+
import java.util.function.BiConsumer;
23+
import java.util.regex.Pattern;
24+
import java.util.stream.Stream;
25+
26+
import org.junit.jupiter.api.Assertions;
27+
import org.junit.jupiter.api.Test;
28+
import org.junit.jupiter.params.ParameterizedTest;
29+
import org.junit.jupiter.params.provider.Arguments;
30+
import org.junit.jupiter.params.provider.MethodSource;
31+
import org.talend.sdk.component.api.record.Schema.Entry;
32+
import org.talend.sdk.component.api.record.Schema.Type;
33+
import org.talend.sdk.component.api.test.MockEntry;
34+
35+
import lombok.RequiredArgsConstructor;
36+
37+
public class SchemaCompanionUtilTest {
38+
39+
@Test
40+
void sanitizationPatternBasedCheck() {
41+
final Pattern checkPattern = Pattern.compile("^[A-Za-z_][A-Za-z0-9_]*$");
42+
final String nonAscii1 = SchemaCompanionUtil.sanitizeConnectionName("30_39歳");
43+
Assertions.assertTrue(checkPattern.matcher(nonAscii1).matches(), "'" + nonAscii1 + "' don't match");
44+
45+
final String ch1 = SchemaCompanionUtil.sanitizeConnectionName("世帯数分布");
46+
final String ch2 = SchemaCompanionUtil.sanitizeConnectionName("抽出率調整");
47+
Assertions.assertTrue(checkPattern.matcher(ch1).matches(), "'" + ch1 + "' don't match");
48+
Assertions.assertTrue(checkPattern.matcher(ch2).matches(), "'" + ch2 + "' don't match");
49+
Assertions.assertNotEquals(ch1, ch2);
50+
51+
final Random rnd = new Random();
52+
final byte[] array = new byte[20]; // length is bounded by 7
53+
for (int i = 0; i < 150; i++) {
54+
rnd.nextBytes(array);
55+
final String randomString = new String(array, StandardCharsets.UTF_8);
56+
final String sanitize = SchemaCompanionUtil.sanitizeConnectionName(randomString);
57+
Assertions.assertTrue(checkPattern.matcher(sanitize).matches(), "'" + sanitize + "' don't match");
58+
59+
final String sanitize2 = SchemaCompanionUtil.sanitizeConnectionName(sanitize);
60+
Assertions.assertEquals(sanitize, sanitize2);
61+
}
62+
}
63+
64+
@Test
65+
void sanitizeNull() {
66+
Assertions.assertNull(SchemaCompanionUtil.sanitizeConnectionName(null));
67+
}
68+
69+
@MethodSource("sanitizeCasesSource")
70+
@ParameterizedTest
71+
void sanitizeCases(final String expected, final String rawName) {
72+
Assertions.assertEquals(expected, SchemaCompanionUtil.sanitizeConnectionName(rawName));
73+
}
74+
75+
public static Stream<Arguments> sanitizeCasesSource() {
76+
return Stream.of(
77+
Arguments.of("", ""),
78+
Arguments.of("_", "$"),
79+
Arguments.of("_", "1"),
80+
Arguments.of("_", "é"),
81+
Arguments.of("H", "éH"),
82+
Arguments.of("_1", "é1"),
83+
Arguments.of("H_lloWorld", "HélloWorld"),
84+
Arguments.of("oid", "$oid"),
85+
Arguments.of("Hello_World_", " Hello World "),
86+
Arguments.of("_23HelloWorld", "123HelloWorld"),
87+
88+
Arguments.of("Hello_World_", "Hello-World$"),
89+
Arguments.of("_656", "5656"),
90+
Arguments.of("_____", "Істина"));
91+
}
92+
93+
@Test
94+
void noCollisionDuplicatedEntry() {
95+
final String name = "name_b";
96+
97+
final Entry entry1 = newEntry(name, Type.STRING);
98+
final Entry entry2 = newEntry(name, Type.STRING);
99+
100+
final Map<String, Entry> entries = new HashMap<>();
101+
addNewEntry(entry1, entries);
102+
addNewEntry(entry2, entries);
103+
104+
// second entry with the same name was ignored (can't be two same raw names)
105+
Assertions.assertEquals(1, entries.size());
106+
107+
Assertions.assertNull(entries.get(name).getRawName());
108+
Assertions.assertEquals(name, entries.get("name_b").getName());
109+
}
110+
111+
@Test
112+
void avoidCollisionWithSanitization() {
113+
final String name = "name_b";
114+
115+
final Entry entry1 = newEntry(name, Type.STRING);
116+
final Entry entry2 = newEntry(name, Type.INT);
117+
118+
final Map<String, Entry> entries = new HashMap<>();
119+
addNewEntry(entry1, entries);
120+
addNewEntry(entry2, entries);
121+
122+
// second entry with the same name was ignored (can't be two same raw names)
123+
Assertions.assertEquals(1, entries.size());
124+
125+
Assertions.assertNull(entries.get(name).getRawName());
126+
Assertions.assertEquals(name, entries.get("name_b").getName());
127+
// we remain the first entry.
128+
Assertions.assertEquals(Type.STRING, entries.get("name_b").getType());
129+
}
130+
131+
@Test
132+
void avoidCollisionEqualLengthCyrillicNames() {
133+
final String firstRawName = "Світло";
134+
final String secondRawName = "Мріяти";
135+
final String thirdRawName = "Копати";
136+
137+
final Entry entry1 = newEntry(firstRawName, Type.STRING);
138+
final Entry entry2 = newEntry(secondRawName, Type.STRING);
139+
final Entry entry3 = newEntry(thirdRawName, Type.STRING);
140+
141+
final Map<String, Entry> entries = new HashMap<>();
142+
addNewEntry(entry1, entries);
143+
addNewEntry(entry2, entries);
144+
addNewEntry(entry3, entries);
145+
146+
Assertions.assertEquals(3, entries.size());
147+
148+
// Check that the sanitized names are different
149+
// it was a strange behavior when we replace the existed entry with the same name
150+
Assertions.assertEquals(thirdRawName, entries.get("_____").getRawName());
151+
Assertions.assertEquals(secondRawName, entries.get("______2").getRawName());
152+
Assertions.assertEquals(firstRawName, entries.get("______1").getRawName());
153+
}
154+
155+
@Test
156+
void avoidCollisionNormalNameFirst() {
157+
final String firstRawName = "name_b";
158+
final String secondRawName = "1name_b";
159+
160+
final Entry entry1 = newEntry(firstRawName, Type.STRING);
161+
final Entry entry2 = newEntry(secondRawName, Type.STRING);
162+
163+
final Map<String, Entry> entries = new HashMap<>();
164+
addNewEntry(entry1, entries);
165+
addNewEntry(entry2, entries);
166+
167+
Assertions.assertEquals(2, entries.size());
168+
169+
// Check that the sanitized names are different
170+
// it was a strange behavior when we replace the existed entry with the same name
171+
Assertions.assertNull(entries.get("name_b").getRawName());
172+
Assertions.assertEquals(firstRawName, entries.get("name_b").getName());
173+
Assertions.assertEquals(secondRawName, entries.get("name_b_1").getRawName());
174+
}
175+
176+
@Test
177+
void avoidCollisionNormalNameLast() {
178+
final String firstRawName = "1name_b";
179+
final String secondRawName = "name_b";
180+
181+
final Entry entry1 = newEntry(firstRawName, Type.STRING);
182+
final Entry entry2 = newEntry(secondRawName, Type.STRING);
183+
184+
final Map<String, Entry> entries = new HashMap<>();
185+
addNewEntry(entry1, entries);
186+
addNewEntry(entry2, entries);
187+
188+
Assertions.assertEquals(2, entries.size());
189+
190+
// Check that the sanitized names are different
191+
// it was a strange behavior when we replace the existed entry with the same name
192+
Assertions.assertEquals(firstRawName, entries.get("name_b_1").getRawName());
193+
Assertions.assertNull(entries.get("name_b").getRawName());
194+
Assertions.assertEquals(secondRawName, entries.get("name_b").getName());
195+
}
196+
197+
private static Schema.Entry newEntry(final String name, final Schema.Type type) {
198+
final String sanitizedName = SchemaCompanionUtil.sanitizeConnectionName(name);
199+
return MockEntry.internalBuilder()
200+
.withName(sanitizedName)
201+
.withRawName(name.equals(sanitizedName) ? null : name)
202+
.withType(type)
203+
.build();
204+
}
205+
206+
private static void addNewEntry(final Entry entry, final Map<String, Entry> entries) {
207+
final ReplaceFunction replaceFunction = new ReplaceFunction(entries);
208+
final Entry sanitized = SchemaCompanionUtil.avoidCollision(entry, entries::get, replaceFunction);
209+
if (sanitized != null) {
210+
entries.put(sanitized.getName(), sanitized);
211+
}
212+
}
213+
214+
@RequiredArgsConstructor
215+
private static final class ReplaceFunction implements BiConsumer<String, Entry> {
216+
217+
private final Map<String, Entry> entries;
218+
219+
@Override
220+
public void accept(final String s, final Entry entry) {
221+
entries.remove(s);
222+
entries.put(entry.getName(), entry);
223+
}
224+
}
225+
}

0 commit comments

Comments
 (0)