Skip to content

Commit 8f3244d

Browse files
Fix Dissect with leading non-ascii characters (elastic#111184)
1 parent c1dcc6e commit 8f3244d

File tree

3 files changed

+18
-1
lines changed

3 files changed

+18
-1
lines changed

docs/changelog/111184.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 111184
2+
summary: Fix Dissect with leading non-ascii characters
3+
area: Ingest Node
4+
type: bug
5+
issues: []

libs/dissect/src/main/java/org/elasticsearch/dissect/DissectParser.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -203,7 +203,7 @@ public Map<String, String> parse(String inputString) {
203203
DissectKey key = dissectPair.key();
204204
byte[] delimiter = dissectPair.delimiter().getBytes(StandardCharsets.UTF_8);
205205
// start dissection after the first delimiter
206-
int i = leadingDelimiter.length();
206+
int i = leadingDelimiter.getBytes(StandardCharsets.UTF_8).length;
207207
int valueStart = i;
208208
int lookAheadMatches;
209209
// start walking the input string byte by byte, look ahead for matches where needed

libs/dissect/src/test/java/org/elasticsearch/dissect/DissectParserTests.java

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,18 @@ public void testMatchUnicode() {
211211
assertMatch("%{a->}࿏%{b}", "⟳༒࿏࿏࿏࿏࿏༒⟲", Arrays.asList("a", "b"), Arrays.asList("⟳༒", "༒⟲"));
212212
assertMatch("%{*a}࿏%{&a}", "⟳༒࿏༒⟲", Arrays.asList("⟳༒"), Arrays.asList("༒⟲"));
213213
assertMatch("%{}࿏%{a}", "⟳༒࿏༒⟲", Arrays.asList("a"), Arrays.asList("༒⟲"));
214+
assertMatch(
215+
"Zürich, the %{adjective} city in Switzerland",
216+
"Zürich, the largest city in Switzerland",
217+
Arrays.asList("adjective"),
218+
Arrays.asList("largest")
219+
);
220+
assertMatch(
221+
"Zürich, the %{one} city in Switzerland; Zürich, the %{two} city in Switzerland",
222+
"Zürich, the largest city in Switzerland; Zürich, the LARGEST city in Switzerland",
223+
Arrays.asList("one", "two"),
224+
Arrays.asList("largest", "LARGEST")
225+
);
214226
}
215227

216228
public void testMatchRemainder() {

0 commit comments

Comments
 (0)