Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Java] Improve cucumber expression creation performance #202

Merged
merged 15 commits into from
Jan 15, 2023
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@ and this project adheres to [Semantic Versioning](http://semver.org/).

## [Unreleased]

### Fixed
- [Java] Improve cucumber expression creation performance ([#202](https://github.com/cucumber/cucumber-expressions/pull/202))

## [16.1.1] - 2022-12-08
### Fixed
- [Java] Improve expression creation performance ([#187](https://github.com/cucumber/cucumber-expressions/pull/187), [#189](https://github.com/cucumber/cucumber-expressions/pull/189))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@

@API(status = API.Status.STABLE)
public final class CucumberExpression implements Expression {
private static final Pattern ESCAPE_PATTERN = Pattern.compile("[\\\\^\\[({$.|?*+})\\]]");
private final List<ParameterType<?>> parameterTypes = new ArrayList<>();
private final String source;
private final TreeRegexp treeRegexp;
Expand All @@ -43,7 +42,7 @@ public final class CucumberExpression implements Expression {
private String rewriteToRegex(Node node) {
switch (node.type()) {
case TEXT_NODE:
return escapeRegex(node.text());
return RegexpUtils.escapeRegex(node.text());
jkronegg marked this conversation as resolved.
Show resolved Hide resolved
case OPTIONAL_NODE:
return rewriteOptional(node);
case ALTERNATION_NODE:
Expand All @@ -60,11 +59,6 @@ private String rewriteToRegex(Node node) {
}
}

private static String escapeRegex(String text) {
return ESCAPE_PATTERN.matcher(text).replaceAll("\\\\$0");
}


private String rewriteOptional(Node node) {
assertNoParameters(node, astNode -> createParameterIsNotAllowedInOptional(astNode, source));
assertNoOptionals(node, astNode -> createOptionalIsNotAllowedInOptional(astNode, source));
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
package io.cucumber.cucumberexpressions;

class RegexpUtils {
/**
* List of characters to be escaped.
* The last char is '}' with index 125, so we need only 126 characters.
*/
private static final boolean[] CHAR_TO_ESCAPE = new boolean[126];
static {
CHAR_TO_ESCAPE['^'] = true;
CHAR_TO_ESCAPE['$'] = true;
CHAR_TO_ESCAPE['['] = true;
CHAR_TO_ESCAPE[']'] = true;
CHAR_TO_ESCAPE['('] = true;
CHAR_TO_ESCAPE[')'] = true;
CHAR_TO_ESCAPE['{'] = true;
CHAR_TO_ESCAPE['}'] = true;
CHAR_TO_ESCAPE['.'] = true;
CHAR_TO_ESCAPE['|'] = true;
CHAR_TO_ESCAPE['?'] = true;
CHAR_TO_ESCAPE['*'] = true;
CHAR_TO_ESCAPE['+'] = true;
CHAR_TO_ESCAPE['\\'] = true;
}

/**
* Escapes the regexp characters (the ones from "^$(){}[].+*?\")
* from the given text, so that they are not considered as regexp
* characters.
* @param text the non-null input text
* @return the input text with escaped regexp characters
*/
public static String escapeRegex(String text) {
/*
Note on performance: this code has been benchmarked for
escaping frequencies of 100%, 50%, 20%, 10%, 1%, 0.1%.
Amongst 4 other variants (including Pattern matching),
this variant is the faster on all escaping frequencies.
*/
int length = text.length();
StringBuilder sb = null; // lazy initialization
int blocStart=0;
jkronegg marked this conversation as resolved.
Show resolved Hide resolved
int maxChar = CHAR_TO_ESCAPE.length;
for (int i = 0; i < length; i++) {
char currentChar = text.charAt(i);
if (currentChar < maxChar && CHAR_TO_ESCAPE[currentChar]) {
if (sb == null) {
sb = new StringBuilder(length * 2);
}
if (i > blocStart) {
// flush previous block
sb.append(text, blocStart, i);
}
sb.append('\\');
sb.append(currentChar);
blocStart=i+1;
}
}
if (sb != null) {
// finalizing character escaping
if (length > blocStart) {
// flush remaining characters
sb.append(text, blocStart, length);
}
return sb.toString();
}
return text;
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
package io.cucumber.cucumberexpressions;

import org.junit.jupiter.api.Test;

import static org.junit.jupiter.api.Assertions.assertEquals;

class RegexpUtilsTest {

@Test
void escape_all_regexp_characters() {
assertEquals("\\^\\$\\[\\]\\(\\)\\{\\}\\.\\|\\?\\*\\+\\\\", RegexpUtils.escapeRegex("^$[](){}.|?*+\\"));
}

@Test
void escape_escaped_regexp_characters() {
assertEquals("\\^\\$\\[\\]\\\\\\(\\\\\\)\\{\\}\\\\\\\\\\.\\|\\?\\*\\+", RegexpUtils.escapeRegex("^$[]\\(\\){}\\\\.|?*+"));
}


@Test
void do_not_escape_when_there_is_nothing_to_escape() {
assertEquals("dummy", RegexpUtils.escapeRegex("dummy"));
}

@Test
void escapeRegex_gives_no_error_for_unicode_characters() {
assertEquals("🥒", RegexpUtils.escapeRegex("🥒"));
}

}