Skip to content

Commit

Permalink
Merge branch 'integration' into tighter-query-options
Browse files Browse the repository at this point in the history
  • Loading branch information
SethSmucker committed Jun 25, 2024
2 parents 7da6358 + 10ff243 commit f142bed
Show file tree
Hide file tree
Showing 8 changed files with 303 additions and 41 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,7 @@ public GenericQueryConfiguration initialize(AccumuloClient client, Query setting
logicQueryStringBuilder.append(config.getQueryString());
QueryLogicHolder holder = new QueryLogicHolder(logicName, logic);
holder.setSettings(settingsCopy);
holder.setMaxResults(logic.getMaxResults());
holder.setMaxResults(logic.getResultLimit(settingsCopy));
configs.put(logicName, config);
logicState.put(logicName, holder);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -90,9 +90,14 @@ public boolean apply(Tuple3<Key,Document,DatawaveJexlContext> input) {
}

// now evaluate
if (log.isTraceEnabled()) {
log.trace("Evaluating " + query + " against document " + input.second().getMetadata() + " with context " + input.third());
}

Object o = script.execute(input.third());

if (log.isTraceEnabled()) {
log.trace("Evaluation of " + query + " against " + input.third() + " returned " + o);
log.trace("Evaluation of " + query + " against document " + input.second().getMetadata() + " returned " + o);
}

boolean matched = isMatched(o);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1649,7 +1649,7 @@ public static ValueTuple getHitTerm(Object valueTuple) {
* Given the string "FIRST.SECOND.THIRD.FOURTH"
* - A value of 0 for pos will result in the substring 'SECOND.THIRD'
* - A value of 1 for pos will result in the substring 'SECOND'
* - A value of 2 for pos will result in an exception being thrown
* - A value of 2 for pos will result in null being returned
* </pre>
*
* @param input
Expand All @@ -1663,7 +1663,10 @@ public static String getMatchToLeftOfPeriod(String input, int pos) {
input = input.substring(input.indexOf('.') + 1);
int[] indices = getIndicesOfPeriods(input);
if (indices.length < pos + 1) {
throw new IllegalArgumentException("Input " + input + " does not have a '.' at position " + pos + " from the left.");
if (log.isTraceEnabled()) {
log.trace("Not enough grouping info to extract group " + pos + " from the left for input " + input);
}
return null;
}
return input.substring(0, indices[indices.length - pos - 1]);
}
Expand All @@ -1677,7 +1680,7 @@ public static String getMatchToLeftOfPeriod(String input, int pos) {
* - A value of 0 for pos will result in the substring 'FOURTH'
* - A value of 1 for pos will result in the substring 'THIRD.FOURTH'
* - A value of 2 for pos will result in the substring 'SECOND.THIRD.FOURTH'
* - A value of 3 for pos will result in an exception being thrown
* - A value of 3 for pos will result in null being returned
* </pre>
*
* @param input
Expand All @@ -1689,7 +1692,10 @@ public static String getMatchToLeftOfPeriod(String input, int pos) {
public static String getMatchToRightOfPeriod(String input, int pos) {
int[] indices = getIndicesOfPeriods(input);
if (indices.length < pos + 1) {
throw new IllegalArgumentException("Input " + input + " does not have a '.' at position " + pos + " from the right.");
if (log.isTraceEnabled()) {
log.trace("Not enough grouping info to extract group " + pos + " from the right for input " + input);
}
return null;
}
return input.substring(indices[indices.length - pos - 1] + 1);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,10 @@
import java.util.List;
import java.util.Set;

import org.apache.commons.jexl3.parser.ASTAndNode;
import org.apache.commons.jexl3.parser.ASTFunctionNode;
import org.apache.commons.jexl3.parser.ASTIdentifier;
import org.apache.commons.jexl3.parser.ASTOrNode;
import org.apache.commons.jexl3.parser.JexlNode;
import org.apache.commons.jexl3.parser.JexlNodes;
import org.apache.log4j.Logger;
Expand All @@ -19,6 +21,8 @@
import datawave.query.jexl.JexlASTHelper;
import datawave.query.jexl.JexlNodeFactory;
import datawave.query.jexl.functions.FunctionJexlNodeVisitor;
import datawave.query.language.functions.jexl.Jexl;
import datawave.query.language.tree.FunctionNode;
import datawave.query.parser.JavaRegexAnalyzer;
import datawave.query.parser.JavaRegexAnalyzer.JavaRegexParseException;
import datawave.query.util.MetadataHelper;
Expand Down Expand Up @@ -62,42 +66,68 @@ public Object visit(ASTFunctionNode node, Object data) {
List<JexlNode> arguments = functionMetadata.args();

List<ASTIdentifier> identifiers = JexlASTHelper.getIdentifiers(arguments.get(0));
List<JexlNode> children = new ArrayList<>(identifiers.size());
List<JexlNode> extractChildren = new ArrayList<>(identifiers.size());
List<JexlNode> keepChildren = new ArrayList<>(identifiers.size());

for (ASTIdentifier identifier : identifiers) {
JexlNode regexNode = buildRegexNode(identifier, functionMetadata.name(), JexlNodes.getIdentifierOrLiteralAsString(arguments.get(1)));
if (regexNode != null) {
children.add(regexNode);
extractChildren.add(regexNode);
} else {
keepChildren.add(JexlNodeFactory.buildIdentifier(identifier.getName()));
}

}

// only re-parent if the same number of regex nodes were built
if (identifiers.size() == children.size()) {
switch (identifiers.size()) {
case 0:
return returnNode;
case 1:
if (log.isTraceEnabled()) {
log.trace("Rewrote \"" + JexlStringBuildingVisitor.buildQueryWithoutParse(node) + "\" into \""
+ JexlStringBuildingVisitor.buildQueryWithoutParse(children.get(0)) + "\"");
}
return children.get(0);
default:
if (functionMetadata.name().equals(INCLUDE_REGEX)) {
returnNode = JexlNodeFactory.createOrNode(children);
} else {
// build an AND node because of how DeMorgan's law works with expanding negations
returnNode = JexlNodeFactory.createAndNode(children);
}

if (log.isTraceEnabled()) {
log.trace("Rewrote \"" + JexlStringBuildingVisitor.buildQueryWithoutParse(node) + "\" into \""
+ JexlStringBuildingVisitor.buildQueryWithoutParse(returnNode) + "\"");
}
return returnNode;
if (extractChildren.size() == 0) {
// nothing to rewrite
return returnNode;
} else if (keepChildren.size() == 0) {
// rewrite all nodes
if (identifiers.size() == 1) {
// we've already rewritten our one node
returnNode = extractChildren.get(0);
} else {
if (functionMetadata.name().equals(INCLUDE_REGEX) && arguments.get(0) instanceof ASTOrNode) {
returnNode = JexlNodeFactory.createOrNode(extractChildren);
} else {
// build an AND node because of how DeMorgan's law works with expanding negations
returnNode = JexlNodeFactory.createAndNode(extractChildren);
}
}
} else {
// construct each and put it all together
JexlNode extractNode;
List<JexlNode> joint = new ArrayList<>();
List<JexlNode> newArgs = new ArrayList<>();

if (functionMetadata.name().equals(INCLUDE_REGEX) && arguments.get(0) instanceof ASTOrNode) {
newArgs.add(JexlNodeFactory.createOrNode(keepChildren));
extractNode = JexlNodeFactory.createOrNode(extractChildren);
} else {
newArgs.add(JexlNodeFactory.createAndNode(keepChildren));
extractNode = JexlNodeFactory.createAndNode(extractChildren);
}

newArgs.add(arguments.get(1));
JexlNode newFunc = FunctionJexlNodeVisitor.makeFunctionFrom(functionMetadata.namespace(), functionMetadata.name(),
newArgs.toArray(new JexlNode[0]));

joint.add(extractNode);
joint.add(newFunc);

if (functionMetadata.name().equals(INCLUDE_REGEX) && arguments.get(0) instanceof ASTOrNode) {
returnNode = JexlNodeFactory.createOrNode(joint);
} else {
returnNode = JexlNodeFactory.createAndNode(joint);
}
}
}

if (log.isTraceEnabled()) {
log.trace("Rewrote \"" + JexlStringBuildingVisitor.buildQueryWithoutParse(node) + "\" into \""
+ JexlStringBuildingVisitor.buildQueryWithoutParse(returnNode) + "\"");
}
return returnNode;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1055,13 +1055,12 @@ public void testValidPositions() {
assertResult("second");
}

// Verify that an exception is thrown for an invalid position.
// Verify that null is returned for an invalid position.
@Test
public void testInvalidPosition() {
givenPosition(2);

assertThatIllegalArgumentException().isThrownBy(() -> assertResult("doesn't matter"))
.withMessage("Input second.third.fourth does not have a '.' at position " + position + " from the left.");
assertResult(null);
}

private void givenPosition(int position) {
Expand Down Expand Up @@ -1094,13 +1093,12 @@ public void testValidPositions() {
assertResult("second.third.fourth");
}

// Verify that an exception is thrown for an invalid position.
// Verify that null is returned for an invalid position.
@Test
public void testInvalidPosition() {
givenPosition(3);

assertThatIllegalArgumentException().isThrownBy(() -> assertResult("doesn't matter"))
.withMessage("Input " + input + " does not have a '.' at position " + position + " from the right.");
assertResult(null);
}

private void givenPosition(int position) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,13 @@ public void functionWithMethod() throws ParseException {
assertResult(original, expected);
}

@Test
public void excludeFunction() throws ParseException {
String original = "filter:excludeRegex(FOO, '1')";
String expected = "filter:excludeRegex(BAR1||BAR2, '1')";
assertResult(original, expected);
}

@Test
public void functionWithMethodInExpression() throws ParseException {
String original = "filter:includeRegex(FOO, '1').size() > 0";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,66 @@ public void testRewriteMultiFieldedIncludeRegex() throws ParseException {
assertVisitorResult(query, expected, indexOnlyFields);
}

@Test
public void testRewriteMultiHeteroIndexOnlyFieldsIncludeRegex() throws ParseException {
Set<String> indexOnlyFields = Sets.newHashSet("FIELDB");
String query = "FOO == 'bar' && filter:includeRegex(FIELDA||FIELDB||FIELDC, 'ba.*')";
String expected = "FOO == 'bar' && (FIELDB =~ 'ba.*' || filter:includeRegex(FIELDA || FIELDC, 'ba.*'))";
assertVisitorResult(query, expected, indexOnlyFields);

query = "FOO == 'bar' || filter:includeRegex(FIELDA||FIELDB||FIELDC, 'ba.*')";
expected = "FOO == 'bar' || (FIELDB =~ 'ba.*' || filter:includeRegex(FIELDA || FIELDC, 'ba.*'))";
assertVisitorResult(query, expected, indexOnlyFields);
}

@Test
public void testRewriteMultiHeteroIndexOnlyFieldsIncludeRegex2() throws ParseException {
Set<String> indexOnlyFields = Sets.newHashSet("FIELDB", "FIELDC");
String query = "FOO == 'bar' && filter:includeRegex(FIELDA||FIELDB||FIELDC, 'ba.*')";
String expected = "FOO == 'bar' && (FIELDB =~ 'ba.*' || FIELDC =~ 'ba.*' || filter:includeRegex(FIELDA, 'ba.*'))";
assertVisitorResult(query, expected, indexOnlyFields);

query = "FOO == 'bar' || filter:includeRegex(FIELDA||FIELDB||FIELDC, 'ba.*')";
expected = "FOO == 'bar' || (FIELDB =~ 'ba.*' || FIELDC =~ 'ba.*' || filter:includeRegex(FIELDA, 'ba.*'))";
assertVisitorResult(query, expected, indexOnlyFields);
}

@Test
public void testRewriteHeteroIndexOnlyFieldsExcludeRegex() throws ParseException {
Set<String> indexOnlyFields = Sets.newHashSet("FIELDA", "FIELDB");
String query = "FOO == 'bar' && filter:excludeRegex(FIELDA||FIELDB||FIELDC, 'ba.*')";
String expected = "FOO == 'bar' && FIELDA !~ 'ba.*' && FIELDB !~ 'ba.*' && filter:excludeRegex(FIELDC, 'ba.*')";
assertVisitorResult(query, expected, indexOnlyFields);

query = "FOO == 'bar' || filter:excludeRegex(FIELDA||FIELDB||FIELDC, 'ba.*')";
expected = "FOO == 'bar' || (FIELDA !~ 'ba.*' && FIELDB !~ 'ba.*' && filter:excludeRegex(FIELDC, 'ba.*'))";
assertVisitorResult(query, expected, indexOnlyFields);
}

@Test
public void testANDIncludeRegex() throws ParseException {
Set<String> indexOnlyFields = Sets.newHashSet("FIELDA", "FIELDB");
String query = "FOO == 'bar' && filter:includeRegex(FIELDA&&FIELDB&&FIELDC, 'ba.*')";
String expected = "FOO == 'bar' && FIELDA =~ 'ba.*' && FIELDB =~ 'ba.*' && filter:includeRegex(FIELDC, 'ba.*')";
assertVisitorResult(query, expected, indexOnlyFields);

query = "FOO == 'bar' || filter:includeRegex(FIELDA&&FIELDB&&FIELDC, 'ba.*')";
expected = "FOO == 'bar' || (FIELDA =~ 'ba.*' && FIELDB =~ 'ba.*' && filter:includeRegex(FIELDC, 'ba.*'))";
assertVisitorResult(query, expected, indexOnlyFields);
}

@Test
public void testANDExcludeRegex() throws ParseException {
Set<String> indexOnlyFields = Sets.newHashSet("FIELDA", "FIELDB");
String query = "FOO == 'bar' && filter:excludeRegex(FIELDA&&FIELDB&&FIELDC, 'ba.*')";
String expected = "FOO == 'bar' && FIELDA !~ 'ba.*' && FIELDB !~ 'ba.*' && filter:excludeRegex(FIELDC, 'ba.*')";
assertVisitorResult(query, expected, indexOnlyFields);

query = "FOO == 'bar' || filter:excludeRegex(FIELDA&&FIELDB&&FIELDC, 'ba.*')";
expected = "FOO == 'bar' || (FIELDA !~ 'ba.*' && FIELDB !~ 'ba.*' && filter:excludeRegex(FIELDC, 'ba.*'))";
assertVisitorResult(query, expected, indexOnlyFields);
}

@Test
public void testRewriteMultiFieldedExcludeRegex() throws ParseException {
Set<String> indexOnlyFields = Sets.newHashSet("FIELDA", "FIELDB");
Expand All @@ -78,6 +138,18 @@ public void testRewriteMultiFieldedExcludeRegex() throws ParseException {
assertVisitorResult(query, expected, indexOnlyFields);
}

@Test
public void testRewriteMultiHeteroFieldedExcludeRegex() throws ParseException {
Set<String> indexOnlyFields = Sets.newHashSet("FIELDA");
String query = "FOO == 'bar' && filter:excludeRegex(FIELDA||FIELDB, 'ba.*')";
String expected = "FOO == 'bar' && (FIELDA !~ 'ba.*' && filter:excludeRegex(FIELDB, 'ba.*'))";
assertVisitorResult(query, expected, indexOnlyFields);

query = "FOO == 'bar' || filter:excludeRegex(FIELDA||FIELDB, 'ba.*')";
expected = "FOO == 'bar' || (FIELDA !~ 'ba.*' && filter:excludeRegex(FIELDB, 'ba.*'))";
assertVisitorResult(query, expected, indexOnlyFields);
}

// non index-only fields should not be expanded

@Test
Expand All @@ -90,13 +162,12 @@ public void testEndWildCardNotIndexOnly() throws ParseException {
assertVisitorResult(query, query, indexOnlyFields);
}

// mixture of index-only and non index-only fields should not be expanded

@Test
public void testMixedEventNonEvent() throws ParseException {
Set<String> indexOnlyFields = Sets.newHashSet("NON_EVENT_FIELD");
String query = "filter:includeRegex(EVENT_FIELD || NON_EVENT_FIELD,'all_.*?')";
assertVisitorResult(query, query, indexOnlyFields);
String expected = " NON_EVENT_FIELD =~ 'all_.*?' || filter:includeRegex(EVENT_FIELD, 'all_.*?')";
assertVisitorResult(query, expected, indexOnlyFields);
}

// bad regex cases
Expand Down
Loading

0 comments on commit f142bed

Please sign in to comment.