Skip to content

Commit

Permalink
Add QueryFunctionsDescriptorTest, fields method will now return corre…
Browse files Browse the repository at this point in the history
…ct values for functions with more than one argument
  • Loading branch information
apmoriarty committed Jun 27, 2024
1 parent af8844c commit 23d975e
Show file tree
Hide file tree
Showing 4 changed files with 343 additions and 13 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,12 @@
import java.util.NavigableSet;
import java.util.Objects;
import java.util.Set;
import java.util.SortedSet;

import org.apache.commons.lang.StringUtils;

import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonValue;
import com.google.common.collect.Multimap;
import com.google.common.collect.Multimaps;
import com.google.common.collect.Sets;
import com.google.common.collect.SortedSetMultimap;
import com.google.common.collect.TreeMultimap;
Expand All @@ -32,7 +30,7 @@ public class UniqueFields implements Serializable, Cloneable {

private final TreeMultimap<String,UniqueGranularity> fieldMap = TreeMultimap.create();
private boolean mostRecent = false;
private static String MOST_RECENT_UNIQUE = "_MOST_RECENT_";
private static final String MOST_RECENT_UNIQUE = "_MOST_RECENT_";

/**
* Returns a new {@link UniqueFields} parsed from this string. The provided string is expected to have the format returned by
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
Expand All @@ -14,11 +15,13 @@
import org.apache.commons.jexl3.parser.ASTGENode;
import org.apache.commons.jexl3.parser.ASTIdentifier;
import org.apache.commons.jexl3.parser.ASTLENode;
import org.apache.commons.jexl3.parser.ASTStringLiteral;
import org.apache.commons.jexl3.parser.JexlNode;
import org.apache.commons.jexl3.parser.JexlNodes;
import org.apache.commons.jexl3.parser.ParserTreeConstants;

import datawave.query.attributes.AttributeFactory;
import datawave.query.attributes.UniqueFields;
import datawave.query.config.ShardQueryConfiguration;
import datawave.query.jexl.ArithmeticJexlEngines;
import datawave.query.jexl.JexlASTHelper;
Expand All @@ -41,7 +44,8 @@ public class QueryFunctionsDescriptor implements JexlFunctionArgumentDescriptorF
*/
public static class QueryJexlArgumentDescriptor implements JexlArgumentDescriptor {
private final ASTFunctionNode node;
private final String namespace, name;
private final String namespace;
private final String name;
private final List<JexlNode> args;

public QueryJexlArgumentDescriptor(ASTFunctionNode node, String namespace, String name, List<JexlNode> args) {
Expand Down Expand Up @@ -126,24 +130,74 @@ public void addFilters(AttributeFactory attributeFactory, Map<String,EventDataQu

@Override
public Set<String> fieldsForNormalization(MetadataHelper helper, Set<String> datatypeFilter, int arg) {
// Do not normalize fields for the includeText function.
if (!name.equalsIgnoreCase(INCLUDE_TEXT)) {
// All other functions use the fields in the first argument for normalization.
if (arg > 0) {
return fields(helper, datatypeFilter);
}
if (name.equalsIgnoreCase(QueryFunctions.INCLUDE_TEXT)) {
// do not normalize fields for the includeText function
return Collections.emptySet();
}
return Collections.emptySet();

// otherwise delegate to the fields method
return fields(helper, datatypeFilter);
}

@Override
public Set<String> fields(MetadataHelper helper, Set<String> datatypeFilter) {
return JexlASTHelper.getIdentifierNames(args.get(0));
Set<String> fields = new HashSet<>();
switch (name) {
case QueryFunctions.COUNT:
case QueryFunctions.SUM:
case QueryFunctions.MIN:
case QueryFunctions.MAX:
case QueryFunctions.AVERAGE:
case QueryFunctions.GROUPBY_FUNCTION:
case QueryFunctions.NO_EXPANSION:
case QueryFunctions.LENIENT_FIELDS_FUNCTION:
case QueryFunctions.STRICT_FIELDS_FUNCTION:
// In practice each of these functions should be parsed from the query
// almost immediately. This implementation is added for consistency
for (JexlNode arg : args) {
fields.addAll(JexlASTHelper.getIdentifierNames(arg));
}
break;
case QueryFunctions.INCLUDE_TEXT:
if (args.size() == 2) {
fields.addAll(JexlASTHelper.getIdentifierNames(args.get(0)));
} else {
for (int i = 1; i < args.size(); i += 2) {
fields.addAll(JexlASTHelper.getIdentifierNames(args.get(i)));
}
}
break;
case QueryFunctions.UNIQUE_FUNCTION:
for (JexlNode arg : args) {
if (arg instanceof ASTStringLiteral) {
// FIELD[GRANULARITY] is represented by an ASTStringLiteral
String literal = ((ASTStringLiteral) arg).getLiteral();
fields.addAll(UniqueFields.from(literal).getFields());
} else {
// otherwise it's just an ASTIdentifier
for (String identifier : JexlASTHelper.getIdentifierNames(arg)) {
fields.addAll(UniqueFields.from(identifier).getFields());
}
}
}
break;
case QueryFunctions.MATCH_REGEX:
case BETWEEN:
case LENGTH:
default:
fields.addAll(JexlASTHelper.getIdentifierNames(args.get(0)));
}
return fields;
}

@Override
public Set<Set<String>> fieldSets(MetadataHelper helper, Set<String> datatypeFilter) {
return JexlArgumentDescriptor.Fields.product(args.get(0));
Set<Set<String>> fieldSet = new HashSet<>();
Set<String> fields = fields(helper, datatypeFilter);
for (String field : fields) {
fieldSet.add(Set.of(field));
}
return fieldSet;
}

@Override
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,194 @@
package datawave.query.jexl.functions;

import static datawave.query.jexl.functions.QueryFunctionsDescriptor.QueryJexlArgumentDescriptor;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.fail;

import java.util.Set;

import org.apache.commons.jexl3.parser.ASTFunctionNode;
import org.apache.commons.jexl3.parser.ASTJexlScript;
import org.apache.commons.jexl3.parser.JexlNode;
import org.apache.commons.jexl3.parser.ParseException;
import org.junit.jupiter.api.Test;

import datawave.query.jexl.JexlASTHelper;
import datawave.query.jexl.visitors.QueryOptionsFromQueryVisitor;

/**
* Although most query functions are removed from the query by the {@link QueryOptionsFromQueryVisitor}, several functions will persist. These functions may
* contribute contextual information to the query planner, namely what fields are present in the query. When a field only exists in one of these non-removable
* functions it is important to verify that all fields are actually parsed by the {@link QueryFunctionsDescriptor}.
*/
class QueryFunctionsDescriptorTest {

private final String singleFieldCount = "f:count(FIELD)";
private final String multiFieldedCount = "f:count(FIELD_A, FIELD_B)";

private final String betweenDecimal = "f:between(FIELD, 50.0, 60.0)";
private final String betweenValue = "f:between(FIELD, 'm', 'm~')";

private final String length = "f:length(FIELD, '2', '3')";

private final String include = "f:includeText(FIELD, 'baz')";
private final String includeAnd = "f:includeText(AND, FIELD_A, 'bar', FIELD_B, 'baz')";
private final String includeOr = "f:includeText(OR, FIELD_A, 'bar', FIELD_B, 'baz')";

private final String regex = "f:matchRegex(FIELD, 'ba.*')";

private final String singleFieldSum = "f:sum(FIELD)";
private final String multiFieldSum = "f:sum(FIELD_A, FIELD_B)";

private final String singleFieldMin = "f:min(FIELD)";
private final String multiFieldMin = "f:min(FIELD_A, FIELD_B)";

private final String singleFieldMax = "f:max(FIELD)";
private final String multiFieldMax = "f:max(FIELD_A, FIELD_B)";

private final String singleFieldAvg = "f:average(FIELD)";
private final String multiFieldAvg = "f:average(FIELD_A, FIELD_B)";

private final String singleFieldGroupBy = "f:groupby(FIELD)";
private final String multiFieldGroupBy = "f:groupby(FIELD_A, FIELD_B)";

private final String singleFieldUnique = "f:unique(FIELD)";
private final String multiFieldUnique = "f:unique(FIELD_A, FIELD_B)";

private final String singleFieldUniqueDay = "f:unique('FIELD[DAY]')";
private final String multiFieldUniqueDay = "f:unique('FIELD_A[DAY]', 'FIELD_B[DAY]')";

private final String singleFieldNoExpansion = "f:noExpansion(FIELD)";
private final String multiFieldNoExpansion = "f:noExpansion(FIELD_A, FIELD_B)";

private final String singleFieldLenient = "f:lenient(FIELD)";
private final String multiFieldLenient = "f:lenient(FIELD_A, FIELD_B)";

private final String singleFieldStrict = "f:strict(FIELD)";
private final String multiFieldStrict = "f:strict(FIELD_A, FIELD_B)";

private final QueryFunctionsDescriptor descriptor = new QueryFunctionsDescriptor();

@Test
void testFields() {
assertFields(singleFieldCount, Set.of("FIELD"));
assertFields(multiFieldedCount, Set.of("FIELD_A", "FIELD_B"));

assertFields(betweenDecimal, Set.of("FIELD"));
assertFields(betweenValue, Set.of("FIELD"));

assertFields(length, Set.of("FIELD"));

assertFields(include, Set.of("FIELD"));
assertFields(includeAnd, Set.of("FIELD_A", "FIELD_B"));
assertFields(includeOr, Set.of("FIELD_A", "FIELD_B"));

assertFields(regex, Set.of("FIELD"));

assertFields(singleFieldSum, Set.of("FIELD"));
assertFields(multiFieldSum, Set.of("FIELD_A", "FIELD_B"));

assertFields(singleFieldMin, Set.of("FIELD"));
assertFields(multiFieldMin, Set.of("FIELD_A", "FIELD_B"));

assertFields(singleFieldMax, Set.of("FIELD"));
assertFields(multiFieldMax, Set.of("FIELD_A", "FIELD_B"));

assertFields(singleFieldAvg, Set.of("FIELD"));
assertFields(multiFieldAvg, Set.of("FIELD_A", "FIELD_B"));

assertFields(singleFieldGroupBy, Set.of("FIELD"));
assertFields(multiFieldGroupBy, Set.of("FIELD_A", "FIELD_B"));

assertFields(singleFieldUnique, Set.of("FIELD"));
assertFields(multiFieldUnique, Set.of("FIELD_A", "FIELD_B"));

assertFields(singleFieldUniqueDay, Set.of("FIELD"));
assertFields(multiFieldUniqueDay, Set.of("FIELD_A", "FIELD_B"));

assertFields(singleFieldNoExpansion, Set.of("FIELD"));
assertFields(multiFieldNoExpansion, Set.of("FIELD_A", "FIELD_B"));

assertFields(singleFieldLenient, Set.of("FIELD"));
assertFields(multiFieldLenient, Set.of("FIELD_A", "FIELD_B"));

assertFields(singleFieldStrict, Set.of("FIELD"));
assertFields(multiFieldStrict, Set.of("FIELD_A", "FIELD_B"));
}

private void assertFields(String query, Set<String> expected) {
QueryJexlArgumentDescriptor jexlDescriptor = getDescriptor(query);
Set<String> fields = jexlDescriptor.fields(null, Set.of());
assertEquals(expected, fields);
}

@Test
void testFieldSets() {
assertFieldSets(singleFieldCount, Set.of(Set.of("FIELD")));
assertFieldSets(multiFieldedCount, Set.of(Set.of("FIELD_A"), Set.of("FIELD_B")));

assertFieldSets(betweenDecimal, Set.of(Set.of("FIELD")));
assertFieldSets(betweenValue, Set.of(Set.of("FIELD")));

assertFieldSets(length, Set.of(Set.of("FIELD")));

assertFieldSets(include, Set.of(Set.of("FIELD")));
assertFieldSets(includeAnd, Set.of(Set.of("FIELD_A"), Set.of("FIELD_B")));
assertFieldSets(includeOr, Set.of(Set.of("FIELD_A"), Set.of("FIELD_B")));

assertFieldSets(regex, Set.of(Set.of("FIELD")));

assertFieldSets(singleFieldSum, Set.of(Set.of("FIELD")));
assertFieldSets(multiFieldSum, Set.of(Set.of("FIELD_A"), Set.of("FIELD_B")));

assertFieldSets(singleFieldMin, Set.of(Set.of("FIELD")));
assertFieldSets(multiFieldMin, Set.of(Set.of("FIELD_A"), Set.of("FIELD_B")));

assertFieldSets(singleFieldMax, Set.of(Set.of("FIELD")));
assertFieldSets(multiFieldMax, Set.of(Set.of("FIELD_A"), Set.of("FIELD_B")));

assertFieldSets(singleFieldAvg, Set.of(Set.of("FIELD")));
assertFieldSets(multiFieldAvg, Set.of(Set.of("FIELD_A"), Set.of("FIELD_B")));

assertFieldSets(singleFieldGroupBy, Set.of(Set.of("FIELD")));
assertFieldSets(multiFieldGroupBy, Set.of(Set.of("FIELD_A"), Set.of("FIELD_B")));

assertFieldSets(singleFieldUnique, Set.of(Set.of("FIELD")));
assertFieldSets(multiFieldUnique, Set.of(Set.of("FIELD_A"), Set.of("FIELD_B")));

assertFields(singleFieldUniqueDay, Set.of("FIELD"));
assertFields(multiFieldUniqueDay, Set.of("FIELD_A", "FIELD_B"));

assertFieldSets(singleFieldNoExpansion, Set.of(Set.of("FIELD")));
assertFieldSets(multiFieldNoExpansion, Set.of(Set.of("FIELD_A"), Set.of("FIELD_B")));

assertFieldSets(singleFieldLenient, Set.of(Set.of("FIELD")));
assertFieldSets(multiFieldLenient, Set.of(Set.of("FIELD_A"), Set.of("FIELD_B")));

assertFieldSets(singleFieldStrict, Set.of(Set.of("FIELD")));
assertFieldSets(multiFieldStrict, Set.of(Set.of("FIELD_A"), Set.of("FIELD_B")));
}

private void assertFieldSets(String query, Set<Set<String>> expected) {
QueryJexlArgumentDescriptor jexlDescriptor = getDescriptor(query);
Set<Set<String>> fields = jexlDescriptor.fieldSets(null, Set.of());
assertEquals(expected, fields);
}

private QueryJexlArgumentDescriptor getDescriptor(String query) {
ASTJexlScript script = getQuery(query);
JexlNode child = script.jjtGetChild(0);
if (child instanceof ASTFunctionNode) {
return (QueryJexlArgumentDescriptor) descriptor.getArgumentDescriptor((ASTFunctionNode) child);
}
throw new IllegalArgumentException("Could not get descriptor for query: " + query);
}

private ASTJexlScript getQuery(String query) {
try {
return JexlASTHelper.parseAndFlattenJexlQuery(query);
} catch (ParseException e) {
fail("Could not parse query: " + query);
throw new RuntimeException(e);
}
}
}
Loading

0 comments on commit 23d975e

Please sign in to comment.