Skip to content

Commit

Permalink
Added most recent equivalent unique functions (#2339)
Browse files Browse the repository at this point in the history
* added MOST_RECENT_ in front of the function name
  • Loading branch information
ivakegg committed Apr 18, 2024
1 parent 3210069 commit bb8d353
Show file tree
Hide file tree
Showing 19 changed files with 376 additions and 23 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,23 @@
<bean class="datawave.query.language.functions.jexl.TimeFunction"/>
<bean class="datawave.query.language.functions.jexl.Jexl"/>
<bean class="datawave.query.language.functions.jexl.Options"/>
<bean class="datawave.query.language.functions.jexl.GroupBy"/>
<bean class="datawave.query.language.functions.jexl.Unique"/>
<bean class="datawave.query.language.functions.jexl.UniqueByDay"/>
<bean class="datawave.query.language.functions.jexl.UniqueByHour"/>
<bean class="datawave.query.language.functions.jexl.UniqueByMinute"/>
<bean class="datawave.query.language.functions.jexl.UniqueByMonth"/>
<bean class="datawave.query.language.functions.jexl.UniqueBySecond"/>
<bean class="datawave.query.language.functions.jexl.UniqueByTenthOfHour"/>
<bean class="datawave.query.language.functions.jexl.UniqueByYear"/>
<bean class="datawave.query.language.functions.jexl.MostRecentUnique"/>
<bean class="datawave.query.language.functions.jexl.MostRecentUniqueByDay"/>
<bean class="datawave.query.language.functions.jexl.MostRecentUniqueByHour"/>
<bean class="datawave.query.language.functions.jexl.MostRecentUniqueByMinute"/>
<bean class="datawave.query.language.functions.jexl.MostRecentUniqueByMonth"/>
<bean class="datawave.query.language.functions.jexl.MostRecentUniqueBySecond"/>
<bean class="datawave.query.language.functions.jexl.MostRecentUniqueByTenthOfHour"/>
<bean class="datawave.query.language.functions.jexl.MostRecentUniqueByYear"/>
<bean class="datawave.query.language.functions.jexl.Geowave.Contains"/>
<bean class="datawave.query.language.functions.jexl.Geowave.CoveredBy"/>
<bean class="datawave.query.language.functions.jexl.Geowave.Covers"/>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,15 +38,23 @@
<bean class="datawave.query.language.functions.jexl.AtomValuesMatchFunction"/>
<bean class="datawave.query.language.functions.jexl.Options"/>
<bean class="datawave.query.language.functions.jexl.Rename"/>
<bean class="datawave.query.language.functions.jexl.Unique"/>
<bean class="datawave.query.language.functions.jexl.GroupBy"/>
<bean class="datawave.query.language.functions.jexl.Unique"/>
<bean class="datawave.query.language.functions.jexl.UniqueByDay"/>
<bean class="datawave.query.language.functions.jexl.UniqueByHour"/>
<bean class="datawave.query.language.functions.jexl.UniqueByMinute"/>
<bean class="datawave.query.language.functions.jexl.UniqueByMonth"/>
<bean class="datawave.query.language.functions.jexl.UniqueByYear"/>
<bean class="datawave.query.language.functions.jexl.UniqueBySecond"/>
<bean class="datawave.query.language.functions.jexl.UniqueByTenthOfHour"/>
<bean class="datawave.query.language.functions.jexl.UniqueByYear"/>
<bean class="datawave.query.language.functions.jexl.MostRecentUnique"/>
<bean class="datawave.query.language.functions.jexl.MostRecentUniqueByDay"/>
<bean class="datawave.query.language.functions.jexl.MostRecentUniqueByHour"/>
<bean class="datawave.query.language.functions.jexl.MostRecentUniqueByMinute"/>
<bean class="datawave.query.language.functions.jexl.MostRecentUniqueByMonth"/>
<bean class="datawave.query.language.functions.jexl.MostRecentUniqueBySecond"/>
<bean class="datawave.query.language.functions.jexl.MostRecentUniqueByTenthOfHour"/>
<bean class="datawave.query.language.functions.jexl.MostRecentUniqueByYear"/>
<bean class="datawave.query.language.functions.jexl.NoExpansion"/>
<bean class="datawave.query.language.functions.jexl.Compare"/>
<bean class="datawave.query.language.functions.jexl.Geowave.Contains"/>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,23 @@
<bean class="datawave.query.language.functions.jexl.TimeFunction"/>
<bean class="datawave.query.language.functions.jexl.Jexl"/>
<bean class="datawave.query.language.functions.jexl.Options"/>
<bean class="datawave.query.language.functions.jexl.GroupBy"/>
<bean class="datawave.query.language.functions.jexl.Unique"/>
<bean class="datawave.query.language.functions.jexl.UniqueByDay"/>
<bean class="datawave.query.language.functions.jexl.UniqueByHour"/>
<bean class="datawave.query.language.functions.jexl.UniqueByMinute"/>
<bean class="datawave.query.language.functions.jexl.UniqueByMonth"/>
<bean class="datawave.query.language.functions.jexl.UniqueBySecond"/>
<bean class="datawave.query.language.functions.jexl.UniqueByTenthOfHour"/>
<bean class="datawave.query.language.functions.jexl.UniqueByYear"/>
<bean class="datawave.query.language.functions.jexl.MostRecentUnique"/>
<bean class="datawave.query.language.functions.jexl.MostRecentUniqueByDay"/>
<bean class="datawave.query.language.functions.jexl.MostRecentUniqueByHour"/>
<bean class="datawave.query.language.functions.jexl.MostRecentUniqueByMinute"/>
<bean class="datawave.query.language.functions.jexl.MostRecentUniqueByMonth"/>
<bean class="datawave.query.language.functions.jexl.MostRecentUniqueBySecond"/>
<bean class="datawave.query.language.functions.jexl.MostRecentUniqueByTenthOfHour"/>
<bean class="datawave.query.language.functions.jexl.MostRecentUniqueByYear"/>
<bean class="datawave.query.language.functions.jexl.Geowave.Contains"/>
<bean class="datawave.query.language.functions.jexl.Geowave.CoveredBy"/>
<bean class="datawave.query.language.functions.jexl.Geowave.Covers"/>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ public class QueryFunctions {

public static final String QUERY_FUNCTION_NAMESPACE = "f";
public static final String OPTIONS_FUNCTION = "options";

public static final String MOST_RECENT_PREFIX = "most_recent_";
public static final String UNIQUE_FUNCTION = "unique";
public static final String GROUPBY_FUNCTION = "groupby";
public static final String EXCERPT_FIELDS_FUNCTION = "excerpt_fields";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@
* {@code unique_by_minute('field1','field2')}</li>
* <li>{@code f:unique_by_second()}: Expects a comma-delimited list of fields to be unique with a granularity level of by SECOND, e.g.
* {@code unique_by_second('field1','field2')}</li>
* <li>{@code f:most_recent_unique...} Adding most_recent_ before any unique function will set the most.recent.unique flag to true, e.g.
* {@code most_recent_unique_by_day('field1','field2')}</li>
* <li>{@code f:rename}: Expects a comma-delimited list field/field mappings e.g. {@code f:rename('field1=field2','field3=field4')}</li>
* </ul>
*/
Expand All @@ -63,9 +65,18 @@ public class QueryOptionsFromQueryVisitor extends RebuildingVisitor {
QueryFunctions.UNIQUE_FUNCTION, UniqueFunction.UNIQUE_BY_DAY_FUNCTION, UniqueFunction.UNIQUE_BY_HOUR_FUNCTION,
UniqueFunction.UNIQUE_BY_MINUTE_FUNCTION, UniqueFunction.UNIQUE_BY_TENTH_OF_HOUR_FUNCTION, UniqueFunction.UNIQUE_BY_MONTH_FUNCTION,
UniqueFunction.UNIQUE_BY_SECOND_FUNCTION, UniqueFunction.UNIQUE_BY_MILLISECOND_FUNCTION, UniqueFunction.UNIQUE_BY_YEAR_FUNCTION,
QueryFunctions.GROUPBY_FUNCTION, QueryFunctions.EXCERPT_FIELDS_FUNCTION, QueryFunctions.NO_EXPANSION,
QueryFunctions.LENIENT_FIELDS_FUNCTION, QueryFunctions.STRICT_FIELDS_FUNCTION, QueryFunctions.SUM, QueryFunctions.MIN, QueryFunctions.MAX,
QueryFunctions.AVERAGE, QueryFunctions.COUNT, QueryFunctions.RENAME_FUNCTION);
QueryFunctions.MOST_RECENT_PREFIX + QueryFunctions.UNIQUE_FUNCTION,
QueryFunctions.MOST_RECENT_PREFIX + UniqueFunction.UNIQUE_BY_DAY_FUNCTION,
QueryFunctions.MOST_RECENT_PREFIX + UniqueFunction.UNIQUE_BY_HOUR_FUNCTION,
QueryFunctions.MOST_RECENT_PREFIX + UniqueFunction.UNIQUE_BY_MINUTE_FUNCTION,
QueryFunctions.MOST_RECENT_PREFIX + UniqueFunction.UNIQUE_BY_TENTH_OF_HOUR_FUNCTION,
QueryFunctions.MOST_RECENT_PREFIX + UniqueFunction.UNIQUE_BY_MONTH_FUNCTION,
QueryFunctions.MOST_RECENT_PREFIX + UniqueFunction.UNIQUE_BY_SECOND_FUNCTION,
QueryFunctions.MOST_RECENT_PREFIX + UniqueFunction.UNIQUE_BY_MILLISECOND_FUNCTION,
QueryFunctions.MOST_RECENT_PREFIX + UniqueFunction.UNIQUE_BY_YEAR_FUNCTION, QueryFunctions.GROUPBY_FUNCTION,
QueryFunctions.EXCERPT_FIELDS_FUNCTION, QueryFunctions.NO_EXPANSION, QueryFunctions.LENIENT_FIELDS_FUNCTION,
QueryFunctions.STRICT_FIELDS_FUNCTION, QueryFunctions.SUM, QueryFunctions.MIN, QueryFunctions.MAX, QueryFunctions.AVERAGE,
QueryFunctions.COUNT, QueryFunctions.RENAME_FUNCTION);

@SuppressWarnings("unchecked")
public static <T extends JexlNode> T collect(T node, Object data) {
Expand Down Expand Up @@ -192,7 +203,16 @@ private Object visit(ASTFunctionNode node, Map<String,String> optionsMap) {
ASTNamespaceIdentifier nsIdentifier = (ASTNamespaceIdentifier) node.jjtGetChild(0);
// if this is the f:options function, create a List for the userData to be passed to the child nodes
if (nsIdentifier.getNamespace().equals(QueryFunctions.QUERY_FUNCTION_NAMESPACE)) {
switch (String.valueOf(nsIdentifier.getName())) {
String function = String.valueOf(nsIdentifier.getName());

// check for the most recent flag for the unique functions only
boolean mostRecent = function.startsWith(QueryFunctions.MOST_RECENT_PREFIX + QueryFunctions.UNIQUE_FUNCTION);
if (mostRecent) {
function = function.substring(QueryFunctions.MOST_RECENT_PREFIX.length());
optionsMap.put(QueryParameters.MOST_RECENT_UNIQUE, "true");
}

switch (function) {
case QueryFunctions.OPTIONS_FUNCTION: {
List<String> optionsList = new ArrayList<>();
this.visit(node, optionsList);
Expand Down Expand Up @@ -238,7 +258,7 @@ private Object visit(ASTFunctionNode node, Map<String,String> optionsMap) {
case UniqueFunction.UNIQUE_BY_SECOND_FUNCTION:
case UniqueFunction.UNIQUE_BY_TENTH_OF_HOUR_FUNCTION: {
UniqueFields uniqueFields = new UniqueFields();
updateUniqueFields(node, uniqueFields, optionsMap, UniqueFunction.findByName(nsIdentifier.getName()));
updateUniqueFields(node, uniqueFields, optionsMap, UniqueFunction.findByName(function));
return null;
}
case QueryFunctions.GROUPBY_FUNCTION: {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
package datawave.query.language.functions.jexl;

import java.text.MessageFormat;
import java.util.ArrayList;
import java.util.stream.Collectors;

import datawave.query.attributes.UniqueFields;
import datawave.query.jexl.functions.QueryFunctions;
import datawave.query.language.functions.QueryFunction;
import datawave.webservice.query.exception.BadRequestQueryException;
import datawave.webservice.query.exception.DatawaveErrorCode;

/**
* Function to determine most recent uniqueness among documents given a set of fields and the levels of granularity that should be used for each fields. This
* function accepts a list of fields with specified granularity levels in the format {@code field[ALL],dateField[DAY,HOUR,MINUTE]}. See {@link UniqueFields} for
* additional documentation on supported formatting.
*/
public class MostRecentUnique extends JexlQueryFunction {

public MostRecentUnique() {
super(QueryFunctions.MOST_RECENT_PREFIX + QueryFunctions.UNIQUE_FUNCTION, new ArrayList<>());
}

/**
* query options contain a list of fields. Cannot be the empty list.
*
* @throws IllegalArgumentException
* for illegal arguments
*/
@Override
public void validate() throws IllegalArgumentException {
if (this.parameterList.isEmpty()) {
BadRequestQueryException qe = new BadRequestQueryException(DatawaveErrorCode.INVALID_FUNCTION_ARGUMENTS,
MessageFormat.format("{0} requires at least one argument", this.name));
throw new IllegalArgumentException(qe);
} else {
String parameters = String.join(",", parameterList);
try {
UniqueFields.from(parameters);
} catch (Exception e) {
BadRequestQueryException qe = new BadRequestQueryException(DatawaveErrorCode.INVALID_FUNCTION_ARGUMENTS,
MessageFormat.format("Unable to parse unique fields from arguments for function {0}", this.name));
throw new IllegalArgumentException(qe);
}
}
}

@Override
public String toString() {
StringBuilder sb = new StringBuilder();

sb.append(QueryFunctions.QUERY_FUNCTION_NAMESPACE).append(':').append(QueryFunctions.MOST_RECENT_PREFIX).append(QueryFunctions.UNIQUE_FUNCTION);
if (parameterList.isEmpty()) {
sb.append("()");
} else {
char separator = '(';
for (String parm : parameterList) {
sb.append(separator).append(escapeString(parm));
separator = ',';
}
sb.append(')');
}

return sb.toString();
}

@Override
public QueryFunction duplicate() {
return new Unique();
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
package datawave.query.language.functions.jexl;

import java.util.ArrayList;

import datawave.query.jexl.functions.QueryFunctions;
import datawave.query.jexl.visitors.QueryOptionsFromQueryVisitor;
import datawave.query.language.functions.QueryFunction;

/**
* Function to return a unique result for every day for a given list of fields. This function is equivalent to {@code #MOST_RECENT_UNIQUE(field[DAY])}.
*/
public class MostRecentUniqueByDay extends UniqueByFunction {

public MostRecentUniqueByDay() {
super(QueryFunctions.MOST_RECENT_PREFIX + QueryOptionsFromQueryVisitor.UniqueFunction.UNIQUE_BY_DAY_FUNCTION, new ArrayList<>());
}

@Override
public QueryFunction duplicate() {
return new UniqueByDay();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
package datawave.query.language.functions.jexl;

import java.util.ArrayList;

import datawave.query.jexl.functions.QueryFunctions;
import datawave.query.jexl.visitors.QueryOptionsFromQueryVisitor;
import datawave.query.language.functions.QueryFunction;

/**
* Function to return a unique result for every hour of the day for a given list of fields. This function is equivalent to
* {@code #MOST_RECENT_UNIQUE(field[HOUR])}.
*/
public class MostRecentUniqueByHour extends UniqueByFunction {

public MostRecentUniqueByHour() {
super(QueryFunctions.MOST_RECENT_PREFIX + QueryOptionsFromQueryVisitor.UniqueFunction.UNIQUE_BY_HOUR_FUNCTION, new ArrayList<>());
}

@Override
public QueryFunction duplicate() {
return new UniqueByHour();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
package datawave.query.language.functions.jexl;

import java.util.ArrayList;

import datawave.query.jexl.functions.QueryFunctions;
import datawave.query.jexl.visitors.QueryOptionsFromQueryVisitor;
import datawave.query.language.functions.QueryFunction;

/**
* Function to return a most recent unique result for every minute of the hour for a given list of fields. This function is equivalent to
* {@code #MOST_RECENT_UNIQUE(field[MINUTE])}.
*/
public class MostRecentUniqueByMinute extends UniqueByFunction {

public MostRecentUniqueByMinute() {
super(QueryFunctions.MOST_RECENT_PREFIX + QueryOptionsFromQueryVisitor.UniqueFunction.UNIQUE_BY_MINUTE_FUNCTION, new ArrayList<>());
}

@Override
public QueryFunction duplicate() {
return new UniqueByMinute();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
package datawave.query.language.functions.jexl;

import java.util.ArrayList;

import datawave.query.jexl.functions.QueryFunctions;
import datawave.query.jexl.visitors.QueryOptionsFromQueryVisitor;
import datawave.query.language.functions.QueryFunction;

/**
* Function to return a most recent unique result for every month of the year for a given list of fields. This function is equivalent to
* {@code #MOST_RECENT_UNIQUE(field[MONTH])}.
*/
public class MostRecentUniqueByMonth extends UniqueByFunction {

public MostRecentUniqueByMonth() {
super(QueryFunctions.MOST_RECENT_PREFIX + QueryOptionsFromQueryVisitor.UniqueFunction.UNIQUE_BY_MONTH_FUNCTION, new ArrayList<>());
}

@Override
public QueryFunction duplicate() {
return new UniqueByMonth();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
package datawave.query.language.functions.jexl;

import java.util.ArrayList;

import datawave.query.jexl.functions.QueryFunctions;
import datawave.query.jexl.visitors.QueryOptionsFromQueryVisitor;
import datawave.query.language.functions.QueryFunction;

/**
* Function to return a most recent unique result for every second for a given list of fields. This function is equivalent to
* {@code #MOST_RECENT_UNIQUE(field[SECOND])}.
*/
public class MostRecentUniqueBySecond extends UniqueByFunction {

public MostRecentUniqueBySecond() {
super(QueryFunctions.MOST_RECENT_PREFIX + QueryOptionsFromQueryVisitor.UniqueFunction.UNIQUE_BY_SECOND_FUNCTION, new ArrayList<>());
}

@Override
public QueryFunction duplicate() {
return new UniqueBySecond();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
package datawave.query.language.functions.jexl;

import java.util.ArrayList;

import datawave.query.jexl.functions.QueryFunctions;
import datawave.query.jexl.visitors.QueryOptionsFromQueryVisitor;
import datawave.query.language.functions.QueryFunction;

/**
* Function to return a most recent_unique result for every tenth of an hour for a given list of fields. This function is equivalent to
* {@code #MOST_RECENT_UNIQUE(field[TENTH_OF_HOUR])}.
*/
public class MostRecentUniqueByTenthOfHour extends UniqueByFunction {

public MostRecentUniqueByTenthOfHour() {
super(QueryFunctions.MOST_RECENT_PREFIX + QueryOptionsFromQueryVisitor.UniqueFunction.UNIQUE_BY_TENTH_OF_HOUR_FUNCTION, new ArrayList<>());
}

@Override
public QueryFunction duplicate() {
return new UniqueByTenthOfHour();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
package datawave.query.language.functions.jexl;

import java.util.ArrayList;

import datawave.query.jexl.functions.QueryFunctions;
import datawave.query.jexl.visitors.QueryOptionsFromQueryVisitor;
import datawave.query.language.functions.QueryFunction;

/**
* Function to return a most recent unique result for the year for a given list of fields. This function is equivalent to
* {@code #MOST_RECENT_UNIQUE(field[YEAR])}.
*/
public class MostRecentUniqueByYear extends UniqueByFunction {

public MostRecentUniqueByYear() {
super(QueryFunctions.MOST_RECENT_PREFIX + QueryOptionsFromQueryVisitor.UniqueFunction.UNIQUE_BY_YEAR_FUNCTION, new ArrayList<>());
}

@Override
public QueryFunction duplicate() {
return new UniqueByYear();
}
}
Original file line number Diff line number Diff line change
@@ -1,13 +1,9 @@
package datawave.query.language.functions.jexl;

import java.text.MessageFormat;
import java.util.ArrayList;

import datawave.query.jexl.functions.QueryFunctions;
import datawave.query.jexl.visitors.QueryOptionsFromQueryVisitor;
import datawave.query.language.functions.QueryFunction;
import datawave.webservice.query.exception.BadRequestQueryException;
import datawave.webservice.query.exception.DatawaveErrorCode;

/**
* Function to return a unique result for every day for a given list of fields. This function is equivalent to {@code #unique(field[DAY])}.
Expand Down
Loading

0 comments on commit bb8d353

Please sign in to comment.