From 1e4deba4b28b712876fb34cc9e4a5940c101780f Mon Sep 17 00:00:00 2001 From: Ivan Bella Date: Fri, 31 May 2024 20:06:11 +0000 Subject: [PATCH] Updated to ensure the most recent unique functions parse correctly. --- .../query/attributes/UniqueFields.java | 39 +++++++++++++------ .../query/config/ShardQueryConfiguration.java | 2 +- .../functions/QueryFunctionsDescriptor.java | 9 +++++ .../functions/jexl/MostRecentUnique.java | 2 +- .../functions/jexl/MostRecentUniqueByDay.java | 2 +- .../jexl/MostRecentUniqueByHour.java | 2 +- .../jexl/MostRecentUniqueByMinute.java | 2 +- .../jexl/MostRecentUniqueByMonth.java | 2 +- .../jexl/MostRecentUniqueBySecond.java | 2 +- .../jexl/MostRecentUniqueByTenthOfHour.java | 2 +- .../jexl/MostRecentUniqueByYear.java | 2 +- .../query/planner/DefaultQueryPlanner.java | 1 + .../query/planner/QueryOptionsSwitch.java | 1 + .../test/java/datawave/query/UniqueTest.java | 15 +++++++ 14 files changed, 63 insertions(+), 20 deletions(-) diff --git a/warehouse/query-core/src/main/java/datawave/query/attributes/UniqueFields.java b/warehouse/query-core/src/main/java/datawave/query/attributes/UniqueFields.java index 32cc9e67f57..0c861f54e6b 100644 --- a/warehouse/query-core/src/main/java/datawave/query/attributes/UniqueFields.java +++ b/warehouse/query-core/src/main/java/datawave/query/attributes/UniqueFields.java @@ -32,6 +32,7 @@ public class UniqueFields implements Serializable, Cloneable { private final TreeMultimap fieldMap = TreeMultimap.create(); private boolean mostRecent = false; + private static String MOST_RECENT_UNIQUE = "_MOST_RECENT_"; /** * Returns a new {@link UniqueFields} parsed from this string. The provided string is expected to have the format returned by @@ -76,8 +77,12 @@ public static UniqueFields from(String string) { if (nextComma == -1 && nextStartBracket == -1) { String field = string.substring(currentIndex); if (!field.isEmpty()) { - // Add the field only if its not blank. Ignore cases with consecutive trailing commas like field1[ALL],, - uniqueFields.put(field, UniqueGranularity.ALL); + if (field.equals(MOST_RECENT_UNIQUE)) { + uniqueFields.setMostRecent(true); + } else { + // Add the field only if its not blank. Ignore cases with consecutive trailing commas like field1[ALL],, + uniqueFields.put(field, UniqueGranularity.ALL); + } } break; // There are no more fields to be parsed. } else if (nextComma != -1 && (nextStartBracket == -1 || nextComma < nextStartBracket)) { @@ -91,8 +96,12 @@ public static UniqueFields from(String string) { // Add the field with the ALL granularity. String field = string.substring(currentIndex, nextComma); if (!field.isEmpty()) { - // Add the field only if its not blank. Ignore cases with consecutive commas like field1,,field2[DAY] - uniqueFields.put(field, UniqueGranularity.ALL); + if (field.equals(MOST_RECENT_UNIQUE)) { + uniqueFields.setMostRecent(true); + } else { + // Add the field only if its not blank. Ignore cases with consecutive commas like field1,,field2[DAY] + uniqueFields.put(field, UniqueGranularity.ALL); + } } currentIndex = nextComma + 1; // Advance to the start of the next field. } else { @@ -104,14 +113,18 @@ public static UniqueFields from(String string) { String field = string.substring(currentIndex, nextStartBracket); int nextEndBracket = string.indexOf(Constants.BRACKET_END, currentIndex); if (!field.isEmpty()) { - String granularityList = string.substring((nextStartBracket + 1), nextEndBracket); - // An empty granularity list, e.g. field[] is equivalent to field[ALL]. - if (granularityList.isEmpty()) { - uniqueFields.put(field, UniqueGranularity.ALL); + if (field.equals(MOST_RECENT_UNIQUE)) { + uniqueFields.setMostRecent(true); } else { - String[] granularities = StringUtils.split(granularityList, Constants.COMMA); - for (String granularity : granularities) { - uniqueFields.put(field, parseGranularity(granularity)); + String granularityList = string.substring((nextStartBracket + 1), nextEndBracket); + // An empty granularity list, e.g. field[] is equivalent to field[ALL]. + if (granularityList.isEmpty()) { + uniqueFields.put(field, UniqueGranularity.ALL); + } else { + String[] granularities = StringUtils.split(granularityList, Constants.COMMA); + for (String granularity : granularities) { + uniqueFields.put(field, parseGranularity(granularity)); + } } } } @@ -308,6 +321,10 @@ public String transformValue(String field, String value) { @Override public String toString() { StringBuilder sb = new StringBuilder(); + if (mostRecent) { + sb.append(MOST_RECENT_UNIQUE); + sb.append(Constants.COMMA); + } Iterator fieldIterator = fieldMap.keySet().iterator(); while (fieldIterator.hasNext()) { // Write the field. diff --git a/warehouse/query-core/src/main/java/datawave/query/config/ShardQueryConfiguration.java b/warehouse/query-core/src/main/java/datawave/query/config/ShardQueryConfiguration.java index a1636d60664..0ca55e72532 100644 --- a/warehouse/query-core/src/main/java/datawave/query/config/ShardQueryConfiguration.java +++ b/warehouse/query-core/src/main/java/datawave/query/config/ShardQueryConfiguration.java @@ -388,7 +388,6 @@ public class ShardQueryConfiguration extends GenericQueryConfiguration implement private int groupFieldsBatchSize; private boolean accrueStats = false; private UniqueFields uniqueFields = new UniqueFields(); - private boolean mostRecentUnique = false; private boolean cacheModel = false; /** * should the sizes of documents be tracked for this query @@ -677,6 +676,7 @@ public ShardQueryConfiguration(ShardQueryConfiguration other) { this.setGroupFieldsBatchSize(other.getGroupFieldsBatchSize()); this.setAccrueStats(other.getAccrueStats()); this.setUniqueFields(other.getUniqueFields()); + log.info("Checkpointing with " + getUniqueFields()); this.setUniqueCacheBufferSize(other.getUniqueCacheBufferSize()); this.setCacheModel(other.getCacheModel()); this.setTrackSizes(other.isTrackSizes()); diff --git a/warehouse/query-core/src/main/java/datawave/query/jexl/functions/QueryFunctionsDescriptor.java b/warehouse/query-core/src/main/java/datawave/query/jexl/functions/QueryFunctionsDescriptor.java index 2aa3849546c..0dbcc3de4f1 100644 --- a/warehouse/query-core/src/main/java/datawave/query/jexl/functions/QueryFunctionsDescriptor.java +++ b/warehouse/query-core/src/main/java/datawave/query/jexl/functions/QueryFunctionsDescriptor.java @@ -205,6 +205,15 @@ private static void verify(String name, int numArgs) { case QueryOptionsFromQueryVisitor.UniqueFunction.UNIQUE_BY_DAY_FUNCTION: case QueryOptionsFromQueryVisitor.UniqueFunction.UNIQUE_BY_MONTH_FUNCTION: case QueryOptionsFromQueryVisitor.UniqueFunction.UNIQUE_BY_YEAR_FUNCTION: + case QueryFunctions.MOST_RECENT_PREFIX + QueryFunctions.UNIQUE_FUNCTION: + case QueryFunctions.MOST_RECENT_PREFIX + QueryOptionsFromQueryVisitor.UniqueFunction.UNIQUE_BY_MILLISECOND_FUNCTION: + case QueryFunctions.MOST_RECENT_PREFIX + QueryOptionsFromQueryVisitor.UniqueFunction.UNIQUE_BY_SECOND_FUNCTION: + case QueryFunctions.MOST_RECENT_PREFIX + QueryOptionsFromQueryVisitor.UniqueFunction.UNIQUE_BY_MINUTE_FUNCTION: + case QueryFunctions.MOST_RECENT_PREFIX + QueryOptionsFromQueryVisitor.UniqueFunction.UNIQUE_BY_TENTH_OF_HOUR_FUNCTION: + case QueryFunctions.MOST_RECENT_PREFIX + QueryOptionsFromQueryVisitor.UniqueFunction.UNIQUE_BY_HOUR_FUNCTION: + case QueryFunctions.MOST_RECENT_PREFIX + QueryOptionsFromQueryVisitor.UniqueFunction.UNIQUE_BY_DAY_FUNCTION: + case QueryFunctions.MOST_RECENT_PREFIX + QueryOptionsFromQueryVisitor.UniqueFunction.UNIQUE_BY_MONTH_FUNCTION: + case QueryFunctions.MOST_RECENT_PREFIX + QueryOptionsFromQueryVisitor.UniqueFunction.UNIQUE_BY_YEAR_FUNCTION: case QueryFunctions.GROUPBY_FUNCTION: case QueryFunctions.EXCERPT_FIELDS_FUNCTION: case QueryFunctions.MATCH_REGEX: diff --git a/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/MostRecentUnique.java b/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/MostRecentUnique.java index 1e0de067959..ed94abec1ae 100644 --- a/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/MostRecentUnique.java +++ b/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/MostRecentUnique.java @@ -66,7 +66,7 @@ public String toString() { @Override public QueryFunction duplicate() { - return new Unique(); + return new MostRecentUnique(); } } diff --git a/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/MostRecentUniqueByDay.java b/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/MostRecentUniqueByDay.java index 4b0449246e8..845bd863ae3 100644 --- a/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/MostRecentUniqueByDay.java +++ b/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/MostRecentUniqueByDay.java @@ -17,6 +17,6 @@ public MostRecentUniqueByDay() { @Override public QueryFunction duplicate() { - return new UniqueByDay(); + return new MostRecentUniqueByDay(); } } diff --git a/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/MostRecentUniqueByHour.java b/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/MostRecentUniqueByHour.java index fcfec8ddcc4..c831dac8aec 100644 --- a/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/MostRecentUniqueByHour.java +++ b/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/MostRecentUniqueByHour.java @@ -18,6 +18,6 @@ public MostRecentUniqueByHour() { @Override public QueryFunction duplicate() { - return new UniqueByHour(); + return new MostRecentUniqueByHour(); } } diff --git a/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/MostRecentUniqueByMinute.java b/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/MostRecentUniqueByMinute.java index fec744f7a94..f8b04bc4050 100644 --- a/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/MostRecentUniqueByMinute.java +++ b/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/MostRecentUniqueByMinute.java @@ -18,6 +18,6 @@ public MostRecentUniqueByMinute() { @Override public QueryFunction duplicate() { - return new UniqueByMinute(); + return new MostRecentUniqueByMinute(); } } diff --git a/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/MostRecentUniqueByMonth.java b/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/MostRecentUniqueByMonth.java index 79a5a494746..3c611479dd5 100644 --- a/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/MostRecentUniqueByMonth.java +++ b/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/MostRecentUniqueByMonth.java @@ -18,6 +18,6 @@ public MostRecentUniqueByMonth() { @Override public QueryFunction duplicate() { - return new UniqueByMonth(); + return new MostRecentUniqueByMonth(); } } diff --git a/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/MostRecentUniqueBySecond.java b/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/MostRecentUniqueBySecond.java index 369091107a6..8ff9eedbb45 100644 --- a/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/MostRecentUniqueBySecond.java +++ b/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/MostRecentUniqueBySecond.java @@ -18,6 +18,6 @@ public MostRecentUniqueBySecond() { @Override public QueryFunction duplicate() { - return new UniqueBySecond(); + return new MostRecentUniqueBySecond(); } } diff --git a/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/MostRecentUniqueByTenthOfHour.java b/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/MostRecentUniqueByTenthOfHour.java index a635eec6256..81948a62cb3 100644 --- a/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/MostRecentUniqueByTenthOfHour.java +++ b/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/MostRecentUniqueByTenthOfHour.java @@ -18,6 +18,6 @@ public MostRecentUniqueByTenthOfHour() { @Override public QueryFunction duplicate() { - return new UniqueByTenthOfHour(); + return new MostRecentUniqueByTenthOfHour(); } } diff --git a/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/MostRecentUniqueByYear.java b/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/MostRecentUniqueByYear.java index 68b6397cf5d..24d8c8c4471 100644 --- a/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/MostRecentUniqueByYear.java +++ b/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/MostRecentUniqueByYear.java @@ -18,6 +18,6 @@ public MostRecentUniqueByYear() { @Override public QueryFunction duplicate() { - return new UniqueByYear(); + return new MostRecentUniqueByYear(); } } diff --git a/warehouse/query-core/src/main/java/datawave/query/planner/DefaultQueryPlanner.java b/warehouse/query-core/src/main/java/datawave/query/planner/DefaultQueryPlanner.java index 8cfad16d723..427567fe16f 100644 --- a/warehouse/query-core/src/main/java/datawave/query/planner/DefaultQueryPlanner.java +++ b/warehouse/query-core/src/main/java/datawave/query/planner/DefaultQueryPlanner.java @@ -533,6 +533,7 @@ private void configureIterator(ShardQueryConfiguration config, IteratorSetting c addOption(cfg, QueryOptions.GROUP_FIELDS_BATCH_SIZE, config.getGroupFieldsBatchSizeAsString(), true); addOption(cfg, QueryOptions.UNIQUE_FIELDS, config.getUniqueFields().toString(), true); if (config.getUniqueFields().isMostRecent()) { + // this may be redundant with the uniqueFields.toString(), but other code relies on this explicitly being set addOption(cfg, QueryOptions.MOST_RECENT_UNIQUE, Boolean.toString(true), false); addOption(cfg, QueryOptions.UNIQUE_CACHE_BUFFER_SIZE, Integer.toString(config.getUniqueCacheBufferSize()), false); } diff --git a/warehouse/query-core/src/main/java/datawave/query/planner/QueryOptionsSwitch.java b/warehouse/query-core/src/main/java/datawave/query/planner/QueryOptionsSwitch.java index c0d99bd2731..2a4438cfb72 100644 --- a/warehouse/query-core/src/main/java/datawave/query/planner/QueryOptionsSwitch.java +++ b/warehouse/query-core/src/main/java/datawave/query/planner/QueryOptionsSwitch.java @@ -67,6 +67,7 @@ public static void apply(Map optionsMap, ShardQueryConfiguration config.setUniqueFields(uniqueFields); break; case QueryParameters.MOST_RECENT_UNIQUE: + log.info("Setting unique fields to be most recent"); config.getUniqueFields().setMostRecent(Boolean.parseBoolean(value)); break; case QueryParameters.EXCERPT_FIELDS: diff --git a/warehouse/query-core/src/test/java/datawave/query/UniqueTest.java b/warehouse/query-core/src/test/java/datawave/query/UniqueTest.java index af4f4de4de5..289b281ad74 100644 --- a/warehouse/query-core/src/test/java/datawave/query/UniqueTest.java +++ b/warehouse/query-core/src/test/java/datawave/query/UniqueTest.java @@ -373,6 +373,21 @@ public void testUniquenessWithModelAliases() throws Exception { runTestQueryWithUniqueness(expected, queryString, startDate, endDate, extraParameters); } + @Test + public void testRecentUniquenessWithModelAliases() throws Exception { + Map extraParameters = new HashMap<>(); + extraParameters.put("include.grouping.context", "true"); + extraParameters.put("query.syntax", "LUCENE"); + + Set> expected = new HashSet<>(); + expected.add(Sets.newHashSet(WiseGuysIngest.sopranoUID, WiseGuysIngest.corleoneUID, WiseGuysIngest.caponeUID)); + Date startDate = format.parse("20091231"); + Date endDate = format.parse("20150101"); + + String queryString = "UUID:/^[CS].*/ AND #MOST_RECENT_UNIQUE(BOTH_NULL)"; + runTestQueryWithUniqueness(expected, queryString, startDate, endDate, extraParameters); + } + @Test public void testMostRecentUniqueness() throws Exception { Map extraParameters = new HashMap<>();