Skip to content

Commit

Permalink
Task/revert most recent unique (#2464)
Browse files Browse the repository at this point in the history
* Revert "Updated to ensure the most recent unique functions parse correctly."

This reverts commit d0aa266.

* Revert "Fixes to the most-recent-unique functionality (#2392)"

This reverts commit 5b4eb71.

* Revert "Fixed bug where unique results of null values failed to return (#2354)"

This reverts commit 15f08a3.

* Revert "Added most recent equivalent unique functions (#2339)"

This reverts commit bb8d353.

* Revert "Most recent unique feature (#1991)"

This reverts commit a4587af.

* Updated centos dockerfiles to use yum repo mirror

* Update quickstart docker image to Rocky Linux 8 (#2454) (#2455)

* Update quickstart docker image to Rocky Linux 8 (#2454)

* Revert "Updated centos dockerfiles to use yum repo mirror"

This reverts commit d41e886.

* Added the hdfs sorted set builder mechanism back in

---------

Co-authored-by: Whitney O'Meara <[email protected]>
Co-authored-by: Keith Ratcliffe <[email protected]>

Conflicts:
	warehouse/query-core/src/main/java/datawave/query/util/sortedset/RewritableSortedSetImpl.java
  • Loading branch information
ivakegg authored and hgklohr committed Jul 10, 2024
1 parent d367399 commit 2c2de3c
Show file tree
Hide file tree
Showing 75 changed files with 1,201 additions and 3,788 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,6 @@ protected void setup(org.apache.hadoop.mapreduce.Mapper<Key,Value,Key,Value>.Con

QueryLogic<?> logic = (QueryLogic<?>) super.applicationContext.getBean(logicName);
t = logic.getEnrichedTransformer(query);

Assert.notNull(logic.getMarkingFunctions());
Assert.notNull(logic.getResponseObjectFactory());
this.format = SerializationFormat.valueOf(context.getConfiguration().get(RESULT_SERIALIZATION_FORMAT));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -233,23 +233,7 @@
<bean class="datawave.query.language.functions.jexl.TimeFunction"/>
<bean class="datawave.query.language.functions.jexl.Jexl"/>
<bean class="datawave.query.language.functions.jexl.Options"/>
<bean class="datawave.query.language.functions.jexl.GroupBy"/>
<bean class="datawave.query.language.functions.jexl.Unique"/>
<bean class="datawave.query.language.functions.jexl.UniqueByDay"/>
<bean class="datawave.query.language.functions.jexl.UniqueByHour"/>
<bean class="datawave.query.language.functions.jexl.UniqueByMinute"/>
<bean class="datawave.query.language.functions.jexl.UniqueByMonth"/>
<bean class="datawave.query.language.functions.jexl.UniqueBySecond"/>
<bean class="datawave.query.language.functions.jexl.UniqueByTenthOfHour"/>
<bean class="datawave.query.language.functions.jexl.UniqueByYear"/>
<bean class="datawave.query.language.functions.jexl.MostRecentUnique"/>
<bean class="datawave.query.language.functions.jexl.MostRecentUniqueByDay"/>
<bean class="datawave.query.language.functions.jexl.MostRecentUniqueByHour"/>
<bean class="datawave.query.language.functions.jexl.MostRecentUniqueByMinute"/>
<bean class="datawave.query.language.functions.jexl.MostRecentUniqueByMonth"/>
<bean class="datawave.query.language.functions.jexl.MostRecentUniqueBySecond"/>
<bean class="datawave.query.language.functions.jexl.MostRecentUniqueByTenthOfHour"/>
<bean class="datawave.query.language.functions.jexl.MostRecentUniqueByYear"/>
<bean class="datawave.query.language.functions.jexl.Geowave.Contains"/>
<bean class="datawave.query.language.functions.jexl.Geowave.CoveredBy"/>
<bean class="datawave.query.language.functions.jexl.Geowave.Covers"/>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,23 +38,15 @@
<bean class="datawave.query.language.functions.jexl.AtomValuesMatchFunction"/>
<bean class="datawave.query.language.functions.jexl.Options"/>
<bean class="datawave.query.language.functions.jexl.Rename"/>
<bean class="datawave.query.language.functions.jexl.GroupBy"/>
<bean class="datawave.query.language.functions.jexl.Unique"/>
<bean class="datawave.query.language.functions.jexl.GroupBy"/>
<bean class="datawave.query.language.functions.jexl.UniqueByDay"/>
<bean class="datawave.query.language.functions.jexl.UniqueByHour"/>
<bean class="datawave.query.language.functions.jexl.UniqueByMinute"/>
<bean class="datawave.query.language.functions.jexl.UniqueByMonth"/>
<bean class="datawave.query.language.functions.jexl.UniqueByYear"/>
<bean class="datawave.query.language.functions.jexl.UniqueBySecond"/>
<bean class="datawave.query.language.functions.jexl.UniqueByTenthOfHour"/>
<bean class="datawave.query.language.functions.jexl.UniqueByYear"/>
<bean class="datawave.query.language.functions.jexl.MostRecentUnique"/>
<bean class="datawave.query.language.functions.jexl.MostRecentUniqueByDay"/>
<bean class="datawave.query.language.functions.jexl.MostRecentUniqueByHour"/>
<bean class="datawave.query.language.functions.jexl.MostRecentUniqueByMinute"/>
<bean class="datawave.query.language.functions.jexl.MostRecentUniqueByMonth"/>
<bean class="datawave.query.language.functions.jexl.MostRecentUniqueBySecond"/>
<bean class="datawave.query.language.functions.jexl.MostRecentUniqueByTenthOfHour"/>
<bean class="datawave.query.language.functions.jexl.MostRecentUniqueByYear"/>
<bean class="datawave.query.language.functions.jexl.NoExpansion"/>
<bean class="datawave.query.language.functions.jexl.Compare"/>
<bean class="datawave.query.language.functions.jexl.Geowave.Contains"/>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -233,23 +233,7 @@
<bean class="datawave.query.language.functions.jexl.TimeFunction"/>
<bean class="datawave.query.language.functions.jexl.Jexl"/>
<bean class="datawave.query.language.functions.jexl.Options"/>
<bean class="datawave.query.language.functions.jexl.GroupBy"/>
<bean class="datawave.query.language.functions.jexl.Unique"/>
<bean class="datawave.query.language.functions.jexl.UniqueByDay"/>
<bean class="datawave.query.language.functions.jexl.UniqueByHour"/>
<bean class="datawave.query.language.functions.jexl.UniqueByMinute"/>
<bean class="datawave.query.language.functions.jexl.UniqueByMonth"/>
<bean class="datawave.query.language.functions.jexl.UniqueBySecond"/>
<bean class="datawave.query.language.functions.jexl.UniqueByTenthOfHour"/>
<bean class="datawave.query.language.functions.jexl.UniqueByYear"/>
<bean class="datawave.query.language.functions.jexl.MostRecentUnique"/>
<bean class="datawave.query.language.functions.jexl.MostRecentUniqueByDay"/>
<bean class="datawave.query.language.functions.jexl.MostRecentUniqueByHour"/>
<bean class="datawave.query.language.functions.jexl.MostRecentUniqueByMinute"/>
<bean class="datawave.query.language.functions.jexl.MostRecentUniqueByMonth"/>
<bean class="datawave.query.language.functions.jexl.MostRecentUniqueBySecond"/>
<bean class="datawave.query.language.functions.jexl.MostRecentUniqueByTenthOfHour"/>
<bean class="datawave.query.language.functions.jexl.MostRecentUniqueByYear"/>
<bean class="datawave.query.language.functions.jexl.Geowave.Contains"/>
<bean class="datawave.query.language.functions.jexl.Geowave.CoveredBy"/>
<bean class="datawave.query.language.functions.jexl.Geowave.Covers"/>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1296,11 +1296,8 @@ protected void setupRowBasedHdfsBackedSet(String row) throws IOException {
this.createdRowDir = false;
}

// noinspection unchecked
this.set = (HdfsBackedSortedSet<Key>) HdfsBackedSortedSet.builder().withBufferPersistThreshold(hdfsBackedSetBufferSize)
.withIvaratorCacheDirs(ivaratorCacheDirs).withUniqueSubPath(row).withMaxOpenFiles(maxOpenFiles).withNumRetries(numRetries)
.withPersistOptions(persistOptions).withSetFactory(new FileKeySortedSet.Factory()).build();

this.set = new HdfsBackedSortedSet<>(null, hdfsBackedSetBufferSize, ivaratorCacheDirs, row, maxOpenFiles, numRetries, persistOptions,
new FileKeySortedSet.Factory());
this.threadSafeSet = Collections.synchronizedSortedSet(this.set);
this.currentRow = row;
this.setControl.takeOwnership(row, this);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,8 @@ public class FileSystemCache {

public FileSystemCache(String hdfsSiteConfigs) throws MalformedURLException {
conf = new Configuration();
if (hdfsSiteConfigs != null) {
for (String url : org.apache.commons.lang.StringUtils.split(hdfsSiteConfigs, ',')) {
conf.addResource(new URL(url));
}
for (String url : org.apache.commons.lang.StringUtils.split(hdfsSiteConfigs, ',')) {
conf.addResource(new URL(url));
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,6 @@ public class QueryParameters {

public static final String GROUP_FIELDS_BATCH_SIZE = "group.fields.batch.size";
public static final String UNIQUE_FIELDS = "unique.fields";
public static final String MOST_RECENT_UNIQUE = "most.recent.unique";

/**
* Used to specify fields which are excluded from QueryModel expansion
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,14 @@
import java.util.Collection;
import java.util.HashSet;
import java.util.Iterator;
import java.util.NavigableSet;
import java.util.Objects;
import java.util.Set;
import java.util.SortedSet;

import org.apache.commons.lang.StringUtils;

import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonValue;
import com.google.common.collect.Multimap;
import com.google.common.collect.Multimaps;
import com.google.common.collect.Sets;
import com.google.common.collect.SortedSetMultimap;
import com.google.common.collect.TreeMultimap;
Expand All @@ -28,11 +25,9 @@
* captured as a parameter string using {@link UniqueFields#toString()}, and transformed back into a {@link UniqueFields} instance via
* {@link UniqueFields#from(String)}.
*/
public class UniqueFields implements Serializable, Cloneable {
public class UniqueFields implements Serializable {

private final TreeMultimap<String,UniqueGranularity> fieldMap = TreeMultimap.create();
private boolean mostRecent = false;
private static String MOST_RECENT_UNIQUE = "_MOST_RECENT_";
private Multimap<String,UniqueGranularity> fieldMap;

/**
* Returns a new {@link UniqueFields} parsed from this string. The provided string is expected to have the format returned by
Expand Down Expand Up @@ -77,12 +72,8 @@ public static UniqueFields from(String string) {
if (nextComma == -1 && nextStartBracket == -1) {
String field = string.substring(currentIndex);
if (!field.isEmpty()) {
if (field.equals(MOST_RECENT_UNIQUE)) {
uniqueFields.setMostRecent(true);
} else {
// Add the field only if its not blank. Ignore cases with consecutive trailing commas like field1[ALL],,
uniqueFields.put(field, UniqueGranularity.ALL);
}
// Add the field only if its not blank. Ignore cases with consecutive trailing commas like field1[ALL],,
uniqueFields.put(field, UniqueGranularity.ALL);
}
break; // There are no more fields to be parsed.
} else if (nextComma != -1 && (nextStartBracket == -1 || nextComma < nextStartBracket)) {
Expand All @@ -96,12 +87,8 @@ public static UniqueFields from(String string) {
// Add the field with the ALL granularity.
String field = string.substring(currentIndex, nextComma);
if (!field.isEmpty()) {
if (field.equals(MOST_RECENT_UNIQUE)) {
uniqueFields.setMostRecent(true);
} else {
// Add the field only if its not blank. Ignore cases with consecutive commas like field1,,field2[DAY]
uniqueFields.put(field, UniqueGranularity.ALL);
}
// Add the field only if its not blank. Ignore cases with consecutive commas like field1,,field2[DAY]
uniqueFields.put(field, UniqueGranularity.ALL);
}
currentIndex = nextComma + 1; // Advance to the start of the next field.
} else {
Expand All @@ -113,18 +100,14 @@ public static UniqueFields from(String string) {
String field = string.substring(currentIndex, nextStartBracket);
int nextEndBracket = string.indexOf(Constants.BRACKET_END, currentIndex);
if (!field.isEmpty()) {
if (field.equals(MOST_RECENT_UNIQUE)) {
uniqueFields.setMostRecent(true);
String granularityList = string.substring((nextStartBracket + 1), nextEndBracket);
// An empty granularity list, e.g. field[] is equivalent to field[ALL].
if (granularityList.isEmpty()) {
uniqueFields.put(field, UniqueGranularity.ALL);
} else {
String granularityList = string.substring((nextStartBracket + 1), nextEndBracket);
// An empty granularity list, e.g. field[] is equivalent to field[ALL].
if (granularityList.isEmpty()) {
uniqueFields.put(field, UniqueGranularity.ALL);
} else {
String[] granularities = StringUtils.split(granularityList, Constants.COMMA);
for (String granularity : granularities) {
uniqueFields.put(field, parseGranularity(granularity));
}
String[] granularities = StringUtils.split(granularityList, Constants.COMMA);
for (String granularity : granularities) {
uniqueFields.put(field, parseGranularity(granularity));
}
}
}
Expand All @@ -145,19 +128,24 @@ private static UniqueGranularity parseGranularity(String granularity) {
}

/**
* Return a clone of this class
* Return a copy of the given {@link UniqueFields}.
*
* @param other
* the other instance to copy
* @return the copy
*/
@Override
public UniqueFields clone() {
UniqueFields newFields = new UniqueFields();
newFields.fieldMap.putAll(this.fieldMap);
newFields.mostRecent = this.mostRecent;
return newFields;
public static UniqueFields copyOf(UniqueFields other) {
if (other == null) {
return null;
}
UniqueFields uniqueFields = new UniqueFields();
uniqueFields.fieldMap = TreeMultimap.create(other.fieldMap);
return uniqueFields;
}

public UniqueFields() {}
public UniqueFields() {
fieldMap = TreeMultimap.create();
}

/**
* Create a new {@link UniqueFields} with the provided map as the underlying field map.
Expand All @@ -166,24 +154,7 @@ public UniqueFields() {}
* the field map to use
*/
public UniqueFields(SortedSetMultimap<String,UniqueGranularity> fieldMap) {
putAll(fieldMap);
}

/**
* Clear out the field map
*/
public UniqueFields clear() {
this.fieldMap.clear();
return this;
}

/**
* Set the field map
*
* @param fields
*/
public UniqueFields set(Multimap<String,UniqueGranularity> fields) {
return clear().putAll(fields);
this.fieldMap = fieldMap;
}

/**
Expand All @@ -194,9 +165,8 @@ public UniqueFields set(Multimap<String,UniqueGranularity> fields) {
* @param uniqueGranularity
* the granularity
*/
public UniqueFields put(String field, UniqueGranularity uniqueGranularity) {
fieldMap.put(JexlASTHelper.deconstructIdentifier(field).toUpperCase(), uniqueGranularity);
return this;
public void put(String field, UniqueGranularity uniqueGranularity) {
fieldMap.put(field, uniqueGranularity);
}

/**
Expand All @@ -205,13 +175,10 @@ public UniqueFields put(String field, UniqueGranularity uniqueGranularity) {
* @param fieldMap
* the field map to add entries from
*/
public UniqueFields putAll(Multimap<String,UniqueGranularity> fieldMap) {
public void putAll(Multimap<String,UniqueGranularity> fieldMap) {
if (fieldMap != null) {
for (String field : fieldMap.keySet()) {
this.fieldMap.putAll(JexlASTHelper.deconstructIdentifier(field).toUpperCase(), fieldMap.get(field));
}
this.fieldMap.putAll(fieldMap);
}
return this;
}

/**
Expand All @@ -228,23 +195,39 @@ public void replace(String field, String replacement) {
}

/**
* Return the fields within this {@link UniqueFields}. Modifications to this set will modify the fields in this {@link UniqueFields}.
* Return a copy of the fields within this {@link UniqueFields}. Modifications to this set will not modify the fields in this {@link UniqueFields}.
*
* @return a copy of the fields
*/
public NavigableSet<String> getFields() {
return fieldMap.keySet();
public Set<String> getFields() {
return Sets.newHashSet(fieldMap.keySet());
}

/**
* Return the underlying field-granularity map from this {@link UniqueFields}.
*
* @return the field map
*/
public TreeMultimap<String,UniqueGranularity> getFieldMap() {
public Multimap<String,UniqueGranularity> getFieldMap() {
return fieldMap;
}

/**
* Replace any identifier fields with their deconstructed version.
*/
public void deconstructIdentifierFields() {
Multimap<String,UniqueGranularity> newFieldMap = TreeMultimap.create();
for (String field : fieldMap.keySet()) {
String newField = JexlASTHelper.deconstructIdentifier(field);
if (newField.equals(field)) {
newFieldMap.putAll(field, fieldMap.get(field));
} else {
newFieldMap.putAll(newField, fieldMap.get(field));
}
}
this.fieldMap = newFieldMap;
}

/**
* Remap all fields to include any matches from the provided model. The original field entries will be retained.
*
Expand All @@ -255,11 +238,12 @@ public void remapFields(Multimap<String,String> model) {
Multimap<String,UniqueGranularity> newFieldMap = TreeMultimap.create(fieldMap);
for (String field : fieldMap.keySet()) {
Collection<UniqueGranularity> granularities = fieldMap.get(field);
field = field.toUpperCase();
if (model.containsKey(field)) {
model.get(field).forEach((newField) -> newFieldMap.putAll(newField, granularities));
}
}
set(newFieldMap);
this.fieldMap = newFieldMap;
}

/**
Expand Down Expand Up @@ -321,10 +305,6 @@ public String transformValue(String field, String value) {
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
if (mostRecent) {
sb.append(MOST_RECENT_UNIQUE);
sb.append(Constants.COMMA);
}
Iterator<String> fieldIterator = fieldMap.keySet().iterator();
while (fieldIterator.hasNext()) {
// Write the field.
Expand All @@ -346,15 +326,6 @@ public String toString() {
return sb.toString();
}

public boolean isMostRecent() {
return mostRecent;
}

public UniqueFields setMostRecent(boolean mostRecent) {
this.mostRecent = mostRecent;
return this;
}

@Override
public boolean equals(Object o) {
if (this == o) {
Expand All @@ -364,12 +335,12 @@ public boolean equals(Object o) {
return false;
}
UniqueFields that = (UniqueFields) o;
return Objects.equals(fieldMap, that.fieldMap) && mostRecent == that.mostRecent;
return Objects.equals(fieldMap, that.fieldMap);
}

@Override
public int hashCode() {
return Objects.hash(fieldMap, mostRecent);
return Objects.hash(fieldMap);
}

}
Loading

0 comments on commit 2c2de3c

Please sign in to comment.