Skip to content

Commit

Permalink
Task/revert most recent unique (#2464)
Browse files Browse the repository at this point in the history
* Revert "Updated to ensure the most recent unique functions parse correctly."

This reverts commit d0aa266.

* Revert "Fixes to the most-recent-unique functionality (#2392)"

This reverts commit 5b4eb71.

* Revert "Fixed bug where unique results of null values failed to return (#2354)"

This reverts commit 15f08a3.

* Revert "Added most recent equivalent unique functions (#2339)"

This reverts commit bb8d353.

* Revert "Most recent unique feature (#1991)"

This reverts commit a4587af.

* Updated centos dockerfiles to use yum repo mirror

* Update quickstart docker image to Rocky Linux 8 (#2454) (#2455)

* Update quickstart docker image to Rocky Linux 8 (#2454)

* Revert "Updated centos dockerfiles to use yum repo mirror"

This reverts commit d41e886.

* Added the hdfs sorted set builder mechanism back in

---------

Co-authored-by: Whitney O'Meara <[email protected]>
Co-authored-by: Keith Ratcliffe <[email protected]>
  • Loading branch information
3 people committed Jul 10, 2024
1 parent 1e4deba commit aa631c2
Show file tree
Hide file tree
Showing 81 changed files with 1,202 additions and 3,792 deletions.
2 changes: 1 addition & 1 deletion contrib/datawave-quickstart/docker/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM centos:centos7
FROM rockylinux/rockylinux:8

ARG DATAWAVE_COMMIT_ID
ARG DATAWAVE_BRANCH_NAME
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -233,23 +233,7 @@
<bean class="datawave.query.language.functions.jexl.TimeFunction"/>
<bean class="datawave.query.language.functions.jexl.Jexl"/>
<bean class="datawave.query.language.functions.jexl.Options"/>
<bean class="datawave.query.language.functions.jexl.GroupBy"/>
<bean class="datawave.query.language.functions.jexl.Unique"/>
<bean class="datawave.query.language.functions.jexl.UniqueByDay"/>
<bean class="datawave.query.language.functions.jexl.UniqueByHour"/>
<bean class="datawave.query.language.functions.jexl.UniqueByMinute"/>
<bean class="datawave.query.language.functions.jexl.UniqueByMonth"/>
<bean class="datawave.query.language.functions.jexl.UniqueBySecond"/>
<bean class="datawave.query.language.functions.jexl.UniqueByTenthOfHour"/>
<bean class="datawave.query.language.functions.jexl.UniqueByYear"/>
<bean class="datawave.query.language.functions.jexl.MostRecentUnique"/>
<bean class="datawave.query.language.functions.jexl.MostRecentUniqueByDay"/>
<bean class="datawave.query.language.functions.jexl.MostRecentUniqueByHour"/>
<bean class="datawave.query.language.functions.jexl.MostRecentUniqueByMinute"/>
<bean class="datawave.query.language.functions.jexl.MostRecentUniqueByMonth"/>
<bean class="datawave.query.language.functions.jexl.MostRecentUniqueBySecond"/>
<bean class="datawave.query.language.functions.jexl.MostRecentUniqueByTenthOfHour"/>
<bean class="datawave.query.language.functions.jexl.MostRecentUniqueByYear"/>
<bean class="datawave.query.language.functions.jexl.Geowave.Contains"/>
<bean class="datawave.query.language.functions.jexl.Geowave.CoveredBy"/>
<bean class="datawave.query.language.functions.jexl.Geowave.Covers"/>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,23 +38,15 @@
<bean class="datawave.query.language.functions.jexl.AtomValuesMatchFunction"/>
<bean class="datawave.query.language.functions.jexl.Options"/>
<bean class="datawave.query.language.functions.jexl.Rename"/>
<bean class="datawave.query.language.functions.jexl.GroupBy"/>
<bean class="datawave.query.language.functions.jexl.Unique"/>
<bean class="datawave.query.language.functions.jexl.GroupBy"/>
<bean class="datawave.query.language.functions.jexl.UniqueByDay"/>
<bean class="datawave.query.language.functions.jexl.UniqueByHour"/>
<bean class="datawave.query.language.functions.jexl.UniqueByMinute"/>
<bean class="datawave.query.language.functions.jexl.UniqueByMonth"/>
<bean class="datawave.query.language.functions.jexl.UniqueByYear"/>
<bean class="datawave.query.language.functions.jexl.UniqueBySecond"/>
<bean class="datawave.query.language.functions.jexl.UniqueByTenthOfHour"/>
<bean class="datawave.query.language.functions.jexl.UniqueByYear"/>
<bean class="datawave.query.language.functions.jexl.MostRecentUnique"/>
<bean class="datawave.query.language.functions.jexl.MostRecentUniqueByDay"/>
<bean class="datawave.query.language.functions.jexl.MostRecentUniqueByHour"/>
<bean class="datawave.query.language.functions.jexl.MostRecentUniqueByMinute"/>
<bean class="datawave.query.language.functions.jexl.MostRecentUniqueByMonth"/>
<bean class="datawave.query.language.functions.jexl.MostRecentUniqueBySecond"/>
<bean class="datawave.query.language.functions.jexl.MostRecentUniqueByTenthOfHour"/>
<bean class="datawave.query.language.functions.jexl.MostRecentUniqueByYear"/>
<bean class="datawave.query.language.functions.jexl.NoExpansion"/>
<bean class="datawave.query.language.functions.jexl.Compare"/>
<bean class="datawave.query.language.functions.jexl.Geowave.Contains"/>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -233,23 +233,7 @@
<bean class="datawave.query.language.functions.jexl.TimeFunction"/>
<bean class="datawave.query.language.functions.jexl.Jexl"/>
<bean class="datawave.query.language.functions.jexl.Options"/>
<bean class="datawave.query.language.functions.jexl.GroupBy"/>
<bean class="datawave.query.language.functions.jexl.Unique"/>
<bean class="datawave.query.language.functions.jexl.UniqueByDay"/>
<bean class="datawave.query.language.functions.jexl.UniqueByHour"/>
<bean class="datawave.query.language.functions.jexl.UniqueByMinute"/>
<bean class="datawave.query.language.functions.jexl.UniqueByMonth"/>
<bean class="datawave.query.language.functions.jexl.UniqueBySecond"/>
<bean class="datawave.query.language.functions.jexl.UniqueByTenthOfHour"/>
<bean class="datawave.query.language.functions.jexl.UniqueByYear"/>
<bean class="datawave.query.language.functions.jexl.MostRecentUnique"/>
<bean class="datawave.query.language.functions.jexl.MostRecentUniqueByDay"/>
<bean class="datawave.query.language.functions.jexl.MostRecentUniqueByHour"/>
<bean class="datawave.query.language.functions.jexl.MostRecentUniqueByMinute"/>
<bean class="datawave.query.language.functions.jexl.MostRecentUniqueByMonth"/>
<bean class="datawave.query.language.functions.jexl.MostRecentUniqueBySecond"/>
<bean class="datawave.query.language.functions.jexl.MostRecentUniqueByTenthOfHour"/>
<bean class="datawave.query.language.functions.jexl.MostRecentUniqueByYear"/>
<bean class="datawave.query.language.functions.jexl.Geowave.Contains"/>
<bean class="datawave.query.language.functions.jexl.Geowave.CoveredBy"/>
<bean class="datawave.query.language.functions.jexl.Geowave.Covers"/>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1297,11 +1297,8 @@ protected void setupRowBasedHdfsBackedSet(String row) throws IOException {
this.createdRowDir = false;
}

// noinspection unchecked
this.set = (HdfsBackedSortedSet<Key>) HdfsBackedSortedSet.builder().withBufferPersistThreshold(hdfsBackedSetBufferSize)
.withIvaratorCacheDirs(ivaratorCacheDirs).withUniqueSubPath(row).withMaxOpenFiles(maxOpenFiles).withNumRetries(numRetries)
.withPersistOptions(persistOptions).withSetFactory(new FileKeySortedSet.Factory()).build();

this.set = new HdfsBackedSortedSet<>(null, hdfsBackedSetBufferSize, ivaratorCacheDirs, row, maxOpenFiles, numRetries, persistOptions,
new FileKeySortedSet.Factory());
this.threadSafeSet = Collections.synchronizedSortedSet(this.set);
this.currentRow = row;
this.setControl.takeOwnership(row, this);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,8 @@ public class FileSystemCache {

public FileSystemCache(String hdfsSiteConfigs) throws MalformedURLException {
conf = new Configuration();
if (hdfsSiteConfigs != null) {
for (String url : org.apache.commons.lang.StringUtils.split(hdfsSiteConfigs, ',')) {
conf.addResource(new URL(url));
}
for (String url : org.apache.commons.lang.StringUtils.split(hdfsSiteConfigs, ',')) {
conf.addResource(new URL(url));
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,6 @@ public class QueryParameters {

public static final String GROUP_FIELDS_BATCH_SIZE = "group.fields.batch.size";
public static final String UNIQUE_FIELDS = "unique.fields";
public static final String MOST_RECENT_UNIQUE = "most.recent.unique";

/**
* Used to specify fields which are excluded from QueryModel expansion
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,14 @@
import java.util.Collection;
import java.util.HashSet;
import java.util.Iterator;
import java.util.NavigableSet;
import java.util.Objects;
import java.util.Set;
import java.util.SortedSet;

import org.apache.commons.lang.StringUtils;

import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonValue;
import com.google.common.collect.Multimap;
import com.google.common.collect.Multimaps;
import com.google.common.collect.Sets;
import com.google.common.collect.SortedSetMultimap;
import com.google.common.collect.TreeMultimap;
Expand All @@ -28,11 +25,9 @@
* captured as a parameter string using {@link UniqueFields#toString()}, and transformed back into a {@link UniqueFields} instance via
* {@link UniqueFields#from(String)}.
*/
public class UniqueFields implements Serializable, Cloneable {
public class UniqueFields implements Serializable {

private final TreeMultimap<String,UniqueGranularity> fieldMap = TreeMultimap.create();
private boolean mostRecent = false;
private static String MOST_RECENT_UNIQUE = "_MOST_RECENT_";
private Multimap<String,UniqueGranularity> fieldMap;

/**
* Returns a new {@link UniqueFields} parsed from this string. The provided string is expected to have the format returned by
Expand Down Expand Up @@ -77,12 +72,8 @@ public static UniqueFields from(String string) {
if (nextComma == -1 && nextStartBracket == -1) {
String field = string.substring(currentIndex);
if (!field.isEmpty()) {
if (field.equals(MOST_RECENT_UNIQUE)) {
uniqueFields.setMostRecent(true);
} else {
// Add the field only if its not blank. Ignore cases with consecutive trailing commas like field1[ALL],,
uniqueFields.put(field, UniqueGranularity.ALL);
}
// Add the field only if its not blank. Ignore cases with consecutive trailing commas like field1[ALL],,
uniqueFields.put(field, UniqueGranularity.ALL);
}
break; // There are no more fields to be parsed.
} else if (nextComma != -1 && (nextStartBracket == -1 || nextComma < nextStartBracket)) {
Expand All @@ -96,12 +87,8 @@ public static UniqueFields from(String string) {
// Add the field with the ALL granularity.
String field = string.substring(currentIndex, nextComma);
if (!field.isEmpty()) {
if (field.equals(MOST_RECENT_UNIQUE)) {
uniqueFields.setMostRecent(true);
} else {
// Add the field only if its not blank. Ignore cases with consecutive commas like field1,,field2[DAY]
uniqueFields.put(field, UniqueGranularity.ALL);
}
// Add the field only if its not blank. Ignore cases with consecutive commas like field1,,field2[DAY]
uniqueFields.put(field, UniqueGranularity.ALL);
}
currentIndex = nextComma + 1; // Advance to the start of the next field.
} else {
Expand All @@ -113,18 +100,14 @@ public static UniqueFields from(String string) {
String field = string.substring(currentIndex, nextStartBracket);
int nextEndBracket = string.indexOf(Constants.BRACKET_END, currentIndex);
if (!field.isEmpty()) {
if (field.equals(MOST_RECENT_UNIQUE)) {
uniqueFields.setMostRecent(true);
String granularityList = string.substring((nextStartBracket + 1), nextEndBracket);
// An empty granularity list, e.g. field[] is equivalent to field[ALL].
if (granularityList.isEmpty()) {
uniqueFields.put(field, UniqueGranularity.ALL);
} else {
String granularityList = string.substring((nextStartBracket + 1), nextEndBracket);
// An empty granularity list, e.g. field[] is equivalent to field[ALL].
if (granularityList.isEmpty()) {
uniqueFields.put(field, UniqueGranularity.ALL);
} else {
String[] granularities = StringUtils.split(granularityList, Constants.COMMA);
for (String granularity : granularities) {
uniqueFields.put(field, parseGranularity(granularity));
}
String[] granularities = StringUtils.split(granularityList, Constants.COMMA);
for (String granularity : granularities) {
uniqueFields.put(field, parseGranularity(granularity));
}
}
}
Expand All @@ -145,19 +128,24 @@ private static UniqueGranularity parseGranularity(String granularity) {
}

/**
* Return a clone of this class
* Return a copy of the given {@link UniqueFields}.
*
* @param other
* the other instance to copy
* @return the copy
*/
@Override
public UniqueFields clone() {
UniqueFields newFields = new UniqueFields();
newFields.fieldMap.putAll(this.fieldMap);
newFields.mostRecent = this.mostRecent;
return newFields;
public static UniqueFields copyOf(UniqueFields other) {
if (other == null) {
return null;
}
UniqueFields uniqueFields = new UniqueFields();
uniqueFields.fieldMap = TreeMultimap.create(other.fieldMap);
return uniqueFields;
}

public UniqueFields() {}
public UniqueFields() {
fieldMap = TreeMultimap.create();
}

/**
* Create a new {@link UniqueFields} with the provided map as the underlying field map.
Expand All @@ -166,24 +154,7 @@ public UniqueFields() {}
* the field map to use
*/
public UniqueFields(SortedSetMultimap<String,UniqueGranularity> fieldMap) {
putAll(fieldMap);
}

/**
* Clear out the field map
*/
public UniqueFields clear() {
this.fieldMap.clear();
return this;
}

/**
* Set the field map
*
* @param fields
*/
public UniqueFields set(Multimap<String,UniqueGranularity> fields) {
return clear().putAll(fields);
this.fieldMap = fieldMap;
}

/**
Expand All @@ -194,9 +165,8 @@ public UniqueFields set(Multimap<String,UniqueGranularity> fields) {
* @param uniqueGranularity
* the granularity
*/
public UniqueFields put(String field, UniqueGranularity uniqueGranularity) {
fieldMap.put(JexlASTHelper.deconstructIdentifier(field).toUpperCase(), uniqueGranularity);
return this;
public void put(String field, UniqueGranularity uniqueGranularity) {
fieldMap.put(field, uniqueGranularity);
}

/**
Expand All @@ -205,13 +175,10 @@ public UniqueFields put(String field, UniqueGranularity uniqueGranularity) {
* @param fieldMap
* the field map to add entries from
*/
public UniqueFields putAll(Multimap<String,UniqueGranularity> fieldMap) {
public void putAll(Multimap<String,UniqueGranularity> fieldMap) {
if (fieldMap != null) {
for (String field : fieldMap.keySet()) {
this.fieldMap.putAll(JexlASTHelper.deconstructIdentifier(field).toUpperCase(), fieldMap.get(field));
}
this.fieldMap.putAll(fieldMap);
}
return this;
}

/**
Expand All @@ -228,23 +195,39 @@ public void replace(String field, String replacement) {
}

/**
* Return the fields within this {@link UniqueFields}. Modifications to this set will modify the fields in this {@link UniqueFields}.
* Return a copy of the fields within this {@link UniqueFields}. Modifications to this set will not modify the fields in this {@link UniqueFields}.
*
* @return a copy of the fields
*/
public NavigableSet<String> getFields() {
return fieldMap.keySet();
public Set<String> getFields() {
return Sets.newHashSet(fieldMap.keySet());
}

/**
* Return the underlying field-granularity map from this {@link UniqueFields}.
*
* @return the field map
*/
public TreeMultimap<String,UniqueGranularity> getFieldMap() {
public Multimap<String,UniqueGranularity> getFieldMap() {
return fieldMap;
}

/**
* Replace any identifier fields with their deconstructed version.
*/
public void deconstructIdentifierFields() {
Multimap<String,UniqueGranularity> newFieldMap = TreeMultimap.create();
for (String field : fieldMap.keySet()) {
String newField = JexlASTHelper.deconstructIdentifier(field);
if (newField.equals(field)) {
newFieldMap.putAll(field, fieldMap.get(field));
} else {
newFieldMap.putAll(newField, fieldMap.get(field));
}
}
this.fieldMap = newFieldMap;
}

/**
* Remap all fields to include any matches from the provided model. The original field entries will be retained.
*
Expand All @@ -255,11 +238,12 @@ public void remapFields(Multimap<String,String> model) {
Multimap<String,UniqueGranularity> newFieldMap = TreeMultimap.create(fieldMap);
for (String field : fieldMap.keySet()) {
Collection<UniqueGranularity> granularities = fieldMap.get(field);
field = field.toUpperCase();
if (model.containsKey(field)) {
model.get(field).forEach((newField) -> newFieldMap.putAll(newField, granularities));
}
}
set(newFieldMap);
this.fieldMap = newFieldMap;
}

/**
Expand Down Expand Up @@ -321,10 +305,6 @@ public String transformValue(String field, String value) {
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
if (mostRecent) {
sb.append(MOST_RECENT_UNIQUE);
sb.append(Constants.COMMA);
}
Iterator<String> fieldIterator = fieldMap.keySet().iterator();
while (fieldIterator.hasNext()) {
// Write the field.
Expand All @@ -346,15 +326,6 @@ public String toString() {
return sb.toString();
}

public boolean isMostRecent() {
return mostRecent;
}

public UniqueFields setMostRecent(boolean mostRecent) {
this.mostRecent = mostRecent;
return this;
}

@Override
public boolean equals(Object o) {
if (this == o) {
Expand All @@ -364,12 +335,12 @@ public boolean equals(Object o) {
return false;
}
UniqueFields that = (UniqueFields) o;
return Objects.equals(fieldMap, that.fieldMap) && mostRecent == that.mostRecent;
return Objects.equals(fieldMap, that.fieldMap);
}

@Override
public int hashCode() {
return Objects.hash(fieldMap, mostRecent);
return Objects.hash(fieldMap);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -311,7 +311,7 @@ public void deconstructIdentifiers() {

// Return a copy of the given set with all identifiers deconstructed.
private Set<String> deconstructIdentifiers(Set<String> set) {
return set.stream().map(JexlASTHelper::deconstructIdentifier).map(String::toUpperCase).collect(Collectors.toSet());
return set.stream().map(JexlASTHelper::deconstructIdentifier).collect(Collectors.toSet());
}

/**
Expand Down
Loading

0 comments on commit aa631c2

Please sign in to comment.