Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Task/revert most recent unique #2464

Merged
merged 8 commits into from
Jul 9, 2024
2 changes: 1 addition & 1 deletion contrib/datawave-quickstart/docker/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM centos:centos7
FROM rockylinux/rockylinux:8

ARG DATAWAVE_COMMIT_ID
ARG DATAWAVE_BRANCH_NAME
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -233,23 +233,7 @@
<bean class="datawave.query.language.functions.jexl.TimeFunction"/>
<bean class="datawave.query.language.functions.jexl.Jexl"/>
<bean class="datawave.query.language.functions.jexl.Options"/>
<bean class="datawave.query.language.functions.jexl.GroupBy"/>
<bean class="datawave.query.language.functions.jexl.Unique"/>
<bean class="datawave.query.language.functions.jexl.UniqueByDay"/>
<bean class="datawave.query.language.functions.jexl.UniqueByHour"/>
<bean class="datawave.query.language.functions.jexl.UniqueByMinute"/>
<bean class="datawave.query.language.functions.jexl.UniqueByMonth"/>
<bean class="datawave.query.language.functions.jexl.UniqueBySecond"/>
<bean class="datawave.query.language.functions.jexl.UniqueByTenthOfHour"/>
<bean class="datawave.query.language.functions.jexl.UniqueByYear"/>
<bean class="datawave.query.language.functions.jexl.MostRecentUnique"/>
<bean class="datawave.query.language.functions.jexl.MostRecentUniqueByDay"/>
<bean class="datawave.query.language.functions.jexl.MostRecentUniqueByHour"/>
<bean class="datawave.query.language.functions.jexl.MostRecentUniqueByMinute"/>
<bean class="datawave.query.language.functions.jexl.MostRecentUniqueByMonth"/>
<bean class="datawave.query.language.functions.jexl.MostRecentUniqueBySecond"/>
<bean class="datawave.query.language.functions.jexl.MostRecentUniqueByTenthOfHour"/>
<bean class="datawave.query.language.functions.jexl.MostRecentUniqueByYear"/>
<bean class="datawave.query.language.functions.jexl.Geowave.Contains"/>
<bean class="datawave.query.language.functions.jexl.Geowave.CoveredBy"/>
<bean class="datawave.query.language.functions.jexl.Geowave.Covers"/>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,23 +38,15 @@
<bean class="datawave.query.language.functions.jexl.AtomValuesMatchFunction"/>
<bean class="datawave.query.language.functions.jexl.Options"/>
<bean class="datawave.query.language.functions.jexl.Rename"/>
<bean class="datawave.query.language.functions.jexl.GroupBy"/>
<bean class="datawave.query.language.functions.jexl.Unique"/>
<bean class="datawave.query.language.functions.jexl.GroupBy"/>
<bean class="datawave.query.language.functions.jexl.UniqueByDay"/>
<bean class="datawave.query.language.functions.jexl.UniqueByHour"/>
<bean class="datawave.query.language.functions.jexl.UniqueByMinute"/>
<bean class="datawave.query.language.functions.jexl.UniqueByMonth"/>
<bean class="datawave.query.language.functions.jexl.UniqueByYear"/>
<bean class="datawave.query.language.functions.jexl.UniqueBySecond"/>
<bean class="datawave.query.language.functions.jexl.UniqueByTenthOfHour"/>
<bean class="datawave.query.language.functions.jexl.UniqueByYear"/>
<bean class="datawave.query.language.functions.jexl.MostRecentUnique"/>
<bean class="datawave.query.language.functions.jexl.MostRecentUniqueByDay"/>
<bean class="datawave.query.language.functions.jexl.MostRecentUniqueByHour"/>
<bean class="datawave.query.language.functions.jexl.MostRecentUniqueByMinute"/>
<bean class="datawave.query.language.functions.jexl.MostRecentUniqueByMonth"/>
<bean class="datawave.query.language.functions.jexl.MostRecentUniqueBySecond"/>
<bean class="datawave.query.language.functions.jexl.MostRecentUniqueByTenthOfHour"/>
<bean class="datawave.query.language.functions.jexl.MostRecentUniqueByYear"/>
<bean class="datawave.query.language.functions.jexl.NoExpansion"/>
<bean class="datawave.query.language.functions.jexl.Compare"/>
<bean class="datawave.query.language.functions.jexl.Geowave.Contains"/>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -233,23 +233,7 @@
<bean class="datawave.query.language.functions.jexl.TimeFunction"/>
<bean class="datawave.query.language.functions.jexl.Jexl"/>
<bean class="datawave.query.language.functions.jexl.Options"/>
<bean class="datawave.query.language.functions.jexl.GroupBy"/>
<bean class="datawave.query.language.functions.jexl.Unique"/>
<bean class="datawave.query.language.functions.jexl.UniqueByDay"/>
<bean class="datawave.query.language.functions.jexl.UniqueByHour"/>
<bean class="datawave.query.language.functions.jexl.UniqueByMinute"/>
<bean class="datawave.query.language.functions.jexl.UniqueByMonth"/>
<bean class="datawave.query.language.functions.jexl.UniqueBySecond"/>
<bean class="datawave.query.language.functions.jexl.UniqueByTenthOfHour"/>
<bean class="datawave.query.language.functions.jexl.UniqueByYear"/>
<bean class="datawave.query.language.functions.jexl.MostRecentUnique"/>
<bean class="datawave.query.language.functions.jexl.MostRecentUniqueByDay"/>
<bean class="datawave.query.language.functions.jexl.MostRecentUniqueByHour"/>
<bean class="datawave.query.language.functions.jexl.MostRecentUniqueByMinute"/>
<bean class="datawave.query.language.functions.jexl.MostRecentUniqueByMonth"/>
<bean class="datawave.query.language.functions.jexl.MostRecentUniqueBySecond"/>
<bean class="datawave.query.language.functions.jexl.MostRecentUniqueByTenthOfHour"/>
<bean class="datawave.query.language.functions.jexl.MostRecentUniqueByYear"/>
<bean class="datawave.query.language.functions.jexl.Geowave.Contains"/>
<bean class="datawave.query.language.functions.jexl.Geowave.CoveredBy"/>
<bean class="datawave.query.language.functions.jexl.Geowave.Covers"/>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1297,11 +1297,8 @@ protected void setupRowBasedHdfsBackedSet(String row) throws IOException {
this.createdRowDir = false;
}

// noinspection unchecked
this.set = (HdfsBackedSortedSet<Key>) HdfsBackedSortedSet.builder().withBufferPersistThreshold(hdfsBackedSetBufferSize)
.withIvaratorCacheDirs(ivaratorCacheDirs).withUniqueSubPath(row).withMaxOpenFiles(maxOpenFiles).withNumRetries(numRetries)
.withPersistOptions(persistOptions).withSetFactory(new FileKeySortedSet.Factory()).build();

this.set = new HdfsBackedSortedSet<>(null, hdfsBackedSetBufferSize, ivaratorCacheDirs, row, maxOpenFiles, numRetries, persistOptions,
new FileKeySortedSet.Factory());
this.threadSafeSet = Collections.synchronizedSortedSet(this.set);
this.currentRow = row;
this.setControl.takeOwnership(row, this);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,8 @@ public class FileSystemCache {

public FileSystemCache(String hdfsSiteConfigs) throws MalformedURLException {
conf = new Configuration();
if (hdfsSiteConfigs != null) {
for (String url : org.apache.commons.lang.StringUtils.split(hdfsSiteConfigs, ',')) {
conf.addResource(new URL(url));
}
for (String url : org.apache.commons.lang.StringUtils.split(hdfsSiteConfigs, ',')) {
conf.addResource(new URL(url));
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,6 @@ public class QueryParameters {

public static final String GROUP_FIELDS_BATCH_SIZE = "group.fields.batch.size";
public static final String UNIQUE_FIELDS = "unique.fields";
public static final String MOST_RECENT_UNIQUE = "most.recent.unique";

/**
* Used to specify fields which are excluded from QueryModel expansion
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,14 @@
import java.util.Collection;
import java.util.HashSet;
import java.util.Iterator;
import java.util.NavigableSet;
import java.util.Objects;
import java.util.Set;
import java.util.SortedSet;

import org.apache.commons.lang.StringUtils;

import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonValue;
import com.google.common.collect.Multimap;
import com.google.common.collect.Multimaps;
import com.google.common.collect.Sets;
import com.google.common.collect.SortedSetMultimap;
import com.google.common.collect.TreeMultimap;
Expand All @@ -28,11 +25,9 @@
* captured as a parameter string using {@link UniqueFields#toString()}, and transformed back into a {@link UniqueFields} instance via
* {@link UniqueFields#from(String)}.
*/
public class UniqueFields implements Serializable, Cloneable {
public class UniqueFields implements Serializable {

private final TreeMultimap<String,UniqueGranularity> fieldMap = TreeMultimap.create();
private boolean mostRecent = false;
private static String MOST_RECENT_UNIQUE = "_MOST_RECENT_";
private Multimap<String,UniqueGranularity> fieldMap;

/**
* Returns a new {@link UniqueFields} parsed from this string. The provided string is expected to have the format returned by
Expand Down Expand Up @@ -77,12 +72,8 @@ public static UniqueFields from(String string) {
if (nextComma == -1 && nextStartBracket == -1) {
String field = string.substring(currentIndex);
if (!field.isEmpty()) {
if (field.equals(MOST_RECENT_UNIQUE)) {
uniqueFields.setMostRecent(true);
} else {
// Add the field only if its not blank. Ignore cases with consecutive trailing commas like field1[ALL],,
uniqueFields.put(field, UniqueGranularity.ALL);
}
// Add the field only if its not blank. Ignore cases with consecutive trailing commas like field1[ALL],,
uniqueFields.put(field, UniqueGranularity.ALL);
}
break; // There are no more fields to be parsed.
} else if (nextComma != -1 && (nextStartBracket == -1 || nextComma < nextStartBracket)) {
Expand All @@ -96,12 +87,8 @@ public static UniqueFields from(String string) {
// Add the field with the ALL granularity.
String field = string.substring(currentIndex, nextComma);
if (!field.isEmpty()) {
if (field.equals(MOST_RECENT_UNIQUE)) {
uniqueFields.setMostRecent(true);
} else {
// Add the field only if its not blank. Ignore cases with consecutive commas like field1,,field2[DAY]
uniqueFields.put(field, UniqueGranularity.ALL);
}
// Add the field only if its not blank. Ignore cases with consecutive commas like field1,,field2[DAY]
uniqueFields.put(field, UniqueGranularity.ALL);
}
currentIndex = nextComma + 1; // Advance to the start of the next field.
} else {
Expand All @@ -113,18 +100,14 @@ public static UniqueFields from(String string) {
String field = string.substring(currentIndex, nextStartBracket);
int nextEndBracket = string.indexOf(Constants.BRACKET_END, currentIndex);
if (!field.isEmpty()) {
if (field.equals(MOST_RECENT_UNIQUE)) {
uniqueFields.setMostRecent(true);
String granularityList = string.substring((nextStartBracket + 1), nextEndBracket);
// An empty granularity list, e.g. field[] is equivalent to field[ALL].
if (granularityList.isEmpty()) {
uniqueFields.put(field, UniqueGranularity.ALL);
} else {
String granularityList = string.substring((nextStartBracket + 1), nextEndBracket);
// An empty granularity list, e.g. field[] is equivalent to field[ALL].
if (granularityList.isEmpty()) {
uniqueFields.put(field, UniqueGranularity.ALL);
} else {
String[] granularities = StringUtils.split(granularityList, Constants.COMMA);
for (String granularity : granularities) {
uniqueFields.put(field, parseGranularity(granularity));
}
String[] granularities = StringUtils.split(granularityList, Constants.COMMA);
for (String granularity : granularities) {
uniqueFields.put(field, parseGranularity(granularity));
}
}
}
Expand All @@ -145,19 +128,24 @@ private static UniqueGranularity parseGranularity(String granularity) {
}

/**
* Return a clone of this class
* Return a copy of the given {@link UniqueFields}.
*
* @param other
* the other instance to copy
* @return the copy
*/
@Override
public UniqueFields clone() {
UniqueFields newFields = new UniqueFields();
newFields.fieldMap.putAll(this.fieldMap);
newFields.mostRecent = this.mostRecent;
return newFields;
public static UniqueFields copyOf(UniqueFields other) {
if (other == null) {
return null;
}
UniqueFields uniqueFields = new UniqueFields();
uniqueFields.fieldMap = TreeMultimap.create(other.fieldMap);
return uniqueFields;
}

public UniqueFields() {}
public UniqueFields() {
fieldMap = TreeMultimap.create();
}

/**
* Create a new {@link UniqueFields} with the provided map as the underlying field map.
Expand All @@ -166,24 +154,7 @@ public UniqueFields() {}
* the field map to use
*/
public UniqueFields(SortedSetMultimap<String,UniqueGranularity> fieldMap) {
putAll(fieldMap);
}

/**
* Clear out the field map
*/
public UniqueFields clear() {
this.fieldMap.clear();
return this;
}

/**
* Set the field map
*
* @param fields
*/
public UniqueFields set(Multimap<String,UniqueGranularity> fields) {
return clear().putAll(fields);
this.fieldMap = fieldMap;
}

/**
Expand All @@ -194,9 +165,8 @@ public UniqueFields set(Multimap<String,UniqueGranularity> fields) {
* @param uniqueGranularity
* the granularity
*/
public UniqueFields put(String field, UniqueGranularity uniqueGranularity) {
fieldMap.put(JexlASTHelper.deconstructIdentifier(field).toUpperCase(), uniqueGranularity);
return this;
public void put(String field, UniqueGranularity uniqueGranularity) {
fieldMap.put(field, uniqueGranularity);
}

/**
Expand All @@ -205,13 +175,10 @@ public UniqueFields put(String field, UniqueGranularity uniqueGranularity) {
* @param fieldMap
* the field map to add entries from
*/
public UniqueFields putAll(Multimap<String,UniqueGranularity> fieldMap) {
public void putAll(Multimap<String,UniqueGranularity> fieldMap) {
if (fieldMap != null) {
for (String field : fieldMap.keySet()) {
this.fieldMap.putAll(JexlASTHelper.deconstructIdentifier(field).toUpperCase(), fieldMap.get(field));
}
this.fieldMap.putAll(fieldMap);
}
return this;
}

/**
Expand All @@ -228,23 +195,39 @@ public void replace(String field, String replacement) {
}

/**
* Return the fields within this {@link UniqueFields}. Modifications to this set will modify the fields in this {@link UniqueFields}.
* Return a copy of the fields within this {@link UniqueFields}. Modifications to this set will not modify the fields in this {@link UniqueFields}.
*
* @return a copy of the fields
*/
public NavigableSet<String> getFields() {
return fieldMap.keySet();
public Set<String> getFields() {
return Sets.newHashSet(fieldMap.keySet());
}

/**
* Return the underlying field-granularity map from this {@link UniqueFields}.
*
* @return the field map
*/
public TreeMultimap<String,UniqueGranularity> getFieldMap() {
public Multimap<String,UniqueGranularity> getFieldMap() {
return fieldMap;
}

/**
* Replace any identifier fields with their deconstructed version.
*/
public void deconstructIdentifierFields() {
Multimap<String,UniqueGranularity> newFieldMap = TreeMultimap.create();
for (String field : fieldMap.keySet()) {
String newField = JexlASTHelper.deconstructIdentifier(field);
if (newField.equals(field)) {
newFieldMap.putAll(field, fieldMap.get(field));
} else {
newFieldMap.putAll(newField, fieldMap.get(field));
}
}
this.fieldMap = newFieldMap;
}

/**
* Remap all fields to include any matches from the provided model. The original field entries will be retained.
*
Expand All @@ -255,11 +238,12 @@ public void remapFields(Multimap<String,String> model) {
Multimap<String,UniqueGranularity> newFieldMap = TreeMultimap.create(fieldMap);
for (String field : fieldMap.keySet()) {
Collection<UniqueGranularity> granularities = fieldMap.get(field);
field = field.toUpperCase();
if (model.containsKey(field)) {
model.get(field).forEach((newField) -> newFieldMap.putAll(newField, granularities));
}
}
set(newFieldMap);
this.fieldMap = newFieldMap;
}

/**
Expand Down Expand Up @@ -321,10 +305,6 @@ public String transformValue(String field, String value) {
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
if (mostRecent) {
sb.append(MOST_RECENT_UNIQUE);
sb.append(Constants.COMMA);
}
Iterator<String> fieldIterator = fieldMap.keySet().iterator();
while (fieldIterator.hasNext()) {
// Write the field.
Expand All @@ -346,15 +326,6 @@ public String toString() {
return sb.toString();
}

public boolean isMostRecent() {
return mostRecent;
}

public UniqueFields setMostRecent(boolean mostRecent) {
this.mostRecent = mostRecent;
return this;
}

@Override
public boolean equals(Object o) {
if (this == o) {
Expand All @@ -364,12 +335,12 @@ public boolean equals(Object o) {
return false;
}
UniqueFields that = (UniqueFields) o;
return Objects.equals(fieldMap, that.fieldMap) && mostRecent == that.mostRecent;
return Objects.equals(fieldMap, that.fieldMap);
}

@Override
public int hashCode() {
return Objects.hash(fieldMap, mostRecent);
return Objects.hash(fieldMap);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -311,7 +311,7 @@ public void deconstructIdentifiers() {

// Return a copy of the given set with all identifiers deconstructed.
private Set<String> deconstructIdentifiers(Set<String> set) {
return set.stream().map(JexlASTHelper::deconstructIdentifier).map(String::toUpperCase).collect(Collectors.toSet());
return set.stream().map(JexlASTHelper::deconstructIdentifier).collect(Collectors.toSet());
}

/**
Expand Down
Loading
Loading