Skip to content

Commit

Permalink
Revert "Most recent unique feature (#1991)"
Browse files Browse the repository at this point in the history
This reverts commit a4587af.
  • Loading branch information
ivakegg committed Jul 9, 2024
1 parent f81825d commit a74ba0e
Show file tree
Hide file tree
Showing 59 changed files with 1,094 additions and 3,411 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -1297,11 +1297,8 @@ protected void setupRowBasedHdfsBackedSet(String row) throws IOException {
this.createdRowDir = false;
}

// noinspection unchecked
this.set = (HdfsBackedSortedSet<Key>) HdfsBackedSortedSet.builder().withBufferPersistThreshold(hdfsBackedSetBufferSize)
.withIvaratorCacheDirs(ivaratorCacheDirs).withUniqueSubPath(row).withMaxOpenFiles(maxOpenFiles).withNumRetries(numRetries)
.withPersistOptions(persistOptions).withSetFactory(new FileKeySortedSet.Factory()).build();

this.set = new HdfsBackedSortedSet<>(null, hdfsBackedSetBufferSize, ivaratorCacheDirs, row, maxOpenFiles, numRetries, persistOptions,
new FileKeySortedSet.Factory());
this.threadSafeSet = Collections.synchronizedSortedSet(this.set);
this.currentRow = row;
this.setControl.takeOwnership(row, this);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,8 @@ public class FileSystemCache {

public FileSystemCache(String hdfsSiteConfigs) throws MalformedURLException {
conf = new Configuration();
if (hdfsSiteConfigs != null) {
for (String url : org.apache.commons.lang.StringUtils.split(hdfsSiteConfigs, ',')) {
conf.addResource(new URL(url));
}
for (String url : org.apache.commons.lang.StringUtils.split(hdfsSiteConfigs, ',')) {
conf.addResource(new URL(url));
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,6 @@ public class QueryParameters {

public static final String GROUP_FIELDS_BATCH_SIZE = "group.fields.batch.size";
public static final String UNIQUE_FIELDS = "unique.fields";
public static final String MOST_RECENT_UNIQUE = "most.recent.unique";

/**
* Used to specify fields which are excluded from QueryModel expansion
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonValue;
import com.google.common.collect.Multimap;
import com.google.common.collect.Multimaps;
import com.google.common.collect.Sets;
import com.google.common.collect.SortedSetMultimap;
import com.google.common.collect.TreeMultimap;
Expand All @@ -26,10 +25,9 @@
* captured as a parameter string using {@link UniqueFields#toString()}, and transformed back into a {@link UniqueFields} instance via
* {@link UniqueFields#from(String)}.
*/
public class UniqueFields implements Serializable, Cloneable {
public class UniqueFields implements Serializable {

private final SortedSetMultimap<String,UniqueGranularity> fieldMap = TreeMultimap.create();
private boolean mostRecent = false;
private Multimap<String,UniqueGranularity> fieldMap;

/**
* Returns a new {@link UniqueFields} parsed from this string. The provided string is expected to have the format returned by
Expand Down Expand Up @@ -130,19 +128,24 @@ private static UniqueGranularity parseGranularity(String granularity) {
}

/**
* Return a clone of this class
* Return a copy of the given {@link UniqueFields}.
*
* @param other
* the other instance to copy
* @return the copy
*/
@Override
public UniqueFields clone() {
UniqueFields newFields = new UniqueFields();
newFields.fieldMap.putAll(this.fieldMap);
newFields.mostRecent = this.mostRecent;
return newFields;
public static UniqueFields copyOf(UniqueFields other) {
if (other == null) {
return null;
}
UniqueFields uniqueFields = new UniqueFields();
uniqueFields.fieldMap = TreeMultimap.create(other.fieldMap);
return uniqueFields;
}

public UniqueFields() {}
public UniqueFields() {
fieldMap = TreeMultimap.create();
}

/**
* Create a new {@link UniqueFields} with the provided map as the underlying field map.
Expand All @@ -151,24 +154,7 @@ public UniqueFields() {}
* the field map to use
*/
public UniqueFields(SortedSetMultimap<String,UniqueGranularity> fieldMap) {
putAll(fieldMap);
}

/**
* Clear out the field map
*/
public UniqueFields clear() {
this.fieldMap.clear();
return this;
}

/**
* Set the field map
*
* @param fields
*/
public UniqueFields set(Multimap<String,UniqueGranularity> fields) {
return clear().putAll(fields);
this.fieldMap = fieldMap;
}

/**
Expand All @@ -179,9 +165,8 @@ public UniqueFields set(Multimap<String,UniqueGranularity> fields) {
* @param uniqueGranularity
* the granularity
*/
public UniqueFields put(String field, UniqueGranularity uniqueGranularity) {
fieldMap.put(JexlASTHelper.deconstructIdentifier(field).toUpperCase(), uniqueGranularity);
return this;
public void put(String field, UniqueGranularity uniqueGranularity) {
fieldMap.put(field, uniqueGranularity);
}

/**
Expand All @@ -190,13 +175,10 @@ public UniqueFields put(String field, UniqueGranularity uniqueGranularity) {
* @param fieldMap
* the field map to add entries from
*/
public UniqueFields putAll(Multimap<String,UniqueGranularity> fieldMap) {
public void putAll(Multimap<String,UniqueGranularity> fieldMap) {
if (fieldMap != null) {
for (String field : fieldMap.keySet()) {
this.fieldMap.putAll(JexlASTHelper.deconstructIdentifier(field).toUpperCase(), fieldMap.get(field));
}
this.fieldMap.putAll(fieldMap);
}
return this;
}

/**
Expand Down Expand Up @@ -227,7 +209,23 @@ public Set<String> getFields() {
* @return the field map
*/
public Multimap<String,UniqueGranularity> getFieldMap() {
return TreeMultimap.create(fieldMap);
return fieldMap;
}

/**
* Replace any identifier fields with their deconstructed version.
*/
public void deconstructIdentifierFields() {
Multimap<String,UniqueGranularity> newFieldMap = TreeMultimap.create();
for (String field : fieldMap.keySet()) {
String newField = JexlASTHelper.deconstructIdentifier(field);
if (newField.equals(field)) {
newFieldMap.putAll(field, fieldMap.get(field));
} else {
newFieldMap.putAll(newField, fieldMap.get(field));
}
}
this.fieldMap = newFieldMap;
}

/**
Expand All @@ -240,11 +238,12 @@ public void remapFields(Multimap<String,String> model) {
Multimap<String,UniqueGranularity> newFieldMap = TreeMultimap.create(fieldMap);
for (String field : fieldMap.keySet()) {
Collection<UniqueGranularity> granularities = fieldMap.get(field);
field = field.toUpperCase();
if (model.containsKey(field)) {
model.get(field).forEach((newField) -> newFieldMap.putAll(newField, granularities));
}
}
set(newFieldMap);
this.fieldMap = newFieldMap;
}

/**
Expand Down Expand Up @@ -327,15 +326,6 @@ public String toString() {
return sb.toString();
}

public boolean isMostRecent() {
return mostRecent;
}

public UniqueFields setMostRecent(boolean mostRecent) {
this.mostRecent = mostRecent;
return this;
}

@Override
public boolean equals(Object o) {
if (this == o) {
Expand All @@ -345,12 +335,12 @@ public boolean equals(Object o) {
return false;
}
UniqueFields that = (UniqueFields) o;
return Objects.equals(fieldMap, that.fieldMap) && mostRecent == that.mostRecent;
return Objects.equals(fieldMap, that.fieldMap);
}

@Override
public int hashCode() {
return Objects.hash(fieldMap, mostRecent);
return Objects.hash(fieldMap);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -311,7 +311,7 @@ public void deconstructIdentifiers() {

// Return a copy of the given set with all identifiers deconstructed.
private Set<String> deconstructIdentifiers(Set<String> set) {
return set.stream().map(JexlASTHelper::deconstructIdentifier).map(String::toUpperCase).collect(Collectors.toSet());
return set.stream().map(JexlASTHelper::deconstructIdentifier).collect(Collectors.toSet());
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -351,8 +351,6 @@ public class ShardQueryConfiguration extends GenericQueryConfiguration implement
private List<IvaratorCacheDirConfig> ivaratorCacheDirConfigs = Collections.emptyList();
private String ivaratorFstHdfsBaseURIs = null;
private int ivaratorCacheBufferSize = 10000;

private int uniqueCacheBufferSize = 100;
private long ivaratorCacheScanPersistThreshold = 100000L;
private long ivaratorCacheScanTimeout = 1000L * 60 * 60;
private int maxFieldIndexRangeSplit = 11;
Expand Down Expand Up @@ -388,7 +386,6 @@ public class ShardQueryConfiguration extends GenericQueryConfiguration implement
private int groupFieldsBatchSize;
private boolean accrueStats = false;
private UniqueFields uniqueFields = new UniqueFields();
private boolean mostRecentUnique = false;
private boolean cacheModel = false;
/**
* should the sizes of documents be tracked for this query
Expand Down Expand Up @@ -676,8 +673,7 @@ public ShardQueryConfiguration(ShardQueryConfiguration other) {
this.setCompositeFilterFunctionsEnabled(other.isCompositeFilterFunctionsEnabled());
this.setGroupFieldsBatchSize(other.getGroupFieldsBatchSize());
this.setAccrueStats(other.getAccrueStats());
this.setUniqueFields(other.getUniqueFields());
this.setUniqueCacheBufferSize(other.getUniqueCacheBufferSize());
this.setUniqueFields(UniqueFields.copyOf(other.getUniqueFields()));
this.setCacheModel(other.getCacheModel());
this.setTrackSizes(other.isTrackSizes());
this.setContentFieldNames(null == other.getContentFieldNames() ? null : Lists.newArrayList(other.getContentFieldNames()));
Expand Down Expand Up @@ -1426,14 +1422,6 @@ public void setIvaratorFstHdfsBaseURIs(String ivaratorFstHdfsBaseURIs) {
this.ivaratorFstHdfsBaseURIs = ivaratorFstHdfsBaseURIs;
}

public int getUniqueCacheBufferSize() {
return uniqueCacheBufferSize;
}

public void setUniqueCacheBufferSize(int uniqueCacheBufferSize) {
this.uniqueCacheBufferSize = uniqueCacheBufferSize;
}

public int getIvaratorCacheBufferSize() {
return ivaratorCacheBufferSize;
}
Expand Down Expand Up @@ -1794,7 +1782,11 @@ public UniqueFields getUniqueFields() {
}

public void setUniqueFields(UniqueFields uniqueFields) {
this.uniqueFields = uniqueFields.clone();
this.uniqueFields = uniqueFields;
// If unique fields are present, make sure they are deconstructed by this point.
if (uniqueFields != null) {
uniqueFields.deconstructIdentifierFields();
}
}

public boolean isHitList() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
import datawave.webservice.query.cache.ResultsPage;
import datawave.webservice.query.dashboard.DashboardFields;
import datawave.webservice.query.dashboard.DashboardSummary;
import datawave.webservice.query.exception.QueryException;
import datawave.webservice.query.logic.QueryLogicTransformer;
import datawave.webservice.query.logic.ResponseEnricher;
import datawave.webservice.query.result.event.EventBase;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -459,7 +459,8 @@ else if (documentRange != null && (!this.isContainsIndexOnlyTerms() && this.getT
// now apply the unique iterator if requested
UniqueTransform uniquify = getUniqueTransform();
if (uniquify != null) {
pipelineDocuments = uniquify.getIterator(pipelineDocuments);
// pipelineDocuments = uniquify;
pipelineDocuments = Iterators.filter(pipelineDocuments, uniquify.getUniquePredicate());
}

// apply the grouping iterator if requested and if the batch size is greater than zero
Expand Down Expand Up @@ -1540,23 +1541,11 @@ public Comparator<Object> getValueComparator(Tuple3<Key,Document,Map<String,Obje
return new ValueComparator(from.second().getMetadata());
}

protected UniqueTransform getUniqueTransform() throws IOException {
protected UniqueTransform getUniqueTransform() {
if (uniqueTransform == null && getUniqueFields() != null && !getUniqueFields().isEmpty()) {
synchronized (getUniqueFields()) {
if (uniqueTransform == null) {
// @formatter:off
uniqueTransform = new UniqueTransform.Builder()
.withUniqueFields(getUniqueFields())
.withQueryExecutionForPageTimeout(getResultTimeout())
.withBufferPersistThreshold(getUniqueCacheBufferSize())
.withIvaratorCacheDirConfigs(getIvaratorCacheDirConfigs())
.withHdfsSiteConfigURLs(getHdfsSiteConfigURLs())
.withSubDirectory(getQueryId() + "-" + getScanId())
.withMaxOpenFiles(getIvaratorMaxOpenFiles())
.withNumRetries(getIvaratorNumRetries())
.withPersistOptions(getIvaratorPersistOptions())
.build();
// @formatter:on
uniqueTransform = new UniqueTransform(getUniqueFields(), getResultTimeout());
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -153,9 +153,6 @@ public class QueryOptions implements OptionDescriber {
public static final String GROUP_FIELDS = "group.fields";
public static final String GROUP_FIELDS_BATCH_SIZE = "group.fields.batch.size";
public static final String UNIQUE_FIELDS = "unique.fields";
public static final String MOST_RECENT_UNIQUE = "most.recent.unique";
public static final String UNIQUE_CACHE_BUFFER_SIZE = "unique.cache.buffer.size";

public static final String HITS_ONLY = "hits.only";
public static final String HIT_LIST = "hit.list";
public static final String START_TIME = "start.time";
Expand Down Expand Up @@ -316,7 +313,6 @@ public class QueryOptions implements OptionDescriber {
protected GroupFields groupFields = new GroupFields();
protected int groupFieldsBatchSize = Integer.MAX_VALUE;
protected UniqueFields uniqueFields = new UniqueFields();
protected int uniqueCacheBufferSize = 100;

protected Set<String> hitsOnlySet = new HashSet<>();

Expand Down Expand Up @@ -510,7 +506,6 @@ public void deepCopy(QueryOptions other) {
this.ivaratorCacheDirConfigs = (other.ivaratorCacheDirConfigs == null) ? null : new ArrayList<>(other.ivaratorCacheDirConfigs);
this.hdfsSiteConfigURLs = other.hdfsSiteConfigURLs;
this.ivaratorCacheBufferSize = other.ivaratorCacheBufferSize;
this.uniqueCacheBufferSize = other.uniqueCacheBufferSize;
this.ivaratorCacheScanPersistThreshold = other.ivaratorCacheScanPersistThreshold;
this.ivaratorCacheScanTimeout = other.ivaratorCacheScanTimeout;
this.hdfsFileCompressionCodec = other.hdfsFileCompressionCodec;
Expand Down Expand Up @@ -963,14 +958,6 @@ public void setIvaratorCacheBufferSize(int ivaratorCacheBufferSize) {
this.ivaratorCacheBufferSize = ivaratorCacheBufferSize;
}

public int getUniqueCacheBufferSize() {
return uniqueCacheBufferSize;
}

public void setUniqueCacheBufferSize(int uniqueCacheBufferSize) {
this.uniqueCacheBufferSize = uniqueCacheBufferSize;
}

public long getIvaratorCacheScanPersistThreshold() {
return ivaratorCacheScanPersistThreshold;
}
Expand Down Expand Up @@ -1116,7 +1103,7 @@ public UniqueFields getUniqueFields() {
}

public void setUniqueFields(UniqueFields uniqueFields) {
this.uniqueFields = uniqueFields.clone();
this.uniqueFields = uniqueFields;
}

public Set<String> getHitsOnlySet() {
Expand Down Expand Up @@ -1595,12 +1582,6 @@ public boolean validateOptions(Map<String,String> options) {

if (options.containsKey(UNIQUE_FIELDS)) {
this.setUniqueFields(UniqueFields.from(options.get(UNIQUE_FIELDS)));
if (options.containsKey(MOST_RECENT_UNIQUE)) {
this.getUniqueFields().setMostRecent(Boolean.valueOf(options.get(MOST_RECENT_UNIQUE)));
if (options.containsKey(UNIQUE_CACHE_BUFFER_SIZE)) {
this.setUniqueCacheBufferSize(Integer.parseInt(options.get(UNIQUE_CACHE_BUFFER_SIZE)));
}
}
}

if (options.containsKey(HIT_LIST)) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ public class FinalDocumentTrackingIterator implements Iterator<Entry<Key,Documen
private boolean statsEntryReturned;
private Key lastKey = null;

public static final Text MARKER_TEXT = new Text("\u2735FinalDocument\u2735");
public static final ByteSequence MARKER_SEQUENCE = new ArrayByteSequence(MARKER_TEXT.getBytes(), 0, MARKER_TEXT.getLength());
private static final Text MARKER_TEXT = new Text("\u2735FinalDocument\u2735");
private static final ByteSequence MARKER_SEQUENCE = new ArrayByteSequence(MARKER_TEXT.getBytes(), 0, MARKER_TEXT.getLength());

private final Range seekRange;
private final QuerySpanCollector querySpanCollector;
Expand Down
Loading

0 comments on commit a74ba0e

Please sign in to comment.