Skip to content

Commit

Permalink
Update the sortQueryBeforeGlobalIndex option to get field cardinality…
Browse files Browse the repository at this point in the history
… from the DatawaveMetadata table
  • Loading branch information
apmoriarty committed Jun 20, 2024
1 parent 0f34220 commit 9dac86c
Show file tree
Hide file tree
Showing 5 changed files with 44 additions and 14 deletions.
4 changes: 2 additions & 2 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@
<version.hamcrest>1.3</version.hamcrest>
<version.httpcomponents-httpclient>4.5.13</version.httpcomponents-httpclient>
<version.httpcomponents-httpcore>4.4.8</version.httpcomponents-httpcore>
<version.in-memory-accumulo>4.0.0</version.in-memory-accumulo>
<version.in-memory-accumulo>4.0.1</version.in-memory-accumulo>
<version.infinispan>9.4.21.Final</version.infinispan>
<version.jackson>2.10.0.pr1</version.jackson>
<version.jackson-mapper-asl>1.9.13</version.jackson-mapper-asl>
Expand Down Expand Up @@ -107,7 +107,7 @@
<version.microservice.common-utils>3.0.0</version.microservice.common-utils>
<version.microservice.dictionary-api>4.0.0</version.microservice.dictionary-api>
<version.microservice.mapreduce-query-api>1.0.0</version.microservice.mapreduce-query-api>
<version.microservice.metadata-utils>4.0.0</version.microservice.metadata-utils>
<version.microservice.metadata-utils>4.0.2</version.microservice.metadata-utils>
<version.microservice.metrics-reporter>3.0.0</version.microservice.metrics-reporter>
<version.microservice.query-api>1.0.0</version.microservice.query-api>
<version.microservice.query-metric-api>4.0.0</version.microservice.query-metric-api>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,9 @@ public QueryFieldsVisitor(MetadataHelper helper) {

private Object parseSingleField(JexlNode node, Object data) {
String field = JexlASTHelper.getIdentifier(node);
((Set<String>) data).add(field);
if (field != null) {
((Set<String>) data).add(field);
}
return data;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,7 @@
import datawave.query.jexl.visitors.PushdownLowSelectivityNodesVisitor;
import datawave.query.jexl.visitors.PushdownMissingIndexRangeNodesVisitor;
import datawave.query.jexl.visitors.PushdownUnexecutableNodesVisitor;
import datawave.query.jexl.visitors.QueryFieldsVisitor;
import datawave.query.jexl.visitors.QueryModelVisitor;
import datawave.query.jexl.visitors.QueryOptionsFromQueryVisitor;
import datawave.query.jexl.visitors.QueryPropertyMarkerSourceConsolidator;
Expand Down Expand Up @@ -2709,7 +2710,7 @@ public Tuple2<CloseableIterable<QueryPlan>,Boolean> getQueryRanges(ScannerFactor
}

if (config.isSortQueryBeforeGlobalIndex()) {
queryTree = OrderByCostVisitor.order((ASTJexlScript) queryTree);
config.setQueryTree(timedSortQueryBeforeGlobalIndex(config, getMetadataHelper()));
}

// if a simple examination of the query has not forced a full table
Expand Down Expand Up @@ -2796,6 +2797,20 @@ public Tuple2<CloseableIterable<QueryPlan>,Boolean> getQueryRanges(ScannerFactor
return new Tuple2<>(ranges, needsFullTable);
}

protected ASTJexlScript timedSortQueryBeforeGlobalIndex(ShardQueryConfiguration config, MetadataHelper metadataHelper) throws DatawaveQueryException {
return visitorManager.timedVisit(config.getTimers(), "SortQueryBeforeGlobalIndex", () -> {
Set<String> fields = QueryFieldsVisitor.parseQueryFields(config.getQueryTree(), getMetadataHelper());
if (!fields.isEmpty()) {
Set<String> datatypes = config.getDatatypeFilter();
Map<String,Long> counts = metadataHelper.getCountsForFieldsInDateRange(fields, datatypes, config.getBeginDate(), config.getEndDate());
if (!counts.isEmpty()) {
return OrderByCostVisitor.orderByFieldCount(config.getQueryTree(), counts);
}
}
return config.getQueryTree();
});
}

private TypeMetadata getTypeMetadata() {
try {
return metadataHelper.getTypeMetadata();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,12 @@ public void testValueExceededMarker() throws ParseException {
test(query, Collections.singleton("FOO"));
}

@Test
public void testMethod() throws ParseException {
String query = "QUOTE.size() == 1";
test(query, Collections.emptySet());
}

private void test(String query, Set<String> fields) throws ParseException {

// query as string entrance point
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import org.apache.accumulo.core.client.BatchWriterConfig;
import org.apache.accumulo.core.data.Mutation;
import org.apache.accumulo.core.data.Value;
import org.apache.accumulo.core.iterators.LongCombiner;
import org.apache.accumulo.core.security.ColumnVisibility;
import org.apache.hadoop.io.Text;

Expand Down Expand Up @@ -99,6 +100,8 @@ public enum RangeType {
private static final NumberType number = new NumberType();
private static final LcNoDiacriticsListType list = new LcNoDiacriticsListType();

private static final LongCombiner.VarLenEncoder encoder = new LongCombiner.VarLenEncoder();

protected static String normalizerForField(String field) {
switch (field) {
case "SHAPE":
Expand Down Expand Up @@ -485,11 +488,11 @@ public static void writeData(AccumuloClient client, RangeType type) throws Excep
m.put(ColumnFamilyConstants.COLF_E, new Text(hexagon), value);
m.put(ColumnFamilyConstants.COLF_E, new Text(octagon), value);

m.put(ColumnFamilyConstants.COLF_F, new Text(triangle), value);
m.put(ColumnFamilyConstants.COLF_F, new Text(quadrilateral), value);
m.put(ColumnFamilyConstants.COLF_F, new Text(pentagon), value);
m.put(ColumnFamilyConstants.COLF_F, new Text(hexagon), value);
m.put(ColumnFamilyConstants.COLF_F, new Text(octagon), value);
m.put(ColumnFamilyConstants.COLF_F, new Text(triangle + '\u0000' + shard), createValue(12L));
m.put(ColumnFamilyConstants.COLF_F, new Text(quadrilateral + '\u0000' + shard), createValue(13L));
m.put(ColumnFamilyConstants.COLF_F, new Text(pentagon + '\u0000' + shard), createValue(11L));
m.put(ColumnFamilyConstants.COLF_F, new Text(hexagon + '\u0000' + shard), createValue(10L));
m.put(ColumnFamilyConstants.COLF_F, new Text(octagon + '\u0000' + shard), createValue(14L));

m.put(ColumnFamilyConstants.COLF_I, new Text(triangle), value);
m.put(ColumnFamilyConstants.COLF_I, new Text(quadrilateral), value);
Expand Down Expand Up @@ -518,11 +521,11 @@ public static void writeData(AccumuloClient client, RangeType type) throws Excep
m.put(ColumnFamilyConstants.COLF_E, new Text(hexagon), value);
m.put(ColumnFamilyConstants.COLF_E, new Text(octagon), value);

m.put(ColumnFamilyConstants.COLF_F, new Text(triangle), value);
m.put(ColumnFamilyConstants.COLF_F, new Text(quadrilateral), value);
m.put(ColumnFamilyConstants.COLF_F, new Text(pentagon), value);
m.put(ColumnFamilyConstants.COLF_F, new Text(hexagon), value);
m.put(ColumnFamilyConstants.COLF_F, new Text(octagon), value);
m.put(ColumnFamilyConstants.COLF_F, new Text(triangle + '\u0000' + shard), createValue(10L));
m.put(ColumnFamilyConstants.COLF_F, new Text(quadrilateral + '\u0000' + shard), createValue(14L));
m.put(ColumnFamilyConstants.COLF_F, new Text(pentagon + '\u0000' + shard), createValue(11L));
m.put(ColumnFamilyConstants.COLF_F, new Text(hexagon + '\u0000' + shard), createValue(13L));
m.put(ColumnFamilyConstants.COLF_F, new Text(octagon + '\u0000' + shard), createValue(12L));

m.put(ColumnFamilyConstants.COLF_I, new Text(triangle), value);
m.put(ColumnFamilyConstants.COLF_I, new Text(quadrilateral), value);
Expand Down Expand Up @@ -640,4 +643,8 @@ private static Value getValue(RangeType type, String uid) {
}
return new Value(builder.build().toByteArray());
}

private static Value createValue(long count) {
return new Value(encoder.encode(count));
}
}

0 comments on commit 9dac86c

Please sign in to comment.