Skip to content

Commit

Permalink
Table filter scalar, uuid/timeuuid/inet (#1538)
Browse files Browse the repository at this point in the history
  • Loading branch information
Yuqi-Du authored Oct 15, 2024
1 parent 74d7b4e commit ebc1f85
Show file tree
Hide file tree
Showing 10 changed files with 155 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ public WarningException(ErrorInstance errorInstance) {
public enum Code implements ErrorCode<WarningException> {
MISSING_INDEX,
NOT_EQUALS_UNSUPPORTED_BY_INDEXING,
COMPARISON_FILTER_UNSUPPORTED_BY_INDEXING,
ZERO_FILTER_OPERATIONS,
INCOMPLETE_PRIMARY_KEY_FILTER;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,12 @@ public class WhereCQLClauseAnalyzer {
// used.
private static final Set<DataType> ALLOW_FILTERING_NEEDED_FOR_$NE =
Set.of(
DataTypes.TEXT, DataTypes.ASCII, DataTypes.BOOLEAN, DataTypes.DURATION, DataTypes.BLOB);
DataTypes.TEXT,
DataTypes.ASCII,
DataTypes.BOOLEAN,
DataTypes.DURATION,
DataTypes.BLOB,
DataTypes.UUID);

private final TableSchemaObject tableSchemaObject;
private final TableMetadata tableMetadata;
Expand Down Expand Up @@ -74,6 +79,7 @@ public WhereClauseAnalysis analyse(WhereCQLClause<?> whereCQLClause) {

warnMissingIndexOnScalar(identifierToFilter).ifPresent(warnings::add);
warnNotEqUnsupportedByIndexing(identifierToFilter).ifPresent(warnings::add);
warnComparisonUnsupportedByIndexing(identifierToFilter).ifPresent(warnings::add);
warnNoFilters(identifierToFilter).ifPresent(warnings::add);
warnPkNotFullySpecified(identifierToFilter).ifPresent(warnings::add);

Expand Down Expand Up @@ -196,7 +202,7 @@ private Optional<WarningException> warnMissingIndexOnScalar(
}

/**
* Wrn if a filter is on a column that, while it has an index still needs ALLOW FILTERING because
* Warn if a filter is on a column that, while it has an index still needs ALLOW FILTERING because
* not equals is used.
*
* <p>E.G. [perform $ne against a text column 'name' that has SAI index on it] <br>
Expand Down Expand Up @@ -248,6 +254,54 @@ && isIndexOnColumn(entry.getKey())
})));
}

/**
* Warn if a filter is on a column that, while it has an index still needs ALLOW FILTERING because
* comparison API operator $lt, $gt, $lte, $gte is used.
*
* <p>E.G. [perform $lt against a UUID column 'user_id' that has SAI index on it] <br>
* Error from Driver: "Column 'user_id' has an index but does not support the operators specified
* in the query. If you want to execute this query despite the performance unpredictability, use
* ALLOW FILTERING" <br>
* NOTE, TIMEUUID column does not have above constraint.
*/
private Optional<WarningException> warnComparisonUnsupportedByIndexing(
Map<CqlIdentifier, TableFilter> identifierToFilter) {

var inefficientFilters =
identifierToFilter.entrySet().stream()
.filter(
entry -> {
TableFilter tableFilter = entry.getValue();
return (tableFilter instanceof NativeTypeTableFilter<?> nativeTypeTableFilter
&& isIndexOnColumn(entry.getKey())
&& nativeTypeTableFilter.operator.isComparisonOperator());
})
.map(Map.Entry::getKey)
.filter(column -> DataTypes.UUID == tableMetadata.getColumns().get(column).getType())
.sorted(CQL_IDENTIFIER_COMPARATOR)
.toList();

if (inefficientFilters.isEmpty()) {
return Optional.empty();
}

var inefficientColumns =
tableMetadata.getColumns().values().stream()
.filter(column -> DataTypes.UUID == column.getType())
.sorted(COLUMN_METADATA_COMPARATOR)
.toList();

return Optional.of(
WarningException.Code.COMPARISON_FILTER_UNSUPPORTED_BY_INDEXING.get(
errVars(
tableSchemaObject,
map -> {
map.put("inefficientDataTypes", DataTypes.UUID.toString());
map.put("inefficientColumns", errFmtColumnMetadata(inefficientColumns));
map.put("inefficientFilterColumns", errFmtCqlIdentifier(inefficientFilters));
})));
}

private Optional<WarningException> warnNoFilters(
Map<CqlIdentifier, TableFilter> identifierToFilter) {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,14 +57,17 @@ public abstract class ApiDataTypeDefs {
public static final ApiDataTypeDef UUID =
new ApiDataTypeDef(PrimitiveApiDataType.UUID, DataTypes.UUID);

public static final ApiDataTypeDef TIMEUUID =
new ApiDataTypeDef(PrimitiveApiDataType.TIMEUUID, DataTypes.TIMEUUID);

public static final ApiDataTypeDef INET =
new ApiDataTypeDef(PrimitiveApiDataType.INET, DataTypes.INET);

// Primitive Types
public static final List<ApiDataTypeDef> PRIMITIVE_TYPES =
List.of(
ASCII, BIGINT, BOOLEAN, BINARY, DATE, DECIMAL, DOUBLE, DURATION, FLOAT, INT, SMALLINT,
TEXT, TIME, TIMESTAMP, TINYINT, VARINT, INET, UUID);
TEXT, TIME, TIMESTAMP, TINYINT, VARINT, INET, UUID, TIMEUUID);

public static final Map<DataType, ApiDataTypeDef> PRIMITIVE_TYPES_BY_CQL_TYPE =
PRIMITIVE_TYPES.stream()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ public enum PrimitiveApiDataType implements ApiDataType {
TIMESTAMP("timestamp"),
TINYINT("tinyint"),
UUID("uuid"),
TIMEUUID("timeuuid"),
VARINT("varint");

private final String apiName;
Expand Down
14 changes: 14 additions & 0 deletions src/main/resources/errors.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,20 @@ request-errors:
${SNIPPET.INEFFICIENT_QUERY}
- scope: WARNING
code: COMPARISON_FILTER_UNSUPPORTED_BY_INDEXING
title: Use of $lt, $gt, $lte, $gte (comparison filter) on indexed columns
body: |-
The filter uses $lt, $gt, $lte, $gte (comparison filters) on columns that, while indexed, are still inefficient to filter on.
Filtering using $lt, $gt, $lte, $gte on columns of type ${inefficientDataTypes} is inefficient, even when the columns are indexed.
The table ${keyspace}.${table} uses these data types for the columns: ${inefficientColumns}.
The request applied $lt, $gt, $lte, $gte to the indexed columns: ${inefficientFilterColumns}.
${SNIPPET.INEFFICIENT_QUERY}
- scope: WARNING
code: ZERO_FILTER_OPERATIONS
title: Zero operations provided in query filter
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,6 @@ public static TableFilter filter(
return new TextTableFilter(column.asInternal(), operator, (String) value);
}
if (type.equals(DataTypes.DURATION)) {
// we pass a string to the codec for a duration
return new TextTableFilter(column.asInternal(), operator, (String) value);
}
if (type.equals(DataTypes.INT)) {
Expand Down Expand Up @@ -215,6 +214,15 @@ public static TableFilter filter(
if (type.equals(DataTypes.TIMESTAMP)) {
return new TextTableFilter(column.asInternal(), operator, (String) value);
}
if (type.equals(DataTypes.INET)) {
return new TextTableFilter(column.asInternal(), operator, (String) value);
}
if (type.equals(DataTypes.UUID)) {
return new TextTableFilter(column.asInternal(), operator, (String) value);
}
if (type.equals(DataTypes.TIMEUUID)) {
return new TextTableFilter(column.asInternal(), operator, (String) value);
}

throw new IllegalArgumentException("Unsupported type");
}
Expand Down Expand Up @@ -268,6 +276,15 @@ public static Object value(DataType type) {
if (type.equals(DataTypes.TIMESTAMP)) {
return "2024-09-24T14:06:59Z"; // Sample timestamp
}
if (type.equals(DataTypes.INET)) {
return "127.0.0.1"; // Sample internet address
}
if (type.equals(DataTypes.UUID)) {
return "123e4567-e89b-12d3-a456-426614174000"; // Sample UUID
}
if (type.equals(DataTypes.TIMEUUID)) {
return "123e4567-e89b-12d3-a456-426655440000"; // Sample TIMEUUID
}

throw new IllegalArgumentException("Unsupported type");
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,10 @@ public TableMetadata tableAllDatatypesIndexed() {
Map.entry(names.CQL_VARINT_COLUMN, DataTypes.VARINT),
Map.entry(names.CQL_DATE_COLUMN, DataTypes.DATE),
Map.entry(names.CQL_TIMESTAMP_COLUMN, DataTypes.TIMESTAMP),
Map.entry(names.CQL_TIME_COLUMN, DataTypes.TIME)),
Map.entry(names.CQL_TIME_COLUMN, DataTypes.TIME),
Map.entry(names.CQL_INET_COLUMN, DataTypes.INET),
Map.entry(names.CQL_UUID_COLUMN, DataTypes.UUID),
Map.entry(names.CQL_TIMEUUID_COLUMN, DataTypes.TIMEUUID)),
ImmutableMap.of(),
ImmutableMap.ofEntries(
// index all datatypes column in the table,(Blob,Duration can NOT be indexed)
Expand Down Expand Up @@ -184,7 +187,16 @@ public TableMetadata tableAllDatatypesIndexed() {
indexMetadata(names.CQL_TIMESTAMP_COLUMN_INDEX, names.CQL_TIMESTAMP_COLUMN)),
Map.entry(
names.CQL_TIME_COLUMN_INDEX,
indexMetadata(names.CQL_TIME_COLUMN_INDEX, names.CQL_TIME_COLUMN))));
indexMetadata(names.CQL_TIME_COLUMN_INDEX, names.CQL_TIME_COLUMN)),
Map.entry(
names.CQL_INET_COLUMN_INDEX,
indexMetadata(names.CQL_INET_COLUMN_INDEX, names.CQL_INET_COLUMN)),
Map.entry(
names.CQL_UUID_COLUMN_INDEX,
indexMetadata(names.CQL_UUID_COLUMN_INDEX, names.CQL_UUID_COLUMN)),
Map.entry(
names.CQL_TIMEUUID_COLUMN_INDEX,
indexMetadata(names.CQL_TIMEUUID_COLUMN_INDEX, names.CQL_TIMEUUID_COLUMN))));
}

public TableMetadata tableAllDatatypesNotIndexed() {
Expand Down Expand Up @@ -226,7 +238,10 @@ public TableMetadata tableAllDatatypesNotIndexed() {
Map.entry(names.CQL_VARINT_COLUMN, DataTypes.VARINT),
Map.entry(names.CQL_DATE_COLUMN, DataTypes.DATE),
Map.entry(names.CQL_TIMESTAMP_COLUMN, DataTypes.TIMESTAMP),
Map.entry(names.CQL_TIME_COLUMN, DataTypes.TIME)),
Map.entry(names.CQL_TIME_COLUMN, DataTypes.TIME),
Map.entry(names.CQL_INET_COLUMN, DataTypes.INET),
Map.entry(names.CQL_UUID_COLUMN, DataTypes.UUID),
Map.entry(names.CQL_TIMEUUID_COLUMN, DataTypes.TIMEUUID)),
ImmutableMap.of(),
ImmutableMap.of());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,10 +72,16 @@ public class TestDataNames {
CqlIdentifier.fromInternal("date-column-" + System.currentTimeMillis());
public final CqlIdentifier CQL_TIME_COLUMN =
CqlIdentifier.fromInternal("time-column-" + System.currentTimeMillis());
public final CqlIdentifier CQL_INET_COLUMN =
CqlIdentifier.fromInternal("inet-column-" + System.currentTimeMillis());
public final CqlIdentifier CQL_UUID_COLUMN =
CqlIdentifier.fromInternal("uuid-column-" + System.currentTimeMillis());
public final CqlIdentifier CQL_TIMEUUID_COLUMN =
CqlIdentifier.fromInternal("timeuuid-column-" + System.currentTimeMillis());

public final List<CqlIdentifier> ALL_CQL_DATATYPE_COLUMNS =
List.of(
CQL_TEXT_COLUMN,
// CQL_UUID_COLUMN,
CQL_ASCII_COLUMN,
CQL_BLOB_COLUMN,
CQL_DURATION_COLUMN,
Expand All @@ -90,7 +96,10 @@ public class TestDataNames {
CQL_TINYINT_COLUMN,
CQL_TIMESTAMP_COLUMN,
CQL_DATE_COLUMN,
CQL_TIME_COLUMN);
CQL_TIME_COLUMN,
CQL_INET_COLUMN,
CQL_UUID_COLUMN,
CQL_TIMEUUID_COLUMN);

// All column datatypes index name, (Blob,Duration can NOT be indexed)
public final CqlIdentifier CQL_TEXT_COLUMN_INDEX =
Expand Down Expand Up @@ -123,4 +132,10 @@ public class TestDataNames {
CqlIdentifier.fromInternal("date-column-index-" + System.currentTimeMillis());
public final CqlIdentifier CQL_TIME_COLUMN_INDEX =
CqlIdentifier.fromInternal("time-column-index-" + System.currentTimeMillis());
public final CqlIdentifier CQL_INET_COLUMN_INDEX =
CqlIdentifier.fromInternal("inet-column-index-" + System.currentTimeMillis());
public final CqlIdentifier CQL_UUID_COLUMN_INDEX =
CqlIdentifier.fromInternal("uuid-column-index-" + System.currentTimeMillis());
public final CqlIdentifier CQL_TIMEUUID_COLUMN_INDEX =
CqlIdentifier.fromInternal("timeuuid-column-index-" + System.currentTimeMillis());
}
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,14 @@ public WhereAnalyzerFixture assertWarnOnNotEqColumns(CqlIdentifier... columns) {
return assertWarningContains(warning);
}

public WhereAnalyzerFixture assertWarnOnComparisonFilterColumns(CqlIdentifier... columns) {
var identifiers = Arrays.stream(columns).sorted(CQL_IDENTIFIER_COMPARATOR).toList();
var warning =
"The request applied $lt, $gt, $lte, $gte to the indexed columns: %s."
.formatted(errFmtCqlIdentifier(identifiers));
return assertWarningContains(warning);
}

public WhereAnalyzerFixture assertWarnOnUnindexedColumns(CqlIdentifier... columns) {

var identifiers = Arrays.stream(columns).sorted(CQL_IDENTIFIER_COMPARATOR).toList();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -449,7 +449,8 @@ public void ne_indexed_column() {
}
if (cqlDatatypeColumn.equals(names().CQL_TEXT_COLUMN)
|| cqlDatatypeColumn.equals(names().CQL_BOOLEAN_COLUMN)
|| cqlDatatypeColumn.equals(names().CQL_ASCII_COLUMN)) {
|| cqlDatatypeColumn.equals(names().CQL_ASCII_COLUMN)
|| cqlDatatypeColumn.equals(names().CQL_UUID_COLUMN)) {
var fixture =
TEST_DATA
.whereAnalyzer()
Expand Down Expand Up @@ -551,6 +552,7 @@ public void in_not_indexed_column() {
// ==================================================================================================================
// Api comparison operator on scalar column, take $gt as example.
// 1.Can not apply $gt/$lte/$gte/$lte to Duration column
// 2.Can not apply to indexed UUID column, index does not support operator
// ==================================================================================================================

@Test
Expand All @@ -572,6 +574,21 @@ public void comparison_api_filter_indexed_column() {
.assertExceptionOnDurationColumns(cqlDatatypeColumn);
continue;
}

if (cqlDatatypeColumn.equals(names().CQL_UUID_COLUMN)) {
var fixture =
TEST_DATA
.whereAnalyzer()
.tableAllColumnDatatypesIndexed("$gt_on_" + cqlDatatypeColumn.asInternal());
fixture
.expression()
.gtOn(cqlDatatypeColumn)
.analyze()
.assertAllowFilteringEnabled()
.assertOneWarning(WarningException.Code.COMPARISON_FILTER_UNSUPPORTED_BY_INDEXING)
.assertWarnOnComparisonFilterColumns(cqlDatatypeColumn);
continue;
}
var fixture =
TEST_DATA
.whereAnalyzer()
Expand Down

0 comments on commit ebc1f85

Please sign in to comment.