diff --git a/document-store/src/integrationTest/java/org/hypertrace/core/documentstore/DocStoreQueryV1Test.java b/document-store/src/integrationTest/java/org/hypertrace/core/documentstore/DocStoreQueryV1Test.java index 5c9f8930f..302c5e269 100644 --- a/document-store/src/integrationTest/java/org/hypertrace/core/documentstore/DocStoreQueryV1Test.java +++ b/document-store/src/integrationTest/java/org/hypertrace/core/documentstore/DocStoreQueryV1Test.java @@ -919,6 +919,98 @@ public void testAggregateWithMultipleGroupingLevels(String dataStoreName) throws testCountApi(dataStoreName, query, "query/multi_level_grouping_response.json"); } + @ParameterizedTest + @ArgumentsSource(AllProvider.class) + public void testSortByListSizeWithMissingField(String dataStoreName) throws IOException { + Datastore datastore = datastoreMap.get(dataStoreName); + String collectionName = "list_size_sort_collection"; + datastore.deleteCollection(collectionName); + datastore.createCollection(collectionName, null); + Collection collection = datastore.getCollection(collectionName); + + try { + collection.upsert( + new SingleValueKey(TENANT_ID, "three"), + new JSONDocument("{\"item\":\"three\",\"tags\":[\"a\",\"b\",\"c\"]}")); + collection.upsert( + new SingleValueKey(TENANT_ID, "one"), + new JSONDocument("{\"item\":\"one\",\"tags\":[\"x\"]}")); + // Document intentionally missing the "tags" field; LENGTH must resolve to 0 instead of + // failing + collection.upsert( + new SingleValueKey(TENANT_ID, "none"), new JSONDocument("{\"item\":\"none\"}")); + + Query query = + Query.builder() + .addSelection(IdentifierExpression.of("item")) + .addSelection( + FunctionExpression.builder() + .operator(LENGTH) + .operand(IdentifierExpression.of("tags")) + .build(), + "tag_count") + .addSort(IdentifierExpression.of("tag_count"), ASC) + .build(); + + Iterator resultDocs = collection.aggregate(query); + List> results = new ArrayList<>(); + while (resultDocs.hasNext()) { + results.add(Utils.convertDocumentToMap(resultDocs.next())); + } + + assertEquals(3, results.size()); + // Document without the "tags" field counts as 0 and sorts first in ascending order + assertEquals("none", results.get(0).get("item")); + assertEquals(0, ((Number) results.get(0).get("tag_count")).intValue()); + assertEquals("one", results.get(1).get("item")); + assertEquals(1, ((Number) results.get(1).get("tag_count")).intValue()); + assertEquals("three", results.get(2).get("item")); + assertEquals(3, ((Number) results.get(2).get("tag_count")).intValue()); + } finally { + datastore.deleteCollection(collectionName); + } + } + + @ParameterizedTest + @ArgumentsSource(PostgresProvider.class) + public void testSortByJsonbArrayLengthInFlatCollection(String dataStoreName) throws IOException { + Collection flatCollection = getFlatCollection(dataStoreName); + + // props.colors is a jsonb array inside a JSONB column in the flat collection. + // Test data: id=1 ["Blue","Green"], id=3 ["Black"], id=5 ["Orange","Blue"], + // id=7 [] (empty), id=2,4,6,8,9,10 props=NULL. + Query query = + Query.builder() + .addSelection(IdentifierExpression.of("id")) + .addSelection( + FunctionExpression.builder() + .operator(LENGTH) + .operand(JsonIdentifierExpression.of("props", "colors")) + .build(), + "color_count") + .addSort(IdentifierExpression.of("color_count"), DESC) + .addSort(IdentifierExpression.of("id"), ASC) + .build(); + + Iterator resultDocs = flatCollection.aggregate(query); + List> results = new ArrayList<>(); + while (resultDocs.hasNext()) { + results.add(Utils.convertDocumentToMap(resultDocs.next())); + } + + assertEquals(10, results.size()); + // id=1 and id=5 have 2 colors + assertEquals("1", results.get(0).get("id")); + assertEquals(2, ((Number) results.get(0).get("color_count")).intValue()); + assertEquals("5", results.get(1).get("id")); + assertEquals(2, ((Number) results.get(1).get("color_count")).intValue()); + // id=3 has 1 color + assertEquals("3", results.get(2).get("id")); + assertEquals(1, ((Number) results.get(2).get("color_count")).intValue()); + // Remaining have 0 (empty array or NULL props) + assertEquals(0, ((Number) results.get(3).get("color_count")).intValue()); + } + @ParameterizedTest @ArgumentsSource(AllProvider.class) public void testAggregateWithFunctionalLeftHandSideFilter(final String dataStoreName) diff --git a/document-store/src/main/java/org/hypertrace/core/documentstore/mongo/query/parser/MongoFunctionExpressionParser.java b/document-store/src/main/java/org/hypertrace/core/documentstore/mongo/query/parser/MongoFunctionExpressionParser.java index a9c27f96f..99e587a84 100644 --- a/document-store/src/main/java/org/hypertrace/core/documentstore/mongo/query/parser/MongoFunctionExpressionParser.java +++ b/document-store/src/main/java/org/hypertrace/core/documentstore/mongo/query/parser/MongoFunctionExpressionParser.java @@ -69,6 +69,12 @@ Map parse(final FunctionExpression expression) { if (numArgs == 1) { Object value = expression.getOperands().get(0).accept(parser); + // $size fails when the operand resolves to a missing/absent (or null) field. Default such a + // value to an empty array so that LENGTH of an absent field is 0 instead of throwing an + // error. + if (operator == LENGTH) { + value = Map.of("$ifNull", List.of(value, List.of())); + } return Map.of(key, value); } diff --git a/document-store/src/main/java/org/hypertrace/core/documentstore/postgres/query/v1/transformer/FlatPostgresFieldTransformer.java b/document-store/src/main/java/org/hypertrace/core/documentstore/postgres/query/v1/transformer/FlatPostgresFieldTransformer.java index 9c6a4dfe2..0cf619e5a 100644 --- a/document-store/src/main/java/org/hypertrace/core/documentstore/postgres/query/v1/transformer/FlatPostgresFieldTransformer.java +++ b/document-store/src/main/java/org/hypertrace/core/documentstore/postgres/query/v1/transformer/FlatPostgresFieldTransformer.java @@ -103,6 +103,16 @@ public String buildFieldAccessorWithoutCast(FieldToPgColumn fieldToPgColumn) { .toString(); } + @Override + public String buildArrayLengthExpression(FieldToPgColumn fieldToPgColumn) { + String fieldAccessor = buildFieldAccessorWithoutCast(fieldToPgColumn); + // A direct column (no JSON path) is a native PG array; otherwise it is a JSONB array nested in + // a JSONB column. + return fieldToPgColumn.getTransformedField() == null + ? PostgresUtils.prepareArrayLength(fieldAccessor) + : PostgresUtils.prepareJsonbArrayLength(fieldAccessor); + } + @Override public DocumentType getDocumentType() { return DocumentType.FLAT; diff --git a/document-store/src/main/java/org/hypertrace/core/documentstore/postgres/query/v1/transformer/NestedPostgresColTransformer.java b/document-store/src/main/java/org/hypertrace/core/documentstore/postgres/query/v1/transformer/NestedPostgresColTransformer.java index 3eee307c5..f37d53475 100644 --- a/document-store/src/main/java/org/hypertrace/core/documentstore/postgres/query/v1/transformer/NestedPostgresColTransformer.java +++ b/document-store/src/main/java/org/hypertrace/core/documentstore/postgres/query/v1/transformer/NestedPostgresColTransformer.java @@ -103,6 +103,12 @@ public String buildFieldAccessorWithoutCast(FieldToPgColumn fieldToPgColumn) { .toString(); } + @Override + public String buildArrayLengthExpression(FieldToPgColumn fieldToPgColumn) { + // Nested collections store every field inside the JSONB document column. + return PostgresUtils.prepareJsonbArrayLength(buildFieldAccessorWithoutCast(fieldToPgColumn)); + } + @Override public DocumentType getDocumentType() { return DocumentType.NESTED; diff --git a/document-store/src/main/java/org/hypertrace/core/documentstore/postgres/query/v1/transformer/PostgresColTransformer.java b/document-store/src/main/java/org/hypertrace/core/documentstore/postgres/query/v1/transformer/PostgresColTransformer.java index 4d842ff35..5fa76192d 100644 --- a/document-store/src/main/java/org/hypertrace/core/documentstore/postgres/query/v1/transformer/PostgresColTransformer.java +++ b/document-store/src/main/java/org/hypertrace/core/documentstore/postgres/query/v1/transformer/PostgresColTransformer.java @@ -38,6 +38,16 @@ public interface PostgresColTransformer { */ String buildFieldAccessorWithoutCast(FieldToPgColumn fieldToPgColumn); + /** + * Builds a SQL expression that computes the length of an array field, returning 0 for + * NULL/missing/empty arrays. Implementations choose between ARRAY_LENGTH (native PG arrays) and + * jsonb_array_length (JSONB arrays) based on the field's storage type. + * + * @param fieldToPgColumn The result of field transformation + * @return SQL expression computing the array length with NULL-safe handling + */ + String buildArrayLengthExpression(FieldToPgColumn fieldToPgColumn); + /** * Returns the kind of document this transformer is handling - Flat vs nested * diff --git a/document-store/src/main/java/org/hypertrace/core/documentstore/postgres/query/v1/vistors/PostgresFunctionExpressionVisitor.java b/document-store/src/main/java/org/hypertrace/core/documentstore/postgres/query/v1/vistors/PostgresFunctionExpressionVisitor.java index f1b0f2ee4..b20821c70 100644 --- a/document-store/src/main/java/org/hypertrace/core/documentstore/postgres/query/v1/vistors/PostgresFunctionExpressionVisitor.java +++ b/document-store/src/main/java/org/hypertrace/core/documentstore/postgres/query/v1/vistors/PostgresFunctionExpressionVisitor.java @@ -7,14 +7,15 @@ import lombok.NoArgsConstructor; import org.apache.commons.lang3.StringUtils; import org.hypertrace.core.documentstore.expression.impl.FunctionExpression; +import org.hypertrace.core.documentstore.expression.impl.IdentifierExpression; import org.hypertrace.core.documentstore.expression.operators.FunctionOperator; import org.hypertrace.core.documentstore.expression.type.SelectTypeExpression; import org.hypertrace.core.documentstore.postgres.query.v1.PostgresQueryParser; +import org.hypertrace.core.documentstore.postgres.utils.PostgresUtils; @NoArgsConstructor public class PostgresFunctionExpressionVisitor extends PostgresSelectTypeExpressionVisitor { - private static final int ARRAY_DIMENSION = 1; private PostgresIdentifierExpressionVisitor identifierExpressionVisitor; private PostgresSelectTypeExpressionVisitor selectTypeExpressionVisitor; @@ -49,10 +50,11 @@ public String visit(final FunctionExpression expression) { } if (numArgs == 1) { + if (expression.getOperator().equals(FunctionOperator.LENGTH)) { + return buildArrayLengthExpression(expression.getOperands().get(0)); + } String parsedExpression = getParsedExpression(expression.getOperands().get(0)); - return expression.getOperator().equals(FunctionOperator.LENGTH) - ? String.format("ARRAY_LENGTH( %s, %s )", parsedExpression, ARRAY_DIMENSION) - : String.format("%s( %s )", expression.getOperator(), parsedExpression); + return String.format("%s( %s )", expression.getOperator(), parsedExpression); } Collector collector = @@ -83,6 +85,24 @@ private Collector getCollectorForFunctionOperator(FunctionOperator operator) { String.format("Query operation:%s not supported", operator)); } + private String buildArrayLengthExpression(final SelectTypeExpression operand) { + PostgresQueryParser parser = getPostgresQueryParser(); + + // The operand is either a user-defined alias from a prior selection (e.g. ARRAY_AGG, which + // produces a native PG array) or a field identifier. Aliases are resolved against pgSelections. + String identifier = operand.accept(identifierExpressionVisitor); + String resolvedSelection = identifier != null ? parser.getPgSelections().get(identifier) : null; + if (resolvedSelection != null) { + return PostgresUtils.prepareArrayLength(resolvedSelection); + } + + // A plain field identifier — let the transformer decide between ARRAY_LENGTH (native array) and + // jsonb_array_length (JSONB array) based on the field's storage layout. + return parser + .getPgColTransformer() + .buildArrayLengthExpression(parser.transformField((IdentifierExpression) operand)); + } + private String getParsedExpression(final SelectTypeExpression expression) { Optional identifier = Optional.ofNullable(expression.accept(identifierExpressionVisitor)); diff --git a/document-store/src/main/java/org/hypertrace/core/documentstore/postgres/utils/PostgresUtils.java b/document-store/src/main/java/org/hypertrace/core/documentstore/postgres/utils/PostgresUtils.java index 5e8e64cf7..2d6860d03 100644 --- a/document-store/src/main/java/org/hypertrace/core/documentstore/postgres/utils/PostgresUtils.java +++ b/document-store/src/main/java/org/hypertrace/core/documentstore/postgres/utils/PostgresUtils.java @@ -131,6 +131,25 @@ public static String prepareCast(String field, Type type) { } } + /** + * Builds a NULL-safe length expression for a native PG array (e.g. {@code TEXT[]}). Returns 0 for + * a NULL or empty array. + */ + public static String prepareArrayLength(final String arrayAccessor) { + return String.format("COALESCE( ARRAY_LENGTH( %s, 1 ), 0 )", arrayAccessor); + } + + /** + * Builds a length expression for a JSONB value. The {@code jsonb_typeof} guard makes absent, JSON + * {@code null}, and non-array values resolve to 0 instead of erroring inside {@code + * jsonb_array_length}. + */ + public static String prepareJsonbArrayLength(final String jsonbAccessor) { + return String.format( + "jsonb_array_length( CASE WHEN jsonb_typeof( %s ) = 'array' THEN %s ELSE '[]'::jsonb END )", + jsonbAccessor, jsonbAccessor); + } + public static String prepareCastForFieldAccessor(String field, Object value) { String fmt = "CAST (%s AS %s)"; Type type = getType(value); diff --git a/document-store/src/test/java/org/hypertrace/core/documentstore/postgres/query/v1/PostgresQueryParserTest.java b/document-store/src/test/java/org/hypertrace/core/documentstore/postgres/query/v1/PostgresQueryParserTest.java index 5a42e1cd7..c78337aee 100644 --- a/document-store/src/test/java/org/hypertrace/core/documentstore/postgres/query/v1/PostgresQueryParserTest.java +++ b/document-store/src/test/java/org/hypertrace/core/documentstore/postgres/query/v1/PostgresQueryParserTest.java @@ -395,7 +395,7 @@ void testAggregationExpressionDistinctCount() { assertEquals( "SELECT ARRAY_AGG(DISTINCT CAST (document->>'quantity' AS NUMERIC)) AS \"qty_distinct\", " - + "ARRAY_LENGTH( ARRAY_AGG(DISTINCT CAST (document->>'quantity' AS NUMERIC)), 1 ) AS \"qty_distinct_length\" " + + "COALESCE( ARRAY_LENGTH( ARRAY_AGG(DISTINCT CAST (document->>'quantity' AS NUMERIC)), 1 ), 0 ) AS \"qty_distinct_length\" " + "FROM \"testCollection\" " + "WHERE CAST (document->>'price' AS NUMERIC) = ? " + "GROUP BY document->'item'", @@ -435,10 +435,10 @@ void testAggregateWithMultipleGroupingLevels() { assertEquals( "SELECT document->'item' AS \"item\", document->'price' AS \"price\", " + "ARRAY_AGG(DISTINCT CAST (document->>'quantity' AS NUMERIC)) AS \"quantities\", " - + "ARRAY_LENGTH( ARRAY_AGG(DISTINCT CAST (document->>'quantity' AS NUMERIC)), 1 ) AS \"num_quantities\" " + + "COALESCE( ARRAY_LENGTH( ARRAY_AGG(DISTINCT CAST (document->>'quantity' AS NUMERIC)), 1 ), 0 ) AS \"num_quantities\" " + "FROM \"testCollection\" " + "GROUP BY document->'item',document->'price' " - + "HAVING ARRAY_LENGTH( ARRAY_AGG(DISTINCT CAST (document->>'quantity' AS NUMERIC)), 1 ) = ? " + + "HAVING COALESCE( ARRAY_LENGTH( ARRAY_AGG(DISTINCT CAST (document->>'quantity' AS NUMERIC)), 1 ), 0 ) = ? " + "ORDER BY document->'item' DESC NULLS LAST", sql); diff --git a/document-store/src/test/resources/mongo/pipeline/distinct_count.json b/document-store/src/test/resources/mongo/pipeline/distinct_count.json index 27c256b93..bdc99d184 100644 --- a/document-store/src/test/resources/mongo/pipeline/distinct_count.json +++ b/document-store/src/test/resources/mongo/pipeline/distinct_count.json @@ -19,7 +19,12 @@ { "$project": { "section_count": { - "$size": "$section_count" + "$size": { + "$ifNull": [ + "$section_count", + [] + ] + } } } } diff --git a/document-store/src/test/resources/mongo/pipeline/field_count.json b/document-store/src/test/resources/mongo/pipeline/field_count.json index bd7def235..3c8c9c5f9 100644 --- a/document-store/src/test/resources/mongo/pipeline/field_count.json +++ b/document-store/src/test/resources/mongo/pipeline/field_count.json @@ -10,7 +10,12 @@ { "$project": { "total": { - "$size": "$total" + "$size": { + "$ifNull": [ + "$total", + [] + ] + } } } } diff --git a/document-store/src/test/resources/mongo/pipeline/optimize_sorts_simple_sort_with_aggregation_selection.json b/document-store/src/test/resources/mongo/pipeline/optimize_sorts_simple_sort_with_aggregation_selection.json index ce2253af5..ee42beb4f 100644 --- a/document-store/src/test/resources/mongo/pipeline/optimize_sorts_simple_sort_with_aggregation_selection.json +++ b/document-store/src/test/resources/mongo/pipeline/optimize_sorts_simple_sort_with_aggregation_selection.json @@ -10,7 +10,12 @@ { "$project": { "total": { - "$size": "$total" + "$size": { + "$ifNull": [ + "$total", + [] + ] + } } } }, diff --git a/document-store/src/test/resources/mongo/pipeline/simple.json b/document-store/src/test/resources/mongo/pipeline/simple.json index f07ec7a03..97394ffe8 100644 --- a/document-store/src/test/resources/mongo/pipeline/simple.json +++ b/document-store/src/test/resources/mongo/pipeline/simple.json @@ -10,7 +10,12 @@ { "$project": { "total": { - "$size": "$total" + "$size": { + "$ifNull": [ + "$total", + [] + ] + } } } } diff --git a/document-store/src/test/resources/mongo/pipeline/with_projections.json b/document-store/src/test/resources/mongo/pipeline/with_projections.json index cbc11066d..dec1105d9 100644 --- a/document-store/src/test/resources/mongo/pipeline/with_projections.json +++ b/document-store/src/test/resources/mongo/pipeline/with_projections.json @@ -11,7 +11,12 @@ "$project": { "name": 1, "total": { - "$size": "$total" + "$size": { + "$ifNull": [ + "$total", + [] + ] + } } } }