Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -919,6 +919,98 @@ public void testAggregateWithMultipleGroupingLevels(String dataStoreName) throws
testCountApi(dataStoreName, query, "query/multi_level_grouping_response.json");
}

@ParameterizedTest
@ArgumentsSource(AllProvider.class)
public void testSortByListSizeWithMissingField(String dataStoreName) throws IOException {
Datastore datastore = datastoreMap.get(dataStoreName);
String collectionName = "list_size_sort_collection";
Comment on lines +922 to +926

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The integration test runs against nested collections (the standard document store pattern) which exercises the jsonb_array_length path. The flat collection path (ARRAY_LENGTH) is covered by the existing unit tests in PostgresQueryParserTest which validate SQL generation for LENGTH on aggregation aliases. The flat collection LENGTH on a direct column field follows the same ARRAY_LENGTH codepath and is validated by the existing tests that assert correct SQL output.

datastore.deleteCollection(collectionName);
datastore.createCollection(collectionName, null);
Collection collection = datastore.getCollection(collectionName);
Comment on lines +924 to +929

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch. Wrapped the test body in try-finally in be9514a so deleteCollection runs regardless of assertion failures.


try {
collection.upsert(
new SingleValueKey(TENANT_ID, "three"),
new JSONDocument("{\"item\":\"three\",\"tags\":[\"a\",\"b\",\"c\"]}"));
collection.upsert(
new SingleValueKey(TENANT_ID, "one"),
new JSONDocument("{\"item\":\"one\",\"tags\":[\"x\"]}"));
// Document intentionally missing the "tags" field; LENGTH must resolve to 0 instead of
// failing
collection.upsert(
new SingleValueKey(TENANT_ID, "none"), new JSONDocument("{\"item\":\"none\"}"));

Query query =
Query.builder()
.addSelection(IdentifierExpression.of("item"))
.addSelection(
FunctionExpression.builder()
.operator(LENGTH)
.operand(IdentifierExpression.of("tags"))
.build(),
"tag_count")
.addSort(IdentifierExpression.of("tag_count"), ASC)
.build();

Iterator<Document> resultDocs = collection.aggregate(query);
List<Map<String, Object>> results = new ArrayList<>();
while (resultDocs.hasNext()) {
results.add(Utils.convertDocumentToMap(resultDocs.next()));
}

assertEquals(3, results.size());
// Document without the "tags" field counts as 0 and sorts first in ascending order
assertEquals("none", results.get(0).get("item"));
assertEquals(0, ((Number) results.get(0).get("tag_count")).intValue());
assertEquals("one", results.get(1).get("item"));
assertEquals(1, ((Number) results.get(1).get("tag_count")).intValue());
assertEquals("three", results.get(2).get("item"));
assertEquals(3, ((Number) results.get(2).get("tag_count")).intValue());
} finally {
datastore.deleteCollection(collectionName);
}
}

@ParameterizedTest
@ArgumentsSource(PostgresProvider.class)
public void testSortByJsonbArrayLengthInFlatCollection(String dataStoreName) throws IOException {
Collection flatCollection = getFlatCollection(dataStoreName);

// props.colors is a jsonb array inside a JSONB column in the flat collection.
// Test data: id=1 ["Blue","Green"], id=3 ["Black"], id=5 ["Orange","Blue"],
// id=7 [] (empty), id=2,4,6,8,9,10 props=NULL.
Query query =
Query.builder()
.addSelection(IdentifierExpression.of("id"))
.addSelection(
FunctionExpression.builder()
.operator(LENGTH)
.operand(JsonIdentifierExpression.of("props", "colors"))
.build(),
"color_count")
.addSort(IdentifierExpression.of("color_count"), DESC)
.addSort(IdentifierExpression.of("id"), ASC)
.build();

Iterator<Document> resultDocs = flatCollection.aggregate(query);
List<Map<String, Object>> results = new ArrayList<>();
while (resultDocs.hasNext()) {
results.add(Utils.convertDocumentToMap(resultDocs.next()));
}

assertEquals(10, results.size());
// id=1 and id=5 have 2 colors
assertEquals("1", results.get(0).get("id"));
assertEquals(2, ((Number) results.get(0).get("color_count")).intValue());
assertEquals("5", results.get(1).get("id"));
assertEquals(2, ((Number) results.get(1).get("color_count")).intValue());
// id=3 has 1 color
assertEquals("3", results.get(2).get("id"));
assertEquals(1, ((Number) results.get(2).get("color_count")).intValue());
// Remaining have 0 (empty array or NULL props)
assertEquals(0, ((Number) results.get(3).get("color_count")).intValue());
}

@ParameterizedTest
@ArgumentsSource(AllProvider.class)
public void testAggregateWithFunctionalLeftHandSideFilter(final String dataStoreName)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,12 @@ Map<String, Object> parse(final FunctionExpression expression) {

if (numArgs == 1) {
Object value = expression.getOperands().get(0).accept(parser);
// $size fails when the operand resolves to a missing/absent (or null) field. Default such a
// value to an empty array so that LENGTH of an absent field is 0 instead of throwing an
// error.
if (operator == LENGTH) {
value = Map.of("$ifNull", List.of(value, List.of()));
}
return Map.of(key, value);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,16 @@ public String buildFieldAccessorWithoutCast(FieldToPgColumn fieldToPgColumn) {
.toString();
}

@Override
public String buildArrayLengthExpression(FieldToPgColumn fieldToPgColumn) {
String fieldAccessor = buildFieldAccessorWithoutCast(fieldToPgColumn);
// A direct column (no JSON path) is a native PG array; otherwise it is a JSONB array nested in
// a JSONB column.
return fieldToPgColumn.getTransformedField() == null
? PostgresUtils.prepareArrayLength(fieldAccessor)
: PostgresUtils.prepareJsonbArrayLength(fieldAccessor);
}

@Override
public DocumentType getDocumentType() {
return DocumentType.FLAT;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,12 @@ public String buildFieldAccessorWithoutCast(FieldToPgColumn fieldToPgColumn) {
.toString();
}

@Override
public String buildArrayLengthExpression(FieldToPgColumn fieldToPgColumn) {
// Nested collections store every field inside the JSONB document column.
return PostgresUtils.prepareJsonbArrayLength(buildFieldAccessorWithoutCast(fieldToPgColumn));
}

@Override
public DocumentType getDocumentType() {
return DocumentType.NESTED;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,16 @@ public interface PostgresColTransformer {
*/
String buildFieldAccessorWithoutCast(FieldToPgColumn fieldToPgColumn);

/**
* Builds a SQL expression that computes the length of an array field, returning 0 for
* NULL/missing/empty arrays. Implementations choose between ARRAY_LENGTH (native PG arrays) and
* jsonb_array_length (JSONB arrays) based on the field's storage type.
*
* @param fieldToPgColumn The result of field transformation
* @return SQL expression computing the array length with NULL-safe handling
*/
String buildArrayLengthExpression(FieldToPgColumn fieldToPgColumn);

/**
* Returns the kind of document this transformer is handling - Flat vs nested
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,15 @@
import lombok.NoArgsConstructor;
import org.apache.commons.lang3.StringUtils;
import org.hypertrace.core.documentstore.expression.impl.FunctionExpression;
import org.hypertrace.core.documentstore.expression.impl.IdentifierExpression;
import org.hypertrace.core.documentstore.expression.operators.FunctionOperator;
import org.hypertrace.core.documentstore.expression.type.SelectTypeExpression;
import org.hypertrace.core.documentstore.postgres.query.v1.PostgresQueryParser;
import org.hypertrace.core.documentstore.postgres.utils.PostgresUtils;

@NoArgsConstructor
public class PostgresFunctionExpressionVisitor extends PostgresSelectTypeExpressionVisitor {

private static final int ARRAY_DIMENSION = 1;
private PostgresIdentifierExpressionVisitor identifierExpressionVisitor;
private PostgresSelectTypeExpressionVisitor selectTypeExpressionVisitor;

Expand Down Expand Up @@ -49,10 +50,11 @@ public String visit(final FunctionExpression expression) {
}

if (numArgs == 1) {
if (expression.getOperator().equals(FunctionOperator.LENGTH)) {

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

did we have same problem here with length function?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Tests were failing for postgres without the changes.

@puneet-traceable puneet-traceable Jun 23, 2026

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why are the tests failing when there is no change to postgres path?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The new integration test testSortByListSizeWithMissingField runs on both Mongo and Postgres (via @ArgumentsSource(AllProvider.class)). The test applies LENGTH on a raw jsonb field (tags) where one document is missing that field entirely.

The original Postgres LENGTH implementation used ARRAY_LENGTH(parsedExpression, 1) — but parsedExpression here resolved via getParsedExpression() which uses PostgresDataAccessorIdentifierExpressionVisitor. That visitor casts the jsonb field to NUMERIC (via ->> + CAST), resulting in ARRAY_LENGTH(CAST(document->>'tags' AS NUMERIC), 1) — which fails with function array_length(numeric, integer) does not exist.

Even before this PR, calling LENGTH on a raw jsonb field in Postgres would have failed the same way — it just wasn't tested. The fix distinguishes between native PG arrays (from aggregations) and raw jsonb fields, using jsonb_array_length with a jsonb_typeof guard for the latter.

return buildArrayLengthExpression(expression.getOperands().get(0));
}
String parsedExpression = getParsedExpression(expression.getOperands().get(0));
return expression.getOperator().equals(FunctionOperator.LENGTH)
? String.format("ARRAY_LENGTH( %s, %s )", parsedExpression, ARRAY_DIMENSION)
: String.format("%s( %s )", expression.getOperator(), parsedExpression);
return String.format("%s( %s )", expression.getOperator(), parsedExpression);
}

Collector<String, ?, String> collector =
Expand Down Expand Up @@ -83,6 +85,24 @@ private Collector getCollectorForFunctionOperator(FunctionOperator operator) {
String.format("Query operation:%s not supported", operator));
}

private String buildArrayLengthExpression(final SelectTypeExpression operand) {
PostgresQueryParser parser = getPostgresQueryParser();

// The operand is either a user-defined alias from a prior selection (e.g. ARRAY_AGG, which
// produces a native PG array) or a field identifier. Aliases are resolved against pgSelections.
String identifier = operand.accept(identifierExpressionVisitor);
String resolvedSelection = identifier != null ? parser.getPgSelections().get(identifier) : null;
if (resolvedSelection != null) {
return PostgresUtils.prepareArrayLength(resolvedSelection);
}

// A plain field identifier — let the transformer decide between ARRAY_LENGTH (native array) and
// jsonb_array_length (JSONB array) based on the field's storage layout.
return parser
.getPgColTransformer()
.buildArrayLengthExpression(parser.transformField((IdentifierExpression) operand));
}

private String getParsedExpression(final SelectTypeExpression expression) {
Optional<String> identifier =
Optional.ofNullable(expression.accept(identifierExpressionVisitor));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,25 @@ public static String prepareCast(String field, Type type) {
}
}

/**
* Builds a NULL-safe length expression for a native PG array (e.g. {@code TEXT[]}). Returns 0 for
* a NULL or empty array.
*/
public static String prepareArrayLength(final String arrayAccessor) {
return String.format("COALESCE( ARRAY_LENGTH( %s, 1 ), 0 )", arrayAccessor);
}

/**
* Builds a length expression for a JSONB value. The {@code jsonb_typeof} guard makes absent, JSON
* {@code null}, and non-array values resolve to 0 instead of erroring inside {@code
* jsonb_array_length}.
*/
public static String prepareJsonbArrayLength(final String jsonbAccessor) {
return String.format(
"jsonb_array_length( CASE WHEN jsonb_typeof( %s ) = 'array' THEN %s ELSE '[]'::jsonb END )",
jsonbAccessor, jsonbAccessor);
}

public static String prepareCastForFieldAccessor(String field, Object value) {
String fmt = "CAST (%s AS %s)";
Type type = getType(value);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -395,7 +395,7 @@ void testAggregationExpressionDistinctCount() {

assertEquals(
"SELECT ARRAY_AGG(DISTINCT CAST (document->>'quantity' AS NUMERIC)) AS \"qty_distinct\", "
+ "ARRAY_LENGTH( ARRAY_AGG(DISTINCT CAST (document->>'quantity' AS NUMERIC)), 1 ) AS \"qty_distinct_length\" "
+ "COALESCE( ARRAY_LENGTH( ARRAY_AGG(DISTINCT CAST (document->>'quantity' AS NUMERIC)), 1 ), 0 ) AS \"qty_distinct_length\" "
+ "FROM \"testCollection\" "
+ "WHERE CAST (document->>'price' AS NUMERIC) = ? "
+ "GROUP BY document->'item'",
Expand Down Expand Up @@ -435,10 +435,10 @@ void testAggregateWithMultipleGroupingLevels() {
assertEquals(
"SELECT document->'item' AS \"item\", document->'price' AS \"price\", "
+ "ARRAY_AGG(DISTINCT CAST (document->>'quantity' AS NUMERIC)) AS \"quantities\", "
+ "ARRAY_LENGTH( ARRAY_AGG(DISTINCT CAST (document->>'quantity' AS NUMERIC)), 1 ) AS \"num_quantities\" "
+ "COALESCE( ARRAY_LENGTH( ARRAY_AGG(DISTINCT CAST (document->>'quantity' AS NUMERIC)), 1 ), 0 ) AS \"num_quantities\" "
+ "FROM \"testCollection\" "
+ "GROUP BY document->'item',document->'price' "
+ "HAVING ARRAY_LENGTH( ARRAY_AGG(DISTINCT CAST (document->>'quantity' AS NUMERIC)), 1 ) = ? "
+ "HAVING COALESCE( ARRAY_LENGTH( ARRAY_AGG(DISTINCT CAST (document->>'quantity' AS NUMERIC)), 1 ), 0 ) = ? "
+ "ORDER BY document->'item' DESC NULLS LAST",
sql);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,12 @@
{
"$project": {
"section_count": {
"$size": "$section_count"
"$size": {
"$ifNull": [
"$section_count",
[]
]
}
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,12 @@
{
"$project": {
"total": {
"$size": "$total"
"$size": {
"$ifNull": [
"$total",
[]
]
}
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,12 @@
{
"$project": {
"total": {
"$size": "$total"
"$size": {
"$ifNull": [
"$total",
[]
]
}
}
}
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,12 @@
{
"$project": {
"total": {
"$size": "$total"
"$size": {
"$ifNull": [
"$total",
[]
]
}
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,12 @@
"$project": {
"name": 1,
"total": {
"$size": "$total"
"$size": {
"$ifNull": [
"$total",
[]
]
}
}
}
}
Expand Down
Loading