From b3355670a6b435a601a4fb28ddd850120a9716a9 Mon Sep 17 00:00:00 2001 From: 924060929 Date: Wed, 10 Jun 2026 13:07:12 +0800 Subject: [PATCH] [fix](nereids) clamp the merged limit of MERGE_TOP_N by the parent offset (#64306) `MergeTopNs` (the `MERGE_TOP_N` rewrite rule) merges a parent `TopN` into its child `TopN` when their order keys are compatible. When the parent `TopN` carries a non-zero `OFFSET`, the merged limit was computed as `min(parent.limit, child.limit)`, which ignores that the parent offset consumes rows from the child's output. The merged `TopN` therefore keeps too many rows and the query returns a wrong result. Example: ```sql SELECT * FROM (SELECT k, v FROM t ORDER BY k LIMIT 5) s ORDER BY k LIMIT 3 OFFSET 4; ``` The inner `ORDER BY k LIMIT 5` yields 5 rows; the outer `LIMIT 3 OFFSET 4` skips 4 of them, so only 1 row should remain. Before this PR the rule merged the two `TopN` into `OFFSET 4 LIMIT 3` (instead of `OFFSET 4 LIMIT 1`), so it returned 3 rows. Fix: clamp the merged limit by `max(child.limit - parent.offset, 0)`, the same semantics already used by `MergeLimits.mergeLimit` for consecutive limits. The bug only triggers when the outer `TopN` has a non-zero offset (offset = 0 makes both formulas equal). The existing unit test `MergeTopNsTest.testOffset` asserted the buggy value (`limit == 10`, while the correct value is `9`); this PR corrects that assertion as well. ### Release note Fix the wrong result produced by the `MERGE_TOP_N` optimization when an outer `ORDER BY ... LIMIT` carries a non-zero `OFFSET` over an inner `ORDER BY ... LIMIT`. --- .../nereids/rules/rewrite/MergeTopNs.java | 11 ++-- .../nereids/rules/rewrite/MergeTopNsTest.java | 20 +++++++- .../correctness_p0/test_merge_topn_offset.out | 10 ++++ .../test_merge_topn_offset.groovy | 50 +++++++++++++++++++ 4 files changed, 87 insertions(+), 4 deletions(-) create mode 100644 regression-test/data/correctness_p0/test_merge_topn_offset.out create mode 100644 regression-test/suites/correctness_p0/test_merge_topn_offset.groovy diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/MergeTopNs.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/MergeTopNs.java index 39ea7a384df443..3614434459d9c2 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/MergeTopNs.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/MergeTopNs.java @@ -30,7 +30,8 @@ *

* topN - child topN * If child topN orderby list is prefix subList of topN => - * topN with limit = min(topN.limit, childTopN.limit), offset = topN.offset + childTopN.offset + * topN with limit = min(topN.limit, max(childTopN.limit - topN.offset, 0)), + * offset = topN.offset + childTopN.offset */ public class MergeTopNs extends OneRewriteRuleFactory { @Override @@ -48,8 +49,12 @@ public Rule build() { long childOffset = childTopN.getOffset(); long childLimit = childTopN.getLimit(); long newOffset = offset + childOffset; - // choose min limit - long newLimit = Math.min(limit, childLimit); + // The parent's offset is applied on top of the child's output, so only + // (childLimit - offset) of the child's rows survive. Clamp the merged limit + // by that remaining count; otherwise rows beyond the child's limit leak through + // when the parent has a non-zero offset (DORIS-26301). Same semantics as + // MergeLimits.mergeLimit for consecutive limits. + long newLimit = Math.min(limit, Math.max(childLimit - offset, 0)); return topN.withLimitChild(newLimit, newOffset, childTopN.child()); }).toRule(RuleType.MERGE_TOP_N); } diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/MergeTopNsTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/MergeTopNsTest.java index e16a38be6ba5c2..63b2c97ac31f08 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/MergeTopNsTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/MergeTopNsTest.java @@ -70,7 +70,25 @@ void testOffset() { PlanChecker.from(MemoTestUtils.createConnectContext(), plan) .applyTopDown(new MergeTopNs()) .matches( - logicalTopN(logicalOlapScan()).when(topN -> topN.getLimit() == 10 && topN.getOffset() == 4) + logicalTopN(logicalOlapScan()).when(topN -> topN.getLimit() == 9 && topN.getOffset() == 4) + ); + } + + @Test + void testParentOffsetReducesChildLimit() { + // DORIS-26301: when the parent (upper) TopN has a non-zero offset, the merged limit must be + // clamped by (childLimit - parentOffset); otherwise rows beyond the child's limit leak through. + // child : ORDER BY k LIMIT 5 (limit=5, offset=0) -> yields 5 rows + // parent: ORDER BY k LIMIT 3 OFFSET 4 (limit=3, offset=4) -> skips 4, only 1 row remains + // merged: offset = 4, limit = min(3, max(5 - 4, 0)) = 1 + LogicalPlan plan = new LogicalPlanBuilder(score) + .topN(5, 0, ImmutableList.of(0)) + .topN(3, 4, ImmutableList.of(0)) + .build(); + PlanChecker.from(MemoTestUtils.createConnectContext(), plan) + .applyTopDown(new MergeTopNs()) + .matches( + logicalTopN(logicalOlapScan()).when(topN -> topN.getLimit() == 1 && topN.getOffset() == 4) ); } diff --git a/regression-test/data/correctness_p0/test_merge_topn_offset.out b/regression-test/data/correctness_p0/test_merge_topn_offset.out new file mode 100644 index 00000000000000..bf103b2fed265e --- /dev/null +++ b/regression-test/data/correctness_p0/test_merge_topn_offset.out @@ -0,0 +1,10 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !merge_topn_off4lim3 -- +5 50 + +-- !merge_topn_off2lim3 -- +3 30 +4 40 +5 50 + +-- !merge_topn_off6lim3 -- diff --git a/regression-test/suites/correctness_p0/test_merge_topn_offset.groovy b/regression-test/suites/correctness_p0/test_merge_topn_offset.groovy new file mode 100644 index 00000000000000..fec6162d1d6b5e --- /dev/null +++ b/regression-test/suites/correctness_p0/test_merge_topn_offset.groovy @@ -0,0 +1,50 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_merge_topn_offset") { + // DORIS-26301: when MERGE_TOP_N merges a parent TopN that carries a non-zero OFFSET into its + // child TopN, the merged limit must be clamped by (childLimit - parentOffset). Otherwise rows + // beyond the child's limit leak through. Before the fix, an outer "LIMIT 3 OFFSET 4" over an + // inner "LIMIT 5" wrongly returned 3 rows (k=5,6,7) instead of the single correct row (k=5). + sql "SET enable_nereids_planner=true" + sql "SET enable_fallback_to_original_planner=false" + + def tableName = "test_merge_topn_offset" + sql """ DROP TABLE IF EXISTS ${tableName} """ + sql """ + CREATE TABLE ${tableName} ( + k INT NOT NULL, + v INT NOT NULL + ) + DUPLICATE KEY(k) + DISTRIBUTED BY HASH(k) BUCKETS 1 + PROPERTIES ("replication_num" = "1") + """ + sql """ INSERT INTO ${tableName} VALUES + (1, 10), (2, 20), (3, 30), (4, 40), (5, 50), + (6, 60), (7, 70), (8, 80), (9, 90), (10, 100) """ + + // MERGE_TOP_N is enabled by default; stacking an outer TopN with an OFFSET over an inner TopN + // triggers it. The inner "ORDER BY k LIMIT 5" yields k = 1..5. + + // outer offset (4) consumes 4 of the inner's 5 rows -> only k=5 remains + qt_merge_topn_off4lim3 """ SELECT * FROM (SELECT k, v FROM ${tableName} ORDER BY k LIMIT 5) s ORDER BY k LIMIT 3 OFFSET 4 """ + // outer offset (2) is within the inner limit -> k = 3,4,5 + qt_merge_topn_off2lim3 """ SELECT * FROM (SELECT k, v FROM ${tableName} ORDER BY k LIMIT 5) s ORDER BY k LIMIT 3 OFFSET 2 """ + // outer offset (6) exceeds the inner limit (5) -> empty + qt_merge_topn_off6lim3 """ SELECT * FROM (SELECT k, v FROM ${tableName} ORDER BY k LIMIT 5) s ORDER BY k LIMIT 3 OFFSET 6 """ +}