From 218444ad821dffd6bd50216ab2f0e426f6f17dfa Mon Sep 17 00:00:00 2001 From: wenzhenghu Date: Tue, 9 Jun 2026 08:47:33 +0800 Subject: [PATCH 01/11] [improvement](fe) Bootstrap table stats after insert into select --- .../commands/insert/OlapInsertExecutor.java | 5 + .../org/apache/doris/qe/SessionVariable.java | 18 +++ .../doris/statistics/AnalysisManager.java | 31 +++++ .../doris/statistics/TableStatsMeta.java | 19 ++++ .../insert/OlapInsertExecutorTest.java | 78 +++++++++++++ .../doris/statistics/TableStatsMetaTest.java | 26 +++++ ...insert_select_table_stats_bootstrap.groovy | 107 ++++++++++++++++++ 7 files changed, 284 insertions(+) create mode 100644 regression-test/suites/query_p0/stats/insert_select_table_stats_bootstrap.groovy diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/insert/OlapInsertExecutor.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/insert/OlapInsertExecutor.java index c4ae68acb98e26..d7589d8a3c8261 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/insert/OlapInsertExecutor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/insert/OlapInsertExecutor.java @@ -261,6 +261,11 @@ protected void onComplete() throws UserException { StmtExecutor.syncLoadForTablets(backendsList, allTabletIds); } } + // Bootstrap table-level stats only after the target data is visible to keep row-count fallback aligned. + if (txnStatus == TransactionStatus.VISIBLE + && ctx.getSessionVariable().isEnableInsertSelectTableStatsBootstrap()) { + Env.getCurrentEnv().getAnalysisManager().bootstrapTableStatsIfAbsent(olapTable, loadedRows); + } } private void setTxnCallbackId() { diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java index 28613a2c564e2f..e3643677587a03 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java @@ -446,6 +446,9 @@ public String toString() { public static final String ENABLE_SINGLE_REPLICA_INSERT = "enable_single_replica_insert"; + public static final String ENABLE_INSERT_SELECT_TABLE_STATS_BOOTSTRAP = + "enable_insert_select_table_stats_bootstrap"; + public static final String SHUFFLED_AGG_NODE_IDS = "shuffled_agg_node_ids"; public static final String ENABLE_FAST_ANALYZE_INSERT_INTO_VALUES = "enable_fast_analyze_into_values"; @@ -2084,6 +2087,13 @@ public boolean isEnableHboNonStrictMatchingMode() { needForward = true, varType = VariableAnnotation.EXPERIMENTAL) public boolean enableSingleReplicaInsert = false; + @VarAttrDef.VarAttr(name = ENABLE_INSERT_SELECT_TABLE_STATS_BOOTSTRAP, + needForward = true, varType = VariableAnnotation.EXPERIMENTAL, description = { + "是否为 CTAS 和 INSERT INTO SELECT 在写入可见后补建最小表级统计基线。", + "Whether to bootstrap minimal table-level stats after CTAS and INSERT INTO SELECT become visible." + }) + private boolean enableInsertSelectTableStatsBootstrap = false; + @VarAttrDef.VarAttr(name = SHUFFLED_AGG_NODE_IDS, needForward = true, varType = VariableAnnotation.EXPERIMENTAL) public String shuffledAggNodeIds = ""; @@ -4885,6 +4895,14 @@ public boolean isInsertVisibleTimeoutReturnError() { return getInsertVisibleTimeoutReturnModeEnum() == InsertVisibleTimeoutReturnMode.ERROR; } + public boolean isEnableInsertSelectTableStatsBootstrap() { + return enableInsertSelectTableStatsBootstrap; + } + + public void setEnableInsertSelectTableStatsBootstrap(boolean enableInsertSelectTableStatsBootstrap) { + this.enableInsertSelectTableStatsBootstrap = enableInsertSelectTableStatsBootstrap; + } + public void setInsertVisibleTimeoutReturnMode(String insertVisibleTimeoutReturnMode) { this.insertVisibleTimeoutReturnMode = parseInsertVisibleTimeoutReturnMode(insertVisibleTimeoutReturnMode) .getOption(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java index b2b4c0d57f63a1..b622a802aec4f5 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java @@ -597,6 +597,24 @@ public void updateTableStatsForAlterStats(AnalysisInfo jobInfo, TableIf tbl) { } } + // Bootstrap table-level row count immediately after write so the optimizer can consume + // a usable base row count before column statistics are available. + public void bootstrapTableStatsIfAbsent(OlapTable table, long loadedRows) { + if (loadedRows <= 0) { + return; + } + if (findTableStatsStatus(table.getId()) != null) { + return; + } + synchronized (idToTblStats) { + if (idToTblStats.containsKey(table.getId())) { + return; + } + long bootstrapRowCount = resolveBootstrapRowCount(table, loadedRows); + updateTableStatsStatus(TableStatsMeta.newBootstrapStats(table, bootstrapRowCount, loadedRows)); + } + } + public List showAutoPendingJobs(TableNameInfo tblName, String priority) { List result = Lists.newArrayList(); if (priority == null || priority.isEmpty()) { @@ -616,6 +634,19 @@ public List showAutoPendingJobs(TableNameInfo tblName, S return result; } + private long resolveBootstrapRowCount(OlapTable table, long loadedRows) { + long bootstrapRowCount = loadedRows; + long baseIndexRowCount = table.getRowCountForIndex(table.getBaseIndexId(), true); + if (baseIndexRowCount > 0) { + bootstrapRowCount = Math.max(bootstrapRowCount, baseIndexRowCount); + } + long tableRowCount = table.getRowCount(); + if (tableRowCount > 0) { + bootstrapRowCount = Math.max(bootstrapRowCount, tableRowCount); + } + return bootstrapRowCount; + } + protected List getPendingJobs(Map>> jobMap, JobPriority priority, TableNameInfo tableNameInfo) { List result = Lists.newArrayList(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java index 0b77aba91e30d8..0b2176ef56429d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java @@ -116,6 +116,16 @@ public TableStatsMeta() { idxId = 0; } + private TableStatsMeta(TableIf table) { + this.ctlId = table.getDatabase().getCatalog().getId(); + this.ctlName = table.getDatabase().getCatalog().getName(); + this.dbId = table.getDatabase().getId(); + this.dbName = table.getDatabase().getFullName(); + this.tblId = table.getId(); + this.tblName = table.getName(); + this.idxId = -1; + } + // It's necessary to store these fields separately from AnalysisInfo, since the lifecycle between AnalysisInfo // and TableStats is quite different. public TableStatsMeta(long rowCount, AnalysisInfo analyzedJob, TableIf table) { @@ -130,6 +140,15 @@ public TableStatsMeta(long rowCount, AnalysisInfo analyzedJob, TableIf table) { update(analyzedJob, table); } + // Bootstrap metadata only seeds table-level row count so the optimizer can avoid the unknown-row fallback. + public static TableStatsMeta newBootstrapStats(OlapTable table, long rowCount, long updatedRows) { + TableStatsMeta tableStats = new TableStatsMeta(table); + tableStats.rowCount = rowCount; + tableStats.updatedRows.set(updatedRows); + tableStats.indexesRowCount.put(table.getBaseIndexId(), rowCount); + return tableStats; + } + @Override public void write(DataOutput out) throws IOException { String json = GsonUtils.GSON.toJson(this); diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/commands/insert/OlapInsertExecutorTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/commands/insert/OlapInsertExecutorTest.java index ea5318eedcb54f..3b7287f33e2255 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/commands/insert/OlapInsertExecutorTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/commands/insert/OlapInsertExecutorTest.java @@ -39,6 +39,8 @@ import org.apache.doris.qe.QueryState.MysqlStateType; import org.apache.doris.qe.SessionVariable; import org.apache.doris.qe.StmtExecutor; +import org.apache.doris.statistics.AnalysisManager; +import org.apache.doris.statistics.TableStatsMeta; import org.apache.doris.thrift.TQueryOptions; import org.apache.doris.thrift.TStatusCode; import org.apache.doris.thrift.TUniqueId; @@ -182,6 +184,70 @@ void testOnFailAbortsUncommittedTransaction() throws Exception { } } + @Test + void testExecuteSingleInsertVisibleBootstrapsTableStatsWhenAbsent() throws Exception { + ConnectContext ctx = createExecutorContext(); + ctx.getSessionVariable().setEnableInsertSelectTableStatsBootstrap(true); + Coordinator coordinator = createCoordinator(); + GlobalTransactionMgrIface txnMgr = Mockito.mock(GlobalTransactionMgrIface.class); + TransactionState txnState = Mockito.mock(TransactionState.class); + LoadManager loadManager = Mockito.mock(LoadManager.class); + AnalysisManager analysisManager = Mockito.spy(new AnalysisManager()); + Env currentEnv = createCurrentEnv(loadManager, analysisManager); + StmtExecutor stmtExecutor = createStmtExecutor(); + + Mockito.doNothing().when(analysisManager).logCreateTableStats(Mockito.any(TableStatsMeta.class)); + try (MockedStatic envFactoryMock = Mockito.mockStatic(EnvFactory.class); + MockedStatic envMock = Mockito.mockStatic(Env.class)) { + prepareFactoryMocks(envFactoryMock, envMock, coordinator, txnMgr, txnState, currentEnv); + ctx.setEnv(currentEnv); + + Mockito.when(txnMgr.commitAndPublishTransaction( + Mockito.any(), Mockito.anyList(), Mockito.anyLong(), Mockito.anyList(), Mockito.anyLong(), + Mockito.isNull())).thenReturn(true); + + OlapInsertExecutor executor = createExecutor(ctx); + executor.txnId = 10004L; + executor.executeSingleInsert(stmtExecutor); + + TableStatsMeta tableStats = analysisManager.findTableStatsStatus(2L); + Assertions.assertNotNull(tableStats); + Assertions.assertEquals(12L, tableStats.rowCount); + Assertions.assertEquals(12L, tableStats.updatedRows.get()); + Assertions.assertEquals(12L, tableStats.getRowCount(101L)); + Assertions.assertTrue(tableStats.isColumnsStatsEmpty()); + } + } + + @Test + void testExecuteSingleInsertVisibleDoesNotBootstrapTableStatsWhenDisabled() throws Exception { + ConnectContext ctx = createExecutorContext(); + Coordinator coordinator = createCoordinator(); + GlobalTransactionMgrIface txnMgr = Mockito.mock(GlobalTransactionMgrIface.class); + TransactionState txnState = Mockito.mock(TransactionState.class); + LoadManager loadManager = Mockito.mock(LoadManager.class); + AnalysisManager analysisManager = Mockito.spy(new AnalysisManager()); + Env currentEnv = createCurrentEnv(loadManager, analysisManager); + StmtExecutor stmtExecutor = createStmtExecutor(); + + Mockito.doNothing().when(analysisManager).logCreateTableStats(Mockito.any(TableStatsMeta.class)); + try (MockedStatic envFactoryMock = Mockito.mockStatic(EnvFactory.class); + MockedStatic envMock = Mockito.mockStatic(Env.class)) { + prepareFactoryMocks(envFactoryMock, envMock, coordinator, txnMgr, txnState, currentEnv); + ctx.setEnv(currentEnv); + + Mockito.when(txnMgr.commitAndPublishTransaction( + Mockito.any(), Mockito.anyList(), Mockito.anyLong(), Mockito.anyList(), Mockito.anyLong(), + Mockito.isNull())).thenReturn(true); + + OlapInsertExecutor executor = createExecutor(ctx); + executor.txnId = 10005L; + executor.executeSingleInsert(stmtExecutor); + + Assertions.assertNull(analysisManager.findTableStatsStatus(2L)); + } + } + // Build a fresh context per case so insertResult and QueryState do not leak between tests. private ConnectContext createExecutorContext() { ConnectContext ctx = new ConnectContext(); @@ -227,6 +293,10 @@ private StmtExecutor createStmtExecutor() { // Provide the job-manager chain needed by master-side setTxnCallbackId(). private Env createCurrentEnv(LoadManager loadManager) { + return createCurrentEnv(loadManager, Mockito.mock(AnalysisManager.class)); + } + + private Env createCurrentEnv(LoadManager loadManager, AnalysisManager analysisManager) { Env currentEnv = Mockito.mock(Env.class); // Mock the internal catalog because ConnectContext.setEnv() resolves the default catalog on master. InternalCatalog internalCatalog = Mockito.mock(InternalCatalog.class); @@ -236,6 +306,7 @@ private Env createCurrentEnv(LoadManager loadManager) { Mockito.when(internalCatalog.getName()).thenReturn("internal"); Mockito.when(currentEnv.getLoadManager()).thenReturn(loadManager); Mockito.when(currentEnv.getJobManager()).thenReturn(jobManager); + Mockito.when(currentEnv.getAnalysisManager()).thenReturn(analysisManager); Mockito.when(jobManager.getStreamingTaskManager()).thenReturn(streamingTaskManager); Mockito.when(streamingTaskManager.getStreamingInsertTaskById(Mockito.anyLong())).thenReturn(null); return currentEnv; @@ -244,14 +315,21 @@ private Env createCurrentEnv(LoadManager loadManager) { // Create an executor with mocked table metadata because this test only validates timeout result handling. private OlapInsertExecutor createExecutor(ConnectContext ctx) { Database database = Mockito.mock(Database.class); + InternalCatalog catalog = Mockito.mock(InternalCatalog.class); Mockito.when(database.getFullName()).thenReturn("test_db"); Mockito.when(database.getId()).thenReturn(1L); + Mockito.when(database.getCatalog()).thenReturn(catalog); + Mockito.when(catalog.getId()).thenReturn(3L); + Mockito.when(catalog.getName()).thenReturn("internal"); // Mock OlapTable because the master-side executor now casts the target table to OlapTable. OlapTable table = Mockito.mock(OlapTable.class); Mockito.when(table.getDatabase()).thenReturn(database); Mockito.when(table.getName()).thenReturn("test_tbl"); Mockito.when(table.getId()).thenReturn(2L); + Mockito.when(table.getBaseIndexId()).thenReturn(101L); + Mockito.when(table.getRowCountForIndex(101L, true)).thenReturn(-1L); + Mockito.when(table.getRowCount()).thenReturn(0L); return new OlapInsertExecutor(ctx, table, "label_test", Mockito.mock(NereidsPlanner.class), Optional.empty(), false, 0L); diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/TableStatsMetaTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/TableStatsMetaTest.java index 12d3931ed7bd65..5390ea1f422e1f 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/TableStatsMetaTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/TableStatsMetaTest.java @@ -17,7 +17,9 @@ package org.apache.doris.statistics; +import org.apache.doris.catalog.Database; import org.apache.doris.catalog.OlapTable; +import org.apache.doris.datasource.InternalCatalog; import com.google.common.collect.Lists; import org.junit.jupiter.api.Assertions; @@ -58,4 +60,28 @@ void testClearStaleIndexRowCount() { Assertions.assertEquals(-1, meta.getRowCount(3)); Assertions.assertEquals(-1, meta.getRowCount(4)); } + + @Test + void testNewBootstrapStatsSeedsBaseIndexRowCount() { + InternalCatalog catalog = Mockito.mock(InternalCatalog.class); + Database database = Mockito.mock(Database.class); + OlapTable table = Mockito.mock(OlapTable.class); + Mockito.when(table.getDatabase()).thenReturn(database); + Mockito.when(table.getId()).thenReturn(10L); + Mockito.when(table.getName()).thenReturn("t1"); + Mockito.when(table.getBaseIndexId()).thenReturn(100L); + Mockito.when(database.getCatalog()).thenReturn(catalog); + Mockito.when(database.getId()).thenReturn(20L); + Mockito.when(database.getFullName()).thenReturn("db1"); + Mockito.when(catalog.getId()).thenReturn(30L); + Mockito.when(catalog.getName()).thenReturn("internal"); + + TableStatsMeta meta = TableStatsMeta.newBootstrapStats(table, 123L, 123L); + + Assertions.assertEquals(123L, meta.rowCount); + Assertions.assertEquals(123L, meta.updatedRows.get()); + Assertions.assertEquals(123L, meta.getRowCount(100L)); + Assertions.assertTrue(meta.isColumnsStatsEmpty()); + Assertions.assertFalse(meta.userInjected); + } } diff --git a/regression-test/suites/query_p0/stats/insert_select_table_stats_bootstrap.groovy b/regression-test/suites/query_p0/stats/insert_select_table_stats_bootstrap.groovy new file mode 100644 index 00000000000000..78974086b84d3c --- /dev/null +++ b/regression-test/suites/query_p0/stats/insert_select_table_stats_bootstrap.groovy @@ -0,0 +1,107 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("insert_select_table_stats_bootstrap", "nonConcurrent") { + String db = context.config.getDbNameByFile(context.file) + sql "use ${db}" + + sql "set enable_nereids_planner = true" + sql "set enable_fallback_to_original_planner = false" + sql "set runtime_filter_mode = OFF" + sql "set enable_auto_analyze_internal_catalog = false" + sql "set broadcast_row_count_limit = 100" + sql "set broadcast_hashtable_mem_limit_percentage = 1" + + sql "drop table if exists smallb" + sql """ + create table smallb ( + k int, + v int + ) + distributed by hash(k) buckets 1 + properties("replication_num" = "1") + """ + sql """ + insert into smallb + select number, number + from numbers("number" = "10") + """ + // Analyze the known small table first so the join-side change mainly depends on biga bootstrap stats. + sql "analyze table smallb with sync" + + def createBigATable = { boolean enableBootstrap -> + sql "drop table if exists biga" + sql "set enable_insert_select_table_stats_bootstrap = ${enableBootstrap}" + sql """ + create table biga + distributed by hash(k) buckets 1 + properties("replication_num" = "1") + as + select number % 10 as k, repeat('x', 64) as pad + from numbers("number" = "4096") + """ + } + + createBigATable(false) + + def tableStatsWithoutBootstrap = sql "show table stats biga" + assertEquals(1, tableStatsWithoutBootstrap.size()) + assertEquals("", tableStatsWithoutBootstrap[0][0]) + assertEquals("", tableStatsWithoutBootstrap[0][3]) + assertEquals("", tableStatsWithoutBootstrap[0][5]) + + explain { + sql """ + physical plan + select a.k, b.v + from biga a + join smallb b + on a.k = b.k + """ + contains("PhysicalOlapScan[biga]") + contains("distributionSpec=DistributionSpecReplicated") + check { explainStr -> + assertTrue((explainStr =~ /PhysicalOlapScan\[biga\][^\n]*stats=1(?![,\d.])/).find()) + assertTrue((explainStr =~ /DistributionSpecReplicated[\s\S]*PhysicalOlapScan\[biga\]/).find()) + } + } + + createBigATable(true) + + def tableStatsWithBootstrap = sql "show table stats biga" + assertEquals(1, tableStatsWithBootstrap.size()) + assertEquals("4096", tableStatsWithBootstrap[0][0]) + assertEquals("4096", tableStatsWithBootstrap[0][2]) + assertEquals("false", tableStatsWithBootstrap[0][7]) + + explain { + sql """ + physical plan + select a.k, b.v + from biga a + join smallb b + on a.k = b.k + """ + contains("PhysicalOlapScan[biga]") + contains("PhysicalOlapScan[smallb]") + contains("distributionSpec=DistributionSpecReplicated") + check { explainStr -> + assertTrue((explainStr =~ /PhysicalOlapScan\[biga\][^\n]*stats=4,096/).find()) + assertTrue((explainStr =~ /DistributionSpecReplicated[\s\S]*PhysicalOlapScan\[smallb\]/).find()) + } + } +} From 6f73ec319a4aebcd39a6f893db1776ada04d138f Mon Sep 17 00:00:00 2001 From: wenzhenghu Date: Tue, 9 Jun 2026 15:07:00 +0800 Subject: [PATCH 02/11] [improvement](fe) Stabilize bootstrap stats regression coverage --- .../plans/commands/ShowTableStatsCommand.java | 3 +- .../commands/ShowTableStatsCommandTest.java | 33 +++++++++++++++++++ ...insert_select_table_stats_bootstrap.groovy | 32 ++++++++++-------- 3 files changed, 53 insertions(+), 15 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/ShowTableStatsCommand.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/ShowTableStatsCommand.java index cd586216f244a8..5d3031893f96e9 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/ShowTableStatsCommand.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/ShowTableStatsCommand.java @@ -255,7 +255,8 @@ public ShowResultSet constructTableResultSet(TableStatsMeta tableStatistic, Tabl newColumnsSet.add(Pair.of(Util.getTempTableDisplayName(pair.first), pair.second)); } row.add(newColumnsSet.toString()); - row.add(tableStatistic.jobType.toString()); + // Bootstrap table stats may only seed row count, so job type can be absent before the first analyze task. + row.add(tableStatistic.jobType == null ? "" : tableStatistic.jobType.toString()); row.add(String.valueOf(tableStatistic.partitionChanged.get())); row.add(String.valueOf(tableStatistic.userInjected)); row.add(table == null ? "N/A" : String.valueOf(table.autoAnalyzeEnabled())); diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/commands/ShowTableStatsCommandTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/commands/ShowTableStatsCommandTest.java index 38f204a6c280d8..f53ebc937df5a8 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/commands/ShowTableStatsCommandTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/commands/ShowTableStatsCommandTest.java @@ -21,16 +21,20 @@ import org.apache.doris.catalog.Column; import org.apache.doris.catalog.Database; import org.apache.doris.catalog.Env; +import org.apache.doris.catalog.OlapTable; import org.apache.doris.catalog.TabletInvertedIndex; import org.apache.doris.catalog.info.PartitionNamesInfo; import org.apache.doris.catalog.info.TableNameInfo; import org.apache.doris.common.AnalysisException; import org.apache.doris.datasource.CatalogMgr; +import org.apache.doris.datasource.CatalogIf; import org.apache.doris.datasource.InternalCatalog; import org.apache.doris.mysql.privilege.AccessControllerManager; +import org.apache.doris.qe.ShowResultSet; import org.apache.doris.mysql.privilege.PrivPredicate; import org.apache.doris.qe.ConnectContext; import org.apache.doris.qe.QueryState; +import org.apache.doris.statistics.TableStatsMeta; import com.google.common.collect.ImmutableList; import org.junit.jupiter.api.AfterEach; @@ -166,4 +170,33 @@ void testValidateNoPrivilege() { Assertions.assertThrows(AnalysisException.class, () -> command2.validate(connectContext), "Permission denied command denied to user 'null'@'null' for table 'test_db: test_tbl2'"); } + + @Test + void testConstructTableResultSetForBootstrapStats() throws Exception { + runBefore(); + CatalogIf catalogIf = Mockito.mock(CatalogIf.class); + Database database = Mockito.mock(Database.class); + OlapTable table = Mockito.mock(OlapTable.class); + Mockito.when(catalogIf.getId()).thenReturn(1L); + Mockito.when(catalogIf.getName()).thenReturn(internalCtl); + Mockito.when(database.getCatalog()).thenReturn(catalogIf); + Mockito.when(database.getId()).thenReturn(2L); + Mockito.when(database.getFullName()).thenReturn(CatalogMocker.TEST_DB_NAME); + Mockito.when(table.getDatabase()).thenReturn(database); + Mockito.when(table.getId()).thenReturn(CatalogMocker.TEST_TBL_ID); + Mockito.when(table.getName()).thenReturn(CatalogMocker.TEST_TBL_NAME); + Mockito.when(table.getBaseIndexId()).thenReturn(CatalogMocker.TEST_TBL_ID); + Mockito.when(table.autoAnalyzeEnabled()).thenReturn(false); + // Bootstrap stats only seed row count, so show table stats should still render without a job type. + TableStatsMeta bootstrapStats = TableStatsMeta.newBootstrapStats(table, 128L, 128L); + ShowTableStatsCommand command = new ShowTableStatsCommand(CatalogMocker.TEST_TBL_ID); + ShowResultSet resultSet = command.constructTableResultSet(bootstrapStats, table); + + Assertions.assertEquals(1, resultSet.getResultRows().size()); + List row = resultSet.getResultRows().get(0); + Assertions.assertEquals("128", row.get(0)); + Assertions.assertEquals("128", row.get(2)); + Assertions.assertEquals("", row.get(5)); + Assertions.assertEquals("false", row.get(7)); + } } diff --git a/regression-test/suites/query_p0/stats/insert_select_table_stats_bootstrap.groovy b/regression-test/suites/query_p0/stats/insert_select_table_stats_bootstrap.groovy index 78974086b84d3c..62cd9a88b073eb 100644 --- a/regression-test/suites/query_p0/stats/insert_select_table_stats_bootstrap.groovy +++ b/regression-test/suites/query_p0/stats/insert_select_table_stats_bootstrap.groovy @@ -21,23 +21,27 @@ suite("insert_select_table_stats_bootstrap", "nonConcurrent") { sql "set enable_nereids_planner = true" sql "set enable_fallback_to_original_planner = false" + // Disable the distributed planner path so this case falls back to regular join-side selection. + sql "set enable_nereids_distribute_planner = false" + // Disable bucket shuffle join so the optimizer has to choose between broadcast and regular shuffle. + sql "set enable_bucket_shuffle_join = false" sql "set runtime_filter_mode = OFF" - sql "set enable_auto_analyze_internal_catalog = false" sql "set broadcast_row_count_limit = 100" sql "set broadcast_hashtable_mem_limit_percentage = 1" sql "drop table if exists smallb" sql """ create table smallb ( - k int, + k1 int, + k2 int, v int ) - distributed by hash(k) buckets 1 + distributed by hash(v) buckets 1 properties("replication_num" = "1") """ sql """ insert into smallb - select number, number + select number, 0, number from numbers("number" = "10") """ // Analyze the known small table first so the join-side change mainly depends on biga bootstrap stats. @@ -52,7 +56,7 @@ suite("insert_select_table_stats_bootstrap", "nonConcurrent") { properties("replication_num" = "1") as select number % 10 as k, repeat('x', 64) as pad - from numbers("number" = "4096") + from numbers("number" = "262144") """ } @@ -68,9 +72,9 @@ suite("insert_select_table_stats_bootstrap", "nonConcurrent") { sql """ physical plan select a.k, b.v - from biga a - join smallb b - on a.k = b.k + from smallb b + join biga a + on cast(a.k as bigint) = cast(b.k1 + b.k2 as bigint) """ contains("PhysicalOlapScan[biga]") contains("distributionSpec=DistributionSpecReplicated") @@ -84,23 +88,23 @@ suite("insert_select_table_stats_bootstrap", "nonConcurrent") { def tableStatsWithBootstrap = sql "show table stats biga" assertEquals(1, tableStatsWithBootstrap.size()) - assertEquals("4096", tableStatsWithBootstrap[0][0]) - assertEquals("4096", tableStatsWithBootstrap[0][2]) + assertEquals("262144", tableStatsWithBootstrap[0][0]) + assertEquals("262144", tableStatsWithBootstrap[0][2]) assertEquals("false", tableStatsWithBootstrap[0][7]) explain { sql """ physical plan select a.k, b.v - from biga a - join smallb b - on a.k = b.k + from smallb b + join biga a + on cast(a.k as bigint) = cast(b.k1 + b.k2 as bigint) """ contains("PhysicalOlapScan[biga]") contains("PhysicalOlapScan[smallb]") contains("distributionSpec=DistributionSpecReplicated") check { explainStr -> - assertTrue((explainStr =~ /PhysicalOlapScan\[biga\][^\n]*stats=4,096/).find()) + assertTrue((explainStr =~ /PhysicalOlapScan\[biga\][^\n]*stats=262,144/).find()) assertTrue((explainStr =~ /DistributionSpecReplicated[\s\S]*PhysicalOlapScan\[smallb\]/).find()) } } From d5f478c3ca528d40f57a70d19b2b08e5f439fae9 Mon Sep 17 00:00:00 2001 From: wenzhenghu Date: Tue, 9 Jun 2026 15:37:01 +0800 Subject: [PATCH 03/11] [test](fe) Fix import order in show table stats test --- .../trees/plans/commands/ShowTableStatsCommandTest.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/commands/ShowTableStatsCommandTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/commands/ShowTableStatsCommandTest.java index f53ebc937df5a8..31b2070c9f4d16 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/commands/ShowTableStatsCommandTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/commands/ShowTableStatsCommandTest.java @@ -26,14 +26,14 @@ import org.apache.doris.catalog.info.PartitionNamesInfo; import org.apache.doris.catalog.info.TableNameInfo; import org.apache.doris.common.AnalysisException; -import org.apache.doris.datasource.CatalogMgr; import org.apache.doris.datasource.CatalogIf; +import org.apache.doris.datasource.CatalogMgr; import org.apache.doris.datasource.InternalCatalog; import org.apache.doris.mysql.privilege.AccessControllerManager; -import org.apache.doris.qe.ShowResultSet; import org.apache.doris.mysql.privilege.PrivPredicate; import org.apache.doris.qe.ConnectContext; import org.apache.doris.qe.QueryState; +import org.apache.doris.qe.ShowResultSet; import org.apache.doris.statistics.TableStatsMeta; import com.google.common.collect.ImmutableList; From 69f16dd15a7a2971de48a6de17b2d47525e790d1 Mon Sep 17 00:00:00 2001 From: wenzhenghu Date: Tue, 9 Jun 2026 16:49:54 +0800 Subject: [PATCH 04/11] [test](fe) Add debug point for delayed row count report --- .../apache/doris/catalog/TabletStatMgr.java | 32 +++++++++++++ ...insert_select_table_stats_bootstrap.groovy | 46 +++++++++++-------- 2 files changed, 59 insertions(+), 19 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/TabletStatMgr.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/TabletStatMgr.java index ec99c8d45a991a..26c1e905d148d5 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/TabletStatMgr.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/TabletStatMgr.java @@ -25,6 +25,7 @@ import org.apache.doris.common.Pair; import org.apache.doris.common.Status; import org.apache.doris.common.ThreadPoolManager; +import org.apache.doris.common.util.DebugPointUtil; import org.apache.doris.common.util.MasterDaemon; import org.apache.doris.metric.MetricRepo; import org.apache.doris.system.Backend; @@ -62,6 +63,35 @@ public TabletStatMgr() { super("tablet stat mgr", Config.tablet_stat_update_interval_second * 1000); } + private void delayRowCountReportIfNeeded(Database db, OlapTable olapTable, boolean indexReported) { + // Delay only when the current refresh is about to publish a usable row count. + // If indexReported is still false, strict row-count readers already see UNKNOWN_ROW_COUNT, + // so sleeping there would only slow down tablet stat refresh without creating a more precise test window. + if (!indexReported) { + return; + } + DebugPointUtil.DebugPoint debugPoint = DebugPointUtil.getDebugPoint("TabletStatMgr.delay_row_count_report"); + if (debugPoint == null) { + return; + } + String dbName = debugPoint.param("db", ""); + String tableName = debugPoint.param("table", ""); + long sleepMs = debugPoint.param("sleep_ms", -1L); + if (dbName.isEmpty() || tableName.isEmpty() || sleepMs <= 0) { + return; + } + if (!db.getFullName().equals(dbName) || !olapTable.getName().equals(tableName)) { + return; + } + // Delay FE row count publication for the target table so tests can reproduce the unknown-row window. + try { + Thread.sleep(sleepMs); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + LOG.warn("Interrupted while delaying row count report for {}.{}", dbName, tableName, e); + } + } + @Override protected void runAfterCatalogReady() { ImmutableMap backends; @@ -269,6 +299,8 @@ protected void runAfterCatalogReady() { tableBinlogSize += tabletBinlogSize; } // end for tablets + // Delay FE row count publication for a specific table when the debug point is enabled. + delayRowCountReportIfNeeded(db, olapTable, indexReported); index.setRowCountReported(indexReported); index.setRowCount(indexRowCount); LOG.debug("Table {} index {} all tablets reported[{}], row count {}", diff --git a/regression-test/suites/query_p0/stats/insert_select_table_stats_bootstrap.groovy b/regression-test/suites/query_p0/stats/insert_select_table_stats_bootstrap.groovy index 62cd9a88b073eb..6345c7e1138881 100644 --- a/regression-test/suites/query_p0/stats/insert_select_table_stats_bootstrap.groovy +++ b/regression-test/suites/query_p0/stats/insert_select_table_stats_bootstrap.groovy @@ -18,6 +18,7 @@ suite("insert_select_table_stats_bootstrap", "nonConcurrent") { String db = context.config.getDbNameByFile(context.file) sql "use ${db}" + GetDebugPoint().clearDebugPointsForAllFEs() sql "set enable_nereids_planner = true" sql "set enable_fallback_to_original_planner = false" @@ -60,28 +61,35 @@ suite("insert_select_table_stats_bootstrap", "nonConcurrent") { """ } - createBigATable(false) + // Delay FE row count publication so the non-bootstrap branch still sees unknown scan row count. + GetDebugPoint().enableDebugPointForAllFEs("TabletStatMgr.delay_row_count_report", + [db: db, table: "biga", sleep_ms: "5000"]) + try { + createBigATable(false) - def tableStatsWithoutBootstrap = sql "show table stats biga" - assertEquals(1, tableStatsWithoutBootstrap.size()) - assertEquals("", tableStatsWithoutBootstrap[0][0]) - assertEquals("", tableStatsWithoutBootstrap[0][3]) - assertEquals("", tableStatsWithoutBootstrap[0][5]) + def tableStatsWithoutBootstrap = sql "show table stats biga" + assertEquals(1, tableStatsWithoutBootstrap.size()) + assertEquals("", tableStatsWithoutBootstrap[0][0]) + assertEquals("", tableStatsWithoutBootstrap[0][3]) + assertEquals("", tableStatsWithoutBootstrap[0][5]) - explain { - sql """ - physical plan - select a.k, b.v - from smallb b - join biga a - on cast(a.k as bigint) = cast(b.k1 + b.k2 as bigint) - """ - contains("PhysicalOlapScan[biga]") - contains("distributionSpec=DistributionSpecReplicated") - check { explainStr -> - assertTrue((explainStr =~ /PhysicalOlapScan\[biga\][^\n]*stats=1(?![,\d.])/).find()) - assertTrue((explainStr =~ /DistributionSpecReplicated[\s\S]*PhysicalOlapScan\[biga\]/).find()) + explain { + sql """ + physical plan + select a.k, b.v + from smallb b + join biga a + on cast(a.k as bigint) = cast(b.k1 + b.k2 as bigint) + """ + contains("PhysicalOlapScan[biga]") + contains("distributionSpec=DistributionSpecReplicated") + check { explainStr -> + assertTrue((explainStr =~ /PhysicalOlapScan\[biga\][^\n]*stats=1(?![,\d.])/).find()) + assertTrue((explainStr =~ /DistributionSpecReplicated[\s\S]*PhysicalOlapScan\[biga\]/).find()) + } } + } finally { + GetDebugPoint().disableDebugPointForAllFEs("TabletStatMgr.delay_row_count_report") } createBigATable(true) From 8cca08a79034411a2e79bda6c8e417e249a587ad Mon Sep 17 00:00:00 2001 From: wenzhenghu Date: Tue, 9 Jun 2026 18:03:10 +0800 Subject: [PATCH 05/11] [test](fe) Make row count delay debug point easier to target --- .../apache/doris/catalog/TabletStatMgr.java | 7 ++- ...insert_select_table_stats_bootstrap.groovy | 52 +++++++++++++++++-- 2 files changed, 54 insertions(+), 5 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/TabletStatMgr.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/TabletStatMgr.java index 26c1e905d148d5..628c3fedb42c01 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/TabletStatMgr.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/TabletStatMgr.java @@ -80,11 +80,16 @@ private void delayRowCountReportIfNeeded(Database db, OlapTable olapTable, boole if (dbName.isEmpty() || tableName.isEmpty() || sleepMs <= 0) { return; } - if (!db.getFullName().equals(dbName) || !olapTable.getName().equals(tableName)) { + // Accept both the display db name and the fully qualified db name so regression cases + // can target the same table across different FE naming conventions. + if ((!db.getFullName().equals(dbName) && !db.getName().equals(dbName)) + || !olapTable.getName().equals(tableName)) { return; } // Delay FE row count publication for the target table so tests can reproduce the unknown-row window. try { + LOG.info("Delay row count report for {}.{} (full db name: {}) by {} ms", + db.getName(), tableName, db.getFullName(), sleepMs); Thread.sleep(sleepMs); } catch (InterruptedException e) { Thread.currentThread().interrupt(); diff --git a/regression-test/suites/query_p0/stats/insert_select_table_stats_bootstrap.groovy b/regression-test/suites/query_p0/stats/insert_select_table_stats_bootstrap.groovy index 6345c7e1138881..d0d91ff043817e 100644 --- a/regression-test/suites/query_p0/stats/insert_select_table_stats_bootstrap.groovy +++ b/regression-test/suites/query_p0/stats/insert_select_table_stats_bootstrap.groovy @@ -15,10 +15,54 @@ // specific language governing permissions and limitations // under the License. +import java.net.HttpURLConnection +import java.net.URLEncoder +import java.net.URL +import java.util.Base64 + suite("insert_select_table_stats_bootstrap", "nonConcurrent") { String db = context.config.getDbNameByFile(context.file) + // TabletStatMgr currently matches Database.getFullName(), which uses the legacy cluster-qualified name. + String debugPointDb = "default_cluster:${db}" sql "use ${db}" - GetDebugPoint().clearDebugPointsForAllFEs() + String feDebugPointEndpoint = "http://${context.config.feHttpAddress}" + String feDebugPointAuth = Base64.getEncoder().encodeToString((context.config.feHttpUser + ":" + + (context.config.feHttpPassword == null ? "" : context.config.feHttpPassword)).getBytes("UTF-8")) + + // Access the FE debug point endpoint from the regression config so remote tests do not depend on show frontends. + def postToFeDebugPoint = { String path -> + HttpURLConnection conn = (HttpURLConnection) new URL(feDebugPointEndpoint + path).openConnection() + conn.setRequestMethod("POST") + conn.setRequestProperty("Authorization", "Basic ${feDebugPointAuth}") + conn.setDoOutput(true) + int responseCode = conn.getResponseCode() + assertTrue(responseCode >= 200 && responseCode < 300) + conn.getInputStream().close() + } + + // Build the query string locally so this case can target the deployed FE address from regression-conf. + def encodeDebugPointParams = { Map params -> + params.collect { key, value -> + URLEncoder.encode(key, "UTF-8") + "=" + URLEncoder.encode(value, "UTF-8") + }.join("&") + } + + // Use the configured FE HTTP endpoint directly because the remote FE may advertise a loopback host in show frontends. + def enableFeDebugPoint = { String name, Map params -> + postToFeDebugPoint("/api/debug_point/add/${name}?${encodeDebugPointParams(params)}") + } + + // Remove the case-specific debug point explicitly so later suites are not affected by the injected delay. + def disableFeDebugPoint = { String name -> + postToFeDebugPoint("/api/debug_point/remove/${name}") + } + + // Clear residual FE debug points before the case starts because previous runs may fail before the cleanup path. + def clearFeDebugPoints = { + postToFeDebugPoint("/api/debug_point/clear") + } + + clearFeDebugPoints() sql "set enable_nereids_planner = true" sql "set enable_fallback_to_original_planner = false" @@ -62,8 +106,8 @@ suite("insert_select_table_stats_bootstrap", "nonConcurrent") { } // Delay FE row count publication so the non-bootstrap branch still sees unknown scan row count. - GetDebugPoint().enableDebugPointForAllFEs("TabletStatMgr.delay_row_count_report", - [db: db, table: "biga", sleep_ms: "5000"]) + enableFeDebugPoint("TabletStatMgr.delay_row_count_report", + [db: debugPointDb, table: "biga", sleep_ms: "15000"]) try { createBigATable(false) @@ -89,7 +133,7 @@ suite("insert_select_table_stats_bootstrap", "nonConcurrent") { } } } finally { - GetDebugPoint().disableDebugPointForAllFEs("TabletStatMgr.delay_row_count_report") + disableFeDebugPoint("TabletStatMgr.delay_row_count_report") } createBigATable(true) From 17d59c5930799d3e3e5a958ff2d663fffed32659 Mon Sep 17 00:00:00 2001 From: wenzhenghu Date: Tue, 9 Jun 2026 22:17:30 +0800 Subject: [PATCH 06/11] [test](fe) Remove unused debug point code and harden regression test --- .../apache/doris/catalog/TabletStatMgr.java | 37 ------ ...insert_select_table_stats_bootstrap.groovy | 118 +++++++----------- 2 files changed, 42 insertions(+), 113 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/TabletStatMgr.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/TabletStatMgr.java index 628c3fedb42c01..ec99c8d45a991a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/TabletStatMgr.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/TabletStatMgr.java @@ -25,7 +25,6 @@ import org.apache.doris.common.Pair; import org.apache.doris.common.Status; import org.apache.doris.common.ThreadPoolManager; -import org.apache.doris.common.util.DebugPointUtil; import org.apache.doris.common.util.MasterDaemon; import org.apache.doris.metric.MetricRepo; import org.apache.doris.system.Backend; @@ -63,40 +62,6 @@ public TabletStatMgr() { super("tablet stat mgr", Config.tablet_stat_update_interval_second * 1000); } - private void delayRowCountReportIfNeeded(Database db, OlapTable olapTable, boolean indexReported) { - // Delay only when the current refresh is about to publish a usable row count. - // If indexReported is still false, strict row-count readers already see UNKNOWN_ROW_COUNT, - // so sleeping there would only slow down tablet stat refresh without creating a more precise test window. - if (!indexReported) { - return; - } - DebugPointUtil.DebugPoint debugPoint = DebugPointUtil.getDebugPoint("TabletStatMgr.delay_row_count_report"); - if (debugPoint == null) { - return; - } - String dbName = debugPoint.param("db", ""); - String tableName = debugPoint.param("table", ""); - long sleepMs = debugPoint.param("sleep_ms", -1L); - if (dbName.isEmpty() || tableName.isEmpty() || sleepMs <= 0) { - return; - } - // Accept both the display db name and the fully qualified db name so regression cases - // can target the same table across different FE naming conventions. - if ((!db.getFullName().equals(dbName) && !db.getName().equals(dbName)) - || !olapTable.getName().equals(tableName)) { - return; - } - // Delay FE row count publication for the target table so tests can reproduce the unknown-row window. - try { - LOG.info("Delay row count report for {}.{} (full db name: {}) by {} ms", - db.getName(), tableName, db.getFullName(), sleepMs); - Thread.sleep(sleepMs); - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - LOG.warn("Interrupted while delaying row count report for {}.{}", dbName, tableName, e); - } - } - @Override protected void runAfterCatalogReady() { ImmutableMap backends; @@ -304,8 +269,6 @@ protected void runAfterCatalogReady() { tableBinlogSize += tabletBinlogSize; } // end for tablets - // Delay FE row count publication for a specific table when the debug point is enabled. - delayRowCountReportIfNeeded(db, olapTable, indexReported); index.setRowCountReported(indexReported); index.setRowCount(indexRowCount); LOG.debug("Table {} index {} all tablets reported[{}], row count {}", diff --git a/regression-test/suites/query_p0/stats/insert_select_table_stats_bootstrap.groovy b/regression-test/suites/query_p0/stats/insert_select_table_stats_bootstrap.groovy index d0d91ff043817e..5a7bc779200ab6 100644 --- a/regression-test/suites/query_p0/stats/insert_select_table_stats_bootstrap.groovy +++ b/regression-test/suites/query_p0/stats/insert_select_table_stats_bootstrap.groovy @@ -15,54 +15,9 @@ // specific language governing permissions and limitations // under the License. -import java.net.HttpURLConnection -import java.net.URLEncoder -import java.net.URL -import java.util.Base64 - suite("insert_select_table_stats_bootstrap", "nonConcurrent") { String db = context.config.getDbNameByFile(context.file) - // TabletStatMgr currently matches Database.getFullName(), which uses the legacy cluster-qualified name. - String debugPointDb = "default_cluster:${db}" sql "use ${db}" - String feDebugPointEndpoint = "http://${context.config.feHttpAddress}" - String feDebugPointAuth = Base64.getEncoder().encodeToString((context.config.feHttpUser + ":" - + (context.config.feHttpPassword == null ? "" : context.config.feHttpPassword)).getBytes("UTF-8")) - - // Access the FE debug point endpoint from the regression config so remote tests do not depend on show frontends. - def postToFeDebugPoint = { String path -> - HttpURLConnection conn = (HttpURLConnection) new URL(feDebugPointEndpoint + path).openConnection() - conn.setRequestMethod("POST") - conn.setRequestProperty("Authorization", "Basic ${feDebugPointAuth}") - conn.setDoOutput(true) - int responseCode = conn.getResponseCode() - assertTrue(responseCode >= 200 && responseCode < 300) - conn.getInputStream().close() - } - - // Build the query string locally so this case can target the deployed FE address from regression-conf. - def encodeDebugPointParams = { Map params -> - params.collect { key, value -> - URLEncoder.encode(key, "UTF-8") + "=" + URLEncoder.encode(value, "UTF-8") - }.join("&") - } - - // Use the configured FE HTTP endpoint directly because the remote FE may advertise a loopback host in show frontends. - def enableFeDebugPoint = { String name, Map params -> - postToFeDebugPoint("/api/debug_point/add/${name}?${encodeDebugPointParams(params)}") - } - - // Remove the case-specific debug point explicitly so later suites are not affected by the injected delay. - def disableFeDebugPoint = { String name -> - postToFeDebugPoint("/api/debug_point/remove/${name}") - } - - // Clear residual FE debug points before the case starts because previous runs may fail before the cleanup path. - def clearFeDebugPoints = { - postToFeDebugPoint("/api/debug_point/clear") - } - - clearFeDebugPoints() sql "set enable_nereids_planner = true" sql "set enable_fallback_to_original_planner = false" @@ -89,7 +44,7 @@ suite("insert_select_table_stats_bootstrap", "nonConcurrent") { select number, 0, number from numbers("number" = "10") """ - // Analyze the known small table first so the join-side change mainly depends on biga bootstrap stats. + // Analyze the known small table so the join-side decision can depend on biga bootstrap stats. sql "analyze table smallb with sync" def createBigATable = { boolean enableBootstrap -> @@ -105,37 +60,46 @@ suite("insert_select_table_stats_bootstrap", "nonConcurrent") { """ } - // Delay FE row count publication so the non-bootstrap branch still sees unknown scan row count. - enableFeDebugPoint("TabletStatMgr.delay_row_count_report", - [db: debugPointDb, table: "biga", sleep_ms: "15000"]) - try { + // Bootstrap disabled: show table stats should be empty and scan row count should be unknown. + // Retry up to 10 times to tolerate rare cases where TabletStatMgr publishes BE row counts. + boolean bootstrapOffPassed = false + for (int retry = 0; retry < 10 && !bootstrapOffPassed; retry++) { createBigATable(false) - - def tableStatsWithoutBootstrap = sql "show table stats biga" - assertEquals(1, tableStatsWithoutBootstrap.size()) - assertEquals("", tableStatsWithoutBootstrap[0][0]) - assertEquals("", tableStatsWithoutBootstrap[0][3]) - assertEquals("", tableStatsWithoutBootstrap[0][5]) - - explain { - sql """ - physical plan - select a.k, b.v - from smallb b - join biga a - on cast(a.k as bigint) = cast(b.k1 + b.k2 as bigint) - """ - contains("PhysicalOlapScan[biga]") - contains("distributionSpec=DistributionSpecReplicated") - check { explainStr -> - assertTrue((explainStr =~ /PhysicalOlapScan\[biga\][^\n]*stats=1(?![,\d.])/).find()) - assertTrue((explainStr =~ /DistributionSpecReplicated[\s\S]*PhysicalOlapScan\[biga\]/).find()) + try { + def tableStatsWithoutBootstrap = sql "show table stats biga" + assertEquals(1, tableStatsWithoutBootstrap.size()) + assertEquals("", tableStatsWithoutBootstrap[0][0]) + assertEquals("", tableStatsWithoutBootstrap[0][3]) + assertEquals("", tableStatsWithoutBootstrap[0][5]) + + explain { + sql """ + physical plan + select a.k, b.v + from smallb b + join biga a + on cast(a.k as bigint) = cast(b.k1 + b.k2 as bigint) + """ + contains("table=biga") + // Without bootstrap, the optimizer should see the scan row count as 1 (unknown). + contains("stats=1") + contains("distributionSpec=DistributionSpecReplicated") + check { explainStr -> + // biga (stats=1) should be the broadcast side because 1 < smallb's 10 rows. + assertTrue((explainStr =~ /DistributionSpecReplicated[\s\S]*table=biga/).find()) + } + } + bootstrapOffPassed = true + } catch (Throwable t) { + if (retry == 9) { + throw t } + logger.info("Bootstrap-off check attempt ${retry + 1} failed, retrying...", t) } - } finally { - disableFeDebugPoint("TabletStatMgr.delay_row_count_report") } + // Bootstrap enabled: show table stats should reflect the inserted row count, + // and the optimizer should broadcast the known small table (smallb). createBigATable(true) def tableStatsWithBootstrap = sql "show table stats biga" @@ -152,12 +116,14 @@ suite("insert_select_table_stats_bootstrap", "nonConcurrent") { join biga a on cast(a.k as bigint) = cast(b.k1 + b.k2 as bigint) """ - contains("PhysicalOlapScan[biga]") - contains("PhysicalOlapScan[smallb]") + contains("table=biga") + contains("table=smallb") + // With bootstrap stats, the scan row count for biga should reflect the inserted row count. + contains("stats=262,144") contains("distributionSpec=DistributionSpecReplicated") check { explainStr -> - assertTrue((explainStr =~ /PhysicalOlapScan\[biga\][^\n]*stats=262,144/).find()) - assertTrue((explainStr =~ /DistributionSpecReplicated[\s\S]*PhysicalOlapScan\[smallb\]/).find()) + // smallb should be the broadcast (build) side because it has fewer rows than biga. + assertTrue((explainStr =~ /DistributionSpecReplicated[\s\S]*table=smallb/).find()) } } } From 3c009e61152a8ea27e54d7685ec14a31c0a1bacf Mon Sep 17 00:00:00 2001 From: wenzhenghu Date: Tue, 9 Jun 2026 23:23:44 +0800 Subject: [PATCH 07/11] [fix](fe) Catch bootstrap exception so it does not fail insert statement --- .../trees/plans/commands/insert/OlapInsertExecutor.java | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/insert/OlapInsertExecutor.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/insert/OlapInsertExecutor.java index d7589d8a3c8261..4923cf195c0add 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/insert/OlapInsertExecutor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/insert/OlapInsertExecutor.java @@ -264,7 +264,13 @@ protected void onComplete() throws UserException { // Bootstrap table-level stats only after the target data is visible to keep row-count fallback aligned. if (txnStatus == TransactionStatus.VISIBLE && ctx.getSessionVariable().isEnableInsertSelectTableStatsBootstrap()) { - Env.getCurrentEnv().getAnalysisManager().bootstrapTableStatsIfAbsent(olapTable, loadedRows); + try { + Env.getCurrentEnv().getAnalysisManager().bootstrapTableStatsIfAbsent(olapTable, loadedRows); + } catch (Exception e) { + // Bootstrap is best-effort; failure should not fail the insert statement because the data + // has already been committed and is visible. + LOG.warn("Failed to bootstrap table stats for {} after insert", olapTable.getName(), e); + } } } From 421203aa55bc40dfd2dcc1ace5fa0714b1e271a9 Mon Sep 17 00:00:00 2001 From: wenzhenghu Date: Tue, 9 Jun 2026 23:35:47 +0800 Subject: [PATCH 08/11] [fix](fe) Narrow lock scope in bootstrapTableStatsIfAbsent to avoid potential deadlock --- .../java/org/apache/doris/statistics/AnalysisManager.java | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java index b622a802aec4f5..4d9150b2b44f96 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java @@ -606,13 +606,18 @@ public void bootstrapTableStatsIfAbsent(OlapTable table, long loadedRows) { if (findTableStatsStatus(table.getId()) != null) { return; } + TableStatsMeta newStats; synchronized (idToTblStats) { if (idToTblStats.containsKey(table.getId())) { return; } long bootstrapRowCount = resolveBootstrapRowCount(table, loadedRows); - updateTableStatsStatus(TableStatsMeta.newBootstrapStats(table, bootstrapRowCount, loadedRows)); + newStats = TableStatsMeta.newBootstrapStats(table, bootstrapRowCount, loadedRows); + idToTblStats.put(newStats.tblId, newStats); } + // Write edit log outside the lock to avoid potential deadlocks with internal locks + // held by the edit log subsystem. + logCreateTableStats(newStats); } public List showAutoPendingJobs(TableNameInfo tblName, String priority) { From c97c2f5ed00b7499de41623fa56cbe0203506a61 Mon Sep 17 00:00:00 2001 From: wenzhenghu Date: Wed, 10 Jun 2026 14:40:31 +0800 Subject: [PATCH 09/11] [test](fe) Add unit tests for bootstrapTableStatsIfAbsent --- .../doris/statistics/AnalysisManagerTest.java | 72 +++++++++++++++++++ 1 file changed, 72 insertions(+) diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisManagerTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisManagerTest.java index 30b7bd55ed5010..99b356d6960c17 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisManagerTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisManagerTest.java @@ -473,4 +473,76 @@ private AnalyzeTableCommand mockAnalyzeCommand(AnalysisMethod analysisMethod, Sc Mockito.when(command.getAnalyzeProperties()).thenReturn(analyzeProperties); return command; } + + @Test + void testBootstrapTableStatsIfAbsentWithZeroLoadedRows() { + AnalysisManager manager = Mockito.spy(new AnalysisManager()); + OlapTable table = Mockito.mock(OlapTable.class); + Mockito.when(table.getId()).thenReturn(1000L); + + // loadedRows <= 0 → should return immediately without creating stats. + manager.bootstrapTableStatsIfAbsent(table, 0); + Assertions.assertNull(manager.findTableStatsStatus(1000L)); + + manager.bootstrapTableStatsIfAbsent(table, -1); + Assertions.assertNull(manager.findTableStatsStatus(1000L)); + } + + @Test + void testBootstrapTableStatsIfAbsentWhenStatsAlreadyExist() { + AnalysisManager manager = Mockito.spy(new AnalysisManager()); + OlapTable table = mockTable(1000L, "test_tbl"); + + // Seed existing TableStatsMeta so bootstrap should short-circuit. + // Use replayUpdateTableStatsStatus to avoid touching edit log. + TableStatsMeta existing = TableStatsMeta.newBootstrapStats(table, 100L, 100L); + manager.replayUpdateTableStatsStatus(existing); + + manager.bootstrapTableStatsIfAbsent(table, 200L); + TableStatsMeta result = manager.findTableStatsStatus(1000L); + Assertions.assertNotNull(result); + // Existing row count should be unchanged. + Assertions.assertEquals(100L, result.rowCount); + } + + @Test + void testBootstrapTableStatsIfAbsentCreatesStats() { + AnalysisManager manager = Mockito.spy(new AnalysisManager()); + // Avoid touching edit log in unit tests. + Mockito.doNothing().when(manager).logCreateTableStats(Mockito.any(TableStatsMeta.class)); + + OlapTable table = mockTable(1000L, "test_tbl"); + + manager.bootstrapTableStatsIfAbsent(table, 128L); + + TableStatsMeta result = manager.findTableStatsStatus(1000L); + Assertions.assertNotNull(result); + Assertions.assertEquals(128L, result.rowCount); + Assertions.assertEquals(128L, result.updatedRows.get()); + // Bootstrap stats should not be marked as user-injected. + Assertions.assertFalse(result.userInjected); + // Bootstrap stats should not have a job type. + Assertions.assertNull(result.jobType); + } + + private static OlapTable mockTable(long tableId, String tableName) { + org.apache.doris.datasource.CatalogIf catalog = Mockito.mock(org.apache.doris.datasource.CatalogIf.class); + Mockito.when(catalog.getId()).thenReturn(1L); + Mockito.when(catalog.getName()).thenReturn("internal"); + + org.apache.doris.catalog.Database database = Mockito.mock(org.apache.doris.catalog.Database.class); + Mockito.when(database.getCatalog()).thenReturn(catalog); + Mockito.when(database.getId()).thenReturn(100L); + Mockito.when(database.getFullName()).thenReturn("default_cluster:test_db"); + + OlapTable table = Mockito.mock(OlapTable.class); + Mockito.when(table.getId()).thenReturn(tableId); + Mockito.when(table.getName()).thenReturn(tableName); + Mockito.when(table.getDatabase()).thenReturn(database); + Mockito.when(table.getBaseIndexId()).thenReturn(200L); + Mockito.when(table.getRowCountForIndex(Mockito.anyLong(), Mockito.anyBoolean())).thenReturn(-1L); + Mockito.when(table.getRowCount()).thenReturn(-1L); + return table; + } } + From 56d1b716a4bd9fd66d56e6709c529a4f37277b26 Mon Sep 17 00:00:00 2001 From: wenzhenghu Date: Wed, 10 Jun 2026 14:41:07 +0800 Subject: [PATCH 10/11] [test](fe) Add analyze stage to bootstrap regression test --- ...insert_select_table_stats_bootstrap.groovy | 32 +++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/regression-test/suites/query_p0/stats/insert_select_table_stats_bootstrap.groovy b/regression-test/suites/query_p0/stats/insert_select_table_stats_bootstrap.groovy index 5a7bc779200ab6..f09eee0ee91584 100644 --- a/regression-test/suites/query_p0/stats/insert_select_table_stats_bootstrap.groovy +++ b/regression-test/suites/query_p0/stats/insert_select_table_stats_bootstrap.groovy @@ -126,4 +126,36 @@ suite("insert_select_table_stats_bootstrap", "nonConcurrent") { assertTrue((explainStr =~ /DistributionSpecReplicated[\s\S]*table=smallb/).find()) } } + + // Verify that bootstrap stats do not interfere with subsequent manual analyze. + // After analyze completes, column-level stats should be available and the plan should remain correct. + sql "analyze table biga with sync" + + def tableStatsAfterAnalyze = sql "show table stats biga" + assertEquals(1, tableStatsAfterAnalyze.size()) + // Analyze should produce column-level stats for the table; the columns field (index 4) + // should list column names such as 'biga.k'. + assertTrue(tableStatsAfterAnalyze[0][4].toString().contains("biga")) + // trigger (index 5) should be populated (e.g. MANUAL). + assertTrue(!tableStatsAfterAnalyze[0][5].toString().isEmpty()) + assertEquals("false", tableStatsAfterAnalyze[0][7]) // user_injected (index 7) + + // Ensure the optimizer still correctly uses the row count from full stats. + explain { + sql """ + physical plan + select a.k, b.v + from smallb b + join biga a + on cast(a.k as bigint) = cast(b.k1 + b.k2 as bigint) + """ + contains("table=biga") + contains("table=smallb") + contains("stats=262,144") + contains("distributionSpec=DistributionSpecReplicated") + check { explainStr -> + // smallb should still be the broadcast side. + assertTrue((explainStr =~ /DistributionSpecReplicated[\s\S]*table=smallb/).find()) + } + } } From a3c135d6e5982f6df7aecf61637dccc8a123e7bb Mon Sep 17 00:00:00 2001 From: wenzhenghu Date: Wed, 10 Jun 2026 22:29:05 +0800 Subject: [PATCH 11/11] [fix](fe) Set updatedTime for bootstrap stats and hide zero lastAnalyzeTime in show table stats --- .../nereids/trees/plans/commands/ShowTableStatsCommand.java | 2 +- .../main/java/org/apache/doris/statistics/TableStatsMeta.java | 3 +++ .../trees/plans/commands/ShowTableStatsCommandTest.java | 2 ++ .../java/org/apache/doris/statistics/AnalysisManagerTest.java | 3 +++ .../java/org/apache/doris/statistics/TableStatsMetaTest.java | 3 +++ 5 files changed, 12 insertions(+), 1 deletion(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/ShowTableStatsCommand.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/ShowTableStatsCommand.java index 5d3031893f96e9..a22df198525802 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/ShowTableStatsCommand.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/ShowTableStatsCommand.java @@ -260,7 +260,7 @@ public ShowResultSet constructTableResultSet(TableStatsMeta tableStatistic, Tabl row.add(String.valueOf(tableStatistic.partitionChanged.get())); row.add(String.valueOf(tableStatistic.userInjected)); row.add(table == null ? "N/A" : String.valueOf(table.autoAnalyzeEnabled())); - row.add(lastAnalyzeTime.format(formatter)); + row.add(tableStatistic.lastAnalyzeTime == 0 ? "" : lastAnalyzeTime.format(formatter)); result.add(row); return new ShowResultSet(getMetaData(), result); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java index 0b2176ef56429d..aac86dcc0c6bcd 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java @@ -146,6 +146,9 @@ public static TableStatsMeta newBootstrapStats(OlapTable table, long rowCount, l tableStats.rowCount = rowCount; tableStats.updatedRows.set(updatedRows); tableStats.indexesRowCount.put(table.getBaseIndexId(), rowCount); + // Record the time when row count was bootstrapped so show table stats displays a reasonable update time, + // but leave lastAnalyzeTime as 0 since bootstrap is not an analyze operation. + tableStats.updatedTime = System.currentTimeMillis(); return tableStats; } diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/commands/ShowTableStatsCommandTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/commands/ShowTableStatsCommandTest.java index 31b2070c9f4d16..4b4f522d261ddb 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/commands/ShowTableStatsCommandTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/commands/ShowTableStatsCommandTest.java @@ -198,5 +198,7 @@ void testConstructTableResultSetForBootstrapStats() throws Exception { Assertions.assertEquals("128", row.get(2)); Assertions.assertEquals("", row.get(5)); Assertions.assertEquals("false", row.get(7)); + // last_analyze_time (index 9) should be empty for bootstrap stats. + Assertions.assertEquals("", row.get(9)); } } diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisManagerTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisManagerTest.java index 99b356d6960c17..ab0e41843357c5 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisManagerTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisManagerTest.java @@ -523,6 +523,9 @@ void testBootstrapTableStatsIfAbsentCreatesStats() { Assertions.assertFalse(result.userInjected); // Bootstrap stats should not have a job type. Assertions.assertNull(result.jobType); + // Bootstrap should set updatedTime but leave lastAnalyzeTime as 0. + Assertions.assertTrue(result.updatedTime > 0); + Assertions.assertEquals(0L, result.lastAnalyzeTime); } private static OlapTable mockTable(long tableId, String tableName) { diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/TableStatsMetaTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/TableStatsMetaTest.java index 5390ea1f422e1f..ae15d7556d6b61 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/TableStatsMetaTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/TableStatsMetaTest.java @@ -83,5 +83,8 @@ void testNewBootstrapStatsSeedsBaseIndexRowCount() { Assertions.assertEquals(123L, meta.getRowCount(100L)); Assertions.assertTrue(meta.isColumnsStatsEmpty()); Assertions.assertFalse(meta.userInjected); + // Bootstrap should record the current time as updatedTime but leave lastAnalyzeTime as 0. + Assertions.assertTrue(meta.updatedTime > 0); + Assertions.assertEquals(0L, meta.lastAnalyzeTime); } }