diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/ShowTableStatsCommand.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/ShowTableStatsCommand.java index cd586216f244a8..a22df198525802 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/ShowTableStatsCommand.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/ShowTableStatsCommand.java @@ -255,11 +255,12 @@ public ShowResultSet constructTableResultSet(TableStatsMeta tableStatistic, Tabl newColumnsSet.add(Pair.of(Util.getTempTableDisplayName(pair.first), pair.second)); } row.add(newColumnsSet.toString()); - row.add(tableStatistic.jobType.toString()); + // Bootstrap table stats may only seed row count, so job type can be absent before the first analyze task. + row.add(tableStatistic.jobType == null ? "" : tableStatistic.jobType.toString()); row.add(String.valueOf(tableStatistic.partitionChanged.get())); row.add(String.valueOf(tableStatistic.userInjected)); row.add(table == null ? "N/A" : String.valueOf(table.autoAnalyzeEnabled())); - row.add(lastAnalyzeTime.format(formatter)); + row.add(tableStatistic.lastAnalyzeTime == 0 ? "" : lastAnalyzeTime.format(formatter)); result.add(row); return new ShowResultSet(getMetaData(), result); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/insert/OlapInsertExecutor.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/insert/OlapInsertExecutor.java index c4ae68acb98e26..4923cf195c0add 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/insert/OlapInsertExecutor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/insert/OlapInsertExecutor.java @@ -261,6 +261,17 @@ protected void onComplete() throws UserException { StmtExecutor.syncLoadForTablets(backendsList, allTabletIds); } } + // Bootstrap table-level stats only after the target data is visible to keep row-count fallback aligned. + if (txnStatus == TransactionStatus.VISIBLE + && ctx.getSessionVariable().isEnableInsertSelectTableStatsBootstrap()) { + try { + Env.getCurrentEnv().getAnalysisManager().bootstrapTableStatsIfAbsent(olapTable, loadedRows); + } catch (Exception e) { + // Bootstrap is best-effort; failure should not fail the insert statement because the data + // has already been committed and is visible. + LOG.warn("Failed to bootstrap table stats for {} after insert", olapTable.getName(), e); + } + } } private void setTxnCallbackId() { diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java index febfe56721b3ab..f2449d946cd92c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java @@ -446,6 +446,9 @@ public String toString() { public static final String ENABLE_SINGLE_REPLICA_INSERT = "enable_single_replica_insert"; + public static final String ENABLE_INSERT_SELECT_TABLE_STATS_BOOTSTRAP = + "enable_insert_select_table_stats_bootstrap"; + public static final String SHUFFLED_AGG_NODE_IDS = "shuffled_agg_node_ids"; public static final String ENABLE_FAST_ANALYZE_INSERT_INTO_VALUES = "enable_fast_analyze_into_values"; @@ -2084,6 +2087,13 @@ public boolean isEnableHboNonStrictMatchingMode() { needForward = true, varType = VariableAnnotation.EXPERIMENTAL) public boolean enableSingleReplicaInsert = false; + @VarAttrDef.VarAttr(name = ENABLE_INSERT_SELECT_TABLE_STATS_BOOTSTRAP, + needForward = true, varType = VariableAnnotation.EXPERIMENTAL, description = { + "是否为 CTAS 和 INSERT INTO SELECT 在写入可见后补建最小表级统计基线。", + "Whether to bootstrap minimal table-level stats after CTAS and INSERT INTO SELECT become visible." + }) + private boolean enableInsertSelectTableStatsBootstrap = false; + @VarAttrDef.VarAttr(name = SHUFFLED_AGG_NODE_IDS, needForward = true, varType = VariableAnnotation.EXPERIMENTAL) public String shuffledAggNodeIds = ""; @@ -4887,6 +4897,14 @@ public boolean isInsertVisibleTimeoutReturnError() { return getInsertVisibleTimeoutReturnModeEnum() == InsertVisibleTimeoutReturnMode.ERROR; } + public boolean isEnableInsertSelectTableStatsBootstrap() { + return enableInsertSelectTableStatsBootstrap; + } + + public void setEnableInsertSelectTableStatsBootstrap(boolean enableInsertSelectTableStatsBootstrap) { + this.enableInsertSelectTableStatsBootstrap = enableInsertSelectTableStatsBootstrap; + } + public void setInsertVisibleTimeoutReturnMode(String insertVisibleTimeoutReturnMode) { this.insertVisibleTimeoutReturnMode = parseInsertVisibleTimeoutReturnMode(insertVisibleTimeoutReturnMode) .getOption(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java index b2b4c0d57f63a1..4d9150b2b44f96 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java @@ -597,6 +597,29 @@ public void updateTableStatsForAlterStats(AnalysisInfo jobInfo, TableIf tbl) { } } + // Bootstrap table-level row count immediately after write so the optimizer can consume + // a usable base row count before column statistics are available. + public void bootstrapTableStatsIfAbsent(OlapTable table, long loadedRows) { + if (loadedRows <= 0) { + return; + } + if (findTableStatsStatus(table.getId()) != null) { + return; + } + TableStatsMeta newStats; + synchronized (idToTblStats) { + if (idToTblStats.containsKey(table.getId())) { + return; + } + long bootstrapRowCount = resolveBootstrapRowCount(table, loadedRows); + newStats = TableStatsMeta.newBootstrapStats(table, bootstrapRowCount, loadedRows); + idToTblStats.put(newStats.tblId, newStats); + } + // Write edit log outside the lock to avoid potential deadlocks with internal locks + // held by the edit log subsystem. + logCreateTableStats(newStats); + } + public List showAutoPendingJobs(TableNameInfo tblName, String priority) { List result = Lists.newArrayList(); if (priority == null || priority.isEmpty()) { @@ -616,6 +639,19 @@ public List showAutoPendingJobs(TableNameInfo tblName, S return result; } + private long resolveBootstrapRowCount(OlapTable table, long loadedRows) { + long bootstrapRowCount = loadedRows; + long baseIndexRowCount = table.getRowCountForIndex(table.getBaseIndexId(), true); + if (baseIndexRowCount > 0) { + bootstrapRowCount = Math.max(bootstrapRowCount, baseIndexRowCount); + } + long tableRowCount = table.getRowCount(); + if (tableRowCount > 0) { + bootstrapRowCount = Math.max(bootstrapRowCount, tableRowCount); + } + return bootstrapRowCount; + } + protected List getPendingJobs(Map>> jobMap, JobPriority priority, TableNameInfo tableNameInfo) { List result = Lists.newArrayList(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java index 0b77aba91e30d8..aac86dcc0c6bcd 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java @@ -116,6 +116,16 @@ public TableStatsMeta() { idxId = 0; } + private TableStatsMeta(TableIf table) { + this.ctlId = table.getDatabase().getCatalog().getId(); + this.ctlName = table.getDatabase().getCatalog().getName(); + this.dbId = table.getDatabase().getId(); + this.dbName = table.getDatabase().getFullName(); + this.tblId = table.getId(); + this.tblName = table.getName(); + this.idxId = -1; + } + // It's necessary to store these fields separately from AnalysisInfo, since the lifecycle between AnalysisInfo // and TableStats is quite different. public TableStatsMeta(long rowCount, AnalysisInfo analyzedJob, TableIf table) { @@ -130,6 +140,18 @@ public TableStatsMeta(long rowCount, AnalysisInfo analyzedJob, TableIf table) { update(analyzedJob, table); } + // Bootstrap metadata only seeds table-level row count so the optimizer can avoid the unknown-row fallback. + public static TableStatsMeta newBootstrapStats(OlapTable table, long rowCount, long updatedRows) { + TableStatsMeta tableStats = new TableStatsMeta(table); + tableStats.rowCount = rowCount; + tableStats.updatedRows.set(updatedRows); + tableStats.indexesRowCount.put(table.getBaseIndexId(), rowCount); + // Record the time when row count was bootstrapped so show table stats displays a reasonable update time, + // but leave lastAnalyzeTime as 0 since bootstrap is not an analyze operation. + tableStats.updatedTime = System.currentTimeMillis(); + return tableStats; + } + @Override public void write(DataOutput out) throws IOException { String json = GsonUtils.GSON.toJson(this); diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/commands/ShowTableStatsCommandTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/commands/ShowTableStatsCommandTest.java index 38f204a6c280d8..4b4f522d261ddb 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/commands/ShowTableStatsCommandTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/commands/ShowTableStatsCommandTest.java @@ -21,16 +21,20 @@ import org.apache.doris.catalog.Column; import org.apache.doris.catalog.Database; import org.apache.doris.catalog.Env; +import org.apache.doris.catalog.OlapTable; import org.apache.doris.catalog.TabletInvertedIndex; import org.apache.doris.catalog.info.PartitionNamesInfo; import org.apache.doris.catalog.info.TableNameInfo; import org.apache.doris.common.AnalysisException; +import org.apache.doris.datasource.CatalogIf; import org.apache.doris.datasource.CatalogMgr; import org.apache.doris.datasource.InternalCatalog; import org.apache.doris.mysql.privilege.AccessControllerManager; import org.apache.doris.mysql.privilege.PrivPredicate; import org.apache.doris.qe.ConnectContext; import org.apache.doris.qe.QueryState; +import org.apache.doris.qe.ShowResultSet; +import org.apache.doris.statistics.TableStatsMeta; import com.google.common.collect.ImmutableList; import org.junit.jupiter.api.AfterEach; @@ -166,4 +170,35 @@ void testValidateNoPrivilege() { Assertions.assertThrows(AnalysisException.class, () -> command2.validate(connectContext), "Permission denied command denied to user 'null'@'null' for table 'test_db: test_tbl2'"); } + + @Test + void testConstructTableResultSetForBootstrapStats() throws Exception { + runBefore(); + CatalogIf catalogIf = Mockito.mock(CatalogIf.class); + Database database = Mockito.mock(Database.class); + OlapTable table = Mockito.mock(OlapTable.class); + Mockito.when(catalogIf.getId()).thenReturn(1L); + Mockito.when(catalogIf.getName()).thenReturn(internalCtl); + Mockito.when(database.getCatalog()).thenReturn(catalogIf); + Mockito.when(database.getId()).thenReturn(2L); + Mockito.when(database.getFullName()).thenReturn(CatalogMocker.TEST_DB_NAME); + Mockito.when(table.getDatabase()).thenReturn(database); + Mockito.when(table.getId()).thenReturn(CatalogMocker.TEST_TBL_ID); + Mockito.when(table.getName()).thenReturn(CatalogMocker.TEST_TBL_NAME); + Mockito.when(table.getBaseIndexId()).thenReturn(CatalogMocker.TEST_TBL_ID); + Mockito.when(table.autoAnalyzeEnabled()).thenReturn(false); + // Bootstrap stats only seed row count, so show table stats should still render without a job type. + TableStatsMeta bootstrapStats = TableStatsMeta.newBootstrapStats(table, 128L, 128L); + ShowTableStatsCommand command = new ShowTableStatsCommand(CatalogMocker.TEST_TBL_ID); + ShowResultSet resultSet = command.constructTableResultSet(bootstrapStats, table); + + Assertions.assertEquals(1, resultSet.getResultRows().size()); + List row = resultSet.getResultRows().get(0); + Assertions.assertEquals("128", row.get(0)); + Assertions.assertEquals("128", row.get(2)); + Assertions.assertEquals("", row.get(5)); + Assertions.assertEquals("false", row.get(7)); + // last_analyze_time (index 9) should be empty for bootstrap stats. + Assertions.assertEquals("", row.get(9)); + } } diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/commands/insert/OlapInsertExecutorTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/commands/insert/OlapInsertExecutorTest.java index ea5318eedcb54f..3b7287f33e2255 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/commands/insert/OlapInsertExecutorTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/commands/insert/OlapInsertExecutorTest.java @@ -39,6 +39,8 @@ import org.apache.doris.qe.QueryState.MysqlStateType; import org.apache.doris.qe.SessionVariable; import org.apache.doris.qe.StmtExecutor; +import org.apache.doris.statistics.AnalysisManager; +import org.apache.doris.statistics.TableStatsMeta; import org.apache.doris.thrift.TQueryOptions; import org.apache.doris.thrift.TStatusCode; import org.apache.doris.thrift.TUniqueId; @@ -182,6 +184,70 @@ void testOnFailAbortsUncommittedTransaction() throws Exception { } } + @Test + void testExecuteSingleInsertVisibleBootstrapsTableStatsWhenAbsent() throws Exception { + ConnectContext ctx = createExecutorContext(); + ctx.getSessionVariable().setEnableInsertSelectTableStatsBootstrap(true); + Coordinator coordinator = createCoordinator(); + GlobalTransactionMgrIface txnMgr = Mockito.mock(GlobalTransactionMgrIface.class); + TransactionState txnState = Mockito.mock(TransactionState.class); + LoadManager loadManager = Mockito.mock(LoadManager.class); + AnalysisManager analysisManager = Mockito.spy(new AnalysisManager()); + Env currentEnv = createCurrentEnv(loadManager, analysisManager); + StmtExecutor stmtExecutor = createStmtExecutor(); + + Mockito.doNothing().when(analysisManager).logCreateTableStats(Mockito.any(TableStatsMeta.class)); + try (MockedStatic envFactoryMock = Mockito.mockStatic(EnvFactory.class); + MockedStatic envMock = Mockito.mockStatic(Env.class)) { + prepareFactoryMocks(envFactoryMock, envMock, coordinator, txnMgr, txnState, currentEnv); + ctx.setEnv(currentEnv); + + Mockito.when(txnMgr.commitAndPublishTransaction( + Mockito.any(), Mockito.anyList(), Mockito.anyLong(), Mockito.anyList(), Mockito.anyLong(), + Mockito.isNull())).thenReturn(true); + + OlapInsertExecutor executor = createExecutor(ctx); + executor.txnId = 10004L; + executor.executeSingleInsert(stmtExecutor); + + TableStatsMeta tableStats = analysisManager.findTableStatsStatus(2L); + Assertions.assertNotNull(tableStats); + Assertions.assertEquals(12L, tableStats.rowCount); + Assertions.assertEquals(12L, tableStats.updatedRows.get()); + Assertions.assertEquals(12L, tableStats.getRowCount(101L)); + Assertions.assertTrue(tableStats.isColumnsStatsEmpty()); + } + } + + @Test + void testExecuteSingleInsertVisibleDoesNotBootstrapTableStatsWhenDisabled() throws Exception { + ConnectContext ctx = createExecutorContext(); + Coordinator coordinator = createCoordinator(); + GlobalTransactionMgrIface txnMgr = Mockito.mock(GlobalTransactionMgrIface.class); + TransactionState txnState = Mockito.mock(TransactionState.class); + LoadManager loadManager = Mockito.mock(LoadManager.class); + AnalysisManager analysisManager = Mockito.spy(new AnalysisManager()); + Env currentEnv = createCurrentEnv(loadManager, analysisManager); + StmtExecutor stmtExecutor = createStmtExecutor(); + + Mockito.doNothing().when(analysisManager).logCreateTableStats(Mockito.any(TableStatsMeta.class)); + try (MockedStatic envFactoryMock = Mockito.mockStatic(EnvFactory.class); + MockedStatic envMock = Mockito.mockStatic(Env.class)) { + prepareFactoryMocks(envFactoryMock, envMock, coordinator, txnMgr, txnState, currentEnv); + ctx.setEnv(currentEnv); + + Mockito.when(txnMgr.commitAndPublishTransaction( + Mockito.any(), Mockito.anyList(), Mockito.anyLong(), Mockito.anyList(), Mockito.anyLong(), + Mockito.isNull())).thenReturn(true); + + OlapInsertExecutor executor = createExecutor(ctx); + executor.txnId = 10005L; + executor.executeSingleInsert(stmtExecutor); + + Assertions.assertNull(analysisManager.findTableStatsStatus(2L)); + } + } + // Build a fresh context per case so insertResult and QueryState do not leak between tests. private ConnectContext createExecutorContext() { ConnectContext ctx = new ConnectContext(); @@ -227,6 +293,10 @@ private StmtExecutor createStmtExecutor() { // Provide the job-manager chain needed by master-side setTxnCallbackId(). private Env createCurrentEnv(LoadManager loadManager) { + return createCurrentEnv(loadManager, Mockito.mock(AnalysisManager.class)); + } + + private Env createCurrentEnv(LoadManager loadManager, AnalysisManager analysisManager) { Env currentEnv = Mockito.mock(Env.class); // Mock the internal catalog because ConnectContext.setEnv() resolves the default catalog on master. InternalCatalog internalCatalog = Mockito.mock(InternalCatalog.class); @@ -236,6 +306,7 @@ private Env createCurrentEnv(LoadManager loadManager) { Mockito.when(internalCatalog.getName()).thenReturn("internal"); Mockito.when(currentEnv.getLoadManager()).thenReturn(loadManager); Mockito.when(currentEnv.getJobManager()).thenReturn(jobManager); + Mockito.when(currentEnv.getAnalysisManager()).thenReturn(analysisManager); Mockito.when(jobManager.getStreamingTaskManager()).thenReturn(streamingTaskManager); Mockito.when(streamingTaskManager.getStreamingInsertTaskById(Mockito.anyLong())).thenReturn(null); return currentEnv; @@ -244,14 +315,21 @@ private Env createCurrentEnv(LoadManager loadManager) { // Create an executor with mocked table metadata because this test only validates timeout result handling. private OlapInsertExecutor createExecutor(ConnectContext ctx) { Database database = Mockito.mock(Database.class); + InternalCatalog catalog = Mockito.mock(InternalCatalog.class); Mockito.when(database.getFullName()).thenReturn("test_db"); Mockito.when(database.getId()).thenReturn(1L); + Mockito.when(database.getCatalog()).thenReturn(catalog); + Mockito.when(catalog.getId()).thenReturn(3L); + Mockito.when(catalog.getName()).thenReturn("internal"); // Mock OlapTable because the master-side executor now casts the target table to OlapTable. OlapTable table = Mockito.mock(OlapTable.class); Mockito.when(table.getDatabase()).thenReturn(database); Mockito.when(table.getName()).thenReturn("test_tbl"); Mockito.when(table.getId()).thenReturn(2L); + Mockito.when(table.getBaseIndexId()).thenReturn(101L); + Mockito.when(table.getRowCountForIndex(101L, true)).thenReturn(-1L); + Mockito.when(table.getRowCount()).thenReturn(0L); return new OlapInsertExecutor(ctx, table, "label_test", Mockito.mock(NereidsPlanner.class), Optional.empty(), false, 0L); diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisManagerTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisManagerTest.java index 30b7bd55ed5010..ab0e41843357c5 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisManagerTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisManagerTest.java @@ -473,4 +473,79 @@ private AnalyzeTableCommand mockAnalyzeCommand(AnalysisMethod analysisMethod, Sc Mockito.when(command.getAnalyzeProperties()).thenReturn(analyzeProperties); return command; } + + @Test + void testBootstrapTableStatsIfAbsentWithZeroLoadedRows() { + AnalysisManager manager = Mockito.spy(new AnalysisManager()); + OlapTable table = Mockito.mock(OlapTable.class); + Mockito.when(table.getId()).thenReturn(1000L); + + // loadedRows <= 0 → should return immediately without creating stats. + manager.bootstrapTableStatsIfAbsent(table, 0); + Assertions.assertNull(manager.findTableStatsStatus(1000L)); + + manager.bootstrapTableStatsIfAbsent(table, -1); + Assertions.assertNull(manager.findTableStatsStatus(1000L)); + } + + @Test + void testBootstrapTableStatsIfAbsentWhenStatsAlreadyExist() { + AnalysisManager manager = Mockito.spy(new AnalysisManager()); + OlapTable table = mockTable(1000L, "test_tbl"); + + // Seed existing TableStatsMeta so bootstrap should short-circuit. + // Use replayUpdateTableStatsStatus to avoid touching edit log. + TableStatsMeta existing = TableStatsMeta.newBootstrapStats(table, 100L, 100L); + manager.replayUpdateTableStatsStatus(existing); + + manager.bootstrapTableStatsIfAbsent(table, 200L); + TableStatsMeta result = manager.findTableStatsStatus(1000L); + Assertions.assertNotNull(result); + // Existing row count should be unchanged. + Assertions.assertEquals(100L, result.rowCount); + } + + @Test + void testBootstrapTableStatsIfAbsentCreatesStats() { + AnalysisManager manager = Mockito.spy(new AnalysisManager()); + // Avoid touching edit log in unit tests. + Mockito.doNothing().when(manager).logCreateTableStats(Mockito.any(TableStatsMeta.class)); + + OlapTable table = mockTable(1000L, "test_tbl"); + + manager.bootstrapTableStatsIfAbsent(table, 128L); + + TableStatsMeta result = manager.findTableStatsStatus(1000L); + Assertions.assertNotNull(result); + Assertions.assertEquals(128L, result.rowCount); + Assertions.assertEquals(128L, result.updatedRows.get()); + // Bootstrap stats should not be marked as user-injected. + Assertions.assertFalse(result.userInjected); + // Bootstrap stats should not have a job type. + Assertions.assertNull(result.jobType); + // Bootstrap should set updatedTime but leave lastAnalyzeTime as 0. + Assertions.assertTrue(result.updatedTime > 0); + Assertions.assertEquals(0L, result.lastAnalyzeTime); + } + + private static OlapTable mockTable(long tableId, String tableName) { + org.apache.doris.datasource.CatalogIf catalog = Mockito.mock(org.apache.doris.datasource.CatalogIf.class); + Mockito.when(catalog.getId()).thenReturn(1L); + Mockito.when(catalog.getName()).thenReturn("internal"); + + org.apache.doris.catalog.Database database = Mockito.mock(org.apache.doris.catalog.Database.class); + Mockito.when(database.getCatalog()).thenReturn(catalog); + Mockito.when(database.getId()).thenReturn(100L); + Mockito.when(database.getFullName()).thenReturn("default_cluster:test_db"); + + OlapTable table = Mockito.mock(OlapTable.class); + Mockito.when(table.getId()).thenReturn(tableId); + Mockito.when(table.getName()).thenReturn(tableName); + Mockito.when(table.getDatabase()).thenReturn(database); + Mockito.when(table.getBaseIndexId()).thenReturn(200L); + Mockito.when(table.getRowCountForIndex(Mockito.anyLong(), Mockito.anyBoolean())).thenReturn(-1L); + Mockito.when(table.getRowCount()).thenReturn(-1L); + return table; + } } + diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/TableStatsMetaTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/TableStatsMetaTest.java index 12d3931ed7bd65..ae15d7556d6b61 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/TableStatsMetaTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/TableStatsMetaTest.java @@ -17,7 +17,9 @@ package org.apache.doris.statistics; +import org.apache.doris.catalog.Database; import org.apache.doris.catalog.OlapTable; +import org.apache.doris.datasource.InternalCatalog; import com.google.common.collect.Lists; import org.junit.jupiter.api.Assertions; @@ -58,4 +60,31 @@ void testClearStaleIndexRowCount() { Assertions.assertEquals(-1, meta.getRowCount(3)); Assertions.assertEquals(-1, meta.getRowCount(4)); } + + @Test + void testNewBootstrapStatsSeedsBaseIndexRowCount() { + InternalCatalog catalog = Mockito.mock(InternalCatalog.class); + Database database = Mockito.mock(Database.class); + OlapTable table = Mockito.mock(OlapTable.class); + Mockito.when(table.getDatabase()).thenReturn(database); + Mockito.when(table.getId()).thenReturn(10L); + Mockito.when(table.getName()).thenReturn("t1"); + Mockito.when(table.getBaseIndexId()).thenReturn(100L); + Mockito.when(database.getCatalog()).thenReturn(catalog); + Mockito.when(database.getId()).thenReturn(20L); + Mockito.when(database.getFullName()).thenReturn("db1"); + Mockito.when(catalog.getId()).thenReturn(30L); + Mockito.when(catalog.getName()).thenReturn("internal"); + + TableStatsMeta meta = TableStatsMeta.newBootstrapStats(table, 123L, 123L); + + Assertions.assertEquals(123L, meta.rowCount); + Assertions.assertEquals(123L, meta.updatedRows.get()); + Assertions.assertEquals(123L, meta.getRowCount(100L)); + Assertions.assertTrue(meta.isColumnsStatsEmpty()); + Assertions.assertFalse(meta.userInjected); + // Bootstrap should record the current time as updatedTime but leave lastAnalyzeTime as 0. + Assertions.assertTrue(meta.updatedTime > 0); + Assertions.assertEquals(0L, meta.lastAnalyzeTime); + } } diff --git a/regression-test/suites/query_p0/stats/insert_select_table_stats_bootstrap.groovy b/regression-test/suites/query_p0/stats/insert_select_table_stats_bootstrap.groovy new file mode 100644 index 00000000000000..f09eee0ee91584 --- /dev/null +++ b/regression-test/suites/query_p0/stats/insert_select_table_stats_bootstrap.groovy @@ -0,0 +1,161 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("insert_select_table_stats_bootstrap", "nonConcurrent") { + String db = context.config.getDbNameByFile(context.file) + sql "use ${db}" + + sql "set enable_nereids_planner = true" + sql "set enable_fallback_to_original_planner = false" + // Disable the distributed planner path so this case falls back to regular join-side selection. + sql "set enable_nereids_distribute_planner = false" + // Disable bucket shuffle join so the optimizer has to choose between broadcast and regular shuffle. + sql "set enable_bucket_shuffle_join = false" + sql "set runtime_filter_mode = OFF" + sql "set broadcast_row_count_limit = 100" + sql "set broadcast_hashtable_mem_limit_percentage = 1" + + sql "drop table if exists smallb" + sql """ + create table smallb ( + k1 int, + k2 int, + v int + ) + distributed by hash(v) buckets 1 + properties("replication_num" = "1") + """ + sql """ + insert into smallb + select number, 0, number + from numbers("number" = "10") + """ + // Analyze the known small table so the join-side decision can depend on biga bootstrap stats. + sql "analyze table smallb with sync" + + def createBigATable = { boolean enableBootstrap -> + sql "drop table if exists biga" + sql "set enable_insert_select_table_stats_bootstrap = ${enableBootstrap}" + sql """ + create table biga + distributed by hash(k) buckets 1 + properties("replication_num" = "1") + as + select number % 10 as k, repeat('x', 64) as pad + from numbers("number" = "262144") + """ + } + + // Bootstrap disabled: show table stats should be empty and scan row count should be unknown. + // Retry up to 10 times to tolerate rare cases where TabletStatMgr publishes BE row counts. + boolean bootstrapOffPassed = false + for (int retry = 0; retry < 10 && !bootstrapOffPassed; retry++) { + createBigATable(false) + try { + def tableStatsWithoutBootstrap = sql "show table stats biga" + assertEquals(1, tableStatsWithoutBootstrap.size()) + assertEquals("", tableStatsWithoutBootstrap[0][0]) + assertEquals("", tableStatsWithoutBootstrap[0][3]) + assertEquals("", tableStatsWithoutBootstrap[0][5]) + + explain { + sql """ + physical plan + select a.k, b.v + from smallb b + join biga a + on cast(a.k as bigint) = cast(b.k1 + b.k2 as bigint) + """ + contains("table=biga") + // Without bootstrap, the optimizer should see the scan row count as 1 (unknown). + contains("stats=1") + contains("distributionSpec=DistributionSpecReplicated") + check { explainStr -> + // biga (stats=1) should be the broadcast side because 1 < smallb's 10 rows. + assertTrue((explainStr =~ /DistributionSpecReplicated[\s\S]*table=biga/).find()) + } + } + bootstrapOffPassed = true + } catch (Throwable t) { + if (retry == 9) { + throw t + } + logger.info("Bootstrap-off check attempt ${retry + 1} failed, retrying...", t) + } + } + + // Bootstrap enabled: show table stats should reflect the inserted row count, + // and the optimizer should broadcast the known small table (smallb). + createBigATable(true) + + def tableStatsWithBootstrap = sql "show table stats biga" + assertEquals(1, tableStatsWithBootstrap.size()) + assertEquals("262144", tableStatsWithBootstrap[0][0]) + assertEquals("262144", tableStatsWithBootstrap[0][2]) + assertEquals("false", tableStatsWithBootstrap[0][7]) + + explain { + sql """ + physical plan + select a.k, b.v + from smallb b + join biga a + on cast(a.k as bigint) = cast(b.k1 + b.k2 as bigint) + """ + contains("table=biga") + contains("table=smallb") + // With bootstrap stats, the scan row count for biga should reflect the inserted row count. + contains("stats=262,144") + contains("distributionSpec=DistributionSpecReplicated") + check { explainStr -> + // smallb should be the broadcast (build) side because it has fewer rows than biga. + assertTrue((explainStr =~ /DistributionSpecReplicated[\s\S]*table=smallb/).find()) + } + } + + // Verify that bootstrap stats do not interfere with subsequent manual analyze. + // After analyze completes, column-level stats should be available and the plan should remain correct. + sql "analyze table biga with sync" + + def tableStatsAfterAnalyze = sql "show table stats biga" + assertEquals(1, tableStatsAfterAnalyze.size()) + // Analyze should produce column-level stats for the table; the columns field (index 4) + // should list column names such as 'biga.k'. + assertTrue(tableStatsAfterAnalyze[0][4].toString().contains("biga")) + // trigger (index 5) should be populated (e.g. MANUAL). + assertTrue(!tableStatsAfterAnalyze[0][5].toString().isEmpty()) + assertEquals("false", tableStatsAfterAnalyze[0][7]) // user_injected (index 7) + + // Ensure the optimizer still correctly uses the row count from full stats. + explain { + sql """ + physical plan + select a.k, b.v + from smallb b + join biga a + on cast(a.k as bigint) = cast(b.k1 + b.k2 as bigint) + """ + contains("table=biga") + contains("table=smallb") + contains("stats=262,144") + contains("distributionSpec=DistributionSpecReplicated") + check { explainStr -> + // smallb should still be the broadcast side. + assertTrue((explainStr =~ /DistributionSpecReplicated[\s\S]*table=smallb/).find()) + } + } +}