diff --git a/cpp/velox/benchmarks/GenericBenchmark.cc b/cpp/velox/benchmarks/GenericBenchmark.cc index 4a96bbdf29917..244e0b885a3eb 100644 --- a/cpp/velox/benchmarks/GenericBenchmark.cc +++ b/cpp/velox/benchmarks/GenericBenchmark.cc @@ -90,8 +90,6 @@ DEFINE_string( "", "The regexp of traced task id. We only enable trace on a task if its id matches."); -DEFINE_string(query_trace_query_id, "", "The user defined query id"); - struct WriterMetrics { int64_t splitTime{0}; int64_t evictTime{0}; @@ -369,8 +367,6 @@ void setQueryTraceConfig(std::unordered_map& configs) if (FLAGS_query_trace_task_reg_exp != "") { configs[kQueryTraceTaskRegExp] = FLAGS_query_trace_task_reg_exp; } - GLUTEN_CHECK(FLAGS_query_trace_query_id != "", "query is should be set"); - configs[kQueryTraceQueryId] = FLAGS_query_trace_query_id; } } // namespace diff --git a/cpp/velox/compute/WholeStageResultIterator.cc b/cpp/velox/compute/WholeStageResultIterator.cc index 6267b3b611543..20f54856649b6 100644 --- a/cpp/velox/compute/WholeStageResultIterator.cc +++ b/cpp/velox/compute/WholeStageResultIterator.cc @@ -51,14 +51,6 @@ const std::string kWriteIOTime = "writeIOTime"; // others const std::string kHiveDefaultPartition = "__HIVE_DEFAULT_PARTITION__"; -std::string getQueryId(const std::unordered_map& confMap) { - auto it = confMap.find(kQueryTraceQueryId); - if (it != confMap.end()) { - return it->second; - } - return ""; -} - } // namespace WholeStageResultIterator::WholeStageResultIterator( @@ -75,7 +67,6 @@ WholeStageResultIterator::WholeStageResultIterator( std::make_shared(std::unordered_map(confMap))), taskInfo_(taskInfo), veloxPlan_(planNode), - queryId_(getQueryId(confMap)), scanNodeIds_(scanNodeIds), scanInfos_(scanInfos), streamIds_(streamIds) { @@ -187,7 +178,7 @@ WholeStageResultIterator::WholeStageResultIterator( std::shared_ptr WholeStageResultIterator::createNewVeloxQueryCtx() { std::unordered_map> connectorConfigs; connectorConfigs[kHiveConnectorId] = createConnectorConfig(); - + static std::atomic vqId{0}; // Velox query ID, same with taskId. std::shared_ptr ctx = velox::core::QueryCtx::create( nullptr, facebook::velox::core::QueryConfig{getQueryContextConf()}, @@ -195,7 +186,11 @@ std::shared_ptr WholeStageResultIterator::createNewVeloxQ gluten::VeloxBackend::get()->getAsyncDataCache(), memoryManager_->getAggregateMemoryPool(), spillExecutor_.get(), - queryId_); + fmt::format( + "Gluten_Stage_{}_TID_{}_VTID_{}", + std::to_string(taskInfo_.stageId), + std::to_string(taskInfo_.taskId), + std::to_string(vqId++))); return ctx; } diff --git a/cpp/velox/compute/WholeStageResultIterator.h b/cpp/velox/compute/WholeStageResultIterator.h index 06e4d5d703691..d0dd47bfd1e39 100644 --- a/cpp/velox/compute/WholeStageResultIterator.h +++ b/cpp/velox/compute/WholeStageResultIterator.h @@ -109,7 +109,6 @@ class WholeStageResultIterator : public ColumnarBatchIterator { const SparkTaskInfo taskInfo_; std::shared_ptr task_; std::shared_ptr veloxPlan_; - const std::string queryId_; /// Spill. std::string spillStrategy_; diff --git a/cpp/velox/config/VeloxConfig.h b/cpp/velox/config/VeloxConfig.h index c226e3f958929..84493cc469198 100644 --- a/cpp/velox/config/VeloxConfig.h +++ b/cpp/velox/config/VeloxConfig.h @@ -154,7 +154,4 @@ const std::string kQueryTraceTaskRegExp = "spark.gluten.sql.columnar.backend.vel /// defined by the underlying file system. const std::string kOpTraceDirectoryCreateConfig = "spark.gluten.sql.columnar.backend.velox.opTraceDirectoryCreateConfig"; -// Internal config for query benchmark and enable query trace. The default query id for Velox QueryCtx is "", set it if -// this config is set. -const std::string kQueryTraceQueryId = "spark.gluten.sql.columnar.backend.velox.queryTraceQueryId"; } // namespace gluten diff --git a/docs/developers/QueryTrace.md b/docs/developers/QueryTrace.md index a27b9ba442cb6..71c66215110e0 100644 --- a/docs/developers/QueryTrace.md +++ b/docs/developers/QueryTrace.md @@ -1,7 +1,7 @@ --- layout: page -title: How To Use Gluten -nav_order: 1 +title: QueryTrace +nav_order: 14 parent: Developer Overview --- @@ -62,7 +62,7 @@ Now we can see the data in query trace directory `/tmp/query_trace`. ```shell /tmp/query_trace/ -└── query_1 +└── Gluten_Stage_0_TID_0_VTID_0 └── Gluten_Stage_0_TID_0_VTID_0 ├── 7 │   └── 0 @@ -75,7 +75,7 @@ Fourthly, replay the query. Show the query trace summary by following command. ```shell -/mnt/DP_disk1/code/velox/build/velox/tool/trace# ./velox_query_replayer --root_dir /tmp/query_trace --task_id Gluten_Stage_0_TID_0_VTID_0 --query_id=query_1 --node_id=7 --summary +/mnt/DP_disk1/code/velox/build/velox/tool/trace# ./velox_query_replayer --root_dir /tmp/query_trace --task_id Gluten_Stage_0_TID_0_VTID_0 --query_id=Gluten_Stage_0_TID_0_VTID_0 --node_id=7 --summary WARNING: Logging before InitGoogleLogging() is written to STDERR I0115 20:27:25.821105 2684048 HiveConnector.cpp:56] Hive connector test-hive created with maximum of 20000 cached file handles. I0115 20:27:25.823112 2684048 TraceReplayRunner.cpp:223] @@ -149,7 +149,7 @@ driver 0: opType PartialAggregation, inputRows 293762, inputBytes 5.69MB, rawIn Then you can use following command to re-execute the query plan. ```shell -/mnt/DP_disk1/code/velox/build/velox/tool/trace# ./velox_query_replayer --root_dir /tmp/query_trace --task_id Gluten_Stage_0_TID_0_VTID_0 --query_id=query_1 --node_id=7 +/mnt/DP_disk1/code/velox/build/velox/tool/trace# ./velox_query_replayer --root_dir /tmp/query_trace --task_id Gluten_Stage_0_TID_0_VTID_0 --query_id=Gluten_Stage_0_TID_0_VTID_0 --node_id=7 WARNING: Logging before InitGoogleLogging() is written to STDERR I0115 20:30:17.665169 2685397 HiveConnector.cpp:56] Hive connector test-hive created with maximum of 20000 cached file handles. I0115 20:30:17.676046 2685397 Cursor.cpp:192] Task spill directory[/tmp/velox_test_H163pi/test_cursor 1] created @@ -172,4 +172,3 @@ Here is the full list of query trace flags in MicroBenchmark. - query_trace_node_ids: A comma-separated list of plan node ids whose input data will be traced. Empty string if only want to trace the query metadata. - query_trace_max_bytes: The max trace bytes limit. Tracing is disabled if zero. - query_trace_task_reg_exp: The regexp of traced task id. We only enable trace on a task if its id matches. -- query_trace_query_id: The user defined query id.