Skip to content

Commit

Permalink
address comments
Browse files Browse the repository at this point in the history
  • Loading branch information
jinchengchenghh committed Jan 20, 2025
1 parent c89f8fd commit 4ac6780
Show file tree
Hide file tree
Showing 5 changed files with 11 additions and 25 deletions.
4 changes: 0 additions & 4 deletions cpp/velox/benchmarks/GenericBenchmark.cc
Original file line number Diff line number Diff line change
Expand Up @@ -90,8 +90,6 @@ DEFINE_string(
"",
"The regexp of traced task id. We only enable trace on a task if its id matches.");

DEFINE_string(query_trace_query_id, "", "The user defined query id");

struct WriterMetrics {
int64_t splitTime{0};
int64_t evictTime{0};
Expand Down Expand Up @@ -369,8 +367,6 @@ void setQueryTraceConfig(std::unordered_map<std::string, std::string>& configs)
if (FLAGS_query_trace_task_reg_exp != "") {
configs[kQueryTraceTaskRegExp] = FLAGS_query_trace_task_reg_exp;
}
GLUTEN_CHECK(FLAGS_query_trace_query_id != "", "query is should be set");
configs[kQueryTraceQueryId] = FLAGS_query_trace_query_id;
}
} // namespace

Expand Down
17 changes: 6 additions & 11 deletions cpp/velox/compute/WholeStageResultIterator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -51,14 +51,6 @@ const std::string kWriteIOTime = "writeIOTime";
// others
const std::string kHiveDefaultPartition = "__HIVE_DEFAULT_PARTITION__";

std::string getQueryId(const std::unordered_map<std::string, std::string>& confMap) {
auto it = confMap.find(kQueryTraceQueryId);
if (it != confMap.end()) {
return it->second;
}
return "";
}

} // namespace

WholeStageResultIterator::WholeStageResultIterator(
Expand All @@ -75,7 +67,6 @@ WholeStageResultIterator::WholeStageResultIterator(
std::make_shared<facebook::velox::config::ConfigBase>(std::unordered_map<std::string, std::string>(confMap))),
taskInfo_(taskInfo),
veloxPlan_(planNode),
queryId_(getQueryId(confMap)),
scanNodeIds_(scanNodeIds),
scanInfos_(scanInfos),
streamIds_(streamIds) {
Expand Down Expand Up @@ -187,15 +178,19 @@ WholeStageResultIterator::WholeStageResultIterator(
std::shared_ptr<velox::core::QueryCtx> WholeStageResultIterator::createNewVeloxQueryCtx() {
std::unordered_map<std::string, std::shared_ptr<velox::config::ConfigBase>> connectorConfigs;
connectorConfigs[kHiveConnectorId] = createConnectorConfig();

static std::atomic<uint32_t> vqId{0}; // Velox query ID, same with taskId.
std::shared_ptr<velox::core::QueryCtx> ctx = velox::core::QueryCtx::create(
nullptr,
facebook::velox::core::QueryConfig{getQueryContextConf()},
connectorConfigs,
gluten::VeloxBackend::get()->getAsyncDataCache(),
memoryManager_->getAggregateMemoryPool(),
spillExecutor_.get(),
queryId_);
fmt::format(
"Gluten_Stage_{}_TID_{}_VTID_{}",
std::to_string(taskInfo_.stageId),
std::to_string(taskInfo_.taskId),
std::to_string(vqId++)));
return ctx;
}

Expand Down
1 change: 0 additions & 1 deletion cpp/velox/compute/WholeStageResultIterator.h
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,6 @@ class WholeStageResultIterator : public ColumnarBatchIterator {
const SparkTaskInfo taskInfo_;
std::shared_ptr<facebook::velox::exec::Task> task_;
std::shared_ptr<const facebook::velox::core::PlanNode> veloxPlan_;
const std::string queryId_;

/// Spill.
std::string spillStrategy_;
Expand Down
3 changes: 0 additions & 3 deletions cpp/velox/config/VeloxConfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,4 @@ const std::string kQueryTraceTaskRegExp = "spark.gluten.sql.columnar.backend.vel
/// defined by the underlying file system.
const std::string kOpTraceDirectoryCreateConfig =
"spark.gluten.sql.columnar.backend.velox.opTraceDirectoryCreateConfig";
// Internal config for query benchmark and enable query trace. The default query id for Velox QueryCtx is "", set it if
// this config is set.
const std::string kQueryTraceQueryId = "spark.gluten.sql.columnar.backend.velox.queryTraceQueryId";
} // namespace gluten
11 changes: 5 additions & 6 deletions docs/developers/QueryTrace.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
---
layout: page
title: How To Use Gluten
nav_order: 1
title: QueryTrace
nav_order: 14
parent: Developer Overview
---

Expand Down Expand Up @@ -62,7 +62,7 @@ Now we can see the data in query trace directory `/tmp/query_trace`.

```shell
/tmp/query_trace/
└── query_1
└── Gluten_Stage_0_TID_0_VTID_0
└── Gluten_Stage_0_TID_0_VTID_0
├── 7
│   └── 0
Expand All @@ -75,7 +75,7 @@ Fourthly, replay the query. Show the query trace summary by following command.

```shell

/mnt/DP_disk1/code/velox/build/velox/tool/trace# ./velox_query_replayer --root_dir /tmp/query_trace --task_id Gluten_Stage_0_TID_0_VTID_0 --query_id=query_1 --node_id=7 --summary
/mnt/DP_disk1/code/velox/build/velox/tool/trace# ./velox_query_replayer --root_dir /tmp/query_trace --task_id Gluten_Stage_0_TID_0_VTID_0 --query_id=Gluten_Stage_0_TID_0_VTID_0 --node_id=7 --summary
WARNING: Logging before InitGoogleLogging() is written to STDERR
I0115 20:27:25.821105 2684048 HiveConnector.cpp:56] Hive connector test-hive created with maximum of 20000 cached file handles.
I0115 20:27:25.823112 2684048 TraceReplayRunner.cpp:223]
Expand Down Expand Up @@ -149,7 +149,7 @@ driver 0: opType PartialAggregation, inputRows 293762, inputBytes 5.69MB, rawIn
Then you can use following command to re-execute the query plan.

```shell
/mnt/DP_disk1/code/velox/build/velox/tool/trace# ./velox_query_replayer --root_dir /tmp/query_trace --task_id Gluten_Stage_0_TID_0_VTID_0 --query_id=query_1 --node_id=7
/mnt/DP_disk1/code/velox/build/velox/tool/trace# ./velox_query_replayer --root_dir /tmp/query_trace --task_id Gluten_Stage_0_TID_0_VTID_0 --query_id=Gluten_Stage_0_TID_0_VTID_0 --node_id=7
WARNING: Logging before InitGoogleLogging() is written to STDERR
I0115 20:30:17.665169 2685397 HiveConnector.cpp:56] Hive connector test-hive created with maximum of 20000 cached file handles.
I0115 20:30:17.676046 2685397 Cursor.cpp:192] Task spill directory[/tmp/velox_test_H163pi/test_cursor 1] created
Expand All @@ -172,4 +172,3 @@ Here is the full list of query trace flags in MicroBenchmark.
- query_trace_node_ids: A comma-separated list of plan node ids whose input data will be traced. Empty string if only want to trace the query metadata.
- query_trace_max_bytes: The max trace bytes limit. Tracing is disabled if zero.
- query_trace_task_reg_exp: The regexp of traced task id. We only enable trace on a task if its id matches.
- query_trace_query_id: The user defined query id.

0 comments on commit 4ac6780

Please sign in to comment.