Skip to content

Commit

Permalink
support query trace
Browse files Browse the repository at this point in the history
  • Loading branch information
jinchengchenghh committed Dec 31, 2024
1 parent 6c97e56 commit c3e87eb
Show file tree
Hide file tree
Showing 3 changed files with 77 additions and 1 deletion.
12 changes: 12 additions & 0 deletions cpp/velox/compute/WholeStageResultIterator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -558,6 +558,18 @@ std::unordered_map<std::string, std::string> WholeStageResultIterator::getQueryC
configs[velox::core::QueryConfig::kSparkLegacyDateFormatter] = "false";
}

const auto setIfExists = [&](const std::string& glutenKey, const std::string& veloxKey) {
const auto valueOptional = veloxCfg_->get<std::string>(glutenKey);
if (valueOptional.hasValue()) {
configs[veloxKey] = valueOptional.value();
}
};
setIfExists(kQueryTraceEnabled, velox::core::QueryConfig::kQueryTraceEnabled);
setIfExists(kQueryTraceDir, velox::core::QueryConfig::kQueryTraceDir);
setIfExists(kQueryTraceNodeIds, velox::core::QueryConfig::kQueryTraceNodeIds);
setIfExists(kQueryTraceMaxBytes, velox::core::QueryConfig::kQueryTraceMaxBytes);
setIfExists(kQueryTraceTaskRegExp, velox::core::QueryConfig::kQueryTraceTaskRegExp);
setIfExists(kOpTraceDirectoryCreateConfig, velox::core::QueryConfig::kOpTraceDirectoryCreateConfig);
} catch (const std::invalid_argument& err) {
std::string errDetails = err.what();
throw std::runtime_error("Invalid conf arg: " + errDetails);
Expand Down
19 changes: 19 additions & 0 deletions cpp/velox/config/VeloxConfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -135,4 +135,23 @@ const uint32_t kGlogVerboseLevelDefault = 0;
const uint32_t kGlogVerboseLevelMaximum = 99;
const std::string kGlogSeverityLevel = "spark.gluten.sql.columnar.backend.velox.glogSeverityLevel";
const uint32_t kGlogSeverityLevelDefault = 1;

// Query trace
/// Enable query tracing flag.
const std::string kQueryTraceEnabled = "spark.gluten.sql.columnar.backend.velox.queryTraceEnabled";
/// Base dir of a query to store tracing data.
const std::string kQueryTraceDir = "spark.gluten.sql.columnar.backend.velox.queryTraceDir";
/// A comma-separated list of plan node ids whose input data will be traced.
/// Empty string if only want to trace the query metadata.
const std::string kQueryTraceNodeIds = "spark.gluten.sql.columnar.backend.velox.queryTraceNodeIds";
/// The max trace bytes limit. Tracing is disabled if zero.
const std::string kQueryTraceMaxBytes = "spark.gluten.sql.columnar.backend.velox.queryTraceMaxBytes";
/// The regexp of traced task id. We only enable trace on a task if its id
/// matches.
const std::string kQueryTraceTaskRegExp = "spark.gluten.sql.columnar.backend.velox.queryTraceTaskRegExp";
/// Config used to create operator trace directory. This config is provided to
/// underlying file system and the config is free form. The form should be
/// defined by the underlying file system.
const std::string kOpTraceDirectoryCreateConfig =
"spark.gluten.sql.columnar.backend.velox.opTraceDirectoryCreateConfig";
} // namespace gluten
Original file line number Diff line number Diff line change
Expand Up @@ -730,7 +730,13 @@ object GlutenConfig {
SPARK_GCS_STORAGE_ROOT_URL,
SPARK_GCS_AUTH_TYPE,
SPARK_GCS_AUTH_SERVICE_ACCOUNT_JSON_KEYFILE,
SPARK_REDACTION_REGEX
SPARK_REDACTION_REGEX,
QUERY_TRACE_ENABLED,
QUERY_TRACE_DIR,
QUERY_TRACE_NODE_IDS,
QUERY_TRACE_MAX_BYTES,
QUERY_TRACE_TASK_REG_EXP,
OP_TRACE_DIRECTORY_CREATE_CONFIG
)
nativeConfMap.putAll(conf.filter(e => keys.contains(e._1)).asJava)

Expand Down Expand Up @@ -2255,4 +2261,43 @@ object GlutenConfig {
.doc("If enabled, gluten will convert the viewfs path to hdfs path in scala side")
.booleanConf
.createWithDefault(false)

val QUERY_TRACE_ENABLED = buildConf("spark.gluten.sql.columnar.backend.velox.queryTraceEnabled")
.doc("Enable query tracing flag.")
.booleanConf
.createWithDefault(false)

val QUERY_TRACE_DIR = buildConf("spark.gluten.sql.columnar.backend.velox.queryTraceDir")
.doc("Base dir of a query to store tracing data.")
.stringConf
.createWithDefault("")

val QUERY_TRACE_NODE_IDS = buildConf("spark.gluten.sql.columnar.backend.velox.queryTraceNodeIds")
.doc(
"A comma-separated list of plan node ids whose input data will be traced. " +
"Empty string if only want to trace the query metadata.")
.stringConf
.createWithDefault("")

val QUERY_TRACE_MAX_BYTES =
buildConf("spark.gluten.sql.columnar.backend.velox.queryTraceMaxBytes")
.doc("The max trace bytes limit. Tracing is disabled if zero.")
.longConf
.createWithDefault(0)

val QUERY_TRACE_TASK_REG_EXP =
buildConf("spark.gluten.sql.columnar.backend.velox.queryTraceTaskRegExp")
.doc("The regexp of traced task id. We only enable trace on a task if its id matches.")
.stringConf
.createWithDefault("")

val OP_TRACE_DIRECTORY_CREATE_CONFIG =
buildConf("spark.gluten.sql.columnar.backend.velox.opTraceDirectoryCreateConfig")
.doc(
"Config used to create operator trace directory. This config is provided to" +
" underlying file system and the config is free form. The form should be " +
"defined by the underlying file system.")
.stringConf
.createWithDefault("")

}

0 comments on commit c3e87eb

Please sign in to comment.