Skip to content

Commit 2cea82f

Browse files
authored
support more emit modes in window (#517)
1 parent 73e1a82 commit 2cea82f

File tree

68 files changed

+1590
-780
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

68 files changed

+1590
-780
lines changed

src/Core/Streaming/Watermark.h

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
#pragma once
22

3+
#include <base/types.h>
4+
35
namespace DB
46
{
57
namespace Streaming
@@ -9,5 +11,25 @@ namespace Streaming
911
/// The 1970-01-01 00:00:00 is Int64 value 0, earlier times are negative, so we need to use signed Int64 as watermark
1012
constexpr Int64 INVALID_WATERMARK = std::numeric_limits<Int64>::min();
1113
constexpr Int64 TIMEOUT_WATERMARK = std::numeric_limits<Int64>::max();
14+
15+
/// TODO: Separate EmitMode into two parts:
16+
/// 1) EmitStrategy - how to do finalization (what to emit) for all aggregates processing
17+
/// 2) WatermarkStragegy - how / when to stamp watermark
18+
enum class EmitMode : uint8_t
19+
{
20+
None = 0,
21+
Tail,
22+
23+
Periodic, /// Emit a processing time watermark at periodic interval
24+
25+
Watermark, /// Allow time skew in same window, emit the watermark when a window closed
26+
PeriodicWatermark, /// Same as WATERMARK, but emit the watermark at periodic interval
27+
28+
/* emit only keyed and changed states for aggregating */
29+
OnUpdate = 1u << 7, /// Emit a processing time watermark per batch of events
30+
PeriodicOnUpdate, /// Same as Periodic
31+
WatermarkOnUpdate, /// Same as Watermark, but emit the watermark per batch of events
32+
PeriodicWatermarkOnUpdate, /// Same as PeriodicWatermark
33+
};
1234
}
1335
}

src/Interpreters/ExpressionAnalyzer.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1997,7 +1997,7 @@ ExpressionAnalysisResult::ExpressionAnalysisResult(
19971997
if (const auto * proxy = storage->as<Streaming::ProxyStream>())
19981998
{
19991999
if (auto window_desc = proxy->getStreamingWindowFunctionDescription();
2000-
window_desc && window_desc->type == Streaming::WindowType::SESSION)
2000+
window_desc && window_desc->type == Streaming::WindowType::Session)
20012001
{
20022002
auto & step = chain.getLastStep();
20032003
step.addRequiredOutput(window_desc->argument_names[0]);

src/Interpreters/InterpreterSelectQuery.cpp

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,7 @@
111111
#include <Processors/QueryPlan/Streaming/WatermarkStep.h>
112112
#include <Processors/QueryPlan/Streaming/WatermarkStepWithSubstream.h>
113113
#include <Processors/QueryPlan/Streaming/WindowStep.h>
114+
#include <Processors/Transforms/Streaming/AggregatingHelper.h>
114115
#include <Processors/Transforms/Streaming/WatermarkStamper.h>
115116
#include <Storages/Streaming/ProxyStream.h>
116117
#include <Storages/Streaming/StorageStream.h>
@@ -3219,7 +3220,7 @@ void InterpreterSelectQuery::executeStreamingAggregation(
32193220
ErrorCodes::NOT_IMPLEMENTED,
32203221
"User defined aggregation function with emit strategy shouldn't be used together with other aggregation function");
32213222

3222-
if (windowType() != Streaming::WindowType::NONE)
3223+
if (windowType() != Streaming::WindowType::None)
32233224
throw Exception(
32243225
ErrorCodes::NOT_IMPLEMENTED,
32253226
"User defined aggregation function with emit strategy shouldn't be used together with streaming window");
@@ -3240,6 +3241,9 @@ void InterpreterSelectQuery::executeStreamingAggregation(
32403241
auto tracking_updates_type = Streaming::TrackingUpdatesType::None;
32413242
if (data_stream_semantic_pair.isChangelogOutput())
32423243
tracking_updates_type = Streaming::TrackingUpdatesType::UpdatesWithRetract;
3244+
/// TODO: A optimization for `emit on update`, we don't need to track updates and just directly convert each input (fast in and fast out)
3245+
else if (Streaming::AggregatingHelper::onlyEmitUpdates(emit_mode))
3246+
tracking_updates_type = Streaming::TrackingUpdatesType::Updates;
32433247

32443248
Streaming::Aggregator::Params params(
32453249
header_before_aggregation,
@@ -3278,10 +3282,10 @@ void InterpreterSelectQuery::executeStreamingAggregation(
32783282
/// 2) `shuffle by`: calculating light substream without substream ID (The data have been shuffled by `LightShufflingTransform`)
32793283
if (query_info.hasPartitionByKeys() || light_shuffled)
32803284
query_plan.addStep(std::make_unique<Streaming::AggregatingStepWithSubstream>(
3281-
query_plan.getCurrentDataStream(), std::move(params), final, emit_version, data_stream_semantic_pair.isChangelogOutput()));
3285+
query_plan.getCurrentDataStream(), std::move(params), final, emit_version, data_stream_semantic_pair.isChangelogOutput(), emit_mode));
32823286
else
32833287
query_plan.addStep(std::make_unique<Streaming::AggregatingStep>(
3284-
query_plan.getCurrentDataStream(), std::move(params), final, merge_threads, temporary_data_merge_threads, emit_version, data_stream_semantic_pair.isChangelogOutput()));
3288+
query_plan.getCurrentDataStream(), std::move(params), final, merge_threads, temporary_data_merge_threads, emit_version, data_stream_semantic_pair.isChangelogOutput(), emit_mode));
32853289
}
32863290

32873291
/// Resolve input / output data stream semantic.
@@ -3370,7 +3374,7 @@ Streaming::WindowType InterpreterSelectQuery::windowType() const
33703374
}
33713375
}
33723376

3373-
return Streaming::WindowType::NONE;
3377+
return Streaming::WindowType::None;
33743378
}
33753379

33763380
bool InterpreterSelectQuery::hasStreamingGlobalAggregation() const
@@ -3498,12 +3502,14 @@ void InterpreterSelectQuery::buildShufflingQueryPlan(QueryPlan & query_plan)
34983502
query_plan.getCurrentDataStream(), std::move(substream_key_positions), shuffle_output_streams));
34993503
}
35003504

3501-
void InterpreterSelectQuery::buildWatermarkQueryPlan(QueryPlan & query_plan) const
3505+
void InterpreterSelectQuery::buildWatermarkQueryPlan(QueryPlan & query_plan)
35023506
{
35033507
assert(isStreamingQuery());
35043508
auto params = std::make_shared<Streaming::WatermarkStamperParams>(
35053509
query_info.query, query_info.syntax_analyzer_result, query_info.streaming_window_params);
35063510

3511+
emit_mode = params->mode; /// saved it to be used for streaming aggregating step
3512+
35073513
bool skip_stamping_for_backfill_data = !context->getSettingsRef().emit_during_backfill.value;
35083514

35093515
if (query_info.hasPartitionByKeys())

src/Interpreters/InterpreterSelectQuery.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -198,7 +198,7 @@ class InterpreterSelectQuery : public IInterpreterUnionOrSelectQuery
198198
void finalCheckAndOptimizeForStreamingQuery();
199199
bool shouldKeepAggregateState() const;
200200
void buildShufflingQueryPlan(QueryPlan & query_plan);
201-
void buildWatermarkQueryPlan(QueryPlan & query_plan) const;
201+
void buildWatermarkQueryPlan(QueryPlan & query_plan);
202202
void buildStreamingProcessingQueryPlanBeforeJoin(QueryPlan & query_plan);
203203
void buildStreamingProcessingQueryPlanAfterJoin(QueryPlan & query_plan);
204204
void checkEmitVersion();
@@ -257,6 +257,9 @@ class InterpreterSelectQuery : public IInterpreterUnionOrSelectQuery
257257
mutable std::optional<bool> is_streaming_query;
258258
bool shuffled_before_join = false;
259259
bool light_shuffled = false;
260+
261+
Streaming::EmitMode emit_mode = Streaming::EmitMode::None;
262+
260263
/// Overall data stream semantic defines the output semantic of the current layer of SELECT
261264
Streaming::DataStreamSemanticPair data_stream_semantic_pair;
262265
/// proton: ends

0 commit comments

Comments
 (0)