From fc809148f28487f3053bfc8317e3f564bacba39d Mon Sep 17 00:00:00 2001 From: Jaromir Hamala Date: Wed, 4 Jun 2025 17:08:42 +0200 Subject: [PATCH 1/2] Reference doc for ASOF JOIN TOLERANCE --- documentation/reference/sql/asof-join.md | 82 ++++++- documentation/reference/sql/join.md | 3 +- static/images/docs/diagrams/.railroad | 9 +- static/images/docs/diagrams/AsofJoin.svg | 81 +++++++ .../images/docs/diagrams/AsofLtSpliceJoin.svg | 203 +++++++++++------- 5 files changed, 291 insertions(+), 87 deletions(-) create mode 100644 static/images/docs/diagrams/AsofJoin.svg diff --git a/documentation/reference/sql/asof-join.md b/documentation/reference/sql/asof-join.md index e7ddade9..78cacd18 100644 --- a/documentation/reference/sql/asof-join.md +++ b/documentation/reference/sql/asof-join.md @@ -34,14 +34,14 @@ Visualized, a JOIN operation looks like this: for more information. - `joinClause` `ASOF JOIN` with an optional `ON` clause which allows only the - `=` predicate: + `=` predicate and an optional `TOLERANCE` clause: - ![Flow chart showing the syntax of the ASOF, LT, and SPLICE JOIN keyword](/images/docs/diagrams/AsofLtSpliceJoin.svg) + ![Flow chart showing the syntax of the ASOF, LT, and SPLICE JOIN keyword](/images/docs/diagrams/AsofJoin.svg) - `whereClause` - see the [WHERE](/docs/reference/sql/where/) reference docs for more information. -In addition, the following are items of import: +In addition, the following are items of importance: - Columns from joined tables are combined in a single row. @@ -67,6 +67,9 @@ logic: for each row in the first time-series, 1. consider all timestamps in the second time-series **earlier or equal to** the first one 2. choose **the latest** such timestamp +3. If the optional `TOLERANCE` clause is specified, an additional condition applies: + the chosen record from t2 must satisfy `t1.ts - t2.ts <= tolerance_value`. If no record + from t2 meets this condition (along with `t2.ts <= t1.ts`), then the row from t1 will not have a match. ### Example @@ -151,7 +154,7 @@ Let's use an example with two tables: We want to join each trade event to the relevant order book snapshot. All we have to write is -```questdb-sql title="A basic ASOF JOIN example" demo +```questdb-sql title="A basic ASOF JOIN example" trades ASOF JOIN order_book ``` @@ -327,6 +330,77 @@ Result: +### TOLERANCE clause + +The `TOLERANCE` clause enhances ASOF JOIN by limiting how far back in time the join should look for a match in the right +table. The `TOLERANCE` parameter accepts a time interval value (e.g., 2s, 100ms, 1d). + +When specified, a record from the left table t1 at t1.ts will only be joined with a record from the right table t2 at +t2.ts if both conditions are met: `t2.ts <= t1.ts` and `t1.ts - t2.ts <= tolerance_value` + +This ensures that the matched record from the right table is not only the latest one on or before t1.ts, but also within +the specified time window. + +```questdb-sql title="ASOF JOIN with a TOLERANCE parameter" +SELECT ... +FROM table1 +ASOF JOIN table2 TOLERANCE 10s +[WHERE ...] +``` + +TOLERANCE also works together with the ON clause: +```questdb-sql title="ASOF JOIN with keys and a TOLERANCE parameter" +SELECT ... +FROM table1 +ASOF JOIN table2 ON (key_column) TOLERANCE 1m +[WHERE ...] +``` + +The interval_literal must be a valid QuestDB interval string, like '5s' (5 seconds), '100ms' (100 milliseconds), '2m' ( +2 minutes), '3h' (3 hours), or '1d' (1 day). + + +#### Example using TOLERANCE: + +Consider the `trades` and `order_book` tables from the previous examples. If we want to join trades to order book snapshots +that occurred no more than 1 second before the trade: + +```questdb-sql title="TOLERANCE example" +SELECT t.timestamp, t.price, t.size, ob.timestamp AS ob_ts, ob.bid_price, ob.bid_size +FROM trades t +ASOF JOIN order_book ob TOLERANCE 1s; +``` + +Let's analyze a specific trade: trades at `08:00:01.146931`. +Without `TOLERANCE`, it joins with `order_book` at `08:00:01`. The time difference is 0.146931s. +If we set `TOLERANCE` '100ms', this trade would not find a match, because 0.146931s (146.931ms) is greater than 100ms. The +previous `order_book` entry at `08:00:00` would be even further away (1.146931s). + +Another trade: trades at `08:00:00.007140`. +Without `TOLERANCE`, it joins with order_book at `08:00:00`. The time difference is 0.007140s (7.14ms). +If we set `TOLERANCE` '5ms', this trade would not find a match because 7.14ms > 5ms. + +#### Supported Units for interval_literal +The `TOLERANCE` interval literal supports the following time unit qualifiers: +- U: Microseconds +- T: Milliseconds +- s: Seconds +- m: Minutes +- h: Hours +- d: Days +- w: Weeks + +For example, '100U' is 100 microseconds, '50T' is 50 milliseconds, '2s' is 2 seconds, '30m' is 30 minutes, +'1h' is 1 hour, '7d' is 7 days, and '2w' is 2 weeks. Please note that months (M) and years (Y) are not supported as +units for the `TOLERANCE` clause. + +#### Performance impact of TOLERANCE + +Specifying `TOLERANCE` can also improve performance. `ASOF JOIN` execution plans often scan backward in time on the right +table to find a matching entry for each left-table row. `TOLERANCE` allows these scans to terminate early - once a +right-table record is older than the left-table record by more than the specified tolerance - thus avoiding unnecessary +processing of more distant records. + ### Timestamp considerations `ASOF` join can be performed only on tables or result sets that are ordered by diff --git a/documentation/reference/sql/join.md b/documentation/reference/sql/join.md index 4edf188b..7f023301 100644 --- a/documentation/reference/sql/join.md +++ b/documentation/reference/sql/join.md @@ -33,7 +33,8 @@ High-level overview: ![Flow chart showing the syntax of the INNER, LEFT JOIN keyword](/images/docs/diagrams/InnerLeftJoin.svg) - `ASOF`, `LT`, and `SPLICE` `JOIN` has optional `ON` clause allowing only the - `=` predicate: + `=` predicate. + - `ASOF` additionally allows an optional `TOLERANCE` clause: ![Flow chart showing the syntax of the ASOF, LT, and SPLICE JOIN keyword](/images/docs/diagrams/AsofLtSpliceJoin.svg) diff --git a/static/images/docs/diagrams/.railroad b/static/images/docs/diagrams/.railroad index e88bae5b..51eb5e09 100644 --- a/static/images/docs/diagrams/.railroad +++ b/static/images/docs/diagrams/.railroad @@ -238,8 +238,13 @@ joinOverview innerLeftJoin ::= ( 'INNER' | 'LEFT' )? 'JOIN' ( table | '(' sub-query ')' ) ( 'ON' ( ( column operator anotherColumn ) ( 'AND' ( column operator anotherColumn ) )* | '(' column ( ',' column )* ')' ) )? -aAsofLtSpliceJoin - ::= ( 'ASOF' | 'LT' | 'SPLICE' ) 'JOIN' ( table | '(' sub-query ')' ) ( 'ON' ( column '=' anotherColumn ( 'AND' column '=' anotherColumn )* | '(' column ( ',' column )* ')' ) )? +AsofLtSpliceJoin + ::= 'ASOF' 'JOIN' ( table | '(' sub-query ')' ) ( 'ON' ( column '=' anotherColumn ( 'AND' column '=' anotherColumn )* | '(' column ( ',' column )* ')' ) )? ( 'TOLERANCE' interval_literal )? + | ( 'LT' | 'SPLICE' ) 'JOIN' ( table | '(' sub-query ')' ) ( 'ON' ( column '=' anotherColumn ( 'AND' column '=' anotherColumn )* | '(' column ( ',' column )* ')' ) )? + +AsofJoin + ::= 'ASOF' 'JOIN' ( table | '(' sub-query ')' ) ( 'ON' ( column '=' anotherColumn ( 'AND' column '=' anotherColumn )* | '(' column ( ',' column )* ')' ) )? ( 'TOLERANCE' interval_literal )? + crossJoin ::= 'CROSS' 'JOIN' ( table | '(' sub-query ')' ) diff --git a/static/images/docs/diagrams/AsofJoin.svg b/static/images/docs/diagrams/AsofJoin.svg new file mode 100644 index 00000000..279969d3 --- /dev/null +++ b/static/images/docs/diagrams/AsofJoin.svg @@ -0,0 +1,81 @@ + + + + + + + + + ASOF + + + JOIN + + + table + + ( + + + sub-query + + ) + + + ON + + + column + + = + + + anotherColumn + + AND + + + ( + + + column + + , + + + ) + + + TOLERANCE + + + interval_literal + + + \ No newline at end of file diff --git a/static/images/docs/diagrams/AsofLtSpliceJoin.svg b/static/images/docs/diagrams/AsofLtSpliceJoin.svg index 84ca809e..6ada7afc 100644 --- a/static/images/docs/diagrams/AsofLtSpliceJoin.svg +++ b/static/images/docs/diagrams/AsofLtSpliceJoin.svg @@ -1,81 +1,124 @@ - - - - - - - - + + + + + + + + ASOF - - - LT - - - SPLICE - - - JOIN - - - table - - ( - - - sub-query - - ) - - - ON - - - column - - = - - - anotherColumn - - AND - - - ( - - - column - - , - - - ) - - - \ No newline at end of file + + + JOIN + + + table + + ( + + + sub-query + + ) + + + ON + + + column + + = + + + anotherColumn + + AND + + + ( + + + column + + , + + + ) + + + TOLERANCE + + + interval_literal + + LT + + + SPLICE + + + JOIN + + + table + + ( + + + sub-query + + ) + + + ON + + + column + + = + + + anotherColumn + + AND + + + ( + + + column + + , + + + ) + + + + \ No newline at end of file From fa533f1418611cb9800cc3305cfbcbcab4394cd0 Mon Sep 17 00:00:00 2001 From: Jaromir Hamala Date: Fri, 6 Jun 2025 13:22:22 +0200 Subject: [PATCH 2/2] LT JOIN supports TOLERANCE too --- documentation/reference/sql/asof-join.md | 2 +- documentation/reference/sql/join.md | 40 +++- static/images/docs/diagrams/.railroad | 4 +- .../images/docs/diagrams/AsofLtSpliceJoin.svg | 184 +++++++++--------- 4 files changed, 133 insertions(+), 97 deletions(-) diff --git a/documentation/reference/sql/asof-join.md b/documentation/reference/sql/asof-join.md index 78cacd18..01b3111d 100644 --- a/documentation/reference/sql/asof-join.md +++ b/documentation/reference/sql/asof-join.md @@ -332,7 +332,7 @@ Result: ### TOLERANCE clause -The `TOLERANCE` clause enhances ASOF JOIN by limiting how far back in time the join should look for a match in the right +The `TOLERANCE` clause enhances ASOF and LT JOINs by limiting how far back in time the join should look for a match in the right table. The `TOLERANCE` parameter accepts a time interval value (e.g., 2s, 100ms, 1d). When specified, a record from the left table t1 at t1.ts will only be joined with a record from the right table t2 at diff --git a/documentation/reference/sql/join.md b/documentation/reference/sql/join.md index 7f023301..e63d89e9 100644 --- a/documentation/reference/sql/join.md +++ b/documentation/reference/sql/join.md @@ -34,7 +34,7 @@ High-level overview: - `ASOF`, `LT`, and `SPLICE` `JOIN` has optional `ON` clause allowing only the `=` predicate. - - `ASOF` additionally allows an optional `TOLERANCE` clause: + - `ASOF` and `LT` join additionally allows an optional `TOLERANCE` clause: ![Flow chart showing the syntax of the ASOF, LT, and SPLICE JOIN keyword](/images/docs/diagrams/AsofLtSpliceJoin.svg) @@ -312,7 +312,7 @@ WHERE t.timestamp < t2.timestamp ## LT JOIN -Similar to `ASOF JOIN`, `LT JOIN` joins two different time-series measured. For +Similar to [`ASOF JOIN`](/docs/reference/sql/asof-join/), `LT JOIN` joins two different time-series measured. For each row in the first time-series, the `LT JOIN` takes from the second time-series a timestamp that meets both of the following criteria: @@ -395,6 +395,42 @@ order to get preceding values for every row. The `ON` clause can also be used in combination with `LT JOIN` to join both by timestamp and column values. +### TOLERANCE clause +The `TOLERANCE` clause enhances LT JOIN by limiting how far back in time the join should look for a match in the right +table. The `TOLERANCE` parameter accepts a time interval value (e.g., 2s, 100ms, 1d). + +When specified, a record from the left table t1 at t1.ts will only be joined with a record from the right table t2 at +t2.ts if both conditions are met: `t2.ts < t1.ts` and `t1.ts - t2.ts <= tolerance_value` + +This ensures that the matched record from the right table is not only the latest one on or before t1.ts, but also within +the specified time window. + +```questdb-sql title="LT JOIN with a TOLERANCE parameter" +SELECT ... +FROM table1 +LT JOIN table2 TOLERANCE 10s +[WHERE ...] +``` + +The interval_literal must be a valid QuestDB interval string, like '5s' (5 seconds), '100ms' (100 milliseconds), +'2m' ( 2 minutes), '3h' (3 hours), or '1d' (1 day). + +#### Supported Units for interval_literal +The `TOLERANCE` interval literal supports the following time unit qualifiers: +- U: Microseconds +- T: Milliseconds +- s: Seconds +- m: Minutes +- h: Hours +- d: Days +- w: Weeks + +For example, '100U' is 100 microseconds, '50T' is 50 milliseconds, '2s' is 2 seconds, '30m' is 30 minutes, +'1h' is 1 hour, '7d' is 7 days, and '2w' is 2 weeks. Please note that months (M) and years (Y) are not supported as +units for the `TOLERANCE` clause. + +See [`ASOF JOIN documentation`](/docs/reference/sql/asof-join#tolerance-clause) for more examples with the `TOLERANCE` clause. + ## SPLICE JOIN `SPLICE JOIN` is a full `ASOF JOIN`. It will return all the records from both diff --git a/static/images/docs/diagrams/.railroad b/static/images/docs/diagrams/.railroad index 51eb5e09..b3683340 100644 --- a/static/images/docs/diagrams/.railroad +++ b/static/images/docs/diagrams/.railroad @@ -239,8 +239,8 @@ innerLeftJoin ::= ( 'INNER' | 'LEFT' )? 'JOIN' ( table | '(' sub-query ')' ) ( 'ON' ( ( column operator anotherColumn ) ( 'AND' ( column operator anotherColumn ) )* | '(' column ( ',' column )* ')' ) )? AsofLtSpliceJoin - ::= 'ASOF' 'JOIN' ( table | '(' sub-query ')' ) ( 'ON' ( column '=' anotherColumn ( 'AND' column '=' anotherColumn )* | '(' column ( ',' column )* ')' ) )? ( 'TOLERANCE' interval_literal )? - | ( 'LT' | 'SPLICE' ) 'JOIN' ( table | '(' sub-query ')' ) ( 'ON' ( column '=' anotherColumn ( 'AND' column '=' anotherColumn )* | '(' column ( ',' column )* ')' ) )? + ::= ( 'ASOF' | 'LT' ) 'JOIN' ( table | '(' sub-query ')' ) ( 'ON' ( column '=' anotherColumn ( 'AND' column '=' anotherColumn )* | '(' column ( ',' column )* ')' ) )? ( 'TOLERANCE' interval_literal )? + | 'SPLICE' 'JOIN' ( table | '(' sub-query ')' ) ( 'ON' ( column '=' anotherColumn ( 'AND' column '=' anotherColumn )* | '(' column ( ',' column )* ')' ) )? AsofJoin ::= 'ASOF' 'JOIN' ( table | '(' sub-query ')' ) ( 'ON' ( column '=' anotherColumn ( 'AND' column '=' anotherColumn )* | '(' column ( ',' column )* ')' ) )? ( 'TOLERANCE' interval_literal )? diff --git a/static/images/docs/diagrams/AsofLtSpliceJoin.svg b/static/images/docs/diagrams/AsofLtSpliceJoin.svg index 6ada7afc..1b91bbe2 100644 --- a/static/images/docs/diagrams/AsofLtSpliceJoin.svg +++ b/static/images/docs/diagrams/AsofLtSpliceJoin.svg @@ -1,4 +1,4 @@ - + - - - ASOF - - - JOIN - - - table - - ( - - - sub-query - - ) - - - ON - - - column - - = - - - anotherColumn - - AND - - - ( - - - column - - , - - - ) - - - TOLERANCE - - - interval_literal - - LT - - - SPLICE - - - JOIN - - - table - - ( - - - sub-query - - ) - - - ON - - - column - - = - - - anotherColumn - - AND - - - ( - - - column - - , - - - ) - - - + + + ASOF + + + LT + + + JOIN + + + table + + ( + + + sub-query + + ) + + + ON + + + column + + = + + + anotherColumn + + AND + + + ( + + + column + + , + + + ) + + + TOLERANCE + + + interval_literal + + SPLICE + + + JOIN + + + table + + ( + + + sub-query + + ) + + + ON + + + column + + = + + + anotherColumn + + AND + + + ( + + + column + + , + + + ) + + + \ No newline at end of file