-
Notifications
You must be signed in to change notification settings - Fork 1
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
#303: Get flow checkpoints refactoring #304
Changes from 20 commits
6f37267
68e5622
e6a65b3
df7119d
ce05796
52a9243
0ac613a
e3d198c
e0f40a5
353af66
0c520c5
7e4513a
30c0766
93a64f0
ba52d4e
b5122c8
728ee5d
4d0175b
877fac3
57c5bbe
12466a0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -91,3 +91,4 @@ utils/resources/*.conf | |
/server/certs/ | ||
/server/selfsigned.crt | ||
/server/selfsigned.p12 | ||
/.bloop/ | ||
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,181 @@ | ||
/* | ||
* Copyright 2021 ABSA Group Limited | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
DROP FUNCTION IF EXISTS flows.get_flow_checkpoints( | ||
i_flow_id BIGINT, | ||
i_checkpoints_limit INT, | ||
i_offset BIGINT, | ||
i_checkpoint_name TEXT | ||
); | ||
|
||
CREATE OR REPLACE FUNCTION flows.get_flow_checkpoints( | ||
IN i_flow_id BIGINT, | ||
IN i_checkpoints_limit INT DEFAULT 5, | ||
IN i_offset BIGINT DEFAULT 0, | ||
IN i_checkpoint_name TEXT DEFAULT NULL, | ||
IN i_latest_first BOOLEAN DEFAULT TRUE, | ||
OUT status INTEGER, | ||
OUT status_text TEXT, | ||
OUT id_checkpoint UUID, | ||
OUT checkpoint_name TEXT, | ||
OUT checkpoint_author TEXT, | ||
OUT measured_by_atum_agent BOOLEAN, | ||
OUT measure_name TEXT, | ||
OUT measured_columns TEXT[], | ||
OUT measurement_value JSONB, | ||
OUT checkpoint_start_time TIMESTAMP WITH TIME ZONE, | ||
OUT checkpoint_end_time TIMESTAMP WITH TIME ZONE, | ||
OUT id_partitioning BIGINT, | ||
OUT partitioning JSONB, | ||
OUT partitioning_author TEXT, | ||
OUT has_more BOOLEAN | ||
) RETURNS SETOF record AS | ||
$$ | ||
-------------------------------------------------------------------------------------------------------------------- | ||
-- | ||
-- Function: flows.get_flow_checkpoints(4) | ||
-- Retrieves all checkpoints (measures and their measurement details) related to a primary flow | ||
-- associated with the input partitioning. | ||
-- | ||
-- Note: a single row returned from this function doesn't contain all data related to a single checkpoint - it only | ||
-- represents one measure associated with a checkpoint. So even if only a single checkpoint would be retrieved, | ||
-- this function can potentially return multiple rows. | ||
-- | ||
-- Note: checkpoints will be retrieved in ordered fashion, by checkpoint_time and id_checkpoint | ||
-- | ||
-- Parameters: | ||
-- i_partitioning_of_flow - partitioning to use for identifying the flow associate with checkpoints | ||
-- that will be retrieved | ||
-- i_checkpoints_limit - (optional) maximum number of checkpoint to return, returns all of them if NULL | ||
-- i_offset - (optional) offset for checkpoints pagination | ||
-- i_checkpoint_name - (optional) if specified, returns data related to particular checkpoint's name | ||
-- i_latest_first - (optional) if true, checkpoints are ordered by process_start_time in descending order | ||
-- | ||
-- Note: i_checkpoint_limit and i_offset are used for pagination purposes; | ||
-- checkpoints are ordered by process_start_time in descending order | ||
-- and then by id_checkpoint in ascending order | ||
-- | ||
-- Returns: | ||
-- status - Status code | ||
-- status_text - Status text | ||
-- id_checkpoint - ID of retrieved checkpoint | ||
-- checkpoint_name - Name of the retrieved checkpoint | ||
-- checkpoint_author - Author of the checkpoint | ||
-- measured_by_atum_agent - Flag indicating whether the checkpoint was measured by Atum Agent | ||
-- (if false, data supplied manually) | ||
-- measure_name - measure name associated with a given checkpoint | ||
-- measured_columns - measure columns associated with a given checkpoint | ||
-- measurement_value - measurement details associated with a given checkpoint | ||
-- checkpoint_start_time - Time of the checkpoint | ||
-- checkpoint_end_time - End time of the checkpoint computation | ||
-- id_partitioning - ID of the partitioning | ||
-- partitioning - Partitioning value | ||
-- partitioning_author - Author of the partitioning | ||
-- has_more - flag indicating whether there are more checkpoints available, always `false` if `i_limit` is NULL | ||
-- | ||
-- Status codes: | ||
-- 11 - OK | ||
-- 42 - Flow not found | ||
--------------------------------------------------------------------------------------------------- | ||
DECLARE | ||
_has_more BOOLEAN; | ||
BEGIN | ||
-- Check if the flow exists by querying the partitioning_to_flow table. | ||
-- Rationale: | ||
-- This table is preferred over the flows table because: | ||
-- 1. Every flow has at least one record in partitioning_to_flow. | ||
-- 2. This table is used in subsequent queries, providing a caching advantage. | ||
-- 3. Improves performance by reducing the need to query the flows table directly. | ||
PERFORM 1 FROM flows.partitioning_to_flow WHERE fk_flow = i_flow_id; | ||
IF NOT FOUND THEN | ||
status := 42; | ||
status_text := 'Flow not found'; | ||
RETURN NEXT; | ||
RETURN; | ||
END IF; | ||
|
||
-- Determine if there are more checkpoints than the limit | ||
IF i_checkpoints_limit IS NOT NULL THEN | ||
SELECT count(*) > i_checkpoints_limit | ||
FROM runs.checkpoints C | ||
JOIN flows.partitioning_to_flow PF ON C.fk_partitioning = PF.fk_partitioning | ||
WHERE PF.fk_flow = i_flow_id | ||
AND (i_checkpoint_name IS NULL OR C.checkpoint_name = i_checkpoint_name) | ||
LIMIT i_checkpoints_limit + 1 OFFSET i_offset | ||
INTO _has_more; | ||
ELSE | ||
_has_more := false; | ||
END IF; | ||
|
||
-- Retrieve the checkpoints and their associated measurements | ||
RETURN QUERY | ||
WITH limited_checkpoints AS ( | ||
SELECT C.id_checkpoint, | ||
C.fk_partitioning, | ||
C.checkpoint_name, | ||
C.created_by, | ||
C.measured_by_atum_agent, | ||
C.process_start_time, | ||
C.process_end_time | ||
FROM runs.checkpoints C | ||
JOIN flows.partitioning_to_flow PF ON C.fk_partitioning = PF.fk_partitioning | ||
WHERE PF.fk_flow = i_flow_id | ||
AND (i_checkpoint_name IS NULL OR C.checkpoint_name = i_checkpoint_name) | ||
ORDER BY | ||
CASE | ||
WHEN i_latest_first THEN C.process_start_time | ||
END DESC, | ||
CASE | ||
WHEN NOT i_latest_first THEN C.process_start_time | ||
END ASC | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is tricky on NULL. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Or just use a local variable at the beginning |
||
LIMIT i_checkpoints_limit OFFSET i_offset | ||
) | ||
SELECT | ||
11 AS status, | ||
'OK' AS status_text, | ||
LC.id_checkpoint, | ||
LC.checkpoint_name, | ||
LC.created_by AS author, | ||
LC.measured_by_atum_agent, | ||
MD.measure_name, | ||
MD.measured_columns, | ||
M.measurement_value, | ||
LC.process_start_time AS checkpoint_start_time, | ||
LC.process_end_time AS checkpoint_end_time, | ||
LC.fk_partitioning AS id_partitioning, | ||
P.partitioning AS partitioning, | ||
P.created_by AS partitioning_author, | ||
_has_more AS has_more | ||
FROM | ||
limited_checkpoints LC | ||
INNER JOIN | ||
runs.measurements M ON LC.id_checkpoint = M.fk_checkpoint | ||
INNER JOIN | ||
runs.measure_definitions MD ON M.fk_measure_definition = MD.id_measure_definition | ||
INNER JOIN | ||
runs.partitionings P ON LC.fk_partitioning = P.id_partitioning | ||
ORDER BY | ||
CASE | ||
WHEN i_latest_first THEN LC.process_start_time | ||
END DESC, | ||
CASE | ||
WHEN NOT i_latest_first THEN LC.process_start_time | ||
END ASC; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ditto. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. used the coalesce |
||
END; | ||
$$ | ||
LANGUAGE plpgsql VOLATILE SECURITY DEFINER; | ||
|
||
GRANT EXECUTE ON FUNCTION flows.get_flow_checkpoints(BIGINT, INT, BIGINT, TEXT, BOOLEAN) TO atum_owner; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
❓😉
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't know which plugin/tool does that but there was .bloop folder generated which I believe shouldn't be part of VCS.