Skip to content

Commit

Permalink
tests
Browse files Browse the repository at this point in the history
  • Loading branch information
salamonpavel committed Nov 21, 2024
1 parent 68e5622 commit e6a65b3
Show file tree
Hide file tree
Showing 19 changed files with 582 additions and 78 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -23,20 +23,17 @@ CREATE OR REPLACE FUNCTION flows.get_flow_checkpoints(
OUT status_text TEXT,
OUT id_checkpoint UUID,
OUT checkpoint_name TEXT,
OUT checkpoint_author TEXT,
OUT author TEXT,
OUT measured_by_atum_agent BOOLEAN,
OUT measure_name TEXT,
OUT measured_columns TEXT[],
OUT measurement_value JSONB,
OUT checkpoint_start_time TIMESTAMP WITH TIME ZONE,
OUT checkpoint_end_time TIMESTAMP WITH TIME ZONE,
OUT id_partitioning BIGINT,
OUT o_partitioning JSONB,
OUT partitioning_author TEXT,
OUT has_more BOOLEAN
) RETURNS SETOF record AS
$$
--------------------------------------------------------------------------------------------------------------------
--------------------------------------------------------------------------------------------------------------------
--
-- Function: flows.get_flow_checkpoints(4)
-- Retrieves all checkpoints (measures and their measurement details) related to a primary flow
Expand Down Expand Up @@ -64,17 +61,14 @@ $$
-- status_text - Status text
-- id_checkpoint - ID of retrieved checkpoint
-- checkpoint_name - Name of the retrieved checkpoint
-- checkpoint_author - Author of the checkpoint
-- author - Author of the checkpoint
-- measured_by_atum_agent - Flag indicating whether the checkpoint was measured by Atum Agent
-- (if false, data supplied manually)
-- measure_name - measure name associated with a given checkpoint
-- measured_columns - measure columns associated with a given checkpoint
-- measurement_value - measurement details associated with a given checkpoint
-- checkpoint_start_time - Time of the checkpoint
-- checkpoint_end_time - End time of the checkpoint computation
-- id_partitioning - ID of the partitioning
-- o_partitioning - Partitioning value
-- partitioning_author - Author of the partitioning
-- has_more - flag indicating whether there are more checkpoints available, always `false` if `i_limit` is NULL
--
-- Status codes:
Expand Down Expand Up @@ -115,7 +109,6 @@ BEGIN
RETURN QUERY
WITH limited_checkpoints AS (
SELECT C.id_checkpoint,
C.fk_partitioning,
C.checkpoint_name,
C.created_by,
C.measured_by_atum_agent,
Expand All @@ -125,7 +118,7 @@ BEGIN
JOIN flows.partitioning_to_flow PF ON C.fk_partitioning = PF.fk_partitioning
WHERE PF.fk_flow = i_flow_id
AND (i_checkpoint_name IS NULL OR C.checkpoint_name = i_checkpoint_name)
ORDER BY C.process_start_time desc
ORDER BY C.id_checkpoint, C.process_start_time
LIMIT i_checkpoints_limit OFFSET i_offset
)
SELECT
Expand All @@ -140,21 +133,17 @@ BEGIN
M.measurement_value,
LC.process_start_time AS checkpoint_start_time,
LC.process_end_time AS checkpoint_end_time,
LC.fk_partitioning AS id_partitioning,
P.partitioning AS o_partitioning,
P.created_by AS partitioning_author,
_has_more AS has_more
FROM
limited_checkpoints LC
INNER JOIN
runs.measurements M ON LC.id_checkpoint = M.fk_checkpoint
INNER JOIN
runs.measure_definitions MD ON M.fk_measure_definition = MD.id_measure_definition
INNER JOIN
runs.partitionings P ON LC.fk_partitioning = P.id_partitioning
ORDER BY LC.process_start_time desc;
ORDER BY
LC.id_checkpoint, LC.process_start_time;
END;
$$
LANGUAGE plpgsql VOLATILE SECURITY DEFINER;
LANGUAGE plpgsql VOLATILE SECURITY DEFINER;

GRANT EXECUTE ON FUNCTION flows.get_flow_checkpoints(BIGINT, INT, BIGINT, TEXT) TO atum_owner;
167 changes: 167 additions & 0 deletions database/src/main/postgres/flows/V0.3.0.1__get_flow_checkpoints.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
/*
* Copyright 2021 ABSA Group Limited
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

DROP FUNCTION IF EXISTS flows.get_flow_checkpoints(
i_flow_id BIGINT,
i_checkpoints_limit INT,
i_offset BIGINT,
i_checkpoint_name TEXT
);

CREATE OR REPLACE FUNCTION flows.get_flow_checkpoints(
IN i_flow_id BIGINT,
IN i_checkpoints_limit INT DEFAULT 5,
IN i_offset BIGINT DEFAULT 0,
IN i_checkpoint_name TEXT DEFAULT NULL,
OUT status INTEGER,
OUT status_text TEXT,
OUT id_checkpoint UUID,
OUT checkpoint_name TEXT,
OUT checkpoint_author TEXT,
OUT measured_by_atum_agent BOOLEAN,
OUT measure_name TEXT,
OUT measured_columns TEXT[],
OUT measurement_value JSONB,
OUT checkpoint_start_time TIMESTAMP WITH TIME ZONE,
OUT checkpoint_end_time TIMESTAMP WITH TIME ZONE,
OUT id_partitioning BIGINT,
OUT o_partitioning JSONB,
OUT partitioning_author TEXT,
OUT has_more BOOLEAN
) RETURNS SETOF record AS
$$
--------------------------------------------------------------------------------------------------------------------
--
-- Function: flows.get_flow_checkpoints(4)
-- Retrieves all checkpoints (measures and their measurement details) related to a primary flow
-- associated with the input partitioning.
--
-- Note: a single row returned from this function doesn't contain all data related to a single checkpoint - it only
-- represents one measure associated with a checkpoint. So even if only a single checkpoint would be retrieved,
-- this function can potentially return multiple rows.
--
-- Note: checkpoints will be retrieved in ordered fashion, by checkpoint_time and id_checkpoint
--
-- Parameters:
-- i_partitioning_of_flow - partitioning to use for identifying the flow associate with checkpoints
-- that will be retrieved
-- i_checkpoints_limit - (optional) maximum number of checkpoint to return, returns all of them if NULL
-- i_offset - (optional) offset for checkpoints pagination
-- i_checkpoint_name - (optional) if specified, returns data related to particular checkpoint's name
--
-- Note: i_checkpoint_limit and i_offset are used for pagination purposes;
-- checkpoints are ordered by process_start_time in descending order
-- and then by id_checkpoint in ascending order
--
-- Returns:
-- status - Status code
-- status_text - Status text
-- id_checkpoint - ID of retrieved checkpoint
-- checkpoint_name - Name of the retrieved checkpoint
-- checkpoint_author - Author of the checkpoint
-- measured_by_atum_agent - Flag indicating whether the checkpoint was measured by Atum Agent
-- (if false, data supplied manually)
-- measure_name - measure name associated with a given checkpoint
-- measured_columns - measure columns associated with a given checkpoint
-- measurement_value - measurement details associated with a given checkpoint
-- checkpoint_start_time - Time of the checkpoint
-- checkpoint_end_time - End time of the checkpoint computation
-- id_partitioning - ID of the partitioning
-- o_partitioning - Partitioning value
-- partitioning_author - Author of the partitioning
-- has_more - flag indicating whether there are more checkpoints available, always `false` if `i_limit` is NULL
--
-- Status codes:
-- 11 - OK
-- 42 - Flow not found
---------------------------------------------------------------------------------------------------
DECLARE
_has_more BOOLEAN;
BEGIN
-- Check if the flow exists by querying the partitioning_to_flow table.
-- Rationale:
-- This table is preferred over the flows table because:
-- 1. Every flow has at least one record in partitioning_to_flow.
-- 2. This table is used in subsequent queries, providing a caching advantage.
-- 3. Improves performance by reducing the need to query the flows table directly.
PERFORM 1 FROM flows.partitioning_to_flow WHERE fk_flow = i_flow_id;
IF NOT FOUND THEN
status := 42;
status_text := 'Flow not found';
RETURN NEXT;
RETURN;
END IF;

-- Determine if there are more checkpoints than the limit
IF i_checkpoints_limit IS NOT NULL THEN
SELECT count(*) > i_checkpoints_limit
FROM runs.checkpoints C
JOIN flows.partitioning_to_flow PF ON C.fk_partitioning = PF.fk_partitioning
WHERE PF.fk_flow = i_flow_id
AND (i_checkpoint_name IS NULL OR C.checkpoint_name = i_checkpoint_name)
LIMIT i_checkpoints_limit + 1 OFFSET i_offset
INTO _has_more;
ELSE
_has_more := false;
END IF;

-- Retrieve the checkpoints and their associated measurements
RETURN QUERY
WITH limited_checkpoints AS (
SELECT C.id_checkpoint,
C.fk_partitioning,
C.checkpoint_name,
C.created_by,
C.measured_by_atum_agent,
C.process_start_time,
C.process_end_time
FROM runs.checkpoints C
JOIN flows.partitioning_to_flow PF ON C.fk_partitioning = PF.fk_partitioning
WHERE PF.fk_flow = i_flow_id
AND (i_checkpoint_name IS NULL OR C.checkpoint_name = i_checkpoint_name)
ORDER BY C.process_start_time desc
LIMIT i_checkpoints_limit OFFSET i_offset
)
SELECT
11 AS status,
'OK' AS status_text,
LC.id_checkpoint,
LC.checkpoint_name,
LC.created_by AS author,
LC.measured_by_atum_agent,
MD.measure_name,
MD.measured_columns,
M.measurement_value,
LC.process_start_time AS checkpoint_start_time,
LC.process_end_time AS checkpoint_end_time,
LC.fk_partitioning AS id_partitioning,
P.partitioning AS o_partitioning,
P.created_by AS partitioning_author,
_has_more AS has_more
FROM
limited_checkpoints LC
INNER JOIN
runs.measurements M ON LC.id_checkpoint = M.fk_checkpoint
INNER JOIN
runs.measure_definitions MD ON M.fk_measure_definition = MD.id_measure_definition
INNER JOIN
runs.partitionings P ON LC.fk_partitioning = P.id_partitioning
ORDER BY LC.process_start_time desc;
END;
$$
LANGUAGE plpgsql VOLATILE SECURITY DEFINER;

GRANT EXECUTE ON FUNCTION flows.get_flow_checkpoints(BIGINT, INT, BIGINT, TEXT) TO atum_owner;
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
/*
* Copyright 2021 ABSA Group Limited
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package za.co.absa.atum.model.dto

import io.circe.{Decoder, Encoder}
import io.circe.generic.semiauto.{deriveDecoder, deriveEncoder}

import java.time.ZonedDateTime
import java.util.UUID

case class CheckpointWithPartitioningDTO(
id: UUID,
name: String,
author: String,
measuredByAtumAgent: Boolean = false,
processStartTime: ZonedDateTime,
processEndTime: Option[ZonedDateTime],
measurements: Set[MeasurementDTO],
partitioning: PartitioningWithIdDTO
)

object CheckpointWithPartitioningDTO {

implicit val decodeCheckpointWithPartitioningDTO: Decoder[CheckpointWithPartitioningDTO] = deriveDecoder
implicit val encodeCheckpointWithPartitioningDTO: Encoder[CheckpointWithPartitioningDTO] = deriveEncoder

}
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

package za.co.absa.atum.server.api.controller

import za.co.absa.atum.model.dto.CheckpointV2DTO
import za.co.absa.atum.model.dto.CheckpointWithPartitioningDTO
import za.co.absa.atum.model.envelopes.ErrorResponse
import za.co.absa.atum.model.envelopes.SuccessResponse.PaginatedResponse
import zio.IO
Expand All @@ -29,6 +29,6 @@ trait FlowController {
limit: Option[Int],
offset: Option[Long],
checkpointName: Option[String],
): IO[ErrorResponse, PaginatedResponse[CheckpointV2DTO]]
): IO[ErrorResponse, PaginatedResponse[CheckpointWithPartitioningDTO]]

}
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

package za.co.absa.atum.server.api.controller

import za.co.absa.atum.model.dto.CheckpointV2DTO
import za.co.absa.atum.model.dto.CheckpointWithPartitioningDTO
import za.co.absa.atum.model.envelopes.ErrorResponse
import za.co.absa.atum.model.envelopes.SuccessResponse.PaginatedResponse
import za.co.absa.atum.server.api.service.FlowService
Expand All @@ -31,8 +31,8 @@ class FlowControllerImpl(flowService: FlowService) extends FlowController with B
limit: Option[Int],
offset: Option[Long],
checkpointName: Option[String]
): IO[ErrorResponse, PaginatedResponse[CheckpointV2DTO]] = {
val flowData = serviceCall[PaginatedResult[CheckpointV2DTO], PaginatedResult[CheckpointV2DTO]](
): IO[ErrorResponse, PaginatedResponse[CheckpointWithPartitioningDTO]] = {
val flowData = serviceCall[PaginatedResult[CheckpointWithPartitioningDTO], PaginatedResult[CheckpointWithPartitioningDTO]](
flowService.getFlowCheckpoints(flowId, limit, offset, checkpointName)
)
mapToPaginatedResponse(limit.get, offset.get, flowData)
Expand Down
Loading

0 comments on commit e6a65b3

Please sign in to comment.