Skip to content

Commit

Permalink
Release/2024 05 31 (#216)
Browse files Browse the repository at this point in the history
* feature/PI-342-redact_logging_fields Updated logger to redact

* feature/PI-342-redact_logging_fields testing auth logger redaction

* feature/PI-342-redact_logging_fields removed auth test

* feature/PI-376-branch_needs_main_rebase Added rebase main check

* release/2024-05-31 Created release branch

* release/2024-05-31 increase boto3 retries as causing transient errors

* release/2024-05-31 pass missing tests params

* release/2024-05-31 pass missing tests params

* release/2024-05-31 pass missing tests params again

* release/2024-05-31 pass missing tests params again

* release/2024-05-31 fix int tests

---------

Co-authored-by: Rowan Gill <[email protected]>
  • Loading branch information
megan-bower4 and Rohoolio authored Jun 3, 2024
1 parent 69a1a23 commit bbcc01a
Show file tree
Hide file tree
Showing 11 changed files with 99 additions and 53 deletions.
10 changes: 10 additions & 0 deletions .github/workflows/pull-requests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,16 @@ jobs:
with:
command: workflow--check--branch-name

workflow--check--rebased-on-main:
runs-on: [self-hosted, ci]
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- uses: ./.github/actions/make/
with:
command: workflow--check--rebased-on-main

parse-secrets:
runs-on: [self-hosted, ci]
needs: [workflow--check--branch-name]
Expand Down
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# Changelog

## 2024-05-31
- [PI-342] Add redacted-fields to context loggers
- [PI-376] Check releases fully rebased on main

## 2024-05-29
- [PI-344] Added smoke test role

Expand Down
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
2024.05.29
2024.05.31
2 changes: 2 additions & 0 deletions changelog/2024-05-31.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
- [PI-342] Add redacted-fields to context loggers
- [PI-376] Check releases fully rebased on main
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "connecting-party-manager"
version = "2024.05.29"
version = "2024.05.31"
description = "Repository for the Connecting Party Manager API and related services"
authors = ["NHS England"]
license = "LICENSE.md"
Expand Down Expand Up @@ -41,7 +41,7 @@ parse = "^1.19.1"
pytest-mock = "^3.12.0"
datamodel-code-generator = "^0.25.1"
pyyaml = "^6.0.1"
proxygen-cli = "^2.1.5"
proxygen-cli = "^2.1.14"
moto = "^5.0.1"
boto3-stubs = { extras = [
"dynamodb",
Expand Down
105 changes: 68 additions & 37 deletions src/etl/sds/tests/test_sds_etl_components.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import boto3
import pytest
from botocore.config import Config
from domain.core.device import DeviceStatus, DeviceType
from domain.core.device_key import DeviceKeyType
from etl.clear_state_inputs import EMPTY_JSON_DATA, EMPTY_LDIF_DATA
Expand All @@ -24,6 +25,9 @@
from test_helpers.pytest_skips import long_running
from test_helpers.terraform import read_terraform_output

# Configure the boto3 retry settings
config = Config(retries={"max_attempts": 10, "mode": "standard"})

# Note that unique identifier "000428682512" is the identifier of 'GOOD_SDS_RECORD'
DELETION_REQUEST_000428682512 = """
dn: o=nhs,ou=Services,uniqueIdentifier=000428682512
Expand All @@ -40,19 +44,27 @@
"""


@pytest.fixture
def step_functions_client():
return boto3.client("stepfunctions", config=config)


@pytest.fixture
def s3_client():
return boto3.client("s3")


@pytest.fixture
def state_machine_input(request: pytest.FixtureRequest):
execute_state_machine(state_machine_input=request.param)
return request.param


@pytest.fixture
def worker_data(request: pytest.FixtureRequest):
client = boto3.client("s3")
def worker_data(request: pytest.FixtureRequest, s3_client):
etl_bucket = read_terraform_output("sds_etl.value.bucket")

for key, data in request.param.items():
client.put_object(Bucket=etl_bucket, Key=key, Body=data)
s3_client.put_object(Bucket=etl_bucket, Key=key, Body=data)
return request.param


Expand All @@ -65,9 +77,8 @@ def repository():


def execute_state_machine(
state_machine_input: StateMachineInput,
state_machine_input: StateMachineInput, client
) -> StartSyncExecutionOutputTypeDef:
client = boto3.client("stepfunctions")
state_machine_arn = read_terraform_output("sds_etl.value.state_machine_arn")
name = state_machine_input.name
execution_response = client.start_execution(
Expand Down Expand Up @@ -102,25 +113,22 @@ def execute_state_machine(
return response


def get_changelog_number_from_s3() -> str:
client = boto3.client("s3")
def get_changelog_number_from_s3(s3_client) -> str:
etl_bucket = read_terraform_output("sds_etl.value.bucket")
changelog_key = read_terraform_output("sds_etl.value.changelog_key")
response = client.get_object(Bucket=etl_bucket, Key=changelog_key)
response = s3_client.get_object(Bucket=etl_bucket, Key=changelog_key)
return json_loads(response["Body"].read())


def get_object(key: WorkerKey) -> str:
client = boto3.client("s3")
def get_object(s3_client, key: WorkerKey) -> str:
etl_bucket = read_terraform_output("sds_etl.value.bucket")
response = client.get_object(Bucket=etl_bucket, Key=key)
response = s3_client.get_object(Bucket=etl_bucket, Key=key)
return response["Body"].read()


def put_object(key: WorkerKey, body: bytes) -> str:
client = boto3.client("s3")
def put_object(s3_client, key: WorkerKey, body: bytes) -> str:
etl_bucket = read_terraform_output("sds_etl.value.bucket")
return client.put_object(Bucket=etl_bucket, Key=key, Body=body)
return s3_client.put_object(Bucket=etl_bucket, Key=key, Body=body)


@pytest.mark.integration
Expand All @@ -140,8 +148,14 @@ def put_object(key: WorkerKey, body: bytes) -> str:
[StateMachineInput.update(changelog_number_start=1, changelog_number_end=1)],
indirect=True,
)
def test_changelog_number_update(worker_data, state_machine_input: StateMachineInput):
changelog_number_from_s3 = get_changelog_number_from_s3()
def test_changelog_number_update(
worker_data,
state_machine_input: StateMachineInput,
step_functions_client,
s3_client,
):
execute_state_machine(state_machine_input, client=step_functions_client)
changelog_number_from_s3 = get_changelog_number_from_s3(s3_client)
assert changelog_number_from_s3 == state_machine_input.changelog_number_end


Expand All @@ -164,10 +178,18 @@ def test_changelog_number_update(worker_data, state_machine_input: StateMachineI
],
indirect=True,
)
def test_end_to_end(repository: MockDeviceRepository, worker_data, state_machine_input):
extract_data = get_object(key=WorkerKey.EXTRACT)
transform_data = pkl_loads_lz4(get_object(key=WorkerKey.TRANSFORM))
load_data = pkl_loads_lz4(get_object(key=WorkerKey.LOAD))
def test_end_to_end(
repository: MockDeviceRepository,
worker_data,
s3_client,
state_machine_input,
step_functions_client,
):
execute_state_machine(state_machine_input, client=step_functions_client)

extract_data = get_object(s3_client, key=WorkerKey.EXTRACT)
transform_data = pkl_loads_lz4(get_object(s3_client, key=WorkerKey.TRANSFORM))
load_data = pkl_loads_lz4(get_object(s3_client, key=WorkerKey.LOAD))

assert len(extract_data) == 0
assert len(transform_data) == 0
Expand All @@ -179,8 +201,9 @@ def test_end_to_end(repository: MockDeviceRepository, worker_data, state_machine

@long_running
@pytest.mark.integration
def test_end_to_end_bulk_trigger(repository: MockDeviceRepository):
s3_client = boto3.client("s3")
def test_end_to_end_bulk_trigger(
repository: MockDeviceRepository, step_functions_client, s3_client
):
bucket = read_terraform_output("sds_etl.value.bucket")
test_data_bucket = read_terraform_output("test_data_bucket.value")
bulk_trigger_prefix = read_terraform_output("sds_etl.value.bulk_trigger_prefix")
Expand Down Expand Up @@ -208,14 +231,14 @@ def test_end_to_end_bulk_trigger(repository: MockDeviceRepository):
},
)

extract_data = get_object(key=WorkerKey.EXTRACT)
transform_data = pkl_loads_lz4(get_object(key=WorkerKey.TRANSFORM))
load_data = pkl_loads_lz4(get_object(key=WorkerKey.LOAD))
extract_data = get_object(s3_client, key=WorkerKey.EXTRACT)
transform_data = pkl_loads_lz4(get_object(s3_client, key=WorkerKey.TRANSFORM))
load_data = pkl_loads_lz4(get_object(s3_client, key=WorkerKey.LOAD))
while len(extract_data) + len(transform_data) + len(load_data) > 0:
time.sleep(15)
extract_data = get_object(key=WorkerKey.EXTRACT)
transform_data = pkl_loads_lz4(get_object(key=WorkerKey.TRANSFORM))
load_data = pkl_loads_lz4(get_object(key=WorkerKey.LOAD))
extract_data = get_object(s3_client, key=WorkerKey.EXTRACT)
transform_data = pkl_loads_lz4(get_object(s3_client, key=WorkerKey.TRANSFORM))
load_data = pkl_loads_lz4(get_object(s3_client, key=WorkerKey.LOAD))

product_count = repository.count(by=DeviceType.PRODUCT)
endpoint_count = repository.count(by=DeviceType.ENDPOINT)
Expand Down Expand Up @@ -249,12 +272,18 @@ def test_end_to_end_bulk_trigger(repository: MockDeviceRepository):
indirect=True,
)
def test_end_to_end_changelog_delete(
repository: MockDeviceRepository, worker_data, state_machine_input
repository: MockDeviceRepository,
worker_data,
step_functions_client,
s3_client,
state_machine_input,
):
"""Note that the start of this test is the same as test_end_to_end, and then makes changes"""
extract_data = get_object(key=WorkerKey.EXTRACT)
transform_data = pkl_loads_lz4(get_object(key=WorkerKey.TRANSFORM))
load_data = pkl_loads_lz4(get_object(key=WorkerKey.LOAD))
execute_state_machine(state_machine_input, client=step_functions_client)

extract_data = get_object(s3_client, key=WorkerKey.EXTRACT)
transform_data = pkl_loads_lz4(get_object(s3_client, key=WorkerKey.TRANSFORM))
load_data = pkl_loads_lz4(get_object(s3_client, key=WorkerKey.LOAD))

assert len(extract_data) == 0
assert len(transform_data) == 0
Expand All @@ -264,11 +293,12 @@ def test_end_to_end_changelog_delete(
)

# Now execute a changelog initial state in the ETL
put_object(key=WorkerKey.EXTRACT, body=DELETION_REQUEST_000428682512)
put_object(s3_client, key=WorkerKey.EXTRACT, body=DELETION_REQUEST_000428682512)
response = execute_state_machine(
state_machine_input=StateMachineInput.update(
changelog_number_start=124, changelog_number_end=125
)
),
client=step_functions_client,
)
assert response["status"] == "SUCCEEDED"

Expand All @@ -289,11 +319,12 @@ def test_end_to_end_changelog_delete(
assert device.status == DeviceStatus.ACTIVE

# Execute another changelog initial state in the ETL
put_object(key=WorkerKey.EXTRACT, body=DELETION_REQUEST_000842065542)
put_object(s3_client, key=WorkerKey.EXTRACT, body=DELETION_REQUEST_000842065542)
response = execute_state_machine(
state_machine_input=StateMachineInput.update(
changelog_number_start=124, changelog_number_end=125
)
),
client=step_functions_client,
)
assert response["status"] == "SUCCEEDED"

Expand Down
2 changes: 2 additions & 0 deletions src/etl/sds/trigger/bulk/bulk.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from etl_utils.trigger.notify import notify
from event.aws.client import dynamodb_client
from event.environment import BaseEnvironment
from event.logging.logger import setup_logger
from event.step_chain import StepChain

from .steps import steps
Expand Down Expand Up @@ -33,6 +34,7 @@ class BulkTriggerEnvironment(BaseEnvironment):

@event_source(data_class=S3Event)
def handler(event: S3Event, context):
setup_logger(service_name=__file__)
step_chain = StepChain(step_chain=steps, step_decorators=[log_action])
step_chain.run(init=(event.bucket_name, event.object_key), cache=CACHE)
return notify(
Expand Down
7 changes: 1 addition & 6 deletions src/etl/sds/trigger/update/steps.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import base64
from itertools import starmap
from pathlib import Path
from typing import TYPE_CHECKING, TypedDict
Expand Down Expand Up @@ -55,11 +54,7 @@ def _prepare_ldap_client(data, cache: Cache):
cert_file=str(cache["cert_file"]),
key_file=str(cache["key_file"]),
ldap_changelog_user=cache["ldap_changelog_user"],
ldap_changelog_password=str(
base64.b64decode(cache["ldap_changelog_password"].encode("utf-8")).decode(
"utf-8"
)
),
ldap_changelog_password=cache["ldap_changelog_password"],
)


Expand Down
11 changes: 5 additions & 6 deletions src/etl/sds/trigger/update/update.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
import base64
from pathlib import Path

import boto3
from etl_utils.trigger.logger import log_action
from etl_utils.trigger.model import StateMachineInputType
from etl_utils.trigger.notify import notify
from event.environment import BaseEnvironment
from event.logging.constants import LDAP_REDACTED_FIELDS
from event.logging.logger import setup_logger
from event.step_chain import StepChain

from .steps import steps
Expand Down Expand Up @@ -38,11 +39,7 @@ class ChangelogTriggerEnvironment(BaseEnvironment):
"etl_bucket": ENVIRONMENT.ETL_BUCKET,
"ldap_host": ENVIRONMENT.LDAP_HOST,
"ldap_changelog_user": ENVIRONMENT.LDAP_CHANGELOG_USER,
"ldap_changelog_password": str(
base64.b64encode(ENVIRONMENT.LDAP_CHANGELOG_PASSWORD.encode("utf-8")).decode(
"utf-8"
)
),
"ldap_changelog_password": ENVIRONMENT.LDAP_CHANGELOG_PASSWORD,
}


Expand All @@ -52,6 +49,8 @@ def handler(event={}, context=None):

CACHE["ldap"] = ldap

setup_logger(service_name=__file__, redacted_fields=LDAP_REDACTED_FIELDS)

step_chain = StepChain(step_chain=steps, step_decorators=[log_action])
step_chain.run(cache=CACHE)
return notify(
Expand Down
1 change: 1 addition & 0 deletions src/layers/event/logging/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
LDAP_REDACTED_FIELDS = {"ldap_changelog_password", "ldap_changelog_user", "ldap_host"}
4 changes: 3 additions & 1 deletion src/layers/event/logging/logger.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,17 @@
from pathlib import Path
from typing import Set
from uuid import uuid4

from nhs_context_logging import app_logger


def setup_logger(service_name: str, uuid: str = None):
def setup_logger(service_name: str, uuid: str = None, redacted_fields: Set[str] = None):
service_name = Path(service_name).parent.stem
if uuid is None:
uuid = str(uuid4())
app_logger._is_setup = False
app_logger.setup(
service_name="-".join((service_name, uuid)),
config_kwargs={"prepend_module_name": True},
redact_fields=redacted_fields,
)

0 comments on commit bbcc01a

Please sign in to comment.