diff --git a/.github/workflows/pull-requests.yml b/.github/workflows/pull-requests.yml index b40c0a09c..789f19cc7 100644 --- a/.github/workflows/pull-requests.yml +++ b/.github/workflows/pull-requests.yml @@ -26,6 +26,16 @@ jobs: with: command: workflow--check--branch-name + workflow--check--rebased-on-main: + runs-on: [self-hosted, ci] + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + - uses: ./.github/actions/make/ + with: + command: workflow--check--rebased-on-main + parse-secrets: runs-on: [self-hosted, ci] needs: [workflow--check--branch-name] diff --git a/CHANGELOG.md b/CHANGELOG.md index 1db82efce..13cce9854 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ # Changelog +## 2024-05-31 +- [PI-342] Add redacted-fields to context loggers +- [PI-376] Check releases fully rebased on main + ## 2024-05-29 - [PI-344] Added smoke test role diff --git a/VERSION b/VERSION index 618134d81..5b76f3d67 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2024.05.29 +2024.05.31 diff --git a/changelog/2024-05-31.md b/changelog/2024-05-31.md new file mode 100644 index 000000000..cd029cae2 --- /dev/null +++ b/changelog/2024-05-31.md @@ -0,0 +1,2 @@ +- [PI-342] Add redacted-fields to context loggers +- [PI-376] Check releases fully rebased on main diff --git a/pyproject.toml b/pyproject.toml index 79921bd8e..4a4326a94 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "connecting-party-manager" -version = "2024.05.29" +version = "2024.05.31" description = "Repository for the Connecting Party Manager API and related services" authors = ["NHS England"] license = "LICENSE.md" @@ -41,7 +41,7 @@ parse = "^1.19.1" pytest-mock = "^3.12.0" datamodel-code-generator = "^0.25.1" pyyaml = "^6.0.1" -proxygen-cli = "^2.1.5" +proxygen-cli = "^2.1.14" moto = "^5.0.1" boto3-stubs = { extras = [ "dynamodb", diff --git a/src/etl/sds/tests/test_sds_etl_components.py b/src/etl/sds/tests/test_sds_etl_components.py index 761f962d1..dec9bbd31 100644 --- a/src/etl/sds/tests/test_sds_etl_components.py +++ b/src/etl/sds/tests/test_sds_etl_components.py @@ -4,6 +4,7 @@ import boto3 import pytest +from botocore.config import Config from domain.core.device import DeviceStatus, DeviceType from domain.core.device_key import DeviceKeyType from etl.clear_state_inputs import EMPTY_JSON_DATA, EMPTY_LDIF_DATA @@ -24,6 +25,9 @@ from test_helpers.pytest_skips import long_running from test_helpers.terraform import read_terraform_output +# Configure the boto3 retry settings +config = Config(retries={"max_attempts": 10, "mode": "standard"}) + # Note that unique identifier "000428682512" is the identifier of 'GOOD_SDS_RECORD' DELETION_REQUEST_000428682512 = """ dn: o=nhs,ou=Services,uniqueIdentifier=000428682512 @@ -40,19 +44,27 @@ """ +@pytest.fixture +def step_functions_client(): + return boto3.client("stepfunctions", config=config) + + +@pytest.fixture +def s3_client(): + return boto3.client("s3") + + @pytest.fixture def state_machine_input(request: pytest.FixtureRequest): - execute_state_machine(state_machine_input=request.param) return request.param @pytest.fixture -def worker_data(request: pytest.FixtureRequest): - client = boto3.client("s3") +def worker_data(request: pytest.FixtureRequest, s3_client): etl_bucket = read_terraform_output("sds_etl.value.bucket") for key, data in request.param.items(): - client.put_object(Bucket=etl_bucket, Key=key, Body=data) + s3_client.put_object(Bucket=etl_bucket, Key=key, Body=data) return request.param @@ -65,9 +77,8 @@ def repository(): def execute_state_machine( - state_machine_input: StateMachineInput, + state_machine_input: StateMachineInput, client ) -> StartSyncExecutionOutputTypeDef: - client = boto3.client("stepfunctions") state_machine_arn = read_terraform_output("sds_etl.value.state_machine_arn") name = state_machine_input.name execution_response = client.start_execution( @@ -102,25 +113,22 @@ def execute_state_machine( return response -def get_changelog_number_from_s3() -> str: - client = boto3.client("s3") +def get_changelog_number_from_s3(s3_client) -> str: etl_bucket = read_terraform_output("sds_etl.value.bucket") changelog_key = read_terraform_output("sds_etl.value.changelog_key") - response = client.get_object(Bucket=etl_bucket, Key=changelog_key) + response = s3_client.get_object(Bucket=etl_bucket, Key=changelog_key) return json_loads(response["Body"].read()) -def get_object(key: WorkerKey) -> str: - client = boto3.client("s3") +def get_object(s3_client, key: WorkerKey) -> str: etl_bucket = read_terraform_output("sds_etl.value.bucket") - response = client.get_object(Bucket=etl_bucket, Key=key) + response = s3_client.get_object(Bucket=etl_bucket, Key=key) return response["Body"].read() -def put_object(key: WorkerKey, body: bytes) -> str: - client = boto3.client("s3") +def put_object(s3_client, key: WorkerKey, body: bytes) -> str: etl_bucket = read_terraform_output("sds_etl.value.bucket") - return client.put_object(Bucket=etl_bucket, Key=key, Body=body) + return s3_client.put_object(Bucket=etl_bucket, Key=key, Body=body) @pytest.mark.integration @@ -140,8 +148,14 @@ def put_object(key: WorkerKey, body: bytes) -> str: [StateMachineInput.update(changelog_number_start=1, changelog_number_end=1)], indirect=True, ) -def test_changelog_number_update(worker_data, state_machine_input: StateMachineInput): - changelog_number_from_s3 = get_changelog_number_from_s3() +def test_changelog_number_update( + worker_data, + state_machine_input: StateMachineInput, + step_functions_client, + s3_client, +): + execute_state_machine(state_machine_input, client=step_functions_client) + changelog_number_from_s3 = get_changelog_number_from_s3(s3_client) assert changelog_number_from_s3 == state_machine_input.changelog_number_end @@ -164,10 +178,18 @@ def test_changelog_number_update(worker_data, state_machine_input: StateMachineI ], indirect=True, ) -def test_end_to_end(repository: MockDeviceRepository, worker_data, state_machine_input): - extract_data = get_object(key=WorkerKey.EXTRACT) - transform_data = pkl_loads_lz4(get_object(key=WorkerKey.TRANSFORM)) - load_data = pkl_loads_lz4(get_object(key=WorkerKey.LOAD)) +def test_end_to_end( + repository: MockDeviceRepository, + worker_data, + s3_client, + state_machine_input, + step_functions_client, +): + execute_state_machine(state_machine_input, client=step_functions_client) + + extract_data = get_object(s3_client, key=WorkerKey.EXTRACT) + transform_data = pkl_loads_lz4(get_object(s3_client, key=WorkerKey.TRANSFORM)) + load_data = pkl_loads_lz4(get_object(s3_client, key=WorkerKey.LOAD)) assert len(extract_data) == 0 assert len(transform_data) == 0 @@ -179,8 +201,9 @@ def test_end_to_end(repository: MockDeviceRepository, worker_data, state_machine @long_running @pytest.mark.integration -def test_end_to_end_bulk_trigger(repository: MockDeviceRepository): - s3_client = boto3.client("s3") +def test_end_to_end_bulk_trigger( + repository: MockDeviceRepository, step_functions_client, s3_client +): bucket = read_terraform_output("sds_etl.value.bucket") test_data_bucket = read_terraform_output("test_data_bucket.value") bulk_trigger_prefix = read_terraform_output("sds_etl.value.bulk_trigger_prefix") @@ -208,14 +231,14 @@ def test_end_to_end_bulk_trigger(repository: MockDeviceRepository): }, ) - extract_data = get_object(key=WorkerKey.EXTRACT) - transform_data = pkl_loads_lz4(get_object(key=WorkerKey.TRANSFORM)) - load_data = pkl_loads_lz4(get_object(key=WorkerKey.LOAD)) + extract_data = get_object(s3_client, key=WorkerKey.EXTRACT) + transform_data = pkl_loads_lz4(get_object(s3_client, key=WorkerKey.TRANSFORM)) + load_data = pkl_loads_lz4(get_object(s3_client, key=WorkerKey.LOAD)) while len(extract_data) + len(transform_data) + len(load_data) > 0: time.sleep(15) - extract_data = get_object(key=WorkerKey.EXTRACT) - transform_data = pkl_loads_lz4(get_object(key=WorkerKey.TRANSFORM)) - load_data = pkl_loads_lz4(get_object(key=WorkerKey.LOAD)) + extract_data = get_object(s3_client, key=WorkerKey.EXTRACT) + transform_data = pkl_loads_lz4(get_object(s3_client, key=WorkerKey.TRANSFORM)) + load_data = pkl_loads_lz4(get_object(s3_client, key=WorkerKey.LOAD)) product_count = repository.count(by=DeviceType.PRODUCT) endpoint_count = repository.count(by=DeviceType.ENDPOINT) @@ -249,12 +272,18 @@ def test_end_to_end_bulk_trigger(repository: MockDeviceRepository): indirect=True, ) def test_end_to_end_changelog_delete( - repository: MockDeviceRepository, worker_data, state_machine_input + repository: MockDeviceRepository, + worker_data, + step_functions_client, + s3_client, + state_machine_input, ): """Note that the start of this test is the same as test_end_to_end, and then makes changes""" - extract_data = get_object(key=WorkerKey.EXTRACT) - transform_data = pkl_loads_lz4(get_object(key=WorkerKey.TRANSFORM)) - load_data = pkl_loads_lz4(get_object(key=WorkerKey.LOAD)) + execute_state_machine(state_machine_input, client=step_functions_client) + + extract_data = get_object(s3_client, key=WorkerKey.EXTRACT) + transform_data = pkl_loads_lz4(get_object(s3_client, key=WorkerKey.TRANSFORM)) + load_data = pkl_loads_lz4(get_object(s3_client, key=WorkerKey.LOAD)) assert len(extract_data) == 0 assert len(transform_data) == 0 @@ -264,11 +293,12 @@ def test_end_to_end_changelog_delete( ) # Now execute a changelog initial state in the ETL - put_object(key=WorkerKey.EXTRACT, body=DELETION_REQUEST_000428682512) + put_object(s3_client, key=WorkerKey.EXTRACT, body=DELETION_REQUEST_000428682512) response = execute_state_machine( state_machine_input=StateMachineInput.update( changelog_number_start=124, changelog_number_end=125 - ) + ), + client=step_functions_client, ) assert response["status"] == "SUCCEEDED" @@ -289,11 +319,12 @@ def test_end_to_end_changelog_delete( assert device.status == DeviceStatus.ACTIVE # Execute another changelog initial state in the ETL - put_object(key=WorkerKey.EXTRACT, body=DELETION_REQUEST_000842065542) + put_object(s3_client, key=WorkerKey.EXTRACT, body=DELETION_REQUEST_000842065542) response = execute_state_machine( state_machine_input=StateMachineInput.update( changelog_number_start=124, changelog_number_end=125 - ) + ), + client=step_functions_client, ) assert response["status"] == "SUCCEEDED" diff --git a/src/etl/sds/trigger/bulk/bulk.py b/src/etl/sds/trigger/bulk/bulk.py index c15093d7e..fa55c2878 100644 --- a/src/etl/sds/trigger/bulk/bulk.py +++ b/src/etl/sds/trigger/bulk/bulk.py @@ -5,6 +5,7 @@ from etl_utils.trigger.notify import notify from event.aws.client import dynamodb_client from event.environment import BaseEnvironment +from event.logging.logger import setup_logger from event.step_chain import StepChain from .steps import steps @@ -33,6 +34,7 @@ class BulkTriggerEnvironment(BaseEnvironment): @event_source(data_class=S3Event) def handler(event: S3Event, context): + setup_logger(service_name=__file__) step_chain = StepChain(step_chain=steps, step_decorators=[log_action]) step_chain.run(init=(event.bucket_name, event.object_key), cache=CACHE) return notify( diff --git a/src/etl/sds/trigger/update/steps.py b/src/etl/sds/trigger/update/steps.py index 30963a1d9..acaa8641f 100644 --- a/src/etl/sds/trigger/update/steps.py +++ b/src/etl/sds/trigger/update/steps.py @@ -1,4 +1,3 @@ -import base64 from itertools import starmap from pathlib import Path from typing import TYPE_CHECKING, TypedDict @@ -55,11 +54,7 @@ def _prepare_ldap_client(data, cache: Cache): cert_file=str(cache["cert_file"]), key_file=str(cache["key_file"]), ldap_changelog_user=cache["ldap_changelog_user"], - ldap_changelog_password=str( - base64.b64decode(cache["ldap_changelog_password"].encode("utf-8")).decode( - "utf-8" - ) - ), + ldap_changelog_password=cache["ldap_changelog_password"], ) diff --git a/src/etl/sds/trigger/update/update.py b/src/etl/sds/trigger/update/update.py index 2dd4d0d6b..d6fdf3f11 100644 --- a/src/etl/sds/trigger/update/update.py +++ b/src/etl/sds/trigger/update/update.py @@ -1,4 +1,3 @@ -import base64 from pathlib import Path import boto3 @@ -6,6 +5,8 @@ from etl_utils.trigger.model import StateMachineInputType from etl_utils.trigger.notify import notify from event.environment import BaseEnvironment +from event.logging.constants import LDAP_REDACTED_FIELDS +from event.logging.logger import setup_logger from event.step_chain import StepChain from .steps import steps @@ -38,11 +39,7 @@ class ChangelogTriggerEnvironment(BaseEnvironment): "etl_bucket": ENVIRONMENT.ETL_BUCKET, "ldap_host": ENVIRONMENT.LDAP_HOST, "ldap_changelog_user": ENVIRONMENT.LDAP_CHANGELOG_USER, - "ldap_changelog_password": str( - base64.b64encode(ENVIRONMENT.LDAP_CHANGELOG_PASSWORD.encode("utf-8")).decode( - "utf-8" - ) - ), + "ldap_changelog_password": ENVIRONMENT.LDAP_CHANGELOG_PASSWORD, } @@ -52,6 +49,8 @@ def handler(event={}, context=None): CACHE["ldap"] = ldap + setup_logger(service_name=__file__, redacted_fields=LDAP_REDACTED_FIELDS) + step_chain = StepChain(step_chain=steps, step_decorators=[log_action]) step_chain.run(cache=CACHE) return notify( diff --git a/src/layers/event/logging/constants.py b/src/layers/event/logging/constants.py new file mode 100644 index 000000000..60c06ae51 --- /dev/null +++ b/src/layers/event/logging/constants.py @@ -0,0 +1 @@ +LDAP_REDACTED_FIELDS = {"ldap_changelog_password", "ldap_changelog_user", "ldap_host"} diff --git a/src/layers/event/logging/logger.py b/src/layers/event/logging/logger.py index 6f6388b61..f2c11b633 100644 --- a/src/layers/event/logging/logger.py +++ b/src/layers/event/logging/logger.py @@ -1,10 +1,11 @@ from pathlib import Path +from typing import Set from uuid import uuid4 from nhs_context_logging import app_logger -def setup_logger(service_name: str, uuid: str = None): +def setup_logger(service_name: str, uuid: str = None, redacted_fields: Set[str] = None): service_name = Path(service_name).parent.stem if uuid is None: uuid = str(uuid4()) @@ -12,4 +13,5 @@ def setup_logger(service_name: str, uuid: str = None): app_logger.setup( service_name="-".join((service_name, uuid)), config_kwargs={"prepend_module_name": True}, + redact_fields=redacted_fields, )