diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 0c174b09..e17e6e88 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -2,26 +2,18 @@ ci: autoupdate_schedule: quarterly skip: [pip-compile] repos: - - repo: https://github.com/psf/black - rev: 24.4.2 + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.5.0 hooks: - - id: black - - repo: https://github.com/pycqa/flake8 - rev: 7.1.0 - hooks: - - id: flake8 - additional_dependencies: [flake8-comprehensions] - - repo: https://github.com/pycqa/isort - rev: 5.13.2 - hooks: - - id: isort + - id: ruff + - id: ruff-format - repo: https://github.com/astral-sh/uv-pre-commit rev: 0.4.4 hooks: - id: pip-compile name: pip-compile requirements.in - args: [requirements.in, -o, requirements.txt, --no-strip-extras] + args: [requirements.in, -o, requirements.txt] - id: pip-compile name: pip-compile requirements_dev.in - args: [requirements_dev.in, -o, requirements_dev.txt, --no-strip-extras] + args: [requirements_dev.in, -o, requirements_dev.txt] files: ^requirements_dev\.(in|txt)$ diff --git a/contracting_process/field_level/definitions.py b/contracting_process/field_level/definitions.py index 4c30ad7d..5ed43a2c 100644 --- a/contracting_process/field_level/definitions.py +++ b/contracting_process/field_level/definitions.py @@ -26,7 +26,7 @@ def _definitions(properties, path=None, refs=None): refs = () for key, value in properties.items(): - new_path = path + (key,) + new_path = (*path, key) dot_path = ".".join(new_path) if "object" in value["type"] and "properties" in value: @@ -58,10 +58,7 @@ def _definitions(properties, path=None, refs=None): if key == "description": checks.append((document_description_length.calculate, document_description_length.name)) elif key == "documentType": - if refs[1] == "Implementation": - index = 1 - else: - index = 0 + index = 1 if refs[1] == "Implementation" else 0 checks.append( ( functools.partial(document_type.calculate_section, section=refs[index].lower()), @@ -84,7 +81,7 @@ def _definitions(properties, path=None, refs=None): elif refs[-1] == "Tender": if key == "numberOfTenderers": checks.append((number.calculate, number.name)) - elif refs[-1] == "Value": + elif refs[-1] == "Value": # noqa: SIM102 # consistency if key == "amount" and new_path[-3] in ("transactions", "unit"): checks.append((number.calculate, number.name)) diff --git a/contracting_process/field_level/report_examples.py b/contracting_process/field_level/report_examples.py index 5812a74b..dff511e2 100644 --- a/contracting_process/field_level/report_examples.py +++ b/contracting_process/field_level/report_examples.py @@ -118,7 +118,7 @@ def create(dataset_id): check_group["passed_examples"] = check_group["passed_examples"].sample check_group["failed_examples"] = check_group["failed_examples"].sample - for check_name, check in check_group["checks"].items(): + for check in check_group["checks"].values(): check["passed_examples"] = check["passed_examples"].sample check["failed_examples"] = check["failed_examples"].sample diff --git a/contracting_process/processor.py b/contracting_process/processor.py index cb0b1860..b666f377 100644 --- a/contracting_process/processor.py +++ b/contracting_process/processor.py @@ -7,7 +7,7 @@ from contracting_process.resource_level.definitions import definitions as resource_level_definitions from pelican.util import settings from pelican.util.getter import get_values -from pelican.util.services import Json, get_cursor, state, update_items_state +from pelican.util.services import Json, State, get_cursor, update_items_state from pelican.util.workers import is_step_required logger = logging.getLogger("pelican.contracting_process.processor") @@ -26,11 +26,13 @@ def do_work(dataset_id, items): logger.error("data_item %s has no ocid", item_id) continue if do_field_level: - field_level_check_arglist.append(field_level_checks(data, item_id, dataset_id, do_field_quality)) + field_level_check_arglist.append( + field_level_checks(data, item_id, dataset_id, do_field_quality=do_field_quality) + ) if do_resource_level: resource_level_check_arglist.append(resource_level_checks(data, item_id, dataset_id)) - update_items_state(dataset_id, (item_id for _, item_id in items), state.OK) + update_items_state(dataset_id, (item_id for _, item_id in items), State.OK) if do_field_level: save_field_level_checks(field_level_check_arglist) @@ -49,7 +51,7 @@ def resource_level_checks(data, item_id, dataset_id): return (Json(result), item_id, dataset_id) -def field_level_checks(data, item_id, dataset_id, do_field_quality=True): +def field_level_checks(data, item_id, dataset_id, *, do_field_quality=True): logger.debug("Dataset %s: Item %s: Calculating field-level checks", dataset_id, item_id) result = {"meta": {"ocid": data["ocid"], "item_id": item_id}, "checks": {}} @@ -92,7 +94,7 @@ def field_level_checks(data, item_id, dataset_id, do_field_quality=True): else: field_result["path"] = leaf - for check, check_name in coverage_checks: + for check, _ in coverage_checks: check_result = check(item, leaf) passed = check_result["result"] field_result["coverage"]["check_results"].append(check_result) @@ -101,7 +103,7 @@ def field_level_checks(data, item_id, dataset_id, do_field_quality=True): break else: # field_result["coverage"]["overall_result"] is True if do_field_quality: - for check, check_name in checks: + for check, _ in checks: check_result = check(item, leaf) passed = check_result["result"] field_result["quality"]["check_results"].append(check_result) diff --git a/contracting_process/resource_level/coherent/amendments_dates.py b/contracting_process/resource_level/coherent/amendments_dates.py index eb18c6fe..7ab2fc94 100644 --- a/contracting_process/resource_level/coherent/amendments_dates.py +++ b/contracting_process/resource_level/coherent/amendments_dates.py @@ -1,4 +1,6 @@ """ +Coherence check for amendment dates. + .. seealso:: :func:`pelican.util.checks.coherent_dates_check diff --git a/contracting_process/resource_level/coherent/awards_status.py b/contracting_process/resource_level/coherent/awards_status.py index 184f9841..5c43f6a7 100644 --- a/contracting_process/resource_level/coherent/awards_status.py +++ b/contracting_process/resource_level/coherent/awards_status.py @@ -1,6 +1,7 @@ """ -If an award's ``status`` is inactive ('pending', 'cancelled', 'unsuccessful'), then no contract's ``awardID`` matches -the award's ``id``. +If an award's ``status`` is inactive, then no contract's ``awardID`` matches the award's ``id``. + +Inactive statuses are: 'pending', 'cancelled' are 'unsuccessful'. """ from pelican.util.checks import complete_result_resource, get_empty_result_resource diff --git a/contracting_process/resource_level/coherent/dates.py b/contracting_process/resource_level/coherent/dates.py index 9d5f99b8..7e776f10 100644 --- a/contracting_process/resource_level/coherent/dates.py +++ b/contracting_process/resource_level/coherent/dates.py @@ -1,4 +1,6 @@ """ +Coherence check for the contracting process timeline. + .. seealso:: :func:`pelican.util.checks.coherent_dates_check diff --git a/contracting_process/resource_level/coherent/documents_dates.py b/contracting_process/resource_level/coherent/documents_dates.py index e2ae4363..768fc36d 100644 --- a/contracting_process/resource_level/coherent/documents_dates.py +++ b/contracting_process/resource_level/coherent/documents_dates.py @@ -1,4 +1,6 @@ """ +Coherence check for document dates. + .. seealso:: :func:`pelican.util.checks.coherent_dates_check diff --git a/contracting_process/resource_level/coherent/milestones_dates.py b/contracting_process/resource_level/coherent/milestones_dates.py index 2a4a789c..dd505438 100644 --- a/contracting_process/resource_level/coherent/milestones_dates.py +++ b/contracting_process/resource_level/coherent/milestones_dates.py @@ -1,4 +1,6 @@ """ +Coherence check for milestone dates. + .. seealso:: :func:`pelican.util.checks.coherent_dates_check diff --git a/contracting_process/resource_level/coherent/period.py b/contracting_process/resource_level/coherent/period.py index 07f09690..c5f63a3d 100644 --- a/contracting_process/resource_level/coherent/period.py +++ b/contracting_process/resource_level/coherent/period.py @@ -1,4 +1,6 @@ """ +Coherence check for period objects. + .. seealso:: :func:`pelican.util.checks.coherent_dates_check diff --git a/contracting_process/resource_level/coherent/procurement_method_vs_number_of_tenderers.py b/contracting_process/resource_level/coherent/procurement_method_vs_number_of_tenderers.py index d0b29a6c..5a0e314f 100644 --- a/contracting_process/resource_level/coherent/procurement_method_vs_number_of_tenderers.py +++ b/contracting_process/resource_level/coherent/procurement_method_vs_number_of_tenderers.py @@ -25,6 +25,6 @@ def calculate(item): return complete_result_resource_pass_fail( result, - number_of_tenderers == 0 or number_of_tenderers == 1, + number_of_tenderers in {0, 1}, {"numberOfTenderers": item["tender"]["numberOfTenderers"]}, ) diff --git a/contracting_process/resource_level/coherent/release_date.py b/contracting_process/resource_level/coherent/release_date.py index 8b3dd538..3e973547 100644 --- a/contracting_process/resource_level/coherent/release_date.py +++ b/contracting_process/resource_level/coherent/release_date.py @@ -1,4 +1,6 @@ """ +Coherence check for release date. + .. seealso:: :func:`pelican.util.checks.coherent_dates_check diff --git a/contracting_process/resource_level/coherent/tender_status.py b/contracting_process/resource_level/coherent/tender_status.py index 21950f96..17ab8519 100644 --- a/contracting_process/resource_level/coherent/tender_status.py +++ b/contracting_process/resource_level/coherent/tender_status.py @@ -1,6 +1,7 @@ """ -If ``tender.status`` is incomplete ('planning', 'planned', 'active', 'cancelled', 'unsuccessful' or 'withdrawn'), then -``awards`` and ``contracts`` are blank. +If ``tender.status`` is incomplete, then ``awards`` and ``contracts`` are blank. + +Incomplete statuses are: 'planning', 'planned', 'active', 'cancelled', 'unsuccessful' are 'withdrawn'. """ from pelican.util.checks import complete_result_resource_pass_fail, get_empty_result_resource diff --git a/contracting_process/resource_level/consistent/contracts_implementation_transactions_value.py b/contracting_process/resource_level/consistent/contracts_implementation_transactions_value.py index 3f62c0a1..1c06138a 100644 --- a/contracting_process/resource_level/consistent/contracts_implementation_transactions_value.py +++ b/contracting_process/resource_level/consistent/contracts_implementation_transactions_value.py @@ -1,6 +1,7 @@ """ -For each contract, the sum of its transaction's values is less than or equal to the contract's value, after conversion -to USD if necessary. +For each contract, the sum of its transaction's values is less than or equal to the contract's value. + +Valeus are converted to USD if necessary. Since the test operates on all contract and transaction objects, the test silently ignores any missing or non-numeric amounts and any missing or unknown currencies. If currency conversion is necessary, but the release date is invalid, diff --git a/contracting_process/resource_level/consistent/contracts_value.py b/contracting_process/resource_level/consistent/contracts_value.py index cdece418..e5183513 100644 --- a/contracting_process/resource_level/consistent/contracts_value.py +++ b/contracting_process/resource_level/consistent/contracts_value.py @@ -1,6 +1,7 @@ """ -For each award, the sum of its contracts' values isn't less than 50%, or more than 150%, of the award's value, after -conversion to USD if necessary. +For each award, the sum of its contracts' values isn't less than 50%, or more than 150%, of the award's value. + +Values are converted to USD if necessary. Since the test operates on all award and contract values, the test silently ignores: diff --git a/contracting_process/resource_level/consistent/parties_role.py b/contracting_process/resource_level/consistent/parties_role.py index 208ea54e..45c21e5d 100644 --- a/contracting_process/resource_level/consistent/parties_role.py +++ b/contracting_process/resource_level/consistent/parties_role.py @@ -1,5 +1,7 @@ """ -For each role of each party, there is an organization reference. The roles to test are: +For each role of each party, there is an organization reference. + +The roles to test are: - procuringEntity - tenderer diff --git a/contracting_process/resource_level/consistent/period_duration_in_days.py b/contracting_process/resource_level/consistent/period_duration_in_days.py index 2915d40a..841cb970 100644 --- a/contracting_process/resource_level/consistent/period_duration_in_days.py +++ b/contracting_process/resource_level/consistent/period_duration_in_days.py @@ -1,6 +1,8 @@ """ -For each period, ``durationInDays`` is equal to the difference between ``startDate`` and ``endDate``. If ``endDate`` is -blank or unparsable, then ``durationInDays`` is equal to the difference between ``startDate`` and ``maxExtentDate``. +For each period, ``durationInDays`` is equal to the difference between ``startDate`` and ``endDate``. + +If ``endDate`` is blank or unparsable, then, for each period, ``durationInDays`` is equal to the difference between +``startDate`` and ``maxExtentDate``. Since the test operates on all period objects, the test silently ignores any dates that can't be parsed. """ diff --git a/contracting_process/resource_level/consistent/tender_value.py b/contracting_process/resource_level/consistent/tender_value.py index 461a9894..3a7436da 100644 --- a/contracting_process/resource_level/consistent/tender_value.py +++ b/contracting_process/resource_level/consistent/tender_value.py @@ -1,6 +1,7 @@ """ -``planning.budget.amount`` isn't less than 50%, or more than 150%, of ``tender.value``, after conversion to USD if -necessary. +``planning.budget.amount`` isn't less than 50%, or more than 150%, of ``tender.value``. + +Values are converted to USD if necessary. The test is skipped if an amount is missing, zero or non-numeric, if a currency is missing or unknown, if the two amounts aren't both positive or both negative, or if currency conversion is necessary and the release date is invalid, diff --git a/contracting_process/resource_level/reference/contract_in_awards.py b/contracting_process/resource_level/reference/contract_in_awards.py index a1deff97..3b24c059 100644 --- a/contracting_process/resource_level/reference/contract_in_awards.py +++ b/contracting_process/resource_level/reference/contract_in_awards.py @@ -32,27 +32,27 @@ def calculate(item): if not deep_has(value["value"], "awardID"): failed_paths.append({"path": path, "awardID": None, "reason": "contract has no awardID"}) else: - awardID = value["value"]["awardID"] - if awardID not in id_counts: + award_id = value["value"]["awardID"] + if award_id not in id_counts: if not ids: - failed_paths.append({"path": path, "awardID": awardID, "reason": "no award has an id"}) - elif str(awardID) in id_counts_str: + failed_paths.append({"path": path, "awardID": award_id, "reason": "no award has an id"}) + elif str(award_id) in id_counts_str: failed_paths.append( - {"path": path, "awardID": awardID, "reason": "id is not the same type as awardID"} + {"path": path, "awardID": award_id, "reason": "id is not the same type as awardID"} ) else: - failed_paths.append({"path": path, "awardID": awardID, "reason": "no award matches the awardID"}) - elif id_counts[awardID] > 1: + failed_paths.append({"path": path, "awardID": award_id, "reason": "no award matches the awardID"}) + elif id_counts[award_id] > 1: failed_paths.append( - {"path": path, "awardID": awardID, "reason": "multiple awards match the awardID"} # (same type) + {"path": path, "awardID": award_id, "reason": "multiple awards match the awardID"} # (same type) ) # Multiple matches across different types are currently designed to pass. (This assumes users do not coerce # IDs to strings.) If we change this to a failure, uncomment the following lines. # - # elif id_counts_str[str(awardID)] > 1: - # failed_paths.append( - # {"path": path, "awardID": awardID, "reason": "multiple awards match the awardID (types differ)"} - # ) + # > elif id_counts_str[str(award_id)] > 1: + # > failed_paths.append( + # > {"path": path, "awardID": award_id, "reason": "multiple awards match the awardID (types differ)"} + # > ) else: pass_count += 1 diff --git a/contracting_process/resource_level/reference/parties.py b/contracting_process/resource_level/reference/parties.py index 11774d02..636043b0 100644 --- a/contracting_process/resource_level/reference/parties.py +++ b/contracting_process/resource_level/reference/parties.py @@ -43,10 +43,10 @@ def calculate_path(item, path): # Multiple matches across different types are currently designed to pass. (This assumes users do not coerce # IDs to strings.) If we change this to a failure, uncomment the following lines. # - # elif id_counts_str[str(ident)] > 1: - # failed_paths.append( - # {"path": path, "id": ident, "reason": "multiple parties match the referencing id (types differ)"} - # ) + # > elif id_counts_str[str(ident)] > 1: + # > failed_paths.append( + # > {"path": path, "id": ident, "reason": "multiple parties match the referencing id (types differ)"} + # > ) else: pass_count += 1 diff --git a/dataset/consistent/related_process_title.py b/dataset/consistent/related_process_title.py index 101122dd..e114c2f2 100644 --- a/dataset/consistent/related_process_title.py +++ b/dataset/consistent/related_process_title.py @@ -1,6 +1,7 @@ """ -A related process object has the same value for its ``title`` field as the ``tender.title`` field of the compiled -release it references. The related process fields are: +The ``title`` of a related process is equal to the ``tender.title`` of the compiled release it references. + +The related process fields are: - ``contracts.relatedProcesses`` - ``relatedProcesses`` @@ -84,7 +85,7 @@ def add_item(scope, item, item_id): def get_result(scope): result = get_empty_result_dataset(version) - for ref, example in scope["related_processes"].items(): + for example in scope["related_processes"].values(): if scope["ocids"][example["related_ocid"]]["found"]: scope = _add_example(scope, example) diff --git a/dataset/distribution/buyer.py b/dataset/distribution/buyer.py index 179d92e7..d1ef0711 100644 --- a/dataset/distribution/buyer.py +++ b/dataset/distribution/buyer.py @@ -1,6 +1,8 @@ """ -Fewer than 50% of all buyers are identified in only one compiled release. Failure indicates issues in buyer -identification. Buyers are identified by ``buyer.identifier.scheme`` and ``buyer.identifier.id``. +Fewer than 50% of all buyers are identified in only one compiled release. + +Failure indicates issues in buyer identification. Buyers are identified by ``buyer.identifier.scheme`` and +``buyer.identifier.id``. The test is skipped if the ``buyer.identifier.scheme`` and ``buyer.identifier.id`` fields are both present in fewer than 1,000 compiled releases. diff --git a/dataset/distribution/buyer_repetition.py b/dataset/distribution/buyer_repetition.py index 1c35c819..f56a278c 100644 --- a/dataset/distribution/buyer_repetition.py +++ b/dataset/distribution/buyer_repetition.py @@ -1,6 +1,8 @@ """ -The most common buyer is identified in 1% to 50% of compiled releases. Failure indicates issues in buyer identification -or buyer over-representation. Buyers are identified by ``buyer.identifier.scheme`` and ``buyer.identifier.id``. +The most common buyer is identified in 1% to 50% of compiled releases. + +Failure indicates issues in buyer identification or buyer over-representation. Buyers are identified by +``buyer.identifier.scheme`` and ``buyer.identifier.id``. The test is skipped if the ``buyer.identifier.scheme`` and ``buyer.identifier.id`` fields are both present in fewer than 1,000 compiled releases. diff --git a/dataset/distribution/code_distribution.py b/dataset/distribution/code_distribution.py index cbe2eb0f..5fc32aca 100644 --- a/dataset/distribution/code_distribution.py +++ b/dataset/distribution/code_distribution.py @@ -1,6 +1,9 @@ """ -If ``test_values`` is set, then each test value occurs in between 0.1% and 99% of cases. Otherwise, no test is -performed. The test is skipped if the ``paths`` are never present. +If ``test_values`` is set, then each test value occurs in between 0.1% and 99% of cases. + +Otherwise, no test is performed. + +The test is skipped if the ``paths`` are never present. """ from pelican.util.checks import ReservoirSampler, get_empty_result_dataset @@ -8,7 +11,7 @@ class CodeDistribution: - def __init__(self, paths, test_values=[], limit=20): + def __init__(self, paths, test_values=(), limit=20): self.paths = paths self.test_values = set(test_values) self.limit = limit diff --git a/dataset/distribution/value.py b/dataset/distribution/value.py index bb61d10a..ca374f04 100644 --- a/dataset/distribution/value.py +++ b/dataset/distribution/value.py @@ -1,6 +1,7 @@ """ -The total value of the top 1% of values is less than the total value of the remaining values. Failure indicates extreme -outliers in the top 1%. All values are converted to USD as of the compiled release's ``date``. +The total value of the top 1% of values is less than the total value of the remaining values. + +Failure indicates extreme outliers in the top 1%. Values are converted to USD as of the compiled release's ``date``. The test is skipped if fewer than 100 values are included. A value is excluded if an amount is missing, negative or non-numeric, if a currency is missing or unknown, or if currency conversion is necessary and the release date is diff --git a/dataset/metadata_aggregator.py b/dataset/metadata_aggregator.py index c6f84ce9..6768de9c 100644 --- a/dataset/metadata_aggregator.py +++ b/dataset/metadata_aggregator.py @@ -38,6 +38,8 @@ def get_result(scope): def get_kingfisher_metadata(kingfisher_process_cursor, collection_id): """ + Return metadata from Kingfisher Process. + :param kingfisher_process_cursor: the cursor must be initialized with `cursor_factory=psycopg2.extras.DictCursor` :param collection_id: the ID of the compiled collection """ @@ -192,7 +194,7 @@ def get_kingfisher_metadata(kingfisher_process_cursor, collection_id): for repository_url in deep_get(row["data"], "extensions", list): try: response = requests.get(repository_url, timeout=30) - if response.status_code != 200: + if response.status_code != requests.codes.ok: continue extension = response.json() diff --git a/dataset/misc/url_availability.py b/dataset/misc/url_availability.py index 54f0dcdf..94481def 100644 --- a/dataset/misc/url_availability.py +++ b/dataset/misc/url_availability.py @@ -1,5 +1,7 @@ """ -A random sample of 100 URL values return responses without HTTP error codes. The URL fields are: +A random sample of 100 URL values return responses without HTTP error codes. + +The URL fields are: - ``planning.documents.url`` - ``tender.documents.url`` @@ -61,7 +63,7 @@ def get_result(scope): for sample in sampler: try: response = requests.get(sample["value"], timeout=settings.REQUESTS_TIMEOUT, stream=True) - if 200 <= response.status_code < 400: + if requests.codes.ok <= response.status_code < requests.codes.bad_request: sample["status"] = "OK" passed_examples.append(sample) passed_count += 1 diff --git a/dataset/reference/related_process_identifier.py b/dataset/reference/related_process_identifier.py index 9e027d28..2413dbad 100644 --- a/dataset/reference/related_process_identifier.py +++ b/dataset/reference/related_process_identifier.py @@ -1,6 +1,7 @@ """ -If a related process has a ``scheme`` of 'ocid' and its ``identifier`` is present, then its ``identifier`` matches the -``ocid`` of a compiled release. The related process fields are: +The ``identifier`` of a related process whose ``scheme`` is 'ocid' matches the ``ocid`` of a compiled release. + +The related process fields are: - ``contracts.relatedProcesses`` - ``relatedProcesses`` @@ -77,7 +78,7 @@ def add_item(scope, item, item_id): def get_result(scope): result = get_empty_result_dataset(version) - for ref, example in scope["related_processes"].items(): + for example in scope["related_processes"].values(): scope = _add_example(scope, example) if not scope["meta"]["total_processed"]: diff --git a/docs/conf.py b/docs/conf.py index dd2933c2..28084931 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -6,13 +6,6 @@ # -- Path setup -------------------------------------------------------------- -# If extensions (or modules to document with autodoc) are in another directory, -# add these directories to sys.path here. If the directory is relative to the -# documentation root, use os.path.abspath to make it absolute, like shown here. -# -# import os -# import sys -# sys.path.insert(0, os.path.abspath('.')) import os import sys diff --git a/manage.py b/manage.py index 13de7ca1..0e0d9511 100755 --- a/manage.py +++ b/manage.py @@ -2,7 +2,7 @@ import click from pelican.util import exchange_rates_db, settings -from pelican.util.services import commit, get_cursor, phase, publish, state, update_dataset_state +from pelican.util.services import Phase, State, commit, get_cursor, publish, update_dataset_state @click.group() @@ -52,18 +52,18 @@ def remove(dataset_id, include_filtered, force): {"dataset_id": dataset_id}, ) row = cursor.fetchone() - if not row or row[0] not in (phase.CHECKED, phase.DELETED) or row[1] != state.OK: + if not row or row[0] not in (Phase.CHECKED, Phase.DELETED) or row[1] != State.OK: if force: click.secho( f"Forcefully removing dataset {dataset_id} (phase={row[0]}, state={row[1]}). (Its phase should be " - f"{phase.CHECKED} or {phase.DELETED}, and its state should be {state.OK}.)", + f"{Phase.CHECKED} or {Phase.DELETED}, and its state should be {State.OK}.)", fg="yellow", err=True, ) else: click.secho( f"Dataset {dataset_id} (phase={row[0]}, state={row[1]}) can't be removed. Its phase must be " - f"{phase.CHECKED} or {phase.DELETED}, and its state must be {state.OK}.", + f"{Phase.CHECKED} or {Phase.DELETED}, and its state must be {State.OK}.", fg="red", err=True, ) @@ -88,8 +88,8 @@ def remove(dataset_id, include_filtered, force): ) """, { - "phases": [phase.CHECKED, phase.DELETED], - "state": state.OK, + "phases": [Phase.CHECKED, Phase.DELETED], + "state": State.OK, "dataset_ids": delete_dataset_ids, }, ) @@ -119,8 +119,8 @@ def remove(dataset_id, include_filtered, force): """, { "dataset_ids": delete_dataset_ids, - "phase": phase.DELETED, - "state": state.OK, + "phase": Phase.DELETED, + "state": State.OK, }, ) @@ -146,8 +146,8 @@ def remove(dataset_id, include_filtered, force): ) """, { - "phase": phase.DELETED, - "state": state.OK, + "phase": Phase.DELETED, + "state": State.OK, "dataset_ids": drop_dataset_ids, }, ) @@ -182,7 +182,6 @@ def dev(): """ Commands for administrators and developers of Pelican backend. """ - pass @dev.command() @@ -191,7 +190,7 @@ def restart_dataset_check(dataset_id): """ Restart the dataset check if the check.dataset worker failed. """ - update_dataset_state(dataset_id, phase.CONTRACTING_PROCESS, state.OK) + update_dataset_state(dataset_id, Phase.CONTRACTING_PROCESS, State.OK) commit() message = {"dataset_id": dataset_id} diff --git a/pelican/exceptions.py b/pelican/exceptions.py new file mode 100644 index 00000000..8e0c887c --- /dev/null +++ b/pelican/exceptions.py @@ -0,0 +1,10 @@ +class PelicanError(Exception): + pass + + +class EmptyExchangeRatesTableError(PelicanError): + pass + + +class NonPositiveLimitError(PelicanError): + pass diff --git a/pelican/util/checks.py b/pelican/util/checks.py index 820173a2..e081aa4b 100644 --- a/pelican/util/checks.py +++ b/pelican/util/checks.py @@ -2,6 +2,7 @@ from collections.abc import Callable, Sequence from typing import Any +from pelican.exceptions import NonPositiveLimitError from pelican.util.getter import parse_date @@ -119,6 +120,8 @@ def field_coverage_check( name: str, test: Callable[[dict[str, Any], str], tuple[bool, str]], version: float = 1.0 ) -> Callable[[dict[str, Any], str], dict[str, Any]]: """ + Return a function that calculates a coverage check. + :param name: the machine name of the check :param test: a function that accepts a dict and a key and returns a tuple of a boolean (whether the test passed) and a string (the reason for any failed test) @@ -149,6 +152,8 @@ def field_quality_check( return_value: Callable[[Any], Any] | None = None, ) -> Callable[[dict[str, Any], str], dict[str, Any]]: """ + Return a function that calculates a quality check. + :param name: the machine name of the check :param test: a function that accepts a value and returns a tuple of a boolean (whether the test passed) and a string (the reason for any failed test) @@ -249,7 +254,7 @@ def _prepare_field_result( class ReservoirSampler: def __init__(self, limit: int): if limit < 1: - raise ValueError("limit must be a positive integer") + raise NonPositiveLimitError self._limit = limit self.index = 0 diff --git a/pelican/util/codelists.py b/pelican/util/codelists.py index 5b04e4fc..8e615c4d 100644 --- a/pelican/util/codelists.py +++ b/pelican/util/codelists.py @@ -13,8 +13,8 @@ @cachetools.func.ttl_cache(ttl=86400) # 1 day def _get(url: str) -> list[dict[str, str]]: while True: - response = requests.get(url) - if response.status_code == 429: + response = requests.get(url, timeout=10) + if response.status_code == requests.codes.too_many_requests: logger.warning("HTTP 429 %s %s", url, response.headers) time.sleep(1) # time.sleep() blocks the IO loop. An asynchronous version like asyncio.sleep() wouldn't. else: @@ -52,5 +52,5 @@ def get_media_type_codelist() -> tuple[str, ...]: def get_ocid_prefix_codelist() -> tuple[str, ...]: # https://docs.google.com/spreadsheets/d/1E5ZVhc8VhGOakCq4GegvkyFYT974QQb-sSjvOfaxH7s/pubhtml?gid=506986894&single=true&widget=true - url = "https://docs.google.com/spreadsheets/d/e/2PACX-1vQP8EwbUhsfxN7Fx7vX3mTA6Y8CXyGi04bHUepdcfxvM6VRVP9f5BWAYEG6MPbnJjWJp-La81DgG8wx/pub?gid=506986894&single=true&output=csv" # noqa: E501 + url = "https://docs.google.com/spreadsheets/d/e/2PACX-1vQP8EwbUhsfxN7Fx7vX3mTA6Y8CXyGi04bHUepdcfxvM6VRVP9f5BWAYEG6MPbnJjWJp-La81DgG8wx/pub?gid=506986894&single=true&output=csv" return _codes(url, "OCID") diff --git a/pelican/util/currency_converter.py b/pelican/util/currency_converter.py index dcc9cbcf..05dc63a4 100644 --- a/pelican/util/currency_converter.py +++ b/pelican/util/currency_converter.py @@ -19,9 +19,6 @@ def bootstrap() -> None: def import_data(data: list[tuple[datetime.date, dict[str, float]]]) -> None: - global rates - global bounds - global currencies rates.clear() bounds.clear() currencies.clear() @@ -57,7 +54,7 @@ def import_data(data: list[tuple[datetime.date, dict[str, float]]]) -> None: elif settings.CURRENCY_CONVERTER_INTERPOLATION == "linear": interpolation_linear(currency, previous, date) else: - raise AttributeError() + raise AttributeError previous = date else: @@ -73,7 +70,7 @@ def import_data(data: list[tuple[datetime.date, dict[str, float]]]) -> None: def interpolation_closest(currency, start_date, end_date): """ - start_date exclusive, end_date exclusive + start_date and end_date are exclusive. """ start_date_rate = rates[start_date][currency] end_date_rate = rates[end_date][currency] @@ -94,7 +91,7 @@ def interpolation_closest(currency, start_date, end_date): days=distance_to_end - settings.CURRENCY_CONVERTER_INTERPOLATION_MAX_DAYS_FALLBACK ) continue - elif distance_to_start < distance_to_end: + if distance_to_start < distance_to_end: rates.setdefault(current_date, {}) rates[current_date][currency] = start_date_rate else: @@ -106,7 +103,7 @@ def interpolation_closest(currency, start_date, end_date): def interpolation_linear(currency, start_date, end_date): """ - start_date exclusive, end_date exclusive + start_date and end_date are exclusive. """ start_date_rate = rates[start_date][currency] end_date_rate = rates[end_date][currency] @@ -127,14 +124,13 @@ def interpolation_linear(currency, start_date, end_date): days=distance_to_end - settings.CURRENCY_CONVERTER_INTERPOLATION_MAX_DAYS_FALLBACK ) continue - else: - rates.setdefault(current_date, {}) - rates[current_date][currency] = round( - start_date_rate - + (current_date - start_date).days - * ((end_date_rate - start_date_rate) / (end_date - start_date).days), - 6, - ) + + rates.setdefault(current_date, {}) + rates[current_date][currency] = round( + start_date_rate + + (current_date - start_date).days * (end_date_rate - start_date_rate) / (end_date - start_date).days, + 6, + ) current_date += datetime.timedelta(days=1) @@ -148,12 +144,14 @@ def extrapolation_closest_rate(currency, rel_date): ): return rates[bound[0]][currency] - elif bound[1] < rel_date and ( + if bound[1] < rel_date and ( (rel_date - bound[1]).days <= settings.CURRENCY_CONVERTER_EXTRAPOLATION_MAX_DAYS_FALLBACK or settings.CURRENCY_CONVERTER_EXTRAPOLATION_MAX_DAYS_FALLBACK == -1 ): return rates[bound[1]][currency] + return None + def convert(amount, original_currency, target_currency, rel_date): if original_currency not in currencies or target_currency not in currencies: diff --git a/pelican/util/exchange_rates_db.py b/pelican/util/exchange_rates_db.py index eb7e2c0d..4dada4b7 100644 --- a/pelican/util/exchange_rates_db.py +++ b/pelican/util/exchange_rates_db.py @@ -1,31 +1,19 @@ +import datetime import logging -from datetime import date, timedelta import psycopg2 import requests +from pelican.exceptions import EmptyExchangeRatesTableError from pelican.util import settings from pelican.util.services import Json, commit, get_cursor, rollback logger = logging.getLogger("pelican.tools.exchange_rates_db") -# Datlab had already downloaded the exchange rates to EUR from another project. Changing the base would require -# re-downloading decades of exchange rates. It makes no difference to the application's logic, as all currency -# operations are performed in USD. -# -# The Basic plan is required to request rates for all base currencies. The Professional plan supports the Time-Series -# Endpoint, which can request rates for multiple dates at once. https://fixer.io/documentation -# -# "The Fixer API delivers EOD / End of Day historical exchange rates, which become available at 00:05am GMT for the -# previous day and are time stamped at one second before midnight." https://fixer.io/faq -FIXER_IO_URL = "https://data.fixer.io/api/{date}?access_key={access_key}&base=EUR&symbols={symbols}" +BASE_URL = "https://data.fixer.io/api" -class EmptyExchangeRatesTable(Exception): - pass - - -def load() -> list[tuple[date, dict[str, float]]]: +def load() -> list[tuple[datetime.date, dict[str, float]]]: with get_cursor() as cursor: cursor.execute("SELECT valid_on, rates FROM exchange_rates") return cursor.fetchall() @@ -39,10 +27,10 @@ def update_from_fixer_io() -> None: query_result = cursor.fetchone() if query_result is None: - raise EmptyExchangeRatesTable + raise EmptyExchangeRatesTableError max_date = query_result[0] - date_now = date.today() + date_now = datetime.datetime.now(tz=datetime.UTC).date() length = (date_now - max_date).days if not length: @@ -55,10 +43,10 @@ def update_from_fixer_io() -> None: try: # To get the list of currencies for testing: # curl 'https://data.fixer.io/api/symbols?access_key=' | jq '.symbols | keys | join(",")' - response = requests.get(f"https://data.fixer.io/api/symbols?access_key={access_key}") + response = requests.get(f"{BASE_URL}/symbols?access_key={access_key}", timeout=10) response.raise_for_status() - except requests.RequestException as e: - logger.error("Couldn't retrieve currency symbols: %s", e) + except requests.RequestException: + logger.exception("Couldn't retrieve currency symbols") return data = response.json() @@ -75,13 +63,21 @@ def update_from_fixer_io() -> None: date_str = target_date.strftime("%Y-%m-%d") logger.info("Fetching exchange rates for %s.", date_str) + # Datlab had already downloaded the exchange rates to EUR from another project. Changing the base would require + # re-downloading decades of exchange rates. It makes no difference to the application's logic, as all currency + # operations are performed in USD. + # + # The Basic plan is required to request rates for all base currencies. The Professional plan supports the Time-Series + # Endpoint, which can request rates for multiple dates at once. https://fixer.io/documentation + # + # "The Fixer API delivers EOD / End of Day historical exchange rates, which become available at 00:05am GMT for the + # previous day and are time stamped at one second before midnight." https://fixer.io/faq response = requests.get( - FIXER_IO_URL.format(date=date_str, access_key=access_key, symbols=symbols), - timeout=10, + f"{BASE_URL}/{date_str}?access_key={access_key}&base=EUR&symbols={symbols}", timeout=10 ) response.raise_for_status() - except requests.RequestException as e: - logger.error("Couldn't retrieve currency rates: %s", e) + except requests.RequestException: + logger.exception("Couldn't retrieve currency rates") break try: @@ -107,14 +103,14 @@ def update_from_fixer_io() -> None: """, parameters, ) - except psycopg2.Error as e: - logger.error("Couldn't insert exchange rate: %s", e) + except psycopg2.Error: + logger.exception("Couldn't insert exchange rate") rollback() break else: commit() - target_date += timedelta(days=1) + target_date += datetime.timedelta(days=1) logger.info("Exchange rates update finished.") diff --git a/pelican/util/getter.py b/pelican/util/getter.py index 93c92807..e42a6b92 100644 --- a/pelican/util/getter.py +++ b/pelican/util/getter.py @@ -1,5 +1,5 @@ +import datetime import re -from datetime import date, datetime from typing import Any from dateutil.parser import isoparse @@ -12,12 +12,13 @@ def get_amount(no_conversion, amount, currency, date): if no_conversion: return amount - elif date is not None: + if date is not None: return convert(amount, currency, "USD", date) + return None # https://datatracker.ietf.org/doc/html/rfc3339#section-5.6 -def parse_datetime(string: str | None) -> datetime | None: +def parse_datetime(string: str | None) -> datetime.datetime | None: """ Parse a string to a datetime. @@ -30,12 +31,12 @@ def parse_datetime(string: str | None) -> datetime | None: except ValueError: pass try: - return datetime.strptime(string, "%Y-%m-%dT%H:%M:%S%z") + return datetime.datetime.strptime(string, "%Y-%m-%dT%H:%M:%S%z") except ValueError: return None -def parse_date(string: str | None) -> date | None: +def parse_date(string: str | None) -> datetime.date | None: """ Parse a string to a date. @@ -48,14 +49,14 @@ def parse_date(string: str | None) -> date | None: except ValueError: pass try: - return datetime.strptime(string[:10], "%Y-%m-%d").date() + return datetime.datetime.strptime(string[:10], "%Y-%m-%d").replace(tzinfo=datetime.UTC).date() except ValueError: return None def deep_has(value: Any, path: str) -> bool: """ - Returns whether a nested value exists in nested dicts, safely. + Return whether a nested value exists in nested dicts, safely. Use this instead of :func:`deep_get` to check for the presence of a key. For example, ``deep_get({"id": 0}, "id")`` is falsy. @@ -74,8 +75,9 @@ def deep_has(value: Any, path: str) -> bool: def deep_get(value: Any, path: str, force: type[Any] | None = None) -> Any: """ - Gets a nested value from nested dicts, safely. If ``force`` is provided and the nested value is not of that type, - then if ``force`` is ... + Get a nested value from nested dicts, safely. + + If ``force`` is provided and the nested value is not of that type, then if ``force`` is ... - ``datetime.date``, ``datetime.datetime``: Parse the nested value as ISO 8601. On failure, return ``None``. - ``dict``, ``list``: Return an empty ``dict`` or ``list``, respectively. @@ -97,11 +99,11 @@ def deep_get(value: Any, path: str, force: type[Any] | None = None) -> Any: return None if force and type(value) is not force: - if force is date: + if force is datetime.date: return parse_date(value) - elif force is datetime: + if force is datetime.datetime: return parse_datetime(value) - elif force in (dict, list): + if force in (dict, list): value = force() elif force in (float, int, str): try: @@ -114,7 +116,7 @@ def deep_get(value: Any, path: str, force: type[Any] | None = None) -> Any: return value -def get_values(item: Any, str_path: str, value_only: bool | None = False) -> list[Any]: +def get_values(item: Any, str_path: str, *, value_only: bool | None = False) -> list[Any]: index: int | None if item is None: @@ -124,8 +126,7 @@ def get_values(item: Any, str_path: str, value_only: bool | None = False) -> lis if not str_path or str_path == "": if value_only: return [item] - else: - return [{"path": str_path, "value": item}] + return [{"path": str_path, "value": item}] # return the value for key in the item if "." not in str_path and str_path in item: @@ -138,11 +139,9 @@ def get_values(item: Any, str_path: str, value_only: bool | None = False) -> lis values.append({"path": f"{str_path}[{index}]", "value": item[str_path][index]}) return values - else: - if value_only: - return [item[str_path]] - else: - return [{"path": str_path, "value": item[str_path]}] + if value_only: + return [item[str_path]] + return [{"path": str_path, "value": item[str_path]}] # indexing used field = None @@ -155,14 +154,14 @@ def get_values(item: Any, str_path: str, value_only: bool | None = False) -> lis except (IndexError, TypeError, ValueError): pass - if field is not None and index is not None and field in item: - if type(item[field]) is list and len(item[field]) > index: - if value_only: - values = [item[field][index]] - else: - values = [{"path": f"{field}[{index}]", "value": item[field][index]}] - - return values + if ( + field is not None + and index is not None + and field in item + and type(item[field]) is list + and len(item[field]) > index + ): + return [item[field][index]] if value_only else [{"path": f"{field}[{index}]", "value": item[field][index]}] # get new key identifying the new item path = str_path.split(".") @@ -183,29 +182,24 @@ def get_values(item: Any, str_path: str, value_only: bool | None = False) -> lis else: values = result - for list_item in values: - if not value_only and list_item and "path" in list_item: - list_item["path"] = f"{key}.{list_item['path']}" + for value in values: + if not value_only and value and "path" in value: + value["path"] = f"{key}.{value['path']}" return values # inner value is an array { "key" : [{"aaa":"bbb"}, {"ccc": "ddd"}]} # iterate over the items and read the rest of the path from the if type(item[key]) is list: - index_counter = 0 result = [] - for list_item in item[key]: + for index, list_item in enumerate(item[key]): values = get_values(list_item, ".".join(path[1:]), value_only=value_only) - for list_item in values: + for value in values: if value_only: - result.append(list_item) - else: - if list_item and "path" in list_item: - list_item["path"] = f"{key}[{index_counter}].{list_item['path']}" - - result.append(list_item) - - index_counter += 1 + result.append(value) + elif value and "path" in value: + value["path"] = f"{key}[{index}].{value['path']}" + result.append(value) return result @@ -228,22 +222,25 @@ def get_values(item: Any, str_path: str, value_only: bool | None = False) -> lis except (IndexError, TypeError, ValueError): pass - if field is not None and index is not None and field in item: - if type(item[field]) is list and len(item[field]) > index: - result = [] + if ( + field is not None + and index is not None + and field in item + and type(item[field]) is list + and len(item[field]) > index + ): + result = [] - values = get_values(item[field][index], ".".join(path[1:]), value_only=value_only) + values = get_values(item[field][index], ".".join(path[1:]), value_only=value_only) - for list_item in values: - if value_only: - result.append(list_item) - else: - if list_item and "path" in list_item: - list_item["path"] = f"{field}[{index}].{list_item['path']}" - - result.append(list_item) + for value in values: + if value_only: + result.append(value) + elif value and "path" in value: + value["path"] = f"{field}[{index}].{value['path']}" + result.append(value) - return result + return result # nothing found return [] diff --git a/pelican/util/services.py b/pelican/util/services.py index 4180fe04..a18e6a96 100644 --- a/pelican/util/services.py +++ b/pelican/util/services.py @@ -10,7 +10,6 @@ from pelican.util import settings -global db_connected, db_connection db_connected = False db_connection = None db_cursor_idx = 0 @@ -113,12 +112,12 @@ def rollback() -> None: db_connection.rollback() -class state: +class State: IN_PROGRESS = "IN_PROGRESS" OK = "OK" -class phase: +class Phase: CONTRACTING_PROCESS = "CONTRACTING_PROCESS" DATASET = "DATASET" TIME_VARIANCE = "TIME_VARIANCE" @@ -137,7 +136,7 @@ def initialize_dataset_state(dataset_id: int) -> None: VALUES (%(dataset_id)s, %(phase)s, %(state)s, 0) """ with get_cursor() as cursor: - cursor.execute(sql, {"dataset_id": dataset_id, "phase": phase.CONTRACTING_PROCESS, "state": state.IN_PROGRESS}) + cursor.execute(sql, {"dataset_id": dataset_id, "phase": Phase.CONTRACTING_PROCESS, "state": State.IN_PROGRESS}) def update_dataset_state(dataset_id: int, phase: str, state: str, size: int | None = None) -> None: @@ -178,7 +177,7 @@ def initialize_items_state(dataset_id: int, item_ids: list[int]) -> None: VALUES %s """ with get_cursor() as cursor: - psycopg2.extras.execute_values(cursor, sql, [(dataset_id, item_id, state.IN_PROGRESS) for item_id in item_ids]) + psycopg2.extras.execute_values(cursor, sql, [(dataset_id, item_id, State.IN_PROGRESS) for item_id in item_ids]) def update_items_state(dataset_id: int, item_ids: list[int], state: str) -> None: @@ -208,7 +207,7 @@ def get_processed_items_count(dataset_id: int) -> int: with get_cursor() as cursor: cursor.execute( "SELECT COUNT(*) cnt FROM progress_monitor_item WHERE dataset_id = %(dataset_id)s AND state = %(state)s", - {"dataset_id": dataset_id, "state": state.OK}, + {"dataset_id": dataset_id, "state": State.OK}, ) return cursor.fetchone()["cnt"] diff --git a/pelican/util/settings.py b/pelican/util/settings.py index fa00700c..cd86e420 100644 --- a/pelican/util/settings.py +++ b/pelican/util/settings.py @@ -57,17 +57,17 @@ class Steps: # Project configuration # Extractors insert this many rows into data_item at a time. -EXTRACTOR_PAGE_SIZE = int(os.getenv("EXTRACTOR_PAGE_SIZE", 1000)) +EXTRACTOR_PAGE_SIZE = int(os.getenv("EXTRACTOR_PAGE_SIZE", "1000")) # Extractors collect this number of items before publishing a message. To publish the least number of messages, it # should divide evenly into EXTRACTOR_PAGE_SIZE. -EXTRACTOR_MAX_BATCH_SIZE = min(EXTRACTOR_PAGE_SIZE, int(os.getenv("EXTRACTOR_MAX_BATCH_SIZE", 100))) +EXTRACTOR_MAX_BATCH_SIZE = min(EXTRACTOR_PAGE_SIZE, int(os.getenv("EXTRACTOR_MAX_BATCH_SIZE", "100"))) # Do not import compiled releases whose size is larger than this number of bytes. # # In practice, very large releases create very large results and cause processing to fail. Since less than 0.005% # of releases in Kingfisher Process exceed 300 kB, these releases are simply excluded instead of pursing another # solution. (2021-10-27: n=6.12318e+07: >300 kB: 2650 0.005%; >30 kB: 195009 0.3%) -KINGFISHER_PROCESS_MAX_SIZE = int(os.getenv("KINGFISHER_PROCESS_MAX_SIZE", 300000)) +KINGFISHER_PROCESS_MAX_SIZE = int(os.getenv("KINGFISHER_PROCESS_MAX_SIZE", "300000")) # Timeout for URL availability check. REQUESTS_TIMEOUT = 30 @@ -84,7 +84,7 @@ class Steps: # If the distance to the nearby date(s) is greater than this number of days, the value is not converted. Set to -1 to # set the limit to infinity. CURRENCY_CONVERTER_INTERPOLATION_MAX_DAYS_FALLBACK = int( - os.getenv("CURRENCY_CONVERTER_INTERPOLATION_MAX_DAYS_FALLBACK", 90) + os.getenv("CURRENCY_CONVERTER_INTERPOLATION_MAX_DAYS_FALLBACK", "90") ) # Extrapolation refers to the behavior when the date is outside dates with known rates. if disabled, the value is not @@ -93,7 +93,7 @@ class Steps: # If the distance to the closest date is greater than this number of days, the value is not converted. Set to -1 to # set the limit to infinity. CURRENCY_CONVERTER_EXTRAPOLATION_MAX_DAYS_FALLBACK = int( - os.getenv("CURRENCY_CONVERTER_EXTRAPOLATION_MAX_DAYS_FALLBACK", 180) + os.getenv("CURRENCY_CONVERTER_EXTRAPOLATION_MAX_DAYS_FALLBACK", "180") ) # A comma-separated list of steps to run (default all). Note: If field_quality is set, then field_coverage is run, diff --git a/pelican/util/workers.py b/pelican/util/workers.py index 588380ac..8a2249de 100644 --- a/pelican/util/workers.py +++ b/pelican/util/workers.py @@ -6,15 +6,15 @@ import pika from yapw.methods import ack, publish -from yapw.types import State +from yapw.types import State as ClientState from pelican.util import settings from pelican.util.services import ( + Phase, + State, commit, initialize_dataset_state, initialize_items_state, - phase, - state, update_dataset_state, ) @@ -36,7 +36,7 @@ def is_step_required(*steps: str) -> bool: def process_items( - client_state: State, + client_state: ClientState, channel: pika.channel.Channel, method: pika.spec.Basic.Deliver, routing_key: str, @@ -46,6 +46,8 @@ def process_items( insert_items: Callable[[dict[str, Any], int, list[int]], None], ) -> None: """ + Load items into Pelican. + Ack the message, initialize the dataset's and items' progress, insert items into the database in batches, and publish messages to process the items in batches. @@ -72,8 +74,8 @@ def process_items( items_inserted += len(item_ids_batch) initialize_items_state(dataset_id, item_ids_batch) - dataset_state = state.OK if items_inserted >= len(ids) else state.IN_PROGRESS - update_dataset_state(dataset_id, phase.CONTRACTING_PROCESS, dataset_state, size=items_inserted) + dataset_state = State.OK if items_inserted >= len(ids) else State.IN_PROGRESS + update_dataset_state(dataset_id, Phase.CONTRACTING_PROCESS, dataset_state, size=items_inserted) commit() publish(client_state, channel, {"item_ids": item_ids_batch, "dataset_id": dataset_id}, routing_key) @@ -89,7 +91,7 @@ def process_items( def finish_callback( - client_state: State, + client_state: ClientState, channel: pika.channel.Channel, method: pika.spec.Basic.Deliver, dataset_id: int, @@ -104,7 +106,7 @@ def finish_callback( :param routing_key: the routing key for the outgoing message """ if phase: - update_dataset_state(dataset_id, phase, state.OK) + update_dataset_state(dataset_id, phase, State.OK) commit() if routing_key: publish(client_state, channel, {"dataset_id": dataset_id}, routing_key) diff --git a/pyproject.toml b/pyproject.toml index 8656c702..ea657265 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,39 @@ -[tool.black] +[project] +name = "pelican-backend" +version = "0.0.0" + +[tool.ruff] line-length = 119 +target-version = "py311" + +[tool.ruff.lint] +select = ["ALL"] +ignore = [ + "ANN", "COM", "EM", + # https://docs.astral.sh/ruff/formatter/#conflicting-lint-rules + "W191", "E501", "D206", "Q000", "Q001", "Q002", "Q003", "ISC001", + "D203", "D212", # ignore incompatible rules + "D200", # documentation preferences + "C901", "PLR091", # complexity preferences + "D1", # docstrings + "PTH", # pathlib + "A001", # filter function + "ARG001", # yapw callbacks + "FBT001", # bool argument + "PLR2004", # magic values + "PLW0603", # db globals + "S311", # random +] + +[tool.ruff.lint.flake8-builtins] +builtins-ignorelist = ["copyright"] -[tool.isort] -profile = 'black' -line_length = 119 +[tool.ruff.lint.per-file-ignores] +"docs/conf.py" = ["INP001"] # no __init__.py file +"tests/*" = [ + "D", # docstring + "PLR2004", # Magic value used + "PT009", # unittest asserts + "RUF012", # unittest attributes + "S101", # assert +] diff --git a/requirements_dev.in b/requirements_dev.in index 9e6f33c6..d0bdcb78 100644 --- a/requirements_dev.in +++ b/requirements_dev.in @@ -1,8 +1,5 @@ -r requirements.txt -black coveralls -flake8 -isort jsonschema pre-commit psycopg2-binary @@ -11,3 +8,4 @@ pytest-cov pytest-subtests rfc3339-validator rfc3986-validator +ruff diff --git a/requirements_dev.txt b/requirements_dev.txt index 41671cf2..9dc3d519 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -1,11 +1,9 @@ # This file was autogenerated by uv via the following command: -# uv pip compile requirements_dev.in -o requirements_dev.txt --no-strip-extras +# uv pip compile requirements_dev.in -o requirements_dev.txt attrs==21.2.0 # via # jsonschema # pytest -black==24.3.0 - # via -r requirements_dev.in cachetools==4.2.4 # via -r requirements.txt certifi==2024.7.4 @@ -20,10 +18,8 @@ charset-normalizer==3.1.0 # -r requirements.txt # requests click==8.1.6 - # via - # -r requirements.txt - # black -coverage[toml]==6.5.0 + # via -r requirements.txt +coverage==6.5.0 # via # coveralls # pytest-cov @@ -35,8 +31,6 @@ docopt==0.6.2 # via coveralls filelock==3.13.1 # via virtualenv -flake8==3.9.1 - # via -r requirements_dev.in identify==2.2.4 # via pre-commit idna==3.7 @@ -47,16 +41,10 @@ importlib-metadata==4.8.1 # via pluggy iniconfig==1.1.1 # via pytest -isort==5.8.0 - # via -r requirements_dev.in jsonref==1.0.0.post1 # via -r requirements.txt jsonschema==4.17.3 # via -r requirements_dev.in -mccabe==0.6.1 - # via flake8 -mypy-extensions==0.4.3 - # via black nodeenv==1.6.0 # via pre-commit orjson==3.9.15 @@ -64,11 +52,7 @@ orjson==3.9.15 # -r requirements.txt # yapw packaging==23.1 - # via - # black - # pytest -pathspec==0.9.0 - # via black + # via pytest phonenumbers==8.10.17 # via -r requirements.txt pika==1.3.2 @@ -76,9 +60,7 @@ pika==1.3.2 # -r requirements.txt # yapw platformdirs==4.1.0 - # via - # black - # virtualenv + # via virtualenv pluggy==0.12.0 # via pytest pre-commit==3.6.0 @@ -87,10 +69,6 @@ psycopg2==2.9.6 # via -r requirements.txt psycopg2-binary==2.9.6 # via -r requirements_dev.in -pycodestyle==2.7.0 - # via flake8 -pyflakes==2.3.1 - # via flake8 pyrsistent==0.18.0 # via jsonschema pytest==7.2.0 @@ -114,6 +92,8 @@ rfc3339-validator==0.1.4 # via -r requirements_dev.in rfc3986-validator==0.1.1 # via -r requirements_dev.in +ruff==0.6.3 + # via -r requirements_dev.in sentry-sdk==2.8.0 # via -r requirements.txt six==1.16.0 @@ -130,7 +110,7 @@ validate-email==1.3 # via -r requirements.txt virtualenv==20.25.0 # via pre-commit -yapw[perf]==0.1.4 +yapw==0.1.4 # via -r requirements.txt zipp==3.19.1 # via importlib-metadata diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index 564a578c..00000000 --- a/setup.cfg +++ /dev/null @@ -1,3 +0,0 @@ -[flake8] -max-line-length = 119 -extend-ignore = E203 diff --git a/tests/__init__.py b/tests/__init__.py index f09923d7..cc1d9962 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -14,12 +14,12 @@ def read(basename): return json.load(f) -class override_settings(AbstractContextManager): +class OverrideSettings(AbstractContextManager): def __init__(self, **kwargs): self.new = kwargs self.old = {} - for key, value in self.new.items(): + for key in self.new: self.old[key] = getattr(settings, key) def __enter__(self): @@ -62,10 +62,7 @@ def test_failing(self): for params in self.failing: item = params[0] reason = params[1] - if len(params) > 2: - return_value = params[2] - else: - return_value = None + return_value = params[2] if len(params) > 2 else None with self.subTest(item=item): result = self.module.calculate(item, "key") @@ -110,10 +107,7 @@ def test_failing(self): for params in self.failing: value = params[0] reason = params[1] - if len(params) > 2: - return_value = params[2] - else: - return_value = value + return_value = params[2] if len(params) > 2 else value with self.subTest(value=value): result = self.method({"xxx": value}, "xxx", **self.failing_kwargs) diff --git a/tests/compiled_release/coherent/test_dates.py b/tests/compiled_release/coherent/test_dates.py index de97e0a0..056e7003 100644 --- a/tests/compiled_release/coherent/test_dates.py +++ b/tests/compiled_release/coherent/test_dates.py @@ -27,11 +27,11 @@ def test_undefined(): "tenderPeriod": {"endDate": "2014-12-31T00:00:00Z"}, "contractPeriod": {"startDate": "2015-12-31T00:00:00Z"}, }, + "awards": [{"date": "2015-12-30T00:00:00Z", "id": "1"}, {"date": "2017-12-30T00:00:00Z", "id": "2"}], "contracts": [ {"dateSigned": "2015-12-31T00:00:00Z", "awardID": "1"}, {"dateSigned": "2017-12-31T00:00:00Z", "awardID": "2"}, ], - "awards": [{"date": "2015-12-30T00:00:00Z", "id": "1"}, {"date": "2017-12-30T00:00:00Z", "id": "2"}], } @@ -50,11 +50,11 @@ def test_ok(): "tenderPeriod": {"endDate": "2021-12-31T00:00:00Z"}, "contractPeriod": {"startDate": "2020-12-31T00:00:00Z"}, }, + "awards": [{"date": "2015-12-31T00:00:00Z", "id": "1"}, {"date": "2017-12-31T00:00:00Z", "id": "3"}], "contracts": [ {"dateSigned": "2015-12-30T00:00:00Z", "awardID": "1"}, {"dateSigned": "2017-12-30T00:00:00Z", "awardID": "2"}, ], - "awards": [{"date": "2015-12-31T00:00:00Z", "id": "1"}, {"date": "2017-12-31T00:00:00Z", "id": "3"}], } @@ -62,7 +62,6 @@ def test_failed(): result = calculate(item_failed) assert type(result) is dict assert result["result"] is False - # assert result["application_count"] == 11 assert result["application_count"] == 10 assert result["pass_count"] == 0 assert result["meta"] == { diff --git a/tests/compiled_release/coherent/test_period.py b/tests/compiled_release/coherent/test_period.py index 64b416d4..614d8684 100644 --- a/tests/compiled_release/coherent/test_period.py +++ b/tests/compiled_release/coherent/test_period.py @@ -124,7 +124,6 @@ def test_mixed_time_zones(): def test_multiple_fails_and_passes(): result = calculate(item_test_multiple_fails_and_passes) - print(result) assert type(result) is dict assert result["result"] is False assert result["application_count"] == 3 diff --git a/tests/conftest.py b/tests/conftest.py index 5b8b75fb..eaa8a634 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -149,9 +149,7 @@ def collection_file_item(kingfisher_process_cursor, collection_rows): {"collection_file_id": collection_file_id}, ) kingfisher_process_cursor.execute("SELECT MAX(id) FROM collection_file_item") - collection_file_item_id = kingfisher_process_cursor.fetchone()[0] - - return collection_file_item_id + return kingfisher_process_cursor.fetchone()[0] @pytest.fixture(scope="session") @@ -220,7 +218,7 @@ def data_and_package_data_rows(kingfisher_process_cursor): "license": "https://creativecommons.org/licenses/by/4.0/", "publicationPolicy": "https://example.com/policy", "extensions": [ - "https://raw.githubusercontent.com/open-contracting-extensions/ocds_process_title_extension/master/extension.json", # noqa: E501 + "https://raw.githubusercontent.com/open-contracting-extensions/ocds_process_title_extension/master/extension.json", ], } ) diff --git a/tests/dataset/consistent/test_related_process_title.py b/tests/dataset/consistent/test_related_process_title.py index 7e68f143..0e89e435 100644 --- a/tests/dataset/consistent/test_related_process_title.py +++ b/tests/dataset/consistent/test_related_process_title.py @@ -57,10 +57,8 @@ def test_undefined(): def test_passed(): scope = {} - item_id = 0 - for item in items_test_passed: + for item_id, item in enumerate(items_test_passed): scope = related_process_title.add_item(scope, item, item_id) - item_id += 1 result = related_process_title.get_result(scope) assert result["result"] is True @@ -124,10 +122,8 @@ def test_passed(): def test_failed(): scope = {} - item_id = 0 - for item in items_test_failed: + for item_id, item in enumerate(items_test_failed): scope = related_process_title.add_item(scope, item, item_id) - item_id += 1 result = related_process_title.get_result(scope) assert result["result"] is False diff --git a/tests/dataset/distribution/test_buyer.py b/tests/dataset/distribution/test_buyer.py index 83c7e672..f9f6dc45 100644 --- a/tests/dataset/distribution/test_buyer.py +++ b/tests/dataset/distribution/test_buyer.py @@ -37,10 +37,8 @@ def test_undefined(): def test_undefined_multiple(): scope = {} - item_id = 0 - for item in items_test_undefined_multiple1: + for item_id, item in enumerate(items_test_undefined_multiple1): scope = buyer.add_item(scope, item, item_id) - item_id += 1 result = buyer.get_result(scope) assert result["result"] is None @@ -49,10 +47,8 @@ def test_undefined_multiple(): scope = {} - item_id = 0 - for item in items_test_undefined_multiple2: + for item_id, item in enumerate(items_test_undefined_multiple2): scope = buyer.add_item(scope, item, item_id) - item_id += 1 result = buyer.get_result(scope) assert result["result"] is None @@ -75,10 +71,8 @@ def test_undefined_multiple(): def test_failed(): scope = {} - item_id = 0 - for item in items_test_failed1: + for item_id, item in enumerate(items_test_failed1): scope = buyer.add_item(scope, item, item_id) - item_id += 1 result = buyer.get_result(scope) assert result["result"] is False @@ -91,10 +85,8 @@ def test_failed(): scope = {} - item_id = 0 - for item in items_test_failed2: + for item_id, item in enumerate(items_test_failed2): scope = buyer.add_item(scope, item, item_id) - item_id += 1 result = buyer.get_result(scope) assert result["result"] is False @@ -145,10 +137,8 @@ def test_failed(): def test_passed_multiple(): scope = {} - item_id = 0 - for item in items_test_passed_multiple: + for item_id, item in enumerate(items_test_passed_multiple): scope = buyer.add_item(scope, item, item_id) - item_id += 1 result = buyer.get_result(scope) assert result["result"] is True diff --git a/tests/dataset/distribution/test_buyer_repetition.py b/tests/dataset/distribution/test_buyer_repetition.py index b220de91..e54c928f 100644 --- a/tests/dataset/distribution/test_buyer_repetition.py +++ b/tests/dataset/distribution/test_buyer_repetition.py @@ -31,10 +31,8 @@ def test_undefined(): def test_passed(): scope = {} - item_id = 0 - for item in items_test_passed1: + for item_id, item in enumerate(items_test_passed1): scope = buyer_repetition.add_item(scope, item, item_id) - item_id += 1 result = buyer_repetition.get_result(scope) assert result["result"] is True @@ -49,10 +47,8 @@ def test_passed(): scope = {} - item_id = 0 - for item in items_test_passed2: + for item_id, item in enumerate(items_test_passed2): scope = buyer_repetition.add_item(scope, item, item_id) - item_id += 1 result = buyer_repetition.get_result(scope) assert result["result"] is True @@ -80,10 +76,8 @@ def test_passed(): def test_failed(): scope = {} - item_id = 0 - for item in items_test_failed1: + for item_id, item in enumerate(items_test_failed1): scope = buyer_repetition.add_item(scope, item, item_id) - item_id += 1 result = buyer_repetition.get_result(scope) assert result["result"] is False @@ -98,10 +92,8 @@ def test_failed(): scope = {} - item_id = 0 - for item in items_test_failed2: + for item_id, item in enumerate(items_test_failed2): scope = buyer_repetition.add_item(scope, item, item_id) - item_id += 1 result = buyer_repetition.get_result(scope) assert result["result"] is False diff --git a/tests/dataset/distribution/test_code_distribution.py b/tests/dataset/distribution/test_code_distribution.py index 490d1876..e3df5524 100644 --- a/tests/dataset/distribution/test_code_distribution.py +++ b/tests/dataset/distribution/test_code_distribution.py @@ -29,10 +29,8 @@ def test_no_test_values(): # items_multiple_items distribution = CodeDistribution(["awards.status"], limit=20) scope = {} - item_id = 0 - for item in items_multiple_items: + for item_id, item in enumerate(items_multiple_items): scope = distribution.add_item(scope, item, item_id) - item_id += 1 result = distribution.get_result(scope) assert result["result"] is True @@ -45,10 +43,8 @@ def test_no_test_values(): # items_complex_structure distribution = CodeDistribution(["contracts.implementation.milestones.status"], limit=20) scope = {} - item_id = 0 - for item in items_complex_structure: + for item_id, item in enumerate(items_complex_structure): scope = distribution.add_item(scope, item, item_id) - item_id += 1 result = distribution.get_result(scope) assert result["result"] is True @@ -61,10 +57,8 @@ def test_no_test_values(): # items_multiple_paths distribution = CodeDistribution(["planning.documents.documentType", "tender.documents.documentType"], limit=20) scope = {} - item_id = 0 - for item in items_multiple_paths: + for item_id, item in enumerate(items_multiple_paths): scope = distribution.add_item(scope, item, item_id) - item_id += 1 result = distribution.get_result(scope) assert result["result"] is True @@ -95,10 +89,8 @@ def test_passed(): # test_passed1 distribution = CodeDistribution(["awards.status"], ["pending", "active", "cancelled", "unsuccessful"], limit=20) scope = {} - item_id = 0 - for item in items_passed1: + for item_id, item in enumerate(items_passed1): scope = distribution.add_item(scope, item, item_id) - item_id += 1 result = distribution.get_result(scope) assert result["result"] is True @@ -115,10 +107,8 @@ def test_passed(): # test_passed2 distribution = CodeDistribution(["awards.status"], ["pending"], limit=20) scope = {} - item_id = 0 - for item in items_passed2: + for item_id, item in enumerate(items_passed2): scope = distribution.add_item(scope, item, item_id) - item_id += 1 result = distribution.get_result(scope) assert result["result"] is True @@ -131,10 +121,8 @@ def test_passed(): # test_passed3 distribution = CodeDistribution(["awards.status"], ["pending", "active"], limit=20) scope = {} - item_id = 0 - for item in items_passed3: + for item_id, item in enumerate(items_passed3): scope = distribution.add_item(scope, item, item_id) - item_id += 1 result = distribution.get_result(scope) assert result["result"] is True @@ -157,10 +145,8 @@ def test_failed(): # test_failed1 distribution = CodeDistribution(["awards.status"], ["pending"], limit=20) scope = {} - item_id = 0 - for item in items_failed1: + for item_id, item in enumerate(items_failed1): scope = distribution.add_item(scope, item, item_id) - item_id += 1 result = distribution.get_result(scope) assert result["result"] is False @@ -172,10 +158,8 @@ def test_failed(): # test_failed2 distribution = CodeDistribution(["awards.status"], ["pending"], limit=20) scope = {} - item_id = 0 - for item in items_failed2: + for item_id, item in enumerate(items_failed2): scope = distribution.add_item(scope, item, item_id) - item_id += 1 result = distribution.get_result(scope) assert result["result"] is False @@ -188,10 +172,8 @@ def test_failed(): # test_failed2 distribution = CodeDistribution(["awards.status"], ["pending"], limit=20) scope = {} - item_id = 0 - for item in items_failed3: + for item_id, item in enumerate(items_failed3): scope = distribution.add_item(scope, item, item_id) - item_id += 1 result = distribution.get_result(scope) assert result["result"] is False diff --git a/tests/dataset/misc/test_url_availability.py b/tests/dataset/misc/test_url_availability.py index e634a6ca..1f5b3174 100644 --- a/tests/dataset/misc/test_url_availability.py +++ b/tests/dataset/misc/test_url_availability.py @@ -11,7 +11,7 @@ item_test_undefined = {"ocid": "0", "planning": {"documents": [{"url": f"{TEST_URL}/status/200"}]}} -class mock_settings: +class MockSettings: REQUESTS_TIMEOUT = 1 @@ -62,13 +62,11 @@ def test_passed(): @pytest.mark.skipif("CI" not in os.environ, reason="skipping slow test in development") @pytest.mark.filterwarnings("ignore:unclosed { +# > "dataset_id_original": 2, +# > "filter_message": { +# > "release_date_from": '2019-12-02', +# > "release_date_to": '2020-02-02', +# > "buyer": ["ministry_of_finance", "state"], +# > "buyer_regex": "Development$", +# > "procuring_entity": ["a", "b"], +# > "procuring_entity_regex": "(a|b)casdf+" +# > }, +# > "max_items": 5000 +# > } def callback(client_state, channel, method, properties, input_message): cursor = get_cursor() @@ -103,37 +100,41 @@ def callback(client_state, channel, method, properties, input_message): ) commit() - query = sql.SQL("SELECT id FROM data_item WHERE dataset_id = ") + sql.Literal(dataset_id_original) + variables = {"dataset_id_original": dataset_id_original} + parts = ["SELECT id FROM data_item WHERE dataset_id = %(dataset_id_original)s"] + if "release_date_from" in filter_message: - expr = sql.SQL("data->>'date' >= ") + sql.Literal(filter_message["release_date_from"]) - query += sql.SQL(" AND ") + expr + variables["release_date_from"] = filter_message["release_date_from"] + parts.append("AND data->>'date' >= %(release_date_from)s") + if "release_date_to" in filter_message: - expr = sql.SQL("data->>'date' <= ") + sql.Literal(filter_message["release_date_to"]) - query += sql.SQL(" AND ") + expr + variables["release_date_to"] = filter_message["release_date_to"] + parts.append("AND data->>'date' <= %(release_date_to)s") + if "buyer" in filter_message: - expr = sql.SQL(", ").join([sql.Literal(buyer) for buyer in filter_message["buyer"]]) - expr = sql.SQL("data->'buyer'->>'name' IN ") + sql.SQL("(") + expr + sql.SQL(")") - query += sql.SQL(" AND ") + expr + variables["buyer"] = tuple(filter_message["buyer"]) + parts.append("AND data->'buyer'->>'name' IN %(buyer)s") + if "buyer_regex" in filter_message: - expr = sql.SQL("data->'buyer'->>'name' ILIKE ") + sql.Literal(filter_message["buyer_regex"]) - query += sql.SQL(" AND ") + expr + variables["buyer_regex"] = filter_message["buyer_regex"] + parts.append("AND data->'buyer'->>'name' ILIKE %(buyer_regex)s") + if "procuring_entity" in filter_message: - expr = sql.SQL(", ").join( - [sql.Literal(procuring_entity) for procuring_entity in filter_message["procuring_entity"]] - ) - expr = sql.SQL("data->'tender'->'procuringEntity'->>'name' IN ") + sql.SQL("(") + expr + sql.SQL(")") - query += sql.SQL(" AND ") + expr + variables["procuring_entity"] = tuple(filter_message["procuring_entity"]) + parts.append("AND data->'tender'->'procuringEntity'->>'name' IN %(procuring_entity)s") + if "procuring_entity_regex" in filter_message: - expr = sql.SQL("data->'tender'->'procuringEntity'->>'name' ILIKE ") + sql.Literal( - filter_message["procuring_entity_regex"] - ) - query += sql.SQL(" AND ") + expr + variables["procuring_entity_regex"] = filter_message["procuring_entity_regex"] + parts.append("AND data->'tender'->'procuringEntity'->>'name' ILIKE %(procuring_entity_regex)s") + if max_items is not None: - query += sql.SQL(" LIMIT ") + sql.Literal(max_items) + variables["limit"] = max_items + parts.append("LIMIT %(limit)s") - logger.info(query.as_string(cursor)) + statement = sql.SQL(" ".join(parts)) + logger.info(statement.as_string(cursor)) - cursor.execute(query) + cursor.execute(statement, variables) ids = [row[0] for row in cursor] process_items( diff --git a/workers/extract/kingfisher_process.py b/workers/extract/kingfisher_process.py index b4945d6b..4e3bdccf 100644 --- a/workers/extract/kingfisher_process.py +++ b/workers/extract/kingfisher_process.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python import logging import click diff --git a/workers/report.py b/workers/report.py index 0933d10e..d71bbc1a 100644 --- a/workers/report.py +++ b/workers/report.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python import logging import click @@ -8,7 +7,7 @@ import contracting_process.resource_level.report as resource_level_report from dataset import metadata_aggregator from pelican.util import settings -from pelican.util.services import consume, phase +from pelican.util.services import Phase, consume from pelican.util.workers import finish_callback, is_step_required consume_routing_key = "time_variance_checker" @@ -46,7 +45,7 @@ def callback(client_state, channel, method, properties, input_message): metadata = metadata_aggregator.get_pelican_metadata(dataset_id) metadata_aggregator.update_metadata(metadata, dataset_id) - finish_callback(client_state, channel, method, dataset_id, phase=phase.CHECKED) + finish_callback(client_state, channel, method, dataset_id, phase=Phase.CHECKED) if __name__ == "__main__": diff --git a/workers/wipe.py b/workers/wipe.py index a1810fff..1b0e5b86 100644 --- a/workers/wipe.py +++ b/workers/wipe.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python import click from yapw.methods import ack