Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: ruff as formatter #322

Merged
merged 11 commits into from
Dec 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 3 additions & 12 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,19 +31,10 @@ repos:
rev: v0.1.6
hooks:
- id: ruff

- repo: https://github.com/pycqa/isort
rev: 5.12.0
hooks:
- id: isort
args:
[--add-import, from __future__ import annotations, --profile, black]

- repo: https://github.com/psf/black
rev: 23.11.0
hooks:
- id: black

- --fix
- id: ruff-format
files: ^((otg|utils|tests)/.+)?[^/]+\.py$
- repo: https://github.com/alessandrojcm/commitlint-pre-commit-hook
rev: v9.10.0
hooks:
Expand Down
2 changes: 0 additions & 2 deletions .vscode/extensions.json
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
{
"recommendations": [
"charliermarsh.ruff",
"ms-python.isort",
"ms-python.mypy-type-checker",
"ms-python.python",
"ms-python.black-formatter",
"esbenp.prettier-vscode"
],
"unwantedRecommendations": ["ms-python.flake8"]
Expand Down
7 changes: 2 additions & 5 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"[python]": {
"editor.defaultFormatter": "ms-python.black-formatter",
"editor.defaultFormatter": "charliermarsh.ruff",
"editor.formatOnPaste": false,
"editor.formatOnSave": true,
"editor.codeActionsOnSave": {
Expand All @@ -19,14 +19,11 @@
}
},
"json.format.keepLines": true,
"isort.args": ["--profile", "black"],
"autoDocstring.docstringFormat": "google",
"python.testing.pytestArgs": [".", "--doctest-modules", "--cov=src/"],
"python.testing.unittestEnabled": false,
"python.testing.pytestEnabled": true,
"mypy-type-checker.severity": {
"error": "Information"
},
"ruff.fixAll": false,
"ruff.organizeImports": false
}
}
3 changes: 0 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,6 @@ check: ## Lint and format code
@echo "Linting docstrings..."
@poetry run pydoclint --config=pyproject.toml src
@poetry run pydoclint --config=pyproject.toml --skip-checking-short-docstrings=true tests
@echo "Formatting..."
@poetry run black src/otg .
@poetry run isort src/otg .

test: ## Run tests
@echo "Running Tests..."
Expand Down
1 change: 0 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
[![docs](https://github.com/opentargets/genetics_etl_python/actions/workflows/docs.yaml/badge.svg)](https://opentargets.github.io/genetics_etl_python/)
[![codecov](https://codecov.io/gh/opentargets/genetics_etl_python/branch/main/graph/badge.svg?token=5ixzgu8KFP)](https://codecov.io/gh/opentargets/genetics_etl_python)
[![License](https://img.shields.io/badge/License-Apache_2.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
[![pre-commit.ci status](https://results.pre-commit.ci/badge/github/opentargets/genetics_etl_python/main.svg)](https://results.pre-commit.ci/badge/github/opentargets/genetics_etl_python)

# Genetics Portal Data Pipeline (experimental)
Expand Down
1 change: 0 additions & 1 deletion docs/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ hide:
![docs](https://github.com/opentargets/genetics_etl_python/actions/workflows/docs.yaml/badge.svg)
[![codecov](https://codecov.io/gh/opentargets/genetics_etl_python/branch/main/graph/badge.svg?token=5ixzgu8KFP)](https://codecov.io/gh/opentargets/genetics_etl_python)
[![License](https://img.shields.io/badge/License-Apache_2.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
[![pre-commit.ci status](https://results.pre-commit.ci/badge/github/opentargets/genetics_etl_python/main.svg)](https://results.pre-commit.ci/badge/github/opentargets/genetics_etl_python)

---
Expand Down
50 changes: 5 additions & 45 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

98 changes: 88 additions & 10 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ scikit-learn = "^1.3.2"

[tool.poetry.dev-dependencies]
pre-commit = "^3.6.0"
black = {version = "^22.12.0", allow-prereleases = true}
mypy = "^1.7"
pep8-naming = "^0.13.2"
interrogate = "^1.5.0"
Expand Down Expand Up @@ -84,9 +83,6 @@ upload_to_release = false
requires = ["poetry-core>=1.0.0"]
build-backend = "poetry.core.masonry.api"

[tool.isort]
profile = "black"

[tool.deptry]
extend_exclude = ["src/conftest.py", "src/airflow", "src/utils"]

Expand Down Expand Up @@ -135,12 +131,94 @@ ignore_missing_imports = true


[tool.ruff]
select = ["D", "E", "F", "B"]
ignore = ["D417", "E501", "E203"]
fixable = ["F401"]
unfixable = ["B"]
line-length = 88
target-version = "py310"
select = [
"B002", # Python does not support the unary prefix increment
"B007", # Loop control variable {name} not used within loop body
"B014", # Exception handler with duplicate exception
"B023", # Function definition does not bind loop variable {name}
"B026", # Star-arg unpacking after a keyword argument is strongly discouraged
"C", # complexity
"COM818", # Trailing comma on bare tuple prohibited
"D", # docstrings
"DTZ003", # Use datetime.now(tz=) instead of datetime.utcnow()
"DTZ004", # Use datetime.fromtimestamp(ts, tz=) instead of datetime.utcfromtimestamp(ts)
"E", # pycodestyle
"F", # pyflakes/autoflake
"G", # flake8-logging-format
"I", # isort
"ICN001", # import concentions; {name} should be imported as {asname}
"ISC001", # Implicitly concatenated string literals on one line
"N804", # First argument of a class method should be named cls
"N805", # First argument of a method should be named self
"N815", # Variable {name} in class scope should not be mixedCase
"PGH001", # No builtin eval() allowed
"PGH004", # Use specific rule codes when using noqa
"PLC0414", # Useless import alias. Import alias does not rename original package.
"PLC", # pylint
"PLE", # pylint
"PLR", # pylint
"PLW", # pylint
"Q000", # Double quotes found but single quotes preferred
"RUF006", # Store a reference to the return value of asyncio.create_task
"S102", # Use of exec detected
"S103", # bad-file-permissions
"S108", # hardcoded-temp-file
"S306", # suspicious-mktemp-usage
"S307", # suspicious-eval-usage
"S313", # suspicious-xmlc-element-tree-usage
"S314", # suspicious-xml-element-tree-usage
"S315", # suspicious-xml-expat-reader-usage
"S316", # suspicious-xml-expat-builder-usage
"S317", # suspicious-xml-sax-usage
"S318", # suspicious-xml-mini-dom-usage
"S319", # suspicious-xml-pull-dom-usage
"S320", # suspicious-xmle-tree-usage
"S601", # paramiko-call
"S602", # subprocess-popen-with-shell-equals-true
"S604", # call-with-shell-equals-true
"S608", # hardcoded-sql-expression
"S609", # unix-command-wildcard-injection
"SIM105", # Use contextlib.suppress({exception}) instead of try-except-pass
"SIM117", # Merge with-statements that use the same scope
"SIM118", # Use {key} in {dict} instead of {key} in {dict}.keys()
"SIM201", # Use {left} != {right} instead of not {left} == {right}
"SIM208", # Use {expr} instead of not (not {expr})
"SIM212", # Use {a} if {a} else {b} instead of {b} if not {a} else {a}
"SIM300", # Yoda conditions. Use 'age == 42' instead of '42 == age'.
"SIM401", # Use get from dict with default instead of an if block
"T100", # Trace found: {name} used
"T20", # flake8-print
"TID251", # Banned imports
"TRY004", # Prefer TypeError exception for invalid type
"TRY200", # Use raise from to specify exception cause
"TRY302", # Remove exception handler; error is immediately re-raised
"UP", # pyupgrade
"W", # pycodestyle
]

ignore = [
"E501", # line too long
"E731", # do not assign a lambda expression, use a def

# Ignore ignored, as the rule is now back in preview/nursery, which cannot
# be ignored anymore without warnings.
# https://github.com/astral-sh/ruff/issues/7491
# "PLC1901", # Lots of false positives

# False positives https://github.com/astral-sh/ruff/issues/5386
"PLC0208", # Use a sequence type instead of a `set` when iterating over values
"PLR0911", # Too many return statements ({returns} > {max_returns})
"PLR0912", # Too many branches ({branches} > {max_branches})
"PLR0913", # Too many arguments to function call ({c_args} > {max_args})
"PLR0915", # Too many statements ({statements} > {max_statements})
"PLR2004", # Magic value used in comparison, consider replacing {value} with a constant variable
"PLW2901", # Outer {outer_kind} variable {name} overwritten by inner {inner_kind} target
"UP006", # keep type annotation style as is
"UP007", # keep type annotation style as is
# Ignored due to performance: https://github.com/charliermarsh/ruff/issues/2923
"UP038", # Use `X | Y` in `isinstance` call instead of `(X, Y)`

]

[tool.ruff.per-file-ignores]
"__init__.py" = ["E402"]
Expand Down
28 changes: 13 additions & 15 deletions src/airflow/dags/common_airflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,16 +48,16 @@


# Shared DAG construction parameters.
shared_dag_args = dict(
owner="Open Targets Data Team",
retries=0,
)
shared_dag_kwargs = dict(
tags=["genetics_etl", "experimental"],
start_date=pendulum.now(tz="Europe/London").subtract(days=1),
schedule="@once",
catchup=False,
)
shared_dag_args = {
"owner": "Open Targets Data Team",
"retries": 0,
}
shared_dag_kwargs = {
"tags": ["genetics_etl", "experimental"],
"start_date": pendulum.now(tz="Europe/London").subtract(days=1),
"schedule": "@once",
"catchup": False,
}


def create_cluster(
Expand Down Expand Up @@ -110,7 +110,7 @@ def create_cluster(
if num_local_ssds:
for worker_section in ("worker_config", "secondary_worker_config"):
# Create a disk config section if it does not exist.
cluster_config[worker_section].setdefault("disk_config", dict())
cluster_config[worker_section].setdefault("disk_config", {})
# Specify the number of local SSDs.
cluster_config[worker_section]["disk_config"][
"num_local_ssds"
Expand Down Expand Up @@ -288,7 +288,7 @@ def read_yaml_config(config_path: Path) -> Any:
Any: Parsed YAML config file.
"""
assert config_path.exists(), f"YAML config path {config_path} does not exist."
with open(config_path, "r") as config_file:
with open(config_path) as config_file:
return yaml.safe_load(config_file)


Expand Down Expand Up @@ -349,8 +349,6 @@ def submit_pyspark_job_no_operator(
},
},
}
res = job_client.submit_job(
job_client.submit_job(
project_id=GCP_PROJECT, region=GCP_REGION, job=job_description
)
job_id = res.reference.job_id
print(f"Submitted job ID {job_id}.")
14 changes: 7 additions & 7 deletions src/airflow/dags/gwas_catalog_harmonisation.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,20 +52,20 @@ def create_to_do_list(**kwargs: Any) -> Any:
raw_harmonised = ti.xcom_pull(
task_ids="list_raw_harmonised", key="return_value"
)
print("Number of raw harmonised files: ", len(raw_harmonised))
print("Number of raw harmonised files: ", len(raw_harmonised)) # noqa: T201
to_do_list = []
# Remove the ones that have been processed
parquets = ti.xcom_pull(task_ids="list_harmonised_parquet", key="return_value")
print("Number of parquet files: ", len(parquets))
print("Number of parquet files: ", len(parquets)) # noqa: T201
for path in raw_harmonised:
match_result = re.search(
"raw-harmonised/(.*)/(GCST\d+)/harmonised/(.*)\.h\.tsv\.gz", path
r"raw-harmonised/(.*)/(GCST\d+)/harmonised/(.*)\.h\.tsv\.gz", path
)
if match_result:
study_id = match_result.group(2)
if f"harmonised/{study_id}.parquet/_SUCCESS" not in parquets:
to_do_list.append(path)
print("Number of jobs to submit: ", len(to_do_list))
print("Number of jobs to submit: ", len(to_do_list)) # noqa: T201
ti.xcom_push(key="to_do_list", value=to_do_list)

# Submit jobs to dataproc
Expand All @@ -78,18 +78,18 @@ def submit_jobs(**kwargs: Any) -> None:
"""
ti = kwargs["ti"]
todo = ti.xcom_pull(task_ids="create_to_do_list", key="to_do_list")
print("Number of jobs to submit: ", len(todo))
print("Number of jobs to submit: ", len(todo)) # noqa: T201
for i in range(len(todo)):
# Not to exceed default quota 400 jobs per minute
if i > 0 and i % 399 == 0:
time.sleep(60)
input_path = todo[i]
match_result = re.search(
"raw-harmonised/(.*)/(GCST\d+)/harmonised/(.*)\.h\.tsv\.gz", input_path
r"raw-harmonised/(.*)/(GCST\d+)/harmonised/(.*)\.h\.tsv\.gz", input_path
)
if match_result:
study_id = match_result.group(2)
print("Submitting job for study: ", study_id)
print("Submitting job for study: ", study_id) # noqa: T201
common.submit_pyspark_job_no_operator(
cluster_name=CLUSTER_NAME,
step_id="gwas_catalog_sumstat_preprocess",
Expand Down
2 changes: 1 addition & 1 deletion src/otg/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ def main(cfg: DictConfig) -> None:
Args:
cfg (DictConfig): hydra configuration object
"""
print(OmegaConf.to_yaml(cfg))
print(OmegaConf.to_yaml(cfg)) # noqa: T201
# Initialise and run step
instantiate(cfg.step)

Expand Down
Loading