Merge pull request #9 from smart-on-fhir/mikix/cleanup

Switch to ruff and update the README with more API examples
smart-on-fhir · Dec 13, 2024 · 76bfc63 · 76bfc63
2 parents 34aa45e + 247b315
commit 76bfc63
Show file tree

Hide file tree

Showing 6 changed files with 145 additions and 47 deletions.
diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
@@ -37,11 +37,11 @@ jobs:
 
       - name: Check coverage report
         if: github.ref != 'refs/heads/main'
-        uses: orgoro/coverage@v3.1
+        uses: orgoro/coverage@v3.2
         with:
           coverageFile: coverage.xml
           token: ${{ secrets.GITHUB_TOKEN }}
-          thresholdAll: .99
+          thresholdAll: 1
           thresholdNew: 1
           thresholdModified: 1
 
@@ -51,29 +51,9 @@ jobs:
       - uses: actions/checkout@v4
 
       - name: Install linters
-        # black is synced with the .pre-commit-hooks version
         run: |
           python -m pip install --upgrade pip
-          python -m pip install .[dev] bandit[toml] pycodestyle pylint
+          python -m pip install .[dev]
 
-      - name: Run pycodestyle
-        # E203: pycodestyle is a little too rigid about slices & whitespace
-        #  See https://black.readthedocs.io/en/stable/the_black_code_style/current_style.html#slices
-        # W503: a default ignore that we are restoring
-        run: |
-          pycodestyle --max-line-length=100 --ignore=E203,W503 .
-
-      - name: Run pylint
-        if: success() || failure() # still run pylint if above checks fail
-        run: |
-          pylint cumulus_fhir_support tests
-
-      - name: Run bandit
-        if: success() || failure() # still run bandit if above checks fail
-        run: |
-          bandit -c pyproject.toml -r .
-
-      - name: Run black
-        if: success() || failure() # still run black if above checks fails
-        run: |
-          black --check --verbose .
+      - name: Run ruff
+        run: ruff check --output-format=github .
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,7 +1,10 @@
 repos:
-  - repo: https://github.com/psf/black
-    #this version is synced with the black mentioned in .github/workflows/ci.yml
-    rev: 24.4.2
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.8.3  # keep in rough sync with pyproject.toml
     hooks:
-      - id: black
-        entry: bash -c 'black "$@"; git add -u' --
+      - name: Ruff formatting
+        id: ruff-format
+        entry: bash -c 'ruff format --force-exclude "$@"; git add -u' --
+      - name: Ruff linting
+        id: ruff
+        stages: [pre-push]
diff --git a/README.md b/README.md
@@ -8,10 +8,106 @@ This library holds FHIR support code for the Cumulus project as a whole.
 pip install cumulus-fhir-support
 ```
 
-## Examples
+## API
+
+### list_multiline_json_in_dir
+
+Lists available multiline JSON files in the target directory
+(allowing filtering by FHIR resource).
+
+```python3
+import cumulus_fhir_support
+
+cumulus_fhir_support.list_multiline_json_in_dir("/")
+# {
+#     "/random.jsonl": None,
+#     "/con1.ndjson": "Condition",
+#     "/pat1.jsonl": "Patient",
+# }
+
+cumulus_fhir_support.list_multiline_json_in_dir("/", "Patient")
+# {
+#     "/pat1.jsonl": "Patient",
+# }
+
+cumulus_fhir_support.list_multiline_json_in_dir("/", ["Condition", "Patient"])
+# {
+#     "/con1.ndjson": "Condition",
+#     "/pat1.jsonl": "Patient",
+# }
+
+cumulus_fhir_support.list_multiline_json_in_dir("/does-not-exist/")
+# {}
+
+cumulus_fhir_support.list_multiline_json_in_dir("s3://mybucket/", fsspec_fs=s3_fs)
+# {
+#     "/mybucket/procs.ndjson": "Procedure",
+# }
+```
+
+### read_multiline_json
+
+Iterates over a single multiline JSON file.
+
+```python3
+import cumulus_fhir_support
+
+list(cumulus_fhir_support.read_multiline_json("/pat1.jsonl"))
+# [
+#     {"resourceType": "Patient", "id": "pat1", "birthDate": "2020-10-16"},
+#     {"resourceType": "Patient", "id": "pat2", "birthDate": "2013-04-18"},
+# ]
+
+list(cumulus_fhir_support.read_multiline_json("/does-not-exist.ndjson"))
+# []
+
+list(cumulus_fhir_support.read_multiline_json("/mybucket/procs.ndjson", fsspec_fs=s3_fs))
+# [
+#     {"resourceType": "Procedure", "id": "proc1", "status": "stopped"},
+# ]
+```
+
+### read_multiline_json_from_dir
+
+Iterates over every JSON object in a directory
+(allowing filtering by FHIR resource).
+
+```python3
+import cumulus_fhir_support
+
+list(cumulus_fhir_support.read_multiline_json_from_dir("/"))
+# [
+#     {"description": "not a fhir object"},
+#     {"resourceType": "Condition", "id": "con1", "onsetDateTime": "2011-11-24"},
+#     {"resourceType": "Patient", "id": "pat1", "birthDate": "2020-10-16"},
+#     {"resourceType": "Patient", "id": "pat2", "birthDate": "2013-04-18"},
+# ]
+
+list(cumulus_fhir_support.read_multiline_json_from_dir("/", "Condition"))
+# [
+#     {"resourceType": "Condition", "id": "con1", "onsetDateTime": "2011-11-24"},
+# ]
+
+list(cumulus_fhir_support.read_multiline_json_from_dir("/", ["Condition", "Patient"]))
+# [
+#     {"resourceType": "Condition", "id": "con1", "onsetDateTime": "2011-11-24"},
+#     {"resourceType": "Patient", "id": "pat1", "birthDate": "2020-10-16"},
+#     {"resourceType": "Patient", "id": "pat2", "birthDate": "2013-04-18"},
+# ]
+
+list(cumulus_fhir_support.read_multiline_json_from_dir("/does-not-exist/"))
+# []
+
+list(cumulus_fhir_support.read_multiline_json_from_dir("/mybucket/", fsspec_fs=s3_fs))
+# [
+#     {"resourceType": "Procedure", "id": "proc1", "status": "stopped"},
+# ]
+```
 
 ### pyarrow_schema_from_rows
 
+Calculates a schema that can cover a given collection of FHIR objects.
+
 ```python3
 import cumulus_fhir_support
 
@@ -27,9 +123,9 @@ rows = [
                     "code": "2135-2",
                     "display": "Hispanic or Latino",
                     "system": "urn:oid:2.16.840.1.113883.6.238",
-                }
+                },
             }],
-        }]
+        }],
     },
 ]
 

diff --git a/cumulus_fhir_support/json.py b/cumulus_fhir_support/json.py
@@ -41,7 +41,8 @@
 import logging
 import os
 import pathlib
-from typing import TYPE_CHECKING, Any, Iterable, Optional, Union
+from collections.abc import Iterable
+from typing import TYPE_CHECKING, Any, Optional, Union
 
 if TYPE_CHECKING:
     import fsspec  # pragma: no cover

diff --git a/cumulus_fhir_support/schemas.py b/cumulus_fhir_support/schemas.py
@@ -1,7 +1,8 @@
 """Detect FHIR resource schemas"""
 
 from collections import namedtuple
-from typing import Any, Iterable, Optional
+from collections.abc import Iterable
+from typing import Any, Optional
 
 import pyarrow
 from fhirclient.models import (
@@ -14,7 +15,6 @@
     fhirelementfactory,
 )
 
-
 FhirProperty = namedtuple(
     "FhirProperty", ["name", "json_name", "pytype", "is_list", "of_many", "required"]
 )
@@ -24,7 +24,9 @@
 LEVEL_INCLUSION = 1
 
 
-def pyarrow_schema_from_rows(resource_type: str, rows: Iterable[dict] = None) -> pyarrow.Schema:
+def pyarrow_schema_from_rows(
+    resource_type: str, rows: Optional[Iterable[dict]] = None
+) -> pyarrow.Schema:
     """
     Creates a PyArrow schema based off the named resource (like 'Observation') and row contents.
 
@@ -175,7 +177,7 @@ def _fhir_to_pyarrow_property(
     prop: FhirProperty,
     *,
     base_obj: Optional[fhirabstractbase.FHIRAbstractBase] = None,
-    batch_shape: dict = None,
+    batch_shape: Optional[dict] = None,
     level: int,
 ) -> Optional[pyarrow.Field]:
     """Converts a single FhirProperty to a PyArrow Field, or None if this field should be skipped"""

diff --git a/pyproject.toml b/pyproject.toml
@@ -8,7 +8,7 @@ dependencies = [
 authors = [
   { name="Michael Terry", email="[email protected]" },
 ]
-description = "FHIR schema support code for the Cumulus project"
+description = "FHIR support code for the Cumulus project"
 readme = "README.md"
 license = { text="Apache License 2.0" }
 classifiers = [
@@ -33,19 +33,35 @@ include = [
     "*.md",
 ]
 
-[tool.bandit]
-exclude_dirs = ["tests"]
-
-[tool.black]
-line-length = 100
-
 [project.optional-dependencies]
 tests = [
     "ddt",
     "pytest",
     "pytest-cov",
 ]
 dev = [
-    "black >= 24, < 25",
     "pre-commit",
-]
+    # Ruff is using minor versions for breaking changes until their 1.0 release.
+    # See https://docs.astral.sh/ruff/versioning/
+    "ruff < 0.9",  # keep in rough sync with pre-commit-config.yaml
+]
+
+[tool.ruff]
+line-length = 100
+
+[tool.ruff.lint]
+allowed-confusables = ["’"]  # allow proper apostrophes
+select = [
+    "A",  # prevent using keywords that clobber python builtins
+    "E",  # pycodestyle
+    "F",  # pyflakes
+    "I",  # isort
+    "PLE",  # pylint errors
+    "PLW",  # pylint warnings
+    "RUF",  # the ruff developer's own rules
+    "S",  # bandit security warnings
+    "UP",  # alert you when better syntax is available in your python version
+]
+
+[tool.ruff.lint.per-file-ignores]
+"**/__init__.py" = ["F401"]  # init files hold API, so not using imports is intentional