Mypy #18

sgkit-dev · Aug 25, 2020 · 5a70bc6 · 5a70bc6
1 parent f80601b
commit 5a70bc6
Show file tree

Hide file tree

Showing 3 changed files with 37 additions and 11 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -21,7 +21,21 @@ repos:
       - id: black
         language_version: python3
   - repo: https://gitlab.com/pycqa/flake8
-    rev: 3.7.9
+    rev: 3.8.3
     hooks:
       - id: flake8
         language_version: python3
+  - repo: https://github.com/pre-commit/mirrors-mypy
+    rev: v0.782
+    hooks:
+      - id: mypy
+        args: ["--strict", "--show-error-codes"]
+        additional_dependencies: 
+          - dask[dataframe,array]
+          - fsspec
+          - numpy
+          - scipy
+          - xarray
+          - zarr
+          - bgen_reader>=4.0.5
+          - git+https://github.com/pystatgen/sgkit
diff --git a/setup.cfg b/setup.cfg
@@ -63,6 +63,13 @@ line_length = 88
 
 [mypy-numpy.*]
 ignore_missing_imports = True
-
+[mypy-dask.*]
+ignore_missing_imports = True
+[mypy-setuptools.*]
+ignore_missing_imports = True
+[mypy-bgen_reader.*]
+ignore_missing_imports = True
+[mypy-sgkit.*]
+ignore_missing_imports = True
 [mypy-sgkit_bgen.tests.*]
 disallow_untyped_defs = False
diff --git a/sgkit_bgen/bgen_reader.py b/sgkit_bgen/bgen_reader.py
@@ -1,8 +1,9 @@
 """BGEN reader implementation (using bgen_reader)"""
 from pathlib import Path
-from typing import Any, Union
+from typing import Any, Dict, Tuple, Union
 
 import dask.array as da
+import dask.dataframe as dd
 import numpy as np
 from bgen_reader._bgen_file import bgen_file
 from bgen_reader._bgen_metafile import bgen_metafile
@@ -18,7 +19,7 @@
 PathType = Union[str, Path]
 
 
-def _to_dict(df, dtype=None):
+def _to_dict(df: dd.DataFrame, dtype: Any = None) -> Dict[str, da.Array]:
     return {
         c: df[c].to_dask_array(lengths=True).astype(dtype[c] if dtype else df[c].dtype)
         for c in df
@@ -42,7 +43,9 @@ class BgenReader:
 
     name = "bgen_reader"
 
-    def __init__(self, path, persist=True, dtype=np.float32):
+    def __init__(
+        self, path: PathType, persist: bool = True, dtype: Any = np.float32
+    ) -> None:
         self.path = Path(path)
 
         self.metafile_filepath = infer_metafile_filepath(Path(self.path))
@@ -63,11 +66,13 @@ def __init__(self, path, persist=True, dtype=np.float32):
             self.contig = variant_arrs["chrom"]
             self.pos = variant_arrs["pos"]
 
-            def split_alleles(alleles, block_info=None):
+            def split_alleles(
+                alleles: np.ndarray, block_info: Any = None
+            ) -> np.ndarray:
                 if block_info is None or len(block_info) == 0:
                     return alleles
 
-                def split(allele_row):
+                def split(allele_row: np.ndarray) -> np.ndarray:
                     alleles_list = allele_row[0].split(",")
                     assert len(alleles_list) == 2  # bi-allelic
                     return np.array(alleles_list)
@@ -98,7 +103,7 @@ def max_str_len(arr: ArrayLike) -> Any:
         self.dtype = dtype
         self.ndim = 2
 
-    def __getitem__(self, idx):
+    def __getitem__(self, idx: Any) -> np.ndarray:
         if not isinstance(idx, tuple):
             raise IndexError(  # pragma: no cover
                 f"Indexer must be tuple (received {type(idx)})"
@@ -140,15 +145,15 @@ def __getitem__(self, idx):
             return res
 
 
-def _to_dosage(probs: ArrayLike):
+def _to_dosage(probs: ArrayLike) -> ArrayLike:
     """Calculate the dosage from genotype likelihoods (probabilities)"""
     assert len(probs.shape) == 2 and probs.shape[1] == 3
     return 2 * probs[:, -1] + probs[:, 1]
 
 
 def read_bgen(
     path: PathType,
-    chunks: Union[str, int, tuple] = "auto",
+    chunks: Union[str, int, Tuple[int, ...]] = "auto",
     lock: bool = False,
     persist: bool = True,
 ) -> Dataset:
@@ -198,7 +203,7 @@ def read_bgen(
         name=f"{bgen_reader.name}:read_bgen:{path}",
     )
 
-    ds = create_genotype_dosage_dataset(
+    ds: Dataset = create_genotype_dosage_dataset(
         variant_contig_names=variant_contig_names,
         variant_contig=variant_contig,
         variant_position=variant_position,