Minor type cleanup of io.common (#4525)

DanielYang59 · web-flow · commit b1ea39b13aa3 · 2025-10-21T08:54:46.000-07:00
* use `from typing import XXX` over `import typing`

* add types for io.common

* use asarray when it could already be array
diff --git a/src/pymatgen/io/atat.py b/src/pymatgen/io/atat.py
@@ -2,13 +2,13 @@
 
 from __future__ import annotations
 
-import typing
+from typing import TYPE_CHECKING
 
 import numpy as np
 
 from pymatgen.core import Lattice, Structure, get_el_sp
 
-if typing.TYPE_CHECKING:
+if TYPE_CHECKING:
     from pymatgen.core.structure import IStructure
 
 __author__ = "Matthew Horton"
diff --git a/src/pymatgen/io/common.py b/src/pymatgen/io/common.py
@@ -6,11 +6,10 @@
 import importlib
 import itertools
 import os
-import typing
 import warnings
 from copy import deepcopy
 from pathlib import Path
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, cast
 
 import numpy as np
 import orjson
@@ -23,10 +22,14 @@
 from pymatgen.electronic_structure.core import Spin
 
 if TYPE_CHECKING:
+    from collections.abc import Iterator
+    from typing import Any, ClassVar, TextIO
+
     from numpy.typing import ArrayLike, NDArray
-    from typing_extensions import Any, Self
+    from typing_extensions import Self
 
     from pymatgen.core.structure import IStructure
+    from pymatgen.util.typing import PathLike
 
 
 class VolumetricData(MSONable):
@@ -62,7 +65,7 @@ class VolumetricData(MSONable):
     def __init__(
         self,
         structure: Structure | IStructure,
-        data: dict[str, np.ndarray],
+        data: dict[str, NDArray],
         distance_matrix: dict | None = None,
         data_aug: dict[str, NDArray] | None = None,
     ) -> None:
@@ -81,15 +84,15 @@ def __init__(
                 (typically augmentation charges)
         """
         self.structure = structure
-        self.is_spin_polarized = len(data) >= 2
-        self.is_soc = len(data) >= 4
+        self.is_spin_polarized: bool = len(data) >= 2
+        self.is_soc: bool = len(data) >= 4
         # convert data to numpy arrays in case they were jsanitized as lists
-        self.data = {k: np.array(v) for k, v in data.items()}
+        self.data: dict[str, NDArray] = {k: np.asarray(v) for k, v in data.items()}
         self.dim = self.data["total"].shape
         self.data_aug = data_aug or {}
         self.ngridpts = self.dim[0] * self.dim[1] * self.dim[2]
         # lazy init the spin data since this is not always needed.
-        self._spin_data: dict[Spin, float] = {}
+        self._spin_data: dict[Spin, NDArray] = {}
         self._distance_matrix = distance_matrix if distance_matrix is not None else {}
         self.xpoints = np.linspace(0.0, 1.0, num=self.dim[0])
         self.ypoints = np.linspace(0.0, 1.0, num=self.dim[1])
@@ -101,8 +104,23 @@ def __init__(
         )
         self.name = "VolumetricData"
 
+    def __add__(self, other) -> Self:
+        return self.linear_add(other, 1.0)
+
+    def __radd__(self, other) -> Self:
+        if other == 0 or other is None:
+            # sum() calls 0 + self first; we treat 0 as the identity element
+            return self
+        if isinstance(other, self.__class__):
+            return self.__add__(other)
+
+        raise TypeError(f"Unsupported operand type(s) for +: '{type(other).__name__}' and '{type(self).__name__}'")
+
+    def __sub__(self, other) -> Self:
+        return self.linear_add(other, -1.0)
+
     @property
-    def spin_data(self):
+    def spin_data(self) -> dict[Spin, NDArray]:
         """The data decomposed into actual spin data as {spin: data}.
         Essentially, this provides the actual Spin.up and Spin.down data
         instead of the total and diff. Note that by definition, a
@@ -115,7 +133,7 @@ def spin_data(self):
             self._spin_data = spin_data
         return self._spin_data
 
-    def get_axis_grid(self, ind):
+    def get_axis_grid(self, ind: int) -> list[float]:
         """Get the grid for a particular axis.
 
         Args:
@@ -126,21 +144,6 @@ def get_axis_grid(self, ind):
         lengths = self.structure.lattice.abc
         return [i / num_pts * lengths[ind] for i in range(num_pts)]
 
-    def __add__(self, other):
-        return self.linear_add(other, 1.0)
-
-    def __radd__(self, other):
-        if other == 0 or other is None:
-            # sum() calls 0 + self first; we treat 0 as the identity element
-            return self
-        if isinstance(other, self.__class__):
-            return self.__add__(other)
-
-        raise TypeError(f"Unsupported operand type(s) for +: '{type(other).__name__}' and '{type(self).__name__}'")
-
-    def __sub__(self, other):
-        return self.linear_add(other, -1.0)
-
     def copy(self) -> Self:
         """Make a copy of VolumetricData object."""
         return type(self)(
@@ -150,7 +153,7 @@ def copy(self) -> Self:
             data_aug=self.data_aug,
         )
 
-    def linear_add(self, other, scale_factor=1.0) -> VolumetricData:
+    def linear_add(self, other, scale_factor: float = 1.0) -> Self:
         """
         Method to do a linear sum of volumetric objects. Used by + and -
         operators as well. Returns a VolumetricData object containing the
@@ -181,12 +184,12 @@ def linear_add(self, other, scale_factor=1.0) -> VolumetricData:
         new.data_aug = {}
         return new
 
-    def scale(self, factor):
+    def scale(self, factor: float) -> None:
         """Scale the data in place by a factor."""
         for k in self.data:
             self.data[k] = np.multiply(self.data[k], factor)
 
-    def value_at(self, x, y, z):
+    def value_at(self, x: float, y: float, z: float) -> float:
         """Get a data value from self.data at a given point (x, y, z) in terms
         of fractional lattice parameters. Will be interpolated using a
         RegularGridInterpolator on self.data if (x, y, z) is not in the original
@@ -227,7 +230,7 @@ def linear_slice(self, p1: ArrayLike, p2: ArrayLike, n=100):
         z_pts = np.linspace(p1[2], p2[2], num=n)
         return [self.value_at(x_pts[i], y_pts[i], z_pts[i]) for i in range(n)]
 
-    def get_integrated_diff(self, ind, radius, nbins=1):
+    def get_integrated_diff(self, ind: int, radius: float, nbins: int = 1) -> NDArray:
         """Get integrated difference of atom index ind up to radius. This can be
         an extremely computationally intensive process, depending on how many
         grid points are in the VolumetricData.
@@ -273,13 +276,13 @@ def get_integrated_diff(self, ind, radius, nbins=1):
         data_inds = np.rint(np.mod(list(data[inds, 0]), 1) * np.tile(a, (len(dists), 1))).astype(int)
         vals = [self.data["diff"][x, y, z] for x, y, z in data_inds]
 
-        hist, edges = np.histogram(dists, bins=nbins, range=[0, radius], weights=vals)
+        hist, edges = np.histogram(dists, bins=nbins, range=(0, radius), weights=vals)
         data = np.zeros((nbins, 2))
         data[:, 0] = edges[1:]
         data[:, 1] = [sum(hist[0 : i + 1]) / self.ngridpts for i in range(nbins)]
         return data
 
-    def get_average_along_axis(self, ind):
+    def get_average_along_axis(self, ind: int) -> NDArray:
         """Get the averaged total of the volumetric data a certain axis direction.
         For example, useful for visualizing Hartree Potentials from a LOCPOT
         file.
@@ -300,7 +303,7 @@ def get_average_along_axis(self, ind):
             total = np.sum(np.sum(total_spin_dens, axis=0), 0)
         return total / ng[(ind + 1) % 3] / ng[(ind + 2) % 3]
 
-    def to_hdf5(self, filename):
+    def to_hdf5(self, filename: PathLike) -> None:
         """Write the VolumetricData to a HDF5 format, which is a highly optimized
         format for reading storing large data. The mapping of the VolumetricData
         to this file format is as follows:
@@ -318,7 +321,7 @@ def to_hdf5(self, filename):
         """
         import h5py
 
-        with h5py.File(filename, mode="w") as file:
+        with h5py.File(str(filename), mode="w") as file:
             ds = file.create_dataset("lattice", (3, 3), dtype="float")
             ds[...] = self.structure.lattice.matrix
             ds = file.create_dataset("Z", (len(self.structure.species),), dtype="i")
@@ -336,7 +339,7 @@ def to_hdf5(self, filename):
             file.attrs["structure_json"] = orjson.dumps(self.structure.as_dict()).decode()
 
     @classmethod
-    def from_hdf5(cls, filename: str, **kwargs) -> VolumetricData:
+    def from_hdf5(cls, filename: PathLike, **kwargs) -> Self:
         """
         Reads VolumetricData from HDF5 file.
 
@@ -348,15 +351,15 @@ def from_hdf5(cls, filename: str, **kwargs) -> VolumetricData:
         """
         import h5py
 
-        with h5py.File(filename, mode="r") as file:
-            data = {k: np.array(v) for k, v in file["vdata"].items()}
+        with h5py.File(str(filename), mode="r") as file:
+            data = {k: np.asarray(v) for k, v in file["vdata"].items()}
             data_aug = None
             if "vdata_aug" in file:
-                data_aug = {k: np.array(v) for k, v in file["vdata_aug"].items()}
+                data_aug = {k: np.asarray(v) for k, v in file["vdata_aug"].items()}
             structure = Structure.from_dict(orjson.loads(file.attrs["structure_json"]))
             return cls(structure, data=data, data_aug=data_aug, **kwargs)  # type:ignore[arg-type]
 
-    def to_cube(self, filename, comment: str = ""):
+    def to_cube(self, filename: PathLike, comment: str = "") -> None:
         """Write the total volumetric data to a cube file format, which consists of two comment lines,
         a header section defining the structure IN BOHR, and the data.
 
@@ -365,31 +368,32 @@ def to_cube(self, filename, comment: str = ""):
             comment (str): If provided, this will be added to the second comment line
         """
         with zopen(filename, mode="wt", encoding="utf-8") as file:
-            file.write(f"# Cube file for {self.structure.formula} generated by Pymatgen\n")  # type:ignore[arg-type]
-            file.write(f"# {comment}\n")  # type:ignore[arg-type]  # type:ignore[arg-type]
-            file.write(f"\t {len(self.structure)} 0.000000 0.000000 0.000000\n")  # type:ignore[arg-type]
+            file = cast("TextIO", file)
+            file.write(f"# Cube file for {self.structure.formula} generated by Pymatgen\n")
+            file.write(f"# {comment}\n")
+            file.write(f"\t {len(self.structure)} 0.000000 0.000000 0.000000\n")
 
             for idx in range(3):
                 lattice_matrix = self.structure.lattice.matrix[idx] / self.dim[idx] * ang_to_bohr
                 file.write(
-                    f"\t {self.dim[idx]} {lattice_matrix[0]:.6f} {lattice_matrix[1]:.6f} {lattice_matrix[2]:.6f}\n"  # type:ignore[arg-type]
+                    f"\t {self.dim[idx]} {lattice_matrix[0]:.6f} {lattice_matrix[1]:.6f} {lattice_matrix[2]:.6f}\n"
                 )
 
             for site in self.structure:
                 file.write(
-                    f"\t {Element(site.species_string).Z} 0.000000 "  # type:ignore[arg-type]
-                    f"{ang_to_bohr * site.coords[0]} "  # type:ignore[arg-type]
-                    f"{ang_to_bohr * site.coords[1]} "  # type:ignore[arg-type]
-                    f"{ang_to_bohr * site.coords[2]} \n"  # type:ignore[arg-type]
+                    f"\t {Element(site.species_string).Z} 0.000000 "
+                    f"{ang_to_bohr * site.coords[0]} "
+                    f"{ang_to_bohr * site.coords[1]} "
+                    f"{ang_to_bohr * site.coords[2]} \n"
                 )
 
             for idx, dat in enumerate(self.data["total"].flatten(), start=1):
-                file.write(f"{' ' if dat > 0 else ''}{dat:.6e} ")  # type:ignore[arg-type]
+                file.write(f"{' ' if dat > 0 else ''}{dat:.6e} ")
                 if idx % 6 == 0:
-                    file.write("\n")  # type:ignore[arg-type]
+                    file.write("\n")
 
     @classmethod
-    def from_cube(cls, filename: str | Path) -> Self:
+    def from_cube(cls, filename: PathLike) -> Self:
         """
         Initialize the cube object and store the data as pymatgen objects.
 
@@ -459,9 +463,9 @@ class PMGDir(collections.abc.Mapping):
     ```
     """
 
-    FILE_MAPPINGS: typing.ClassVar = {
-        n: f"pymatgen.io.vasp.{n.capitalize()}"
-        for n in [
+    FILE_MAPPINGS: ClassVar[dict[str, str]] = {
+        name: f"pymatgen.io.vasp.{name.capitalize()}"
+        for name in (
             "INCAR",
             "POSCAR",
             "KPOINTS",
@@ -478,41 +482,31 @@ class PMGDir(collections.abc.Mapping):
             "PROCAR",
             "ELFCAR",
             "DYNMAT",
-        ]
+        )
     } | {
         "CONTCAR": "pymatgen.io.vasp.Poscar",
         "IBZKPT": "pymatgen.io.vasp.Kpoints",
         "WSWQ": "pymatgen.io.vasp.WSWQ",
     }
 
-    def __init__(self, dirname: str | Path):
+    def __init__(self, dirname: PathLike) -> None:
         """
         Args:
             dirname: The directory containing the VASP calculation as a string or Path.
         """
         self.path = Path(dirname).absolute()
         self.reset()
 
-    def reset(self):
-        """
-        Reset all loaded files and recheck the directory for files. Use this when the contents of the directory has
-        changed.
-        """
-        # Note that py3.12 has Path.walk(). But we need to use os.walk to ensure backwards compatibility for now.
-        self._files: dict[str, Any] = {
-            str((Path(d) / f).relative_to(self.path)): None for d, _, fnames in os.walk(self.path) for f in fnames
-        }
-
-    def __contains__(self, item):
+    def __contains__(self, item) -> bool:
         return item in self._files
 
-    def __len__(self):
+    def __len__(self) -> int:
         return len(self._files)
 
-    def __iter__(self):
+    def __iter__(self) -> Iterator[str]:
         return iter(self._files)
 
-    def __getitem__(self, item):
+    def __getitem__(self, item: str) -> Any:
         if self._files.get(item):
             return self._files.get(item)
         fpath = self.path / item
@@ -539,6 +533,19 @@ def __getitem__(self, item):
         with zopen(fpath, mode="rt", encoding="utf-8") as f:
             return f.read()
 
+    def __repr__(self) -> str:
+        return f"PMGDir({self.path})"
+
+    def reset(self) -> None:
+        """
+        Reset all loaded files and recheck the directory for files. Use this when the contents of the directory has
+        changed.
+        """
+        # Note that py3.12 has Path.walk(). But we need to use os.walk to ensure backwards compatibility for now.
+        self._files: dict[str, Any] = {
+            str((Path(d) / f).relative_to(self.path)): None for d, _, fnames in os.walk(self.path) for f in fnames
+        }
+
     def get_files_by_name(self, name: str) -> dict[str, Any]:
         """
         Returns all files with a given name. E.g., if you want all the OUTCAR files, set name="OUTCAR".
@@ -547,6 +554,3 @@ def get_files_by_name(self, name: str) -> dict[str, Any]:
             {filename: object from PMGDir[filename]}
         """
         return {f: self[f] for f in self._files if name in f}
-
-    def __repr__(self):
-        return f"PMGDir({self.path})"
diff --git a/src/pymatgen/io/cp2k/sets.py b/src/pymatgen/io/cp2k/sets.py
@@ -21,7 +21,6 @@
 
 import itertools
 import os
-import typing
 import warnings
 from typing import TYPE_CHECKING, Any
 
@@ -367,7 +366,6 @@ def __init__(
         if kwargs.get("validate", True):
             self.validate()
 
-    @typing.no_type_check
     @staticmethod
     def get_basis_and_potential(
         structure: Structure | IStructure,
diff --git a/src/pymatgen/io/phonopy.py b/src/pymatgen/io/phonopy.py
@@ -2,7 +2,7 @@
 
 from __future__ import annotations
 
-import typing
+from typing import TYPE_CHECKING
 
 import numpy as np
 from monty.dev import requires
@@ -16,7 +16,7 @@
 from pymatgen.phonon.thermal_displacements import ThermalDisplacementMatrices
 from pymatgen.symmetry.bandstructure import HighSymmKpath
 
-if typing.TYPE_CHECKING:
+if TYPE_CHECKING:
     from pymatgen.core.structure import IStructure
 
 try:
diff --git a/tests/conftest.py b/tests/conftest.py