From 9120c4db9fdbaebc523ead259728e678bb802c4a Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Thu, 16 Jan 2025 08:55:44 +0100 Subject: [PATCH] (chore): Update to Ruff 0.9 and add EM lints (#1825) --- .pre-commit-config.yaml | 4 +- benchmarks/benchmarks/utils.py | 5 +- ci/scripts/min-deps.py | 4 +- pyproject.toml | 3 +- src/anndata/__init__.py | 5 +- src/anndata/_core/aligned_df.py | 3 +- src/anndata/_core/aligned_mapping.py | 5 +- src/anndata/_core/anndata.py | 117 ++++++++++-------- src/anndata/_core/file_backing.py | 9 +- src/anndata/_core/index.py | 31 +++-- src/anndata/_core/merge.py | 48 +++---- src/anndata/_core/raw.py | 6 +- src/anndata/_core/sparse_dataset.py | 51 ++++---- src/anndata/_core/views.py | 6 +- src/anndata/_io/h5ad.py | 21 ++-- src/anndata/_io/read.py | 15 ++- src/anndata/_io/specs/lazy_methods.py | 6 +- src/anndata/_io/specs/methods.py | 21 ++-- src/anndata/_io/specs/registry.py | 14 +-- src/anndata/_io/utils.py | 14 ++- src/anndata/_io/write.py | 3 +- src/anndata/_settings.py | 4 +- src/anndata/compat/__init__.py | 3 +- src/anndata/experimental/merge.py | 44 ++++--- .../multi_files/_anncollection.py | 12 +- .../experimental/pytorch/_annloader.py | 3 +- src/anndata/io.py | 6 +- src/anndata/logging.py | 3 +- src/anndata/tests/helpers.py | 27 ++-- src/anndata/utils.py | 23 ++-- tests/test_concatenate.py | 8 +- tests/test_concatenate_disk.py | 3 +- tests/test_helpers.py | 3 +- tests/test_layers.py | 3 +- tests/test_readwrite.py | 16 ++- tests/test_settings.py | 3 +- 36 files changed, 311 insertions(+), 241 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 44159cc2d..3b547f702 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,12 +1,10 @@ repos: - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.8.6 + rev: v0.9.1 hooks: - id: ruff - types_or: [python, pyi, jupyter] args: ["--fix"] - id: ruff-format - types_or: [python, pyi, jupyter] - repo: https://github.com/pre-commit/mirrors-prettier rev: v4.0.0-alpha.8 hooks: diff --git a/benchmarks/benchmarks/utils.py b/benchmarks/benchmarks/utils.py index 05438811d..398dd4177 100644 --- a/benchmarks/benchmarks/utils.py +++ b/benchmarks/benchmarks/utils.py @@ -63,9 +63,8 @@ def gen_indexer(adata, dim, index_kind, ratio): index_kinds = {"slice", "intarray", "boolarray", "strarray"} if index_kind not in index_kinds: - raise ValueError( - f"Argument 'index_kind' must be one of {index_kinds}. Was {index_kind}." - ) + msg = f"Argument 'index_kind' must be one of {index_kinds}. Was {index_kind}." + raise ValueError(msg) axis = dimnames.index(dim) subset = [slice(None), slice(None)] diff --git a/ci/scripts/min-deps.py b/ci/scripts/min-deps.py index 0d49d151e..4efc304cb 100755 --- a/ci/scripts/min-deps.py +++ b/ci/scripts/min-deps.py @@ -71,7 +71,9 @@ def extract_min_deps( # If we are referring to other optional dependency lists, resolve them if req.name == project_name: - assert req.extras, f"Project included itself as dependency, without specifying extras: {req}" + assert req.extras, ( + f"Project included itself as dependency, without specifying extras: {req}" + ) for extra in req.extras: extra_deps = pyproject["project"]["optional-dependencies"][extra] dependencies += map(Requirement, extra_deps) diff --git a/pyproject.toml b/pyproject.toml index 41ee6fac9..04a4387a4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -172,13 +172,14 @@ docstring-code-format = true [tool.ruff.lint] select = [ "E", # Error detected by Pycodestyle + "EM", # Traceback-friendly error messages "F", # Errors detected by Pyflakes "FBT", # Boolean positional arguments "W", # Warning detected by Pycodestyle "PLW", # Pylint "UP", # pyupgrade "I", # isort - "TCH", # manage type checking blocks + "TC", # manage type checking blocks "TID", # Banned imports "ICN", # Follow import conventions "PTH", # Pathlib instead of os.path diff --git a/src/anndata/__init__.py b/src/anndata/__init__.py index fec027c87..41b3a4d0b 100644 --- a/src/anndata/__init__.py +++ b/src/anndata/__init__.py @@ -15,9 +15,8 @@ try: from ._version import __version__ except ModuleNotFoundError: - raise RuntimeError( - "anndata is not correctly installed. Please install it, e.g. with pip." - ) + msg = "anndata is not correctly installed. Please install it, e.g. with pip." + raise RuntimeError(msg) # Allowing notes to be added to exceptions. See: https://github.com/scverse/anndata/issues/868 import sys diff --git a/src/anndata/_core/aligned_df.py b/src/anndata/_core/aligned_df.py index 321264886..8f3de7b37 100644 --- a/src/anndata/_core/aligned_df.py +++ b/src/anndata/_core/aligned_df.py @@ -82,7 +82,8 @@ def _gen_dataframe_1d( attr: Literal["obs", "var"], length: int | None = None, ): - raise ValueError(f"Cannot convert {type(anno)} to {attr} DataFrame") + msg = f"Cannot convert {type(anno)} to {attr} DataFrame" + raise ValueError(msg) def _mk_df_error( diff --git a/src/anndata/_core/aligned_mapping.py b/src/anndata/_core/aligned_mapping.py index 9df5ac977..88d5dde0d 100644 --- a/src/anndata/_core/aligned_mapping.py +++ b/src/anndata/_core/aligned_mapping.py @@ -175,9 +175,8 @@ def __setitem__(self, key: str, value: Value) -> None: def __delitem__(self, key: str) -> None: if key not in self: - raise KeyError( - "'{key!r}' not found in view of {self.attrname}" - ) # Make sure it exists before bothering with a copy + msg = f"{key!r} not found in view of {self.attrname}" + raise KeyError(msg) # Make sure it exists before bothering with a copy warnings.warn( f"Removing element `.{self.attrname}['{key}']` of view, " "initializing view as actual.", diff --git a/src/anndata/_core/anndata.py b/src/anndata/_core/anndata.py index 5651940ab..db22e4a60 100644 --- a/src/anndata/_core/anndata.py +++ b/src/anndata/_core/anndata.py @@ -81,9 +81,8 @@ def _check_2d_shape(X): Assure that X is always 2D: Unlike numpy we always deal with 2D arrays. """ if X.dtype.names is None and len(X.shape) != 2: - raise ValueError( - f"X needs to be 2-dimensional, not {len(X.shape)}-dimensional." - ) + msg = f"X needs to be 2-dimensional, not {len(X.shape)}-dimensional." + raise ValueError(msg) class AnnData(metaclass=utils.DeprecationMixinMeta): @@ -257,7 +256,8 @@ def __init__( raise_value_error_if_multiindex_columns(attr, key) if asview: if not isinstance(X, AnnData): - raise ValueError("`X` has to be an AnnData object.") + msg = "`X` has to be an AnnData object." + raise ValueError(msg) self._init_as_view(X, oidx, vidx) else: self._init_as_actual( @@ -279,19 +279,22 @@ def __init__( def _init_as_view(self, adata_ref: AnnData, oidx: Index, vidx: Index): if adata_ref.isbacked and adata_ref.is_view: - raise ValueError( + msg = ( "Currently, you cannot index repeatedly into a backed AnnData, " "that is, you cannot make a view of a view." ) + raise ValueError(msg) self._is_view = True if isinstance(oidx, int | np.integer): if not (-adata_ref.n_obs <= oidx < adata_ref.n_obs): - raise IndexError(f"Observation index `{oidx}` is out of range.") + msg = f"Observation index `{oidx}` is out of range." + raise IndexError(msg) oidx += adata_ref.n_obs * (oidx < 0) oidx = slice(oidx, oidx + 1, 1) if isinstance(vidx, int | np.integer): if not (-adata_ref.n_vars <= vidx < adata_ref.n_vars): - raise IndexError(f"Variable index `{vidx}` is out of range.") + msg = f"Variable index `{vidx}` is out of range." + raise IndexError(msg) vidx += adata_ref.n_vars * (vidx < 0) vidx = slice(vidx, vidx + 1, 1) if adata_ref.is_view: @@ -368,9 +371,8 @@ def _init_as_actual( # init from AnnData if isinstance(X, AnnData): if any((obs, var, uns, obsm, varm, obsp, varp)): - raise ValueError( - "If `X` is a dict no further arguments must be provided." - ) + msg = "If `X` is a dict no further arguments must be provided." + raise ValueError(msg) X, obs, var, uns, obsm, varm, obsp, varp, layers, raw = ( X._X, X.obs, @@ -405,7 +407,8 @@ def _init_as_actual( if X is not None: X = coerce_array(X, name="X") if shape is not None: - raise ValueError("`shape` needs to be `None` if `X` is not `None`.") + msg = "`shape` needs to be `None` if `X` is not `None`." + raise ValueError(msg) _check_2d_shape(X) # if type doesn’t match, a copy is made, otherwise, use a view if dtype is not None: @@ -445,7 +448,8 @@ def _init_as_actual( if isinstance(attr.index, pd.RangeIndex): attr.index = idx elif not idx.equals(attr.index): - raise ValueError(f"Index of {attr_name} must match {x_name} of X.") + msg = f"Index of {attr_name} must match {x_name} of X." + raise ValueError(msg) # unstructured annotations self.uns = uns or OrderedDict() @@ -463,9 +467,9 @@ def _init_as_actual( self._check_uniqueness() if self.filename: - assert not isinstance( - raw, Raw - ), "got raw from other adata but also filename?" + assert not isinstance(raw, Raw), ( + "got raw from other adata but also filename?" + ) if {"raw", "raw.X"} & set(self.file): raw = dict(X=None, **raw) if not raw: @@ -545,10 +549,11 @@ def __repr__(self) -> str: def __eq__(self, other): """Equality testing""" - raise NotImplementedError( + msg = ( "Equality comparisons are not supported for AnnData objects, " "instead compare the desired attributes." ) + raise NotImplementedError(msg) @property def shape(self) -> tuple[int, int]: @@ -590,9 +595,8 @@ def X(self) -> ArrayDataStructureType | None: def X(self, value: np.ndarray | sparse.spmatrix | SpArray | None): if value is None: if self.isbacked: - raise NotImplementedError( - "Cannot currently remove data matrix from backed object." - ) + msg = "Cannot currently remove data matrix from backed object." + raise NotImplementedError(msg) if self.is_view: self._init_as_actual(self.copy()) self._X = None @@ -665,10 +669,8 @@ def X(self, value: np.ndarray | sparse.spmatrix | SpArray | None): else: self._X = value else: - raise ValueError( - f"Data matrix has wrong shape {value.shape}, " - f"need to be {self.shape}." - ) + msg = f"Data matrix has wrong shape {value.shape}, need to be {self.shape}." + raise ValueError(msg) @X.deleter def X(self): @@ -735,7 +737,8 @@ def raw(self, value: AnnData): if value is None: del self.raw elif not isinstance(value, AnnData): - raise ValueError("Can only init raw attribute with an AnnData object.") + msg = "Can only init raw attribute with an AnnData object." + raise ValueError(msg) else: if self.is_view: self._init_as_actual(self.copy()) @@ -759,7 +762,8 @@ def n_vars(self) -> int: def _set_dim_df(self, value: pd.DataFrame, attr: Literal["obs", "var"]): if not isinstance(value, pd.DataFrame): - raise ValueError(f"Can only assign pd.DataFrame to {attr}.") + msg = f"Can only assign pd.DataFrame to {attr}." + raise ValueError(msg) raise_value_error_if_multiindex_columns(value, attr) value_idx = self._prep_dim_index(value.index, attr) if self.is_view: @@ -775,14 +779,14 @@ def _prep_dim_index(self, value, attr: str) -> pd.Index: If a pd.Index is passed, this will use a reference, otherwise a new index object is created. """ if self.shape[attr == "var"] != len(value): - raise ValueError( - f"Length of passed value for {attr}_names is {len(value)}, but this AnnData has shape: {self.shape}" - ) + msg = f"Length of passed value for {attr}_names is {len(value)}, but this AnnData has shape: {self.shape}" + raise ValueError(msg) if isinstance(value, pd.Index) and not isinstance(value.name, str | type(None)): - raise ValueError( + msg = ( f"AnnData expects .{attr}.index.name to be a string or None, " f"but you passed a name of type {type(value.name).__name__!r}" ) + raise ValueError(msg) else: value = pd.Index(value) if not isinstance(value.name, str | type(None)): @@ -870,9 +874,8 @@ def uns(self) -> MutableMapping: @uns.setter def uns(self, value: MutableMapping): if not isinstance(value, MutableMapping): - raise ValueError( - "Only mutable mapping types (e.g. dict) are allowed for `.uns`." - ) + msg = "Only mutable mapping types (e.g. dict) are allowed for `.uns`." + raise ValueError(msg) if isinstance(value, DictView): value = value.copy() if self.is_view: @@ -1079,7 +1082,8 @@ def rename_categories(self, key: str, categories: Sequence[Any]): New categories, the same number as the old categories. """ if isinstance(categories, Mapping): - raise ValueError("Only list-like `categories` is supported.") + msg = "Only list-like `categories` is supported." + raise ValueError(msg) if key in self.obs: old_categories = self.obs[key].cat.categories.tolist() self.obs[key] = self.obs[key].cat.rename_categories(categories) @@ -1087,7 +1091,8 @@ def rename_categories(self, key: str, categories: Sequence[Any]): old_categories = self.var[key].cat.categories.tolist() self.var[key] = self.var[key].cat.rename_categories(categories) else: - raise ValueError(f"{key} is neither in `.obs` nor in `.var`.") + msg = f"{key} is neither in `.obs` nor in `.var`." + raise ValueError(msg) # this is not a good solution # but depends on the scanpy conventions for storing the categorical key # as `groupby` in the `params` slot @@ -1152,11 +1157,12 @@ def strings_to_categoricals(self, df: pd.DataFrame | None = None): if not np.array_equal(c.categories, sorted_categories): c = c.reorder_categories(sorted_categories) if dont_modify: - raise RuntimeError( + msg = ( "Please call `.strings_to_categoricals()` on full " "AnnData, not on this view. You might encounter this" "error message while copying or writing to disk." ) + raise RuntimeError(msg) df[key] = c logger.info(f"... storing {key!r} as categorical") @@ -1185,7 +1191,8 @@ def _inplace_subset_obs(self, index: Index1D): # TODO: Update, possibly remove def __setitem__(self, index: Index, val: float | np.ndarray | sparse.spmatrix): if self.is_view: - raise ValueError("Object is view and cannot be accessed with `[]`.") + msg = "Object is view and cannot be accessed with `[]`." + raise ValueError(msg) obs, var = self._normalize_indices(index) if not self.isbacked: self._X[obs, var] = val @@ -1211,10 +1218,11 @@ def transpose(self) -> AnnData: else: X = self.file["X"] if self.is_view: - raise ValueError( + msg = ( "You’re trying to transpose a view of an `AnnData`, " "which is currently not implemented. Call `.copy()` before transposing." ) + raise ValueError(msg) return AnnData( X=_safe_transpose(X) if X is not None else None, @@ -1254,7 +1262,8 @@ def to_df(self, layer: str | None = None) -> pd.DataFrame: if layer is not None: X = self.layers[layer] elif not self._has_X(): - raise ValueError("X is None, cannot convert to dataframe.") + msg = "X is None, cannot convert to dataframe." + raise ValueError(msg) else: X = self.X if issparse(X): @@ -1268,15 +1277,17 @@ def _get_X(self, *, use_raw: bool = False, layer: str | None = None): """ is_layer = layer is not None if use_raw and is_layer: - raise ValueError( + msg = ( "Cannot use expression from both layer and raw. You provided:" f"`use_raw={use_raw}` and `layer={layer}`" ) + raise ValueError(msg) if is_layer: return self.layers[layer] elif use_raw: if self.raw is None: - raise ValueError("This AnnData doesn’t have a value in `.raw`.") + msg = "This AnnData doesn’t have a value in `.raw`." + raise ValueError(msg) return self.raw.X else: return self.X @@ -1371,10 +1382,11 @@ def _mutated_copy(self, **kwargs): """Creating AnnData with attributes optionally specified via kwargs.""" if self.isbacked: if "X" not in kwargs or (self.raw is not None and "raw" not in kwargs): - raise NotImplementedError( + msg = ( "This function does not currently handle backed objects " "internally, this should be dealt with before." ) + raise NotImplementedError(msg) new = {} for key in ["obs", "var", "obsm", "varm", "obsp", "varp", "layers"]: @@ -1459,11 +1471,12 @@ def copy(self, filename: PathLike | None = None) -> AnnData: from ..io import read_h5ad, write_h5ad if filename is None: - raise ValueError( + msg = ( "To copy an AnnData object in backed mode, " "pass a filename: `.copy(filename='myfilename.h5ad')`. " "To load the object into memory, use `.to_memory()`." ) + raise ValueError(msg) mode = self.file._filemode write_h5ad(filename, self) return read_h5ad(filename, backed=mode) @@ -1699,7 +1712,8 @@ def concatenate( from .merge import concat, merge_dataframes, merge_outer, merge_same if self.isbacked: - raise ValueError("Currently, concatenate only works in memory mode.") + msg = "Currently, concatenate only works in memory mode." + raise ValueError(msg) if len(adatas) == 0: return self.copy() @@ -1773,9 +1787,8 @@ def _check_uniqueness(self): utils.warn_names_duplicates("var") def __contains__(self, key: Any): - raise AttributeError( - "AnnData has no attribute __contains__, don’t check `in adata`." - ) + msg = "AnnData has no attribute __contains__, don’t check `in adata`." + raise AttributeError(msg) def _check_dimensions(self, key=None): if key is None: @@ -1787,19 +1800,21 @@ def _check_dimensions(self, key=None): not all([axis_len(o, 0) == self.n_obs for o in self.obsm.values()]) and len(self.obsm.dim_names) != self.n_obs ): - raise ValueError( + msg = ( "Observations annot. `obsm` must have number of rows of `X`" f" ({self.n_obs}), but has {len(self.obsm)} rows." ) + raise ValueError(msg) if "varm" in key: if ( not all([axis_len(v, 0) == self.n_vars for v in self.varm.values()]) and len(self.varm.dim_names) != self.n_vars ): - raise ValueError( + msg = ( "Variables annot. `varm` must have number of columns of `X`" f" ({self.n_vars}), but has {len(self.varm)} rows." ) + raise ValueError(msg) def write_h5ad( self, @@ -1873,7 +1888,8 @@ def write_h5ad( from ..io import write_h5ad if filename is None and not self.isbacked: - raise ValueError("Provide a filename!") + msg = "Provide a filename!" + raise ValueError(msg) if filename is None: filename = self.filename @@ -1995,7 +2011,8 @@ def chunk_X( elif isinstance(select, np.ndarray | Sequence): choice = np.asarray(select) else: - raise ValueError("select should be int or array") + msg = "select should be int or array" + raise ValueError(msg) reverse = None if self.isbacked: diff --git a/src/anndata/_core/file_backing.py b/src/anndata/_core/file_backing.py index 5fce9a5d3..005a47b97 100644 --- a/src/anndata/_core/file_backing.py +++ b/src/anndata/_core/file_backing.py @@ -94,7 +94,8 @@ def open( if filemode is not None: self._filemode = filemode if self.filename is None: - raise ValueError("Cannot open backing file if backing not initialized.") + msg = "Cannot open backing file if backing not initialized." + raise ValueError(msg) self._file = h5py.File(self.filename, self._filemode) def close(self): @@ -163,7 +164,8 @@ def _(x: AwkArray, *, copy: bool = False): @singledispatch def filename(x): - raise NotImplementedError(f"Not implemented for {type(x)}") + msg = f"Not implemented for {type(x)}" + raise NotImplementedError(msg) @filename.register(h5py.Group) @@ -180,7 +182,8 @@ def _(x): @singledispatch def get_elem_name(x): - raise NotImplementedError(f"Not implemented for {type(x)}") + msg = f"Not implemented for {type(x)}" + raise NotImplementedError(msg) @get_elem_name.register(h5py.Group) diff --git a/src/anndata/_core/index.py b/src/anndata/_core/index.py index 53434186a..32f69f182 100644 --- a/src/anndata/_core/index.py +++ b/src/anndata/_core/index.py @@ -87,27 +87,31 @@ def name_idx(i): ): indexer_int = indexer.astype(int) if np.all((indexer - indexer_int) != 0): - raise IndexError(f"Indexer {indexer!r} has floating point values.") + msg = f"Indexer {indexer!r} has floating point values." + raise IndexError(msg) if issubclass(indexer.dtype.type, np.integer | np.floating): return indexer # Might not work for range indexes elif issubclass(indexer.dtype.type, np.bool_): if indexer.shape != index.shape: - raise IndexError( + msg = ( f"Boolean index does not match AnnData’s shape along this " f"dimension. Boolean index has shape {indexer.shape} while " f"AnnData index has shape {index.shape}." ) + raise IndexError(msg) return indexer else: # indexer should be string array positions = index.get_indexer(indexer) if np.any(positions < 0): not_found = indexer[positions < 0] - raise KeyError( + msg = ( f"Values {list(not_found)}, from {list(indexer)}, " "are not valid obs/ var names or indices." ) + raise KeyError(msg) return positions # np.ndarray[int] - raise IndexError(f"Unknown indexer {indexer!r} of type {type(indexer)}") + msg = f"Unknown indexer {indexer!r} of type {type(indexer)}" + raise IndexError(msg) def _fix_slice_bounds(s: slice, length: int) -> slice: @@ -136,11 +140,13 @@ def unpack_index(index: Index) -> tuple[Index1D, Index1D]: return index, slice(None) num_ellipsis = sum(i is Ellipsis for i in index) if num_ellipsis > 1: - raise IndexError("an index can only have a single ellipsis ('...')") + msg = "an index can only have a single ellipsis ('...')" + raise IndexError(msg) # If index has Ellipsis, filter it out (and if not, error) if len(index) > 2: if not num_ellipsis: - raise IndexError("Received a length 3 index without an ellipsis") + msg = "Received a length 3 index without an ellipsis" + raise IndexError(msg) index = tuple(i for i in index if i is not Ellipsis) return index # If index has Ellipsis, replace it with slice @@ -152,7 +158,8 @@ def unpack_index(index: Index) -> tuple[Index1D, Index1D]: if index is Ellipsis: index = slice(None) return index, slice(None) - raise IndexError("invalid number of indices") + msg = "invalid number of indices" + raise IndexError(msg) @singledispatch @@ -231,13 +238,11 @@ def get_vector(adata, k, coldim, idxdim, layer=None): in_idx = k in idx if (in_col + in_idx) == 2: - raise ValueError( - f"Key {k} could be found in both .{idxdim}_names and .{coldim}.columns" - ) + msg = f"Key {k} could be found in both .{idxdim}_names and .{coldim}.columns" + raise ValueError(msg) elif (in_col + in_idx) == 0: - raise KeyError( - f"Could not find key {k} in .{idxdim}_names or .{coldim}.columns." - ) + msg = f"Could not find key {k} in .{idxdim}_names or .{coldim}.columns." + raise KeyError(msg) elif in_col: return getattr(adata, coldim)[k].values elif in_idx: diff --git a/src/anndata/_core/merge.py b/src/anndata/_core/merge.py index 9fda2f74a..21bdcc414 100644 --- a/src/anndata/_core/merge.py +++ b/src/anndata/_core/merge.py @@ -318,11 +318,12 @@ def check_combinable_cols(cols: list[pd.Index], join: Literal["inner", "outer"]) if len(problem_cols) > 0: problem_cols = list(problem_cols) - raise pd.errors.InvalidIndexError( + msg = ( f"Cannot combine dataframes as some contained duplicated column names - " "causing ambiguity.\n\n" f"The problem columns are: {problem_cols}" ) + raise pd.errors.InvalidIndexError(msg) # TODO: open PR or feature request to cupy @@ -699,7 +700,8 @@ def _apply_to_awkward(self, el: AwkArray, *, axis, fill_value=None): return el[self.new_idx] else: # outer join # TODO: this code isn't actually hit, we should refactor - raise Exception("This should be unreachable, please open an issue.") + msg = "This should be unreachable, please open an issue." + raise Exception(msg) else: if len(self.new_idx) > len(self.old_idx): el = ak.pad_none(el, 1, axis=axis) # axis == 0 @@ -775,9 +777,8 @@ def concat_arrays(arrays, reindexers, axis=0, index=None, fill_value=None): isinstance(a, pd.DataFrame) or a is MissingVal or 0 in a.shape for a in arrays ): - raise NotImplementedError( - "Cannot concatenate a dataframe with other array types." - ) + msg = "Cannot concatenate a dataframe with other array types." + raise NotImplementedError(msg) # TODO: behaviour here should be chosen through a merge strategy df = pd.concat( unify_dtypes(f(x) for f, x in zip(reindexers, arrays)), @@ -792,9 +793,8 @@ def concat_arrays(arrays, reindexers, axis=0, index=None, fill_value=None): if not all( isinstance(a, AwkArray) or a is MissingVal or 0 in a.shape for a in arrays ): - raise NotImplementedError( - "Cannot concatenate an AwkwardArray with other array types." - ) + msg = "Cannot concatenate an AwkwardArray with other array types." + raise NotImplementedError(msg) return ak.concatenate([f(a) for f, a in zip(reindexers, arrays)], axis=axis) elif any(isinstance(a, CupySparseMatrix) for a in arrays): @@ -803,9 +803,8 @@ def concat_arrays(arrays, reindexers, axis=0, index=None, fill_value=None): if not all( isinstance(a, CupySparseMatrix | CupyArray) or 0 in a.shape for a in arrays ): - raise NotImplementedError( - "Cannot concatenate a cupy array with other array types." - ) + msg = "Cannot concatenate a cupy array with other array types." + raise NotImplementedError(msg) sparse_stack = (cpsparse.vstack, cpsparse.hstack)[axis] return sparse_stack( [ @@ -818,9 +817,8 @@ def concat_arrays(arrays, reindexers, axis=0, index=None, fill_value=None): import cupy as cp if not all(isinstance(a, CupyArray) or 0 in a.shape for a in arrays): - raise NotImplementedError( - "Cannot concatenate a cupy array with other array types." - ) + msg = "Cannot concatenate a cupy array with other array types." + raise NotImplementedError(msg) return cp.concatenate( [ f(cp.asarray(x), fill_value=fill_value, axis=1 - axis) @@ -880,9 +878,8 @@ def gen_inner_reindexers(els, new_index, axis: Literal[0, 1] = 0): reindexers = [Reindexer(df_indices(el), common_ind) for el in els] elif any(isinstance(el, AwkArray) for el in els if not_missing(el)): if not all(isinstance(el, AwkArray) for el in els if not_missing(el)): - raise NotImplementedError( - "Cannot concatenate an AwkwardArray with other array types." - ) + msg = "Cannot concatenate an AwkwardArray with other array types." + raise NotImplementedError(msg) common_keys = intersect_keys(el.fields for el in els) reindexers = [ Reindexer(pd.Index(el.fields), pd.Index(list(common_keys))) for el in els @@ -908,9 +905,8 @@ def gen_outer_reindexers(els, shapes, new_index: pd.Index, *, axis=0): import awkward as ak if not all(isinstance(el, AwkArray) for el in els if not_missing(el)): - raise NotImplementedError( - "Cannot concatenate an AwkwardArray with other array types." - ) + msg = "Cannot concatenate an AwkwardArray with other array types." + raise NotImplementedError(msg) warn_once( "Outer joins on awkward.Arrays will have different return values in the future. " "For details, and to offer input, please see:\n\n\t" @@ -1048,7 +1044,8 @@ def _resolve_axis( return (0, "obs") if axis in {1, "var"}: return (1, "var") - raise ValueError(f"`axis` must be either 0, 1, 'obs', or 'var', was {axis}") + msg = f"`axis` must be either 0, 1, 'obs', or 'var', was {axis}" + raise ValueError(msg) def axis_indices(adata: AnnData, axis: Literal["obs", 0, "var", 1]) -> pd.Index: @@ -1071,11 +1068,12 @@ def concat_Xs(adatas, reindexers, axis, fill_value): if all(X is None for X in Xs): return None elif any(X is None for X in Xs): - raise NotImplementedError( + msg = ( "Some (but not all) of the AnnData's to be concatenated had no .X value. " "Concatenation is currently only implemented for cases where all or none of" " the AnnData's have .X assigned." ) + raise NotImplementedError(msg) else: return concat_arrays(Xs, reindexers, axis=axis, fill_value=fill_value) @@ -1287,10 +1285,11 @@ def concat( if isinstance(adatas, Mapping): if keys is not None: - raise TypeError( + msg = ( "Cannot specify categories in both mapping keys and using `keys`. " "Only specify this once." ) + raise TypeError(msg) keys, adatas = list(adatas.keys()), list(adatas.values()) else: adatas = list(adatas) @@ -1351,7 +1350,8 @@ def concat( ) join_keys = union_keys else: - raise AssertionError(f"{join=} should have been validated above by pd.concat") + msg = f"{join=} should have been validated above by pd.concat" + raise AssertionError(msg) layers = concat_aligned_mapping( [a.layers for a in adatas], axis=axis, reindexers=reindexers diff --git a/src/anndata/_core/raw.py b/src/anndata/_core/raw.py index d138440b5..f71c8d74d 100644 --- a/src/anndata/_core/raw.py +++ b/src/anndata/_core/raw.py @@ -58,7 +58,8 @@ def __init__( self._var = adata.var.copy() self.varm = adata.varm.copy() elif adata.isbacked: - raise ValueError("Cannot specify X if adata is backed") + msg = "Cannot specify X if adata is backed" + raise ValueError(msg) def _get_X(self, layer=None): if layer is not None: @@ -78,10 +79,11 @@ def X(self) -> BaseCompressedSparseDataset | np.ndarray | sparse.spmatrix: elif "raw.X" in self._adata.file: X = self._adata.file["raw.X"] # Backwards compat else: - raise AttributeError( + msg = ( f"Could not find dataset for raw X in file: " f"{self._adata.file.filename}." ) + raise AttributeError(msg) if isinstance(X, h5py.Group): X = sparse_dataset(X) # Check if we need to subset diff --git a/src/anndata/_core/sparse_dataset.py b/src/anndata/_core/sparse_dataset.py index d59c8bafd..1b9eabb5d 100644 --- a/src/anndata/_core/sparse_dataset.py +++ b/src/anndata/_core/sparse_dataset.py @@ -101,9 +101,8 @@ def _set_many(self, i: Iterable[int], j: Iterable[int], x): return else: - raise ValueError( - "You cannot change the sparsity structure of a SparseDataset." - ) + msg = "You cannot change the sparsity structure of a SparseDataset." + raise ValueError(msg) # replace where possible # mask = offsets > -1 # # offsets[mask] @@ -332,7 +331,8 @@ def get_memory_class( return memory_class elif not use_sparray_in_io and issubclass(memory_class, ss.spmatrix): return memory_class - raise ValueError(f"Format string {format} is not supported.") + msg = f"Format string {format} is not supported." + raise ValueError(msg) def get_backed_class( @@ -344,7 +344,8 @@ def get_backed_class( return backed_class elif not use_sparray_in_io and issubclass(backed_class, ss.spmatrix): return backed_class - raise ValueError(f"Format string {format} is not supported.") + msg = f"Format string {format} is not supported." + raise ValueError(msg) def _get_group_format(group: GroupStorageType) -> str: @@ -390,9 +391,8 @@ def group(self) -> GroupStorageType: @group.setter def group(self, val): - raise AttributeError( - f"Do not reset group on a {type(self)} with {val}. Instead use `sparse_dataset` to make a new class." - ) + msg = f"Do not reset group on a {type(self)} with {val}. Instead use `sparse_dataset` to make a new class." + raise AttributeError(msg) @property def backend(self) -> Literal["zarr", "hdf5"]: @@ -402,7 +402,8 @@ def backend(self) -> Literal["zarr", "hdf5"]: elif isinstance(self.group, H5Group): return "hdf5" else: - raise ValueError(f"Unknown group type {type(self.group)}") + msg = f"Unknown group type {type(self.group)}" + raise ValueError(msg) @property def dtype(self) -> np.dtype: @@ -519,42 +520,45 @@ def append(self, sparse_matrix: ss.csr_matrix | ss.csc_matrix | SpArray) -> None # Check input if not ss.issparse(sparse_matrix): - raise NotImplementedError( + msg = ( "Currently, only sparse matrices of equivalent format can be " "appended to a SparseDataset." ) + raise NotImplementedError(msg) if self.format not in {"csr", "csc"}: - raise NotImplementedError( - f"The append method for format {self.format} " f"is not implemented." - ) + msg = f"The append method for format {self.format} is not implemented." + raise NotImplementedError(msg) if self.format != sparse_matrix.format: - raise ValueError( + msg = ( f"Matrices must have same format. Currently are " f"{self.format!r} and {sparse_matrix.format!r}" ) + raise ValueError(msg) indptr_offset = len(self.group["indices"]) if self.group["indptr"].dtype == np.int32: new_nnz = indptr_offset + len(sparse_matrix.indices) if new_nnz >= np.iinfo(np.int32).max: - raise OverflowError( + msg = ( "This array was written with a 32 bit intptr, but is now large " "enough to require 64 bit values. Please recreate the array with " "a 64 bit indptr." ) + raise OverflowError(msg) # shape if self.format == "csr": - assert ( - shape[1] == sparse_matrix.shape[1] - ), "CSR matrices must have same size of dimension 1 to be appended." + assert shape[1] == sparse_matrix.shape[1], ( + "CSR matrices must have same size of dimension 1 to be appended." + ) new_shape = (shape[0] + sparse_matrix.shape[0], shape[1]) elif self.format == "csc": - assert ( - shape[0] == sparse_matrix.shape[0] - ), "CSC matrices must have same size of dimension 0 to be appended." + assert shape[0] == sparse_matrix.shape[0], ( + "CSC matrices must have same size of dimension 0 to be appended." + ) new_shape = (shape[0], shape[1] + sparse_matrix.shape[1]) else: - raise AssertionError("We forgot to update this branching to a new format") + msg = "We forgot to update this branching to a new format" + raise AssertionError(msg) if "h5sparse_shape" in self.group.attrs: del self.group.attrs["h5sparse_shape"] self.group.attrs["shape"] = new_shape @@ -691,7 +695,8 @@ def sparse_dataset(group: GroupStorageType) -> abc.CSRDataset | abc.CSCDataset: return _CSRDataset(group) elif encoding_type == "csc": return _CSCDataset(group) - raise ValueError(f"Unknown encoding type {encoding_type}") + msg = f"Unknown encoding type {encoding_type}" + raise ValueError(msg) @_subset.register(BaseCompressedSparseDataset) diff --git a/src/anndata/_core/views.py b/src/anndata/_core/views.py index ca9af9164..ab658968b 100644 --- a/src/anndata/_core/views.py +++ b/src/anndata/_core/views.py @@ -290,7 +290,8 @@ def __setattr__(self, key: str, value: Any): @singledispatch def as_view(obj, view_args): - raise NotImplementedError(f"No view type has been registered for {type(obj)}") + msg = f"No view type has been registered for {type(obj)}" + raise NotImplementedError(msg) @as_view.register(np.ndarray) @@ -403,10 +404,11 @@ def as_view_awkarray(array, view_args): # A better solution might be based on xarray-style "attrs", once this is implemented # https://github.com/scikit-hep/awkward/issues/1391#issuecomment-1412297114 if type(array).__name__ != "Array": - raise NotImplementedError( + msg = ( "Cannot create a view of an awkward array with __array__ parameter. " "Please open an issue in the AnnData repo and describe your use-case." ) + raise NotImplementedError(msg) array = ak.with_parameter(array, _PARAM_NAME, (parent_key, attrname, keys)) array = ak.with_parameter(array, "__list__", "AwkwardArrayView") return array diff --git a/src/anndata/_io/h5ad.py b/src/anndata/_io/h5ad.py index edf4977cc..ff33dc2f3 100644 --- a/src/anndata/_io/h5ad.py +++ b/src/anndata/_io/h5ad.py @@ -57,11 +57,11 @@ def write_h5ad( as_dense = list(as_dense) as_dense[as_dense.index("raw.X")] = "raw/X" if any(val not in {"X", "raw/X"} for val in as_dense): - raise NotImplementedError( - "Currently, only `X` and `raw/X` are supported values in `as_dense`" - ) + msg = "Currently, only `X` and `raw/X` are supported values in `as_dense`" + raise NotImplementedError(msg) if "raw/X" in as_dense and adata.raw is None: - raise ValueError("Cannot specify writing `raw/X` to dense if it doesn’t exist.") + msg = "Cannot specify writing `raw/X` to dense if it doesn’t exist." + raise ValueError(msg) if convert_strings_to_categoricals: adata.strings_to_categoricals() @@ -214,9 +214,8 @@ def read_h5ad( return read_h5ad_backed(filename, mode) if as_sparse_fmt not in (sparse.csr_matrix, sparse.csc_matrix): - raise NotImplementedError( - "Dense formats can only be read to CSR or CSC matrices at this time." - ) + msg = "Dense formats can only be read to CSR or CSC matrices at this time." + raise NotImplementedError(msg) if isinstance(as_sparse, str): as_sparse = [as_sparse] else: @@ -225,9 +224,8 @@ def read_h5ad( if as_sparse[i] in {("raw", "X"), "raw.X"}: as_sparse[i] = "raw/X" elif as_sparse[i] not in {"raw/X", "X"}: - raise NotImplementedError( - "Currently only `X` and `raw/X` can be read as sparse." - ) + msg = "Currently only `X` and `raw/X` can be read as sparse." + raise NotImplementedError(msg) rdasp = partial( read_dense_as_sparse, sparse_format=as_sparse_fmt, axis_chunk=chunk_size @@ -355,7 +353,8 @@ def read_dense_as_sparse( elif sparse_format == sparse.csc_matrix: return read_dense_as_csc(dataset, axis_chunk) else: - raise ValueError(f"Cannot read dense array as type: {sparse_format}") + msg = f"Cannot read dense array as type: {sparse_format}" + raise ValueError(msg) def read_dense_as_csr(dataset: h5py.Dataset, axis_chunk: int = 6000): diff --git a/src/anndata/_io/read.py b/src/anndata/_io/read.py index f22cff351..a472b6879 100644 --- a/src/anndata/_io/read.py +++ b/src/anndata/_io/read.py @@ -114,10 +114,11 @@ def read_hdf(filename: PathLike, key: str) -> AnnData: # a view and not a list is returned keys = [k for k in f.keys()] if key == "": - raise ValueError( + msg = ( f"The file {filename} stores the following sheets:\n{keys}\n" f"Call read/read_hdf5 with one of them." ) + raise ValueError(msg) # read array X = f[key][()] # try to find row and column names @@ -219,10 +220,11 @@ def read_loom( FutureWarning, ) if obsm_mapping != {}: - raise ValueError( + msg = ( "Received values for both `obsm_names` and `obsm_mapping`. This is " "ambiguous, only pass `obsm_mapping`." ) + raise ValueError(msg) obsm_mapping = obsm_names if varm_names is not None: warn( @@ -231,10 +233,11 @@ def read_loom( FutureWarning, ) if varm_mapping != {}: - raise ValueError( + msg = ( "Received values for both `varm_names` and `varm_mapping`. This is " "ambiguous, only pass `varm_mapping`." ) + raise ValueError(msg) varm_mapping = varm_names filename = fspath(filename) # allow passing pathlib.Path objects @@ -379,7 +382,8 @@ def _read_text( comments.append(comment) else: if delimiter is not None and delimiter not in line: - raise ValueError(f"Did not find delimiter {delimiter!r} in first line.") + msg = f"Did not find delimiter {delimiter!r} in first line." + raise ValueError(msg) line_list = line.split(delimiter) # the first column might be row names, so check the last if not is_float(line_list[-1]): @@ -441,10 +445,11 @@ def _read_text( # in the end, to separate row_names from float data, slicing takes # a lot of memory and CPU time if data[0].size != data[-1].size: - raise ValueError( + msg = ( f"Length of first line ({data[0].size}) is different " f"from length of last line ({data[-1].size})." ) + raise ValueError(msg) data = np.array(data, dtype=dtype) # logg.msg(" constructed array from list of list", t=True, v=4) # transform row_names diff --git a/src/anndata/_io/specs/lazy_methods.py b/src/anndata/_io/specs/lazy_methods.py index 75e585c83..68b588ac7 100644 --- a/src/anndata/_io/specs/lazy_methods.py +++ b/src/anndata/_io/specs/lazy_methods.py @@ -116,12 +116,14 @@ def read_sparse_as_dask( major_dim, minor_dim = (1, 0) if is_csc else (0, 1) if chunks is not None: if len(chunks) != 2: - raise ValueError("`chunks` must be a tuple of two integers") + msg = "`chunks` must be a tuple of two integers" + raise ValueError(msg) if chunks[minor_dim] not in {shape[minor_dim], -1, None}: - raise ValueError( + msg = ( "Only the major axis can be chunked. " f"Try setting chunks to {((-1, _DEFAULT_STRIDE) if is_csc else (_DEFAULT_STRIDE, -1))}" ) + raise ValueError(msg) stride = ( chunks[major_dim] if chunks[major_dim] not in {None, -1} diff --git a/src/anndata/_io/specs/methods.py b/src/anndata/_io/specs/methods.py index 181b525c8..6289257e9 100644 --- a/src/anndata/_io/specs/methods.py +++ b/src/anndata/_io/specs/methods.py @@ -433,9 +433,8 @@ def write_basic_dask_h5( import dask.config as dc if dc.get("scheduler", None) == "dask.distributed": - raise ValueError( - "Cannot write dask arrays to hdf5 when using distributed scheduler" - ) + msg = "Cannot write dask arrays to hdf5 when using distributed scheduler" + raise ValueError(msg) g = f.require_dataset(k, shape=elem.shape, dtype=elem.dtype, **dataset_kwargs) da.store(elem, g) @@ -725,9 +724,8 @@ def as_int64_indices(x): elif sparse_format == "csc": axis = 1 else: - raise NotImplementedError( - f"Cannot write dask sparse arrays with format {sparse_format}" - ) + msg = f"Cannot write dask sparse arrays with format {sparse_format}" + raise NotImplementedError(msg) def chunk_slice(start: int, stop: int) -> tuple[slice | None, slice | None]: result = [slice(None), slice(None)] @@ -838,13 +836,13 @@ def write_dataframe( # Check arguments for reserved in ("_index",): if reserved in df.columns: - raise ValueError(f"{reserved!r} is a reserved name for dataframe columns.") + msg = f"{reserved!r} is a reserved name for dataframe columns." + raise ValueError(msg) group = _require_group_write_dataframe(f, key, df) if not df.columns.is_unique: duplicates = list(df.columns[df.columns.duplicated()]) - raise ValueError( - f"Found repeated column names: {duplicates}. Column names must be unique." - ) + msg = f"Found repeated column names: {duplicates}. Column names must be unique." + raise ValueError(msg) col_names = [check_key(c) for c in df.columns] group.attrs["column-order"] = col_names @@ -852,11 +850,12 @@ def write_dataframe( if df.index.name in col_names and not pd.Series( df.index, index=df.index ).equals(df[df.index.name]): - raise ValueError( + msg = ( f"DataFrame.index.name ({df.index.name!r}) is also used by a column " "whose values are different. This is not supported. Please make sure " "the values are the same, or use a different name." ) + raise ValueError(msg) index_name = df.index.name else: index_name = "_index" diff --git a/src/anndata/_io/specs/registry.py b/src/anndata/_io/specs/registry.py index 798b812d4..3c1efbe68 100644 --- a/src/anndata/_io/specs/registry.py +++ b/src/anndata/_io/specs/registry.py @@ -105,10 +105,11 @@ def register_write( if src_type in self.write_specs and (spec != self.write_specs[src_type]): # First check for consistency current_spec = self.write_specs[src_type] - raise TypeError( + msg = ( "Cannot overwrite IO specifications. Attempted to overwrite encoding " f"for {src_type} from {current_spec} to {spec}" ) + raise TypeError(msg) else: self.write_specs[src_type] = spec @@ -167,7 +168,7 @@ def get_read( reader: Reader, ) -> R: if (src_type, spec, modifiers) not in self.read: - raise IORegistryError._from_read_parts("read", self.read, src_type, spec) + raise IORegistryError._from_read_parts("read", self.read, src_type, spec) # noqa: EM101 internal = self.read[(src_type, spec, modifiers)] return partial(internal, _reader=reader) @@ -196,10 +197,8 @@ def get_partial_read( ): if (src_type, spec, modifiers) in self.read_partial: return self.read_partial[(src_type, spec, modifiers)] - else: - raise IORegistryError._from_read_parts( - "read_partial", self.read_partial, src_type, spec - ) + name = "read_partial" + raise IORegistryError._from_read_parts(name, self.read_partial, src_type, spec) def get_spec(self, elem: Any) -> IOSpec: if isinstance(elem, DaskArray): @@ -217,7 +216,8 @@ def get_spec(self, elem: Any) -> IOSpec: @singledispatch def proc_spec(spec) -> IOSpec: - raise NotImplementedError(f"proc_spec not defined for type: {type(spec)}.") + msg = f"proc_spec not defined for type: {type(spec)}." + raise NotImplementedError(msg) @proc_spec.register(IOSpec) diff --git a/src/anndata/_io/utils.py b/src/anndata/_io/utils.py index bf3399a08..2e393613c 100644 --- a/src/anndata/_io/utils.py +++ b/src/anndata/_io/utils.py @@ -120,7 +120,8 @@ def check_key(key): # elif issubclass(typ, bytes): # return key else: - raise TypeError(f"{key} of type {typ} is an invalid key. Should be str.") + msg = f"{key} of type {typ} is an invalid key. Should be str." + raise TypeError(msg) # ------------------------------------------------------------------------------- @@ -166,7 +167,7 @@ def _get_display_path(store: Storage) -> str: if isinstance(store, BaseCompressedSparseDataset): store = store.group path = store.name or "??" # can be None - return f'/{path.removeprefix("/")}' + return f"/{path.removeprefix('/')}" def add_key_note( @@ -208,7 +209,8 @@ def func_wrapper(*args, **kwargs): store = cast("Storage", arg) break else: - raise ValueError("No element found in args.") + msg = "No element found in args." + raise ValueError(msg) try: return func(*args, **kwargs) except Exception as e: @@ -244,7 +246,8 @@ def func_wrapper(*args, **kwargs): store = cast("Storage", arg) break else: - raise ValueError("No element found in args.") + msg = "No element found in args." + raise ValueError(msg) try: return func(*args, **kwargs) except Exception as e: @@ -275,7 +278,8 @@ def _read_legacy_raw( if modern_raw: if any(k.startswith("raw.") for k in f): what = f"File {f.filename}" if hasattr(f, "filename") else "Store" - raise ValueError(f"{what} has both legacy and current raw formats.") + msg = f"{what} has both legacy and current raw formats." + raise ValueError(msg) return modern_raw raw = {} diff --git a/src/anndata/_io/write.py b/src/anndata/_io/write.py index a2b5ed569..467249e03 100644 --- a/src/anndata/_io/write.py +++ b/src/anndata/_io/write.py @@ -90,7 +90,8 @@ def write_loom(filename: PathLike, adata: AnnData, *, write_obsm_varm: bool = Fa col_attrs[col_dim] = col_names.values if adata.X is None: - raise ValueError("loompy does not accept empty matrices as data") + msg = "loompy does not accept empty matrices as data" + raise ValueError(msg) if write_obsm_varm: for key in adata.obsm.keys(): diff --git a/src/anndata/_settings.py b/src/anndata/_settings.py index f67633c08..ae066f9e5 100644 --- a/src/anndata/_settings.py +++ b/src/anndata/_settings.py @@ -235,7 +235,7 @@ def register( try: validate(default_value) except (ValueError, TypeError) as e: - add_note(e, f"for option {repr(option)}") + add_note(e, f"for option {option!r}") raise e option_type = type(default_value) if option_type is None else option_type self._registered_options[option] = RegisteredOption( @@ -329,7 +329,7 @@ def __getattr__(self, option: str) -> object: """ if option in self._deprecated_options: deprecated = self._deprecated_options[option] - msg = f"{repr(option)} will be removed in {deprecated.removal_version}. {deprecated.message}" + msg = f"{option!r} will be removed in {deprecated.removal_version}. {deprecated.message}" warnings.warn(msg, DeprecationWarning) if option in self._config: return self._config[option] diff --git a/src/anndata/compat/__init__.py b/src/anndata/compat/__init__.py index 419c30c2d..e30e5d4bf 100644 --- a/src/anndata/compat/__init__.py +++ b/src/anndata/compat/__init__.py @@ -94,7 +94,8 @@ def __exit__(self, *_exc_info) -> None: import zarr if Version(zarr.__version__).major > 2: - raise ImportError("zarr-python major version > 2 is not supported") + msg = "zarr-python major version > 2 is not supported" + raise ImportError(msg) from zarr.core import Array as ZarrArray from zarr.hierarchy import Group as ZarrGroup diff --git a/src/anndata/experimental/merge.py b/src/anndata/experimental/merge.py index 21a678e2c..6c62665d7 100644 --- a/src/anndata/experimental/merge.py +++ b/src/anndata/experimental/merge.py @@ -101,7 +101,8 @@ def _gen_slice_to_append( @singledispatch def as_group(store, *args, **kwargs) -> ZarrGroup | H5Group: - raise NotImplementedError("This is not yet implemented.") + msg = "This is not yet implemented." + raise NotImplementedError(msg) @as_group.register(os.PathLike) @@ -284,15 +285,15 @@ def _write_concat_arrays( init_elem = arrays[0] init_type = type(init_elem) if not all(isinstance(a, init_type) for a in arrays): - raise NotImplementedError( - f"All elements must be the same type instead got types: {[type(a) for a in arrays]}" - ) + msg = f"All elements must be the same type instead got types: {[type(a) for a in arrays]}" + raise NotImplementedError(msg) if reindexers is None: if join == "inner": reindexers = gen_inner_reindexers(arrays, new_index=None, axis=axis) else: - raise NotImplementedError("Cannot reindex arrays with outer join.") + msg = "Cannot reindex arrays with outer join." + raise NotImplementedError(msg) if isinstance(init_elem, BaseCompressedSparseDataset): expected_sparse_fmt = ["csr", "csc"][axis] @@ -307,9 +308,8 @@ def _write_concat_arrays( fill_value, ) else: - raise NotImplementedError( - f"Concat of following not supported: {[a.format for a in arrays]}" - ) + msg = f"Concat of following not supported: {[a.format for a in arrays]}" + raise NotImplementedError(msg) else: write_concat_dense( arrays, output_group, output_path, axis, reindexers, fill_value @@ -335,14 +335,14 @@ def _write_concat_sequence( if join == "inner": reindexers = gen_inner_reindexers(arrays, None, axis=axis) else: - raise NotImplementedError("Cannot reindex dataframes with outer join.") + msg = "Cannot reindex dataframes with outer join." + raise NotImplementedError(msg) if not all( isinstance(a, pd.DataFrame) or a is MissingVal or 0 in a.shape for a in arrays ): - raise NotImplementedError( - "Cannot concatenate a dataframe with other array types." - ) + msg = "Cannot concatenate a dataframe with other array types." + raise NotImplementedError(msg) df = concat_arrays( arrays=arrays, reindexers=reindexers, @@ -366,9 +366,8 @@ def _write_concat_sequence( join, ) else: - raise NotImplementedError( - f"Concatenation of these types is not yet implemented: {[type(a) for a in arrays] } with axis={axis}." - ) + msg = f"Concatenation of these types is not yet implemented: {[type(a) for a in arrays]} with axis={axis}." + raise NotImplementedError(msg) def _write_alt_mapping(groups, output_group, alt_axis_name, alt_indices, merge): @@ -537,25 +536,29 @@ def concat_on_disk( Name: count, dtype: int64 """ if len(in_files) == 0: - raise ValueError("No objects to concatenate.") + msg = "No objects to concatenate." + raise ValueError(msg) # Argument normalization if pairwise: - raise NotImplementedError("pairwise concatenation not yet implemented") + msg = "pairwise concatenation not yet implemented" + raise NotImplementedError(msg) merge = resolve_merge_strategy(merge) uns_merge = resolve_merge_strategy(uns_merge) out_file = Path(out_file) if not out_file.parent.exists(): - raise FileNotFoundError(f"Parent directory of {out_file} does not exist.") + msg = f"Parent directory of {out_file} does not exist." + raise FileNotFoundError(msg) if isinstance(in_files, Mapping): if keys is not None: - raise TypeError( + msg = ( "Cannot specify categories in both mapping keys and using `keys`. " "Only specify this once." ) + raise TypeError(msg) keys, in_files = list(in_files.keys()), list(in_files.values()) else: in_files = list(in_files) @@ -582,7 +585,8 @@ def concat_on_disk( # All groups must be anndata if not all(g.attrs.get("encoding-type") == "anndata" for g in groups): - raise ValueError("All groups must be anndata") + msg = "All groups must be anndata" + raise ValueError(msg) # Write metadata output_group.attrs.update({"encoding-type": "anndata", "encoding-version": "0.1.0"}) diff --git a/src/anndata/experimental/multi_files/_anncollection.py b/src/anndata/experimental/multi_files/_anncollection.py index b8d8fea03..69c549c5e 100644 --- a/src/anndata/experimental/multi_files/_anncollection.py +++ b/src/anndata/experimental/multi_files/_anncollection.py @@ -146,7 +146,8 @@ def iterate_axis( Set to `True` to drop a batch with the length lower than `batch_size`. """ if axis not in {0, 1}: - raise ValueError("Axis should be either 0 or 1.") + msg = "Axis should be either 0 or 1." + raise ValueError(msg) n = self.shape[axis] @@ -193,7 +194,8 @@ def __init__( def __getitem__(self, key: str, *, use_convert: bool = True): if self._keys is not None and key not in self._keys: - raise KeyError(f"No {key} in {self.attr} view") + msg = f"No {key} in {self.attr} view" + raise KeyError(msg) arrs = [] for i, oidx in enumerate(self.adatas_oidx): @@ -706,10 +708,11 @@ def __init__( ): if isinstance(adatas, Mapping): if keys is not None: - raise TypeError( + msg = ( "Cannot specify categories in both mapping keys and using `keys`. " "Only specify this once." ) + raise TypeError(msg) keys, adatas = list(adatas.keys()), list(adatas.values()) else: adatas = list(adatas) @@ -731,10 +734,11 @@ def __init__( self.adatas_vidx.append(adata_vidx) self.var_names = var_names else: - raise ValueError( + msg = ( "Adatas have different variables. " "Please specify join_vars='inner' for intersection." ) + raise ValueError(msg) concat_indices = pd.concat( [pd.Series(a.obs_names) for a in adatas], ignore_index=True diff --git a/src/anndata/experimental/pytorch/_annloader.py b/src/anndata/experimental/pytorch/_annloader.py index ddc5e825f..156562b7c 100644 --- a/src/anndata/experimental/pytorch/_annloader.py +++ b/src/anndata/experimental/pytorch/_annloader.py @@ -191,7 +191,8 @@ def __init__( elif isinstance(adatas, _ConcatViewMixin): dataset = copy(adatas) else: - raise ValueError("adata should be of type AnnData or AnnCollection.") + msg = "adata should be of type AnnData or AnnCollection." + raise ValueError(msg) if use_default_converter: pin_memory = kwargs.pop("pin_memory", False) diff --git a/src/anndata/io.py b/src/anndata/io.py index 5f9ba323c..c60250d8f 100644 --- a/src/anndata/io.py +++ b/src/anndata/io.py @@ -22,10 +22,12 @@ else: # pragma: no cover def read_zarr(*args, **kw): - raise ImportError("zarr is not installed") + msg = "zarr is not installed" + raise ImportError(msg) def write_zarr(*args, **kw): - raise ImportError("zarr is not installed") + msg = "zarr is not installed" + raise ImportError(msg) __all__ = [ diff --git a/src/anndata/logging.py b/src/anndata/logging.py index d7849fd8e..7eb714091 100644 --- a/src/anndata/logging.py +++ b/src/anndata/logging.py @@ -48,8 +48,7 @@ def format_memory_usage( more = " \n... " if msg != "" else "" mem, diff = mem_usage return ( - f"{nl}{msg}{more}" - f"Memory usage: current {mem:.2f} GB, difference {diff:+.2f} GB" + f"{nl}{msg}{more}Memory usage: current {mem:.2f} GB, difference {diff:+.2f} GB" ) diff --git a/src/anndata/tests/helpers.py b/src/anndata/tests/helpers.py index a915ff115..95f3a036b 100644 --- a/src/anndata/tests/helpers.py +++ b/src/anndata/tests/helpers.py @@ -223,7 +223,8 @@ def gen_awkward(shape, dtype=np.int32): import awkward as ak if shape[0] is None: - raise ValueError("The first dimension must be fixed-length.") + msg = "The first dimension must be fixed-length." + raise ValueError(msg) rng = random.Random(123) shape = np.array(shape) @@ -466,9 +467,8 @@ def sparray_bool_subset(index, min_size=2): def array_subset(index, min_size=2): if len(index) < min_size: - raise ValueError( - f"min_size (={min_size}) must be smaller than len(index) (={len(index)}" - ) + msg = f"min_size (={min_size}) must be smaller than len(index) (={len(index)}" + raise ValueError(msg) return np.random.choice( index, size=np.random.randint(min_size, len(index), ()), replace=False ) @@ -476,9 +476,8 @@ def array_subset(index, min_size=2): def array_int_subset(index, min_size=2): if len(index) < min_size: - raise ValueError( - f"min_size (={min_size}) must be smaller than len(index) (={len(index)}" - ) + msg = f"min_size (={min_size}) must be smaller than len(index) (={len(index)}" + raise ValueError(msg) return np.random.choice( np.arange(len(index)), size=np.random.randint(min_size, len(index), ()), @@ -969,9 +968,9 @@ def check_error_or_notes_match(e: pytest.ExceptionInfo, pattern: str | re.Patter import traceback message = "".join(traceback.format_exception_only(e.type, e.value)) - assert re.search( - pattern, message - ), f"Could not find pattern: '{pattern}' in error:\n\n{message}\n" + assert re.search(pattern, message), ( + f"Could not find pattern: '{pattern}' in error:\n\n{message}\n" + ) def resolve_cupy_type(val): @@ -987,7 +986,8 @@ def resolve_cupy_type(val): elif issubclass(input_typ, sparse.csc_matrix): typ = CupyCSCMatrix else: - raise NotImplementedError(f"No default target type for input type {input_typ}") + msg = f"No default target type for input type {input_typ}" + raise NotImplementedError(msg) return typ @@ -1024,9 +1024,8 @@ def as_cupy(val, typ=None): else: return cpsparse.csc_matrix(val) else: - raise NotImplementedError( - f"Conversion from {type(val)} to {typ} not implemented" - ) + msg = f"Conversion from {type(val)} to {typ} not implemented" + raise NotImplementedError(msg) # TODO: test diff --git a/src/anndata/utils.py b/src/anndata/utils.py index 06c15bfb7..fa6a77d47 100644 --- a/src/anndata/utils.py +++ b/src/anndata/utils.py @@ -37,7 +37,8 @@ def import_name(name: str) -> Any: try: obj = getattr(obj, name) except AttributeError: - raise RuntimeError(f"{parts[:i]}, {parts[i+1:]}, {obj} {name}") + msg = f"{parts[:i]}, {parts[i + 1 :]}, {obj} {name}" + raise RuntimeError(msg) return obj @@ -91,10 +92,11 @@ def convert_to_dict_dict(obj: dict): @convert_to_dict.register(np.ndarray) def convert_to_dict_ndarray(obj: np.ndarray): if obj.dtype.fields is None: - raise TypeError( + msg = ( "Can only convert np.ndarray with compound dtypes to dict, " f"passed array had “{obj.dtype}”." ) + raise TypeError(msg) return {k: obj[k] for k in obj.dtype.fields.keys()} @@ -127,7 +129,8 @@ def _size_at_depth(layout, depth, lateral_context, **kwargs): shape = layout.shape numpy_axis = lateral_context["axis"] - depth + 1 if not (1 <= numpy_axis < len(shape)): - raise TypeError(f"axis={lateral_context['axis']} is too deep") + msg = f"axis={lateral_context['axis']} is too deep" + raise TypeError(msg) lateral_context["out"] = shape[numpy_axis] return ak.contents.EmptyArray() @@ -136,7 +139,8 @@ def _size_at_depth(layout, depth, lateral_context, **kwargs): # Strings are implemented like an array of lists of uint8 (ListType(NumpyType(...))) # which results in an extra hierarchy-level that shouldn't show up in dim_len # See https://github.com/scikit-hep/awkward/discussions/1654#discussioncomment-3736747 - raise TypeError(f"axis={lateral_context['axis']} is too deep") + msg = f"axis={lateral_context['axis']} is too deep" + raise TypeError(msg) if layout.is_regular: # if it's a regular list, you want the size @@ -154,9 +158,8 @@ def _size_at_depth(layout, depth, lateral_context, **kwargs): # currently, we don't recurse into records # in theory we could, just not sure how to do it at the moment # Would need to consider cases like: scalars, unevenly sized values - raise TypeError( - f"Cannot recurse into record type found at axis={lateral_context['axis']}" - ) + msg = f"Cannot recurse into record type found at axis={lateral_context['axis']}" + raise TypeError(msg) elif layout.is_union: # if it's a union, you could get the result of each union branch @@ -187,7 +190,8 @@ def axis_len_awkward(array, axis: Literal[0, 1]) -> int | None: Code adapted from @jpivarski's solution in https://github.com/scikit-hep/awkward/discussions/1654#discussioncomment-3521574 """ if axis < 0: # negative axis is another can of worms... maybe later - raise NotImplementedError("Does not support negative axis") + msg = "Does not support negative axis" + raise NotImplementedError(msg) elif axis == 0: return len(array) else: @@ -318,10 +322,11 @@ def convert_dictionary_to_structured_array(source: Mapping[str, Sequence[Any]]): for col in source.values() ] except UnicodeEncodeError: - raise ValueError( + msg = ( "Currently only support ascii strings. " "Don’t use “ö” etc. for sample annotation." ) + raise ValueError(msg) # if old_index_key not in source: # names.append(new_index_key) diff --git a/tests/test_concatenate.py b/tests/test_concatenate.py index b43d4666e..378807059 100644 --- a/tests/test_concatenate.py +++ b/tests/test_concatenate.py @@ -1253,9 +1253,9 @@ def test_concat_categories_maintain_dtype(): result = concat({"a": a, "b": b, "c": c}, join="outer") - assert isinstance( - result.obs["cat"].dtype, pd.CategoricalDtype - ), f"Was {result.obs['cat'].dtype}" + assert isinstance(result.obs["cat"].dtype, pd.CategoricalDtype), ( + f"Was {result.obs['cat'].dtype}" + ) assert pd.api.types.is_string_dtype(result.obs["cat_ordered"]) @@ -1675,7 +1675,7 @@ def test_concat_dask_sparse_matches_memory(join_type, merge_strategy): X = sparse.random(50, 20, density=0.5, format="csr") X_dask = da.from_array(X, chunks=(5, 20)) var_names_1 = [f"gene_{i}" for i in range(20)] - var_names_2 = [f"gene_{i}{'_foo' if (i%2) else ''}" for i in range(20, 40)] + var_names_2 = [f"gene_{i}{'_foo' if (i % 2) else ''}" for i in range(20, 40)] ad1 = AnnData(X=X, var=pd.DataFrame(index=var_names_1)) ad2 = AnnData(X=X, var=pd.DataFrame(index=var_names_2)) diff --git a/tests/test_concatenate_disk.py b/tests/test_concatenate_disk.py index bdeb5c6e6..6d0af6142 100644 --- a/tests/test_concatenate_disk.py +++ b/tests/test_concatenate_disk.py @@ -108,7 +108,8 @@ def get_array_type(array_type, axis): return sparse.csr_array if axis == 0 else sparse.csc_array if array_type == "array": return asarray - raise NotImplementedError(f"array_type {array_type} not implemented") + msg = f"array_type {array_type} not implemented" + raise NotImplementedError(msg) @pytest.mark.parametrize("reindex", [True, False], ids=["reindex", "no_reindex"]) diff --git a/tests/test_helpers.py b/tests/test_helpers.py index 4645fedd5..9623a2f68 100644 --- a/tests/test_helpers.py +++ b/tests/test_helpers.py @@ -107,7 +107,8 @@ def test_gen_random_column(dtype): # Does this work for every warning? def test_report_name(): def raise_error(): - raise Exception("an error occurred!") + msg = "an error occurred!" + raise Exception(msg) letters = np.array(list(ascii_letters)) tag = "".join(np.random.permutation(letters)) diff --git a/tests/test_layers.py b/tests/test_layers.py index 380e51d3b..6244a66ce 100644 --- a/tests/test_layers.py +++ b/tests/test_layers.py @@ -8,7 +8,8 @@ import pytest from numba.core.errors import NumbaDeprecationWarning -from anndata import AnnData, ImplicitModificationWarning, read_h5ad, read_loom +from anndata import AnnData, ImplicitModificationWarning, read_h5ad +from anndata.io import read_loom from anndata.tests.helpers import gen_typed_df_t2_size X = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) diff --git a/tests/test_readwrite.py b/tests/test_readwrite.py index 1dab39e68..dc79dd23b 100644 --- a/tests/test_readwrite.py +++ b/tests/test_readwrite.py @@ -324,8 +324,12 @@ def check_compressed(key, value): f.visititems(check_compressed) if not_compressed: - msg = "\n\t".join(not_compressed) - raise AssertionError(f"These elements were not compressed correctly:\n\t{msg}") + sep = "\n\t" + msg = ( + f"These elements were not compressed correctly:{sep}" + f"{sep.join(not_compressed)}" + ) + raise AssertionError(msg) expected = ad.read_h5ad(pth) assert_equal(adata, expected) @@ -350,8 +354,12 @@ def check_compressed(key, value): f.visititems(check_compressed) if not_compressed: - msg = "\n\t".join(not_compressed) - raise AssertionError(f"These elements were not compressed correctly:\n\t{msg}") + sep = "\n\t" + msg = ( + f"These elements were not compressed correctly:{sep}" + f"{sep.join(not_compressed)}" + ) + raise AssertionError(msg) expected = ad.read_zarr(pth) assert_equal(adata, expected) diff --git a/tests/test_settings.py b/tests/test_settings.py index 3387b0cd8..7929c5068 100644 --- a/tests/test_settings.py +++ b/tests/test_settings.py @@ -29,7 +29,8 @@ def validate_int_list(val) -> bool: if not isinstance(val, list) or not [isinstance(type(e), int) for e in val]: - raise TypeError(f"{repr(val)} is not a valid int list") + msg = f"{val!r} is not a valid int list" + raise TypeError(msg) return True