diff --git a/.github/workflows/lint-black.yaml b/.github/workflows/lint-black.yaml index a1933b32..821edea9 100644 --- a/.github/workflows/lint-black.yaml +++ b/.github/workflows/lint-black.yaml @@ -1,13 +1,6 @@ name: Lint-black -on: - push: - branches: - - '*' - pull_request: - branches: - - '*' - +on: [push, pull_request] jobs: black: diff --git a/.github/workflows/lint-darglint.yaml b/.github/workflows/lint-darglint.yaml index 903b3ca5..0974e9bf 100644 --- a/.github/workflows/lint-darglint.yaml +++ b/.github/workflows/lint-darglint.yaml @@ -1,13 +1,6 @@ name: Lint-darglint -on: - push: - branches: - - '*' - pull_request: - branches: - - '*' - +on: [push, pull_request] jobs: darglint: diff --git a/.github/workflows/lint-isort.yaml b/.github/workflows/lint-isort.yaml index db09cb85..6737815d 100644 --- a/.github/workflows/lint-isort.yaml +++ b/.github/workflows/lint-isort.yaml @@ -1,13 +1,6 @@ name: Lint-isort -on: - push: - branches: - - '*' - pull_request: - branches: - - '*' - +on: [push, pull_request] jobs: isort: diff --git a/.github/workflows/lint-pydocstyle.yaml b/.github/workflows/lint-pydocstyle.yaml index d0037497..7e7e8e8f 100644 --- a/.github/workflows/lint-pydocstyle.yaml +++ b/.github/workflows/lint-pydocstyle.yaml @@ -1,12 +1,6 @@ name: Lint-pydocstyle -on: - push: - branches: - - '*' - pull_request: - branches: - - '*' +on: [push, pull_request] jobs: pydocstyle: diff --git a/.github/workflows/lint-flake8.yaml b/.github/workflows/lint-ruff.yaml similarity index 68% rename from .github/workflows/lint-flake8.yaml rename to .github/workflows/lint-ruff.yaml index 3b46b309..ba74f1ff 100644 --- a/.github/workflows/lint-flake8.yaml +++ b/.github/workflows/lint-ruff.yaml @@ -1,15 +1,9 @@ -name: Lint-flake8 +name: Lint-ruff -on: - push: - branches: - - '*' - pull_request: - branches: - - '*' +on: [push, pull_request] jobs: - flake8: + ruff: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 @@ -22,6 +16,6 @@ jobs: run: | python -m pip install --upgrade pip make install-lint - - name: Run flake8 + - name: Run ruff run: | - make flake8 + make ruff \ No newline at end of file diff --git a/.gitignore b/.gitignore index c7a3288d..375d0379 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,4 @@ src/ .vscode/* .coverage .eggs +**.DS_Store diff --git a/curvlinops/_torch_base.py b/curvlinops/_torch_base.py index 00e71975..33df807c 100644 --- a/curvlinops/_torch_base.py +++ b/curvlinops/_torch_base.py @@ -191,7 +191,7 @@ def __check_tensor_and_preprocess( Raises: ValueError: If the input tensor has an invalid shape. """ - if X.ndim > 2 or X.shape[0] != self.shape[1]: + if X.ndim > 2 or X.shape[0] != self.shape[1]: # noqa: PLR2004 raise ValueError( f"Input tensor must have shape ({self.shape[1]},) or " + f"({self.shape[1]}, K), with K arbitrary. Got {X.shape}." @@ -583,8 +583,8 @@ def _loop_over_data( # Assume everything is handled by the model # if `X` is a custom data format if isinstance(X, Tensor): - X = X.to(self._device) - y = y.to(self._device) + X = X.to(self._device) # noqa: PLW2901 + y = y.to(self._device) # noqa: PLW2901 yield (X, y) def _get_normalization_factor( diff --git a/curvlinops/diagonal/hutchinson.py b/curvlinops/diagonal/hutchinson.py index b7647a5a..4f3e99c8 100644 --- a/curvlinops/diagonal/hutchinson.py +++ b/curvlinops/diagonal/hutchinson.py @@ -62,7 +62,7 @@ def __init__(self, A: LinearOperator): Raises: ValueError: If the operator is not square. """ - if len(A.shape) != 2 or A.shape[0] != A.shape[1]: + if len(A.shape) != 2 or A.shape[0] != A.shape[1]: # noqa: PLR2004 raise ValueError(f"A must be square. Got shape {A.shape}.") self._A = A diff --git a/curvlinops/fisher.py b/curvlinops/fisher.py index d4a1e5e5..d5bc9d9d 100644 --- a/curvlinops/fisher.py +++ b/curvlinops/fisher.py @@ -292,7 +292,7 @@ def sample_grad_output(self, output: Tensor, num_samples: int, y: Tensor) -> Ten NotImplementedError: If the prediction does not have two dimensions. NotImplementedError: If binary classification labels are not binary. """ - if output.ndim != 2: + if output.ndim != 2: # noqa: PLR2004 raise NotImplementedError(f"Only 2d outputs supported. Got {output.shape}") C = output.shape[1] diff --git a/curvlinops/inverse.py b/curvlinops/inverse.py index 0a35f578..7eb4afff 100644 --- a/curvlinops/inverse.py +++ b/curvlinops/inverse.py @@ -455,7 +455,8 @@ def _compute_inverse_factors( warn( f"Failed to compute Cholesky decomposition in {aaT.dtype} " f"precision with error {error}. " - "Retrying in double precision..." + "Retrying in double precision...", + stacklevel=2 ) # Retry in double precision original_type = aaT.dtype @@ -476,7 +477,8 @@ def _compute_inverse_factors( warn( f"Failed to compute Cholesky decomposition in {ggT.dtype} " f"precision with error {error}. " - "Retrying in double precision..." + "Retrying in double precision...", + stacklevel=2 ) # Retry in double precision original_dtype = ggT.dtype diff --git a/curvlinops/kfac.py b/curvlinops/kfac.py index 1f6e488e..b0435e33 100644 --- a/curvlinops/kfac.py +++ b/curvlinops/kfac.py @@ -548,7 +548,7 @@ def draw_label(self, output: Tensor) -> Tensor: ValueError: If the output is not 2d. NotImplementedError: If the loss function is not supported. """ - if output.ndim != 2: + if output.ndim != 2: # noqa: PLR2004 raise ValueError("Only a 2d output is supported.") if isinstance(self._loss_func, MSELoss): diff --git a/curvlinops/kfac_utils.py b/curvlinops/kfac_utils.py index d7061739..679cbf27 100644 --- a/curvlinops/kfac_utils.py +++ b/curvlinops/kfac_utils.py @@ -100,7 +100,7 @@ def loss_hessian_matrix_sqrt( NotImplementedError: If the loss function is ``BCEWithLogitsLoss`` but the target is not binary. """ - if output_one_datum.ndim != 2 or output_one_datum.shape[0] != 1: + if output_one_datum.ndim != 2 or output_one_datum.shape[0] != 1: # noqa: PLR2004 raise ValueError( f"Expected 'output_one_datum' to be 2d with shape [1, C], got " f"{output_one_datum.shape}" diff --git a/curvlinops/trace/hutchinson.py b/curvlinops/trace/hutchinson.py index a2d1506c..26f5e8a8 100644 --- a/curvlinops/trace/hutchinson.py +++ b/curvlinops/trace/hutchinson.py @@ -58,7 +58,7 @@ def __init__(self, A: LinearOperator): Raises: ValueError: If the operator is not square. """ - if len(A.shape) != 2 or A.shape[0] != A.shape[1]: + if len(A.shape) != 2 or A.shape[0] != A.shape[1]: # noqa: PLR2004 raise ValueError(f"A must be square. Got shape {A.shape}.") self._A = A diff --git a/curvlinops/trace/meyer2020hutch.py b/curvlinops/trace/meyer2020hutch.py index e37f32e3..a0f5b409 100644 --- a/curvlinops/trace/meyer2020hutch.py +++ b/curvlinops/trace/meyer2020hutch.py @@ -86,7 +86,7 @@ def __init__( ``basis_dim = s1`` and draw ``s2`` samples from Hutch++ such that ``2 * s1 + s2 = s``. """ - if len(A.shape) != 2 or A.shape[0] != A.shape[1]: + if len(A.shape) != 2 or A.shape[0] != A.shape[1]: # noqa: PLR2004 raise ValueError(f"A must be square. Got shape {A.shape}.") self._A = A diff --git a/docs/examples/basic_usage/example_benchmark.py b/docs/examples/basic_usage/example_benchmark.py index 925febfb..afd572cc 100644 --- a/docs/examples/basic_usage/example_benchmark.py +++ b/docs/examples/basic_usage/example_benchmark.py @@ -185,7 +185,7 @@ def setup_problem( for m in supported_layers: # ignore the last layer of GPT because it has 50k outputs, which # will yield an extremely large Kronecker factor - if all(d <= 50_000 for d in m.weight.shape): + if all(d <= 50_000 for d in m.weight.shape): # noqa: PLR2004 params.extend([p for p in m.parameters() if p.requires_grad]) else: params = [p for p in model.parameters() if p.requires_grad] @@ -523,7 +523,7 @@ def visualize_time_benchmark( num_gradients = x_max / reference spacing = 1 / 4 num_ticks = 1 + floor(num_gradients / spacing) - while num_ticks > 8: + while num_ticks > 8: # noqa: PLR2004 spacing *= 2 num_ticks = 1 + floor(num_gradients / spacing) @@ -701,7 +701,7 @@ def visualize_peakmem_benchmark( num_gradients = x_max / reference spacing = 1 / 4 num_ticks = 1 + floor(num_gradients / spacing) - while num_ticks > 8: + while num_ticks > 8: # noqa: PLR2004 spacing *= 2 num_ticks = 1 + floor(num_gradients / spacing) diff --git a/docs/examples/basic_usage/example_eigenvalues.py b/docs/examples/basic_usage/example_eigenvalues.py index 5fdbccdd..8916500e 100644 --- a/docs/examples/basic_usage/example_eigenvalues.py +++ b/docs/examples/basic_usage/example_eigenvalues.py @@ -169,11 +169,10 @@ def orthonormalize(v: numpy.ndarray, basis: List[numpy.ndarray]) -> numpy.ndarra if eigenvalue is None: eigenvalue = tmp_eigenvalue + elif abs(eigenvalue - tmp_eigenvalue) / (abs(eigenvalue) + 1e-6) < tol: + break else: - if abs(eigenvalue - tmp_eigenvalue) / (abs(eigenvalue) + 1e-6) < tol: - break - else: - eigenvalue = tmp_eigenvalue + eigenvalue = tmp_eigenvalue eigenvalues.append(eigenvalue) eigenvectors.append(v) diff --git a/docs/examples/basic_usage/example_inverses.py b/docs/examples/basic_usage/example_inverses.py index 849babd3..b68840c0 100644 --- a/docs/examples/basic_usage/example_inverses.py +++ b/docs/examples/basic_usage/example_inverses.py @@ -249,7 +249,7 @@ # of the matrix to be inverted: max_eigval = eigsh(damped_GGN, k=1, which="LM", return_eigenvectors=False)[0] # eigenvalues (scale * damped_GGN_mat) are in [0; 2) -scale = 1.0 if max_eigval < 2.0 else 1.99 / max_eigval +scale = 1.0 if max_eigval < 2.0 else 1.99 / max_eigval # noqa: PLR2004 # %% # diff --git a/docs/examples/basic_usage/example_model_merging.py b/docs/examples/basic_usage/example_model_merging.py index ed91e982..0295f307 100644 --- a/docs/examples/basic_usage/example_model_merging.py +++ b/docs/examples/basic_usage/example_model_merging.py @@ -115,7 +115,7 @@ def make_dataset() -> TensorDataset: for epoch in range(num_epochs): for batch_idx, (X, y) in enumerate(data_loader): optimizer.zero_grad() - X, y = X.to(DEVICE), y.to(DEVICE) + X, y = X.to(DEVICE), y.to(DEVICE) # noqa: PLW2901 loss = loss_function(model(X), y) loss.backward() optimizer.step() diff --git a/docs/examples/basic_usage/memory_benchmark.py b/docs/examples/basic_usage/memory_benchmark.py index 6e7f70cc..d16642eb 100644 --- a/docs/examples/basic_usage/memory_benchmark.py +++ b/docs/examples/basic_usage/memory_benchmark.py @@ -21,7 +21,7 @@ from curvlinops import KFACInverseLinearOperator, KFACLinearOperator -def run_peakmem_benchmark( # noqa: C901 +def run_peakmem_benchmark( # noqa: C901, PLR0915 linop_str: str, problem_str: str, device_str: str, op_str: str ): """Execute the memory benchmark for a given linear operator class and save results. diff --git a/makefile b/makefile index 84341dca..2a1dba50 100644 --- a/makefile +++ b/makefile @@ -29,8 +29,8 @@ help: @echo " Run black on the project" @echo "black-check" @echo " Check if black would change files" - @echo "flake8" - @echo " Run flake8 on the project" + @echo "ruff" + @echo " Run ruff on the project" @echo "conda-env" @echo " Create conda environment 'curvlinops' with dev setup" @echo "darglint-check" @@ -97,10 +97,10 @@ black: black-check: @black . --config=black.toml --check -.PHONY: flake8 +.PHONY: ruff -flake8: - @flake8 . +ruff: + @ruff check . .PHONY: darglint-check @@ -122,6 +122,6 @@ conda-env: lint: make black-check make isort-check - make flake8 + make ruff make darglint-check make pydocstyle-check diff --git a/pyproject.toml b/pyproject.toml index dd43effc..39128dbe 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -65,14 +65,7 @@ test = [ # Dependencies needed for linting. lint = [ "black", - "flake8", - "mccabe", - "pycodestyle", - "pyflakes", - "pep8-naming", - "flake8-bugbear", - "flake8-comprehensions", - "flake8-tidy-imports", + "ruff", "darglint", "pydocstyle", "isort", @@ -106,3 +99,38 @@ use_parentheses = true convention = "google" match = '.*\.py' match_dir = '^(?!(test|.git)).*' + +[tool.ruff] +# Same as flake8's max-line-length +line-length = 88 + +[tool.ruff.lint] +# Enable all rules from flake8 (E, F), plus additional ones +select = ["E", "F", "B", "C", "W", "B9", "PLE", "PLW", "PLR"] +ignore = [ + # E501 max-line-length (replaced by B950 (max-line-length + 10%)) + "E501", + # C408 use {} instead of dict() (ignored because pytorch uses dict) + "C408", + # E203 whitespace before : + "E203", + # E231 missing whitespace after ',' + "E231", + # W291 trailing whitespace + "W291", + # E203 line break before binary operator (replaces W503) + "E203", + # Line break occurred after a binary operator (replaces W504) + "E226", + # B905 `zip()` without an explicit `strict=` parameter + "B905", + # Too many arguments in function definition (9 > 5) + "PLR0913", +] + +[tool.ruff.lint.per-file-ignores] +# Add any per-file ignores here if needed + +[tool.ruff.lint.flake8-bugbear] +# Allow assert statements in tests +extend-immutable-calls = ["pytest.raises", "pytest.warns", "pytest.mark.skip"] diff --git a/test/papyan2020traces/test_spectrum.py b/test/papyan2020traces/test_spectrum.py index e4b95623..a85a803f 100644 --- a/test/papyan2020traces/test_spectrum.py +++ b/test/papyan2020traces/test_spectrum.py @@ -47,7 +47,7 @@ def test_approximate_boundaries(): for inputs, results in cases: output = approximate_boundaries(A, boundaries=inputs) - assert len(output) == 2 + assert len(output) == 2 # noqa: PLR2004 assert isinstance(output[0], float) assert isinstance(output[1], float) assert allclose(output, results) @@ -69,7 +69,7 @@ def test_approximate_boundaries_abs(): for inputs, results in cases: output = approximate_boundaries_abs(A, boundaries=inputs) - assert len(output) == 2 + assert len(output) == 2 # noqa: PLR2004 assert isinstance(output[0], float) assert isinstance(output[1], float) assert allclose(output, results) diff --git a/test/test__torch_base.py b/test/test__torch_base.py index 5e5205bb..6e2cdeb9 100644 --- a/test/test__torch_base.py +++ b/test/test__torch_base.py @@ -136,7 +136,7 @@ def __iter__(self) -> Iterator[Tuple[Union[Tensor, MutableMapping], Tensor]]: if isinstance(value, Tensor): X[key] = X[key][permutation] else: - X = X[permutation] + X = X[permutation] # noqa: PLW2901 yield X, y[permutation] diff --git a/test/test_inverse.py b/test/test_inverse.py index c9f3a429..6e726bdd 100644 --- a/test/test_inverse.py +++ b/test/test_inverse.py @@ -137,7 +137,7 @@ def test_Neumann_inverse_damped_GGN_matvec(inv_case, delta: float = 1e-2): # set scale such that Neumann series converges eval_max = eigh(damped_GGN_functorch)[0][-1] - scale = 1.0 if eval_max < 2 else 1.9 / eval_max + scale = 1.0 if eval_max < 2 else 1.9 / eval_max # noqa: PLR2004 # NOTE This may break when other cases are added because slow convergence inv_GGN = NeumannInverseLinearOperator(GGN + damping, num_terms=7_000, scale=scale) @@ -303,7 +303,7 @@ def test_KFAC_inverse_damped_matmat( @mark.parametrize( "separate_weight_and_bias", [True, False], ids=["separate_bias", "joint_bias"] ) -def test_KFAC_inverse_heuristically_damped_matmat( # noqa: C901 +def test_KFAC_inverse_heuristically_damped_matmat( # noqa: C901, PLR0912, PLR0915 case: Tuple[ Module, Union[MSELoss, CrossEntropyLoss], diff --git a/test/test_kfac.py b/test/test_kfac.py index 59d455d3..37cafd14 100644 --- a/test/test_kfac.py +++ b/test/test_kfac.py @@ -560,7 +560,7 @@ def test_expand_setting_scaling( # MSE loss averages over number of output channels loss_term_factor *= output_random_variable_size for ggT in kfac_sum_torch._gradient_covariances.values(): - ggT /= kfac_sum_torch._N_data * loss_term_factor + ggT.div_(kfac_sum_torch._N_data * loss_term_factor) kfac_simulated_mean_mat = kfac_sum @ eye(kfac_sum.shape[1]) # KFAC with mean reduction diff --git a/test/utils.py b/test/utils.py index fdfeae3c..be0677e1 100644 --- a/test/utils.py +++ b/test/utils.py @@ -239,7 +239,7 @@ def forward(self, x: Tensor) -> Tensor: # Example: Transformer for translation: (batch, sequence_length, c) # (although second and third dimension would have to be transposed for # classification) - if x.ndim > 2 and self.loss == "CE": + if x.ndim > 2 and self.loss == "CE": # noqa: PLR2004 x = rearrange(x, "batch ... c -> batch c ...") return x