From c7a47809f0c35f97884cb781dc409809551d3651 Mon Sep 17 00:00:00 2001 From: Felix Dangel <48687646+f-dangel@users.noreply.github.com> Date: Fri, 12 Jan 2024 19:48:17 -0500 Subject: [PATCH] [DOC] Update changelog & contributors, fix PyPI upload, prepare `1.2.0` (#66) * [DOC] Update changelog & contributors, fix PyPI upload, prepare `1.2.0` * [FIX] Incorporate feedback --- .github/workflows/python-publish.yml | 4 +- LICENSE | 2 +- README.md | 10 ++- changelog.md | 94 +++++++++++++++++++++++++++- setup.cfg | 5 +- 5 files changed, 107 insertions(+), 8 deletions(-) diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml index 48c52e13..0b1c9739 100644 --- a/.github/workflows/python-publish.yml +++ b/.github/workflows/python-publish.yml @@ -23,8 +23,8 @@ jobs: pip install setuptools wheel twine - name: Build and publish env: - TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} - TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} + TWINE_USERNAME: __token__ + TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }} run: | python setup.py sdist bdist_wheel twine upload dist/* diff --git a/LICENSE b/LICENSE index 50af3ba7..a851ac2e 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2021 Felix Dangel, Lukas Tatzel & Philipp Hennig +Copyright (c) 2021 Felix Dangel, Runa Eschenhagen, Lukas Tatzel & Philipp Hennig Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/README.md b/README.md index 808ee215..9c8f997e 100644 --- a/README.md +++ b/README.md @@ -12,6 +12,7 @@ for deep learning matrices, such as - the Hessian - the Fisher/generalized Gauss-Newton (GGN) - the Monte-Carlo approximated Fisher +- the Fisher/GGN's KFAC approximation (Kronecker-Factored Approximate Curvature) - the uncentered gradient covariance (aka empirical Fisher) - the output-parameter Jacobian of a neural net and its transpose @@ -23,12 +24,15 @@ You can plug these linear operators into `scipy`, while carrying out the heavy lifting (matrix-vector multiplies) in PyTorch on GPU. My favorite example for such a routine is [`scipy.sparse.linalg.eigsh`](https://docs.scipy.org/doc/scipy/reference/generated/scipy.sparse.linalg.eigsh.html) -that lets you compute a subset of eigenpairs. +that lets you compute a subset of eigen-pairs. The library also provides linear operator transformations, like taking the inverse (inverse matrix-vector product via conjugate gradients) or slicing out sub-matrices. +Finally, it offers functionality to probe properties of the represented +matrices, like their spectral density, trace, or diagonal. + - **Documentation:** https://curvlinops.readthedocs.io/en/latest/ - **Bug reports & feature requests:** @@ -43,7 +47,9 @@ pip install curvlinops-for-pytorch ## Examples - [Basic - usage](https://curvlinops.readthedocs.io/en/latest/basic_usage/example_matrix_vector_products.html#sphx-glr-basic-usage-example-matrix-vector-products-py) + usage](https://curvlinops.readthedocs.io/en/latest/basic_usage/example_matrix_vector_products.html) +- [Advanced + examples](https://curvlinops.readthedocs.io/en/latest/basic_usage/index.html) ## Future ideas diff --git a/changelog.md b/changelog.md index 82a9f944..b17f19ee 100644 --- a/changelog.md +++ b/changelog.md @@ -6,6 +6,97 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [1.2.0] - 2024-01-12 + +This release ships with many new features and requires PyTorch 2: + +### Added/New + +- Linear operator for KFAC (Kronecker-Factored Approximate Curvature) + with support for a broad range of options + + - Prototype (`torch.nn.MSELoss` and `torch.nn.Linear`) + ([PR](https://github.com/f-dangel/curvlinops/pull/43)) + + - Support with `torch.nn.CrossEntropyLoss` + ([PR](https://github.com/f-dangel/curvlinops/pull/52)) + + - Support empirical Fisher (using gradients from data distribution) + ([PR](https://github.com/f-dangel/curvlinops/pull/54)) + and type-2 estimation (using columns from the Hessian's matrix square root) + ([PR](https://github.com/f-dangel/curvlinops/pull/56)) + + - Support arbitrary parameter order + ([PR](https://github.com/f-dangel/curvlinops/pull/51)), + weight-only or bias-only layers + ([PR](https://github.com/f-dangel/curvlinops/pull/55)), + and support treating weight and bias jointly + ([PR](https://github.com/f-dangel/curvlinops/pull/57)) + + - Support networks with in-place activations + ([PR](https://github.com/f-dangel/curvlinops/pull/59)) + + - Support models with >2d output + ([PR](https://github.com/f-dangel/curvlinops/pull/62)) + + - Support KFAC `'expand'` and `'reduce'` approximations + for general weight-sharing layers + ([PR](https://github.com/f-dangel/curvlinops/pull/63), + [paper](https://arxiv.org/abs/2311.00636)) + + - Support `torch.nn.Conv2d` + ([PR](https://github.com/f-dangel/curvlinops/pull/64)) + +- Linear operator for taking sub-matrices of another linear operator + ([PR](https://github.com/f-dangel/curvlinops/pull/25), + [example](https://curvlinops.readthedocs.io/en/main/basic_usage/example_submatrices.html) + ([PR](https://github.com/f-dangel/curvlinops/pull/26))) + +- Linear operator for approximate inversion via the Neumann series + ([PR](https://github.com/f-dangel/curvlinops/pull/28), + [example](https://curvlinops.readthedocs.io/en/main/basic_usage/example_inverses.html#neumann-inverse-cg-alternative) + ([PR](https://github.com/f-dangel/curvlinops/pull/29))) + +- Linear operator for a neural network's output-parameter Jacobian + ([PR](https://github.com/f-dangel/curvlinops/pull/32)) and its transpose + ([PR](https://github.com/f-dangel/curvlinops/pull/34)) + +- Implement `adjoint` from `scipy.sparse.linalg.LinearOperator` interface + ([PR](https://github.com/f-dangel/curvlinops/pull/33/files)) + +- [Example](https://curvlinops.readthedocs.io/en/main/basic_usage/example_model_merging.html) for Fisher-weighted model averaging + ([PR](https://github.com/f-dangel/curvlinops/pull/37)) + +- Trace estimation via vanilla Hutchinson + ([PR](https://github.com/f-dangel/curvlinops/pull/38)) + +- Trace estimation via [Hutch++](https://arxiv.org/abs/2010.09649) + ([PR](https://github.com/f-dangel/curvlinops/pull/39)) + +- Diagonal estimation via Hutchinson + ([PR](https://github.com/f-dangel/curvlinops/pull/40)) + +- Experimental: Linear operator for the Hessian of the loss w.r.t. an + intermediate feature + ([PR](https://github.com/f-dangel/curvlinops/pull/65)) + +### Fixed/Removed + +- Allow for partially specified boundaries of the spectrum inside the spectral + density estimation methods and only estimate the missing boundary + ([PR](https://github.com/f-dangel/curvlinops/pull/27)) + +- Deprecate python 3.7 + ([PR](https://github.com/f-dangel/curvlinops/pull/32)) + +- For future releases, we will abandon the `development` branch and switch to a + workflow where new features are directly merged into `main`. + +### Internal + +- Switch from `functorch` to `torch.func` in reference implementation of tests + ([PR](https://github.com/f-dangel/curvlinops/pull/36)) + ## [1.1.0] - 2023-02-19 Adds various new features: @@ -64,6 +155,7 @@ Adds various new features: Initial release -[Unreleased]: https://github.com/f-dangel/curvlinops/compare/1.1.0...HEAD +[Unreleased]: https://github.com/f-dangel/curvlinops/compare/1.2.0...HEAD +[1.2.0]: https://github.com/f-dangel/curvlinops/releases/tag/1.2.0 [1.1.0]: https://github.com/f-dangel/curvlinops/releases/tag/1.1.0 [1.0.0]: https://github.com/f-dangel/curvlinops/releases/tag/1.0.0 diff --git a/setup.cfg b/setup.cfg index 551f6de7..7fc895ae 100644 --- a/setup.cfg +++ b/setup.cfg @@ -8,9 +8,9 @@ [metadata] name = curvlinops-for-pytorch -author = Felix Dangel, Lukas Tatzel +author = Felix Dangel, Runa Eschenhagen, Lukas Tatzel URL = https://github.com/f-dangel/curvlinops -description = scipy Linear operator implementations of the GGN and Hessian in PyTorch +description = scipy Linear operators for curvature matrices in PyTorch long_description = file: README.md long_description_content_type = text/markdown; charset=UTF-8; variant=GFM license = MIT @@ -26,6 +26,7 @@ classifiers = Programming Language :: Python :: 3.9 Programming Language :: Python :: 3.10 Programming Language :: Python :: 3.11 + Programming Language :: Python :: 3.12 [options] zip_safe = False