Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bump minimal pytorch version to 2.6 #373

Merged
merged 7 commits into from
Mar 3, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .github/workflows/linux-cuda-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,14 @@ jobs:
uses: ./.github/workflows/python-quality.yml
test-ubuntu-cuda:
needs: [check-commits, python-quality]
runs-on:
runs-on:
group: aws-g5-4xlarge-plus
strategy:
fail-fast: false
matrix:
cuda-version: ["11.8", "12.1", "12.4"]
cuda-version: ["11.8", "12.4", "12.6"]
container:
image: pytorch/pytorch:2.4.0-cuda${{ matrix.cuda-version }}-cudnn9-devel
image: pytorch/pytorch:2.6.0-cuda${{ matrix.cuda-version }}-cudnn9-devel
options: --gpus 0

steps:
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/linux-examples.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,14 @@ jobs:
uses: ./.github/workflows/python-quality.yml
run-examples:
needs: [check-commits, python-quality]
runs-on:
runs-on:
group: aws-g5-4xlarge-plus
strategy:
fail-fast: false
matrix:
device: ["cpu", "cuda"]
container:
image: pytorch/pytorch:2.4.0-cuda12.1-cudnn9-devel
image: pytorch/pytorch:2.6.0-cuda12.4-cudnn9-devel
options: --gpus 0

steps:
Expand Down
2 changes: 1 addition & 1 deletion optimum/quanto/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

__version__ = "0.2.6dev"
__version__ = "0.2.7dev"

from .calibrate import *
from .library import *
Expand Down
2 changes: 2 additions & 0 deletions optimum/quanto/library/extensions/mps/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,5 @@ To add a new implementation for an operation defined in `library./ops.py`:
- add the corresponding `.mm` file to the list of sources in `__init__.py`,
- add a binding to `pybind_module.cpp`,
- provide an implementation calling the binding in `__init__.py`.

Note: torch JIT extensions for MPS requires the xcode command-line tools.
13 changes: 7 additions & 6 deletions optimum/quanto/tensor/weights/tinygemm/packed.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
from copy import copy

import torch
from packaging import version
from torch.utils import _pytree as pytree


Expand All @@ -27,7 +26,6 @@ class TinyGemmPackedTensor(torch.Tensor):
@staticmethod
def __new__(cls, data, size, stride, requires_grad=False):
# TinyGemmPackedTensor represents uint8 data and can therefore NEVER require gradient
assert data.dtype == torch.int32
assert requires_grad is False
return torch.Tensor._make_wrapper_subclass(
cls, size, strides=stride, dtype=torch.uint8, device=data.device, requires_grad=requires_grad
Expand All @@ -54,11 +52,11 @@ def pack(cls, t):
"""
inner_ktiles = 2
t = t.to(torch.int32).contiguous()
if version.parse(torch.__version__).release >= version.parse("2.5.0").release:
if t.device.type == "cpu":
data = torch._convert_weight_to_int4pack_for_cpu(t, innerKTiles=inner_ktiles)
else:
t_uint8 = (t[::, ::2] << 4 | t[::, 1::2]).to(torch.uint8)
data = torch._convert_weight_to_int4pack(t_uint8, innerKTiles=inner_ktiles)
else:
data = torch._convert_weight_to_int4pack(t, innerKTiles=inner_ktiles)
# We need to store size and stride to make sure the unpacked data has the correct shape
return TinyGemmPackedTensor(data, t.size(), t.stride())

Expand Down Expand Up @@ -86,7 +84,10 @@ def unpack(self):
id_scale_and_shift[:, :, 1] = 8

identity = torch.eye(in_features, dtype=torch.bfloat16, device=self.device)
unpacked_data = torch._weight_int4pack_mm(identity, self._data, group_size, id_scale_and_shift)
if self._data.device.type == "cpu":
unpacked_data = torch._weight_int4pack_mm_for_cpu(identity, self._data, group_size, id_scale_and_shift)
else:
unpacked_data = torch._weight_int4pack_mm(identity, self._data, group_size, id_scale_and_shift)

return unpacked_data.t().to(torch.uint8)

Expand Down
11 changes: 8 additions & 3 deletions optimum/quanto/tensor/weights/tinygemm/qbits.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,14 @@ def forward(ctx, input, other, bias):
in_features = input.shape[-1]
out_features = other.shape[0]
output_shape = input.shape[:-1] + (out_features,)
output = torch._weight_int4pack_mm(
input.reshape(-1, in_features), other._data._data, other._group_size, other._scale_shift
)
if input.device.type == "cpu":
output = torch._weight_int4pack_mm_for_cpu(
input.reshape(-1, in_features), other._data._data, other._group_size, other._scale_shift
)
else:
output = torch._weight_int4pack_mm(
input.reshape(-1, in_features), other._data._data, other._group_size, other._scale_shift
)
output = output.reshape(output_shape)
if bias is not None:
output = output + bias
Expand Down
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ authors = [{ name = 'David Corvoysier' }]
maintainers = [
{name = "HuggingFace Inc. Special Ops Team", email="[email protected]"},
]
dependencies = ['torch>=2.4.0', 'ninja', 'numpy', 'safetensors', 'huggingface_hub']
dependencies = ['torch>=2.6.0', 'ninja', 'numpy', 'safetensors', 'huggingface_hub']
license = { text = 'Apache-2.0' }
readme = 'README.md'
dynamic = ['version']
Expand Down Expand Up @@ -66,4 +66,4 @@ lint.select = ['C', 'E', 'F', 'I', 'W']
# isort configuration (to sort imports)
[tool.ruff.lint.isort]
lines-after-imports = 2
known-first-party = ['optimum.quanto']
known-first-party = ['optimum.quanto']
3 changes: 1 addition & 2 deletions test/tensor/activations/test_activations_compile.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def compile_for_device(f, device):
return torch.compile(f, backend=backend)


@torch_min_version("2.6.0")
@torch_min_version("2.7.0")
@pytest.mark.parametrize("input_shape", [(2, 10), (10, 32, 32)])
@pytest.mark.parametrize("qtype", [qint8], ids=["qint8"])
@pytest.mark.parametrize("dtype", [torch.float32, torch.float16, torch.bfloat16], ids=["fp32", "fp16", "bf16"])
Expand All @@ -48,7 +48,6 @@ def f(x, qtype):
assert qa.axis is None


@torch_min_version("2.3.0")
def test_compile_qtensor_to(device):
input_shape = (10, 32, 32)
a = random_tensor(input_shape).to(device)
Expand Down
2 changes: 1 addition & 1 deletion test/tensor/test_packed_tensor.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def test_packed_tensor_serialization(bits, device):
b = io.BytesIO()
torch.save(packed, b)
b.seek(0)
packed_reloaded = torch.load(b)
packed_reloaded = torch.load(b, weights_only=False)
assert isinstance(packed_reloaded, PackedTensor)
assert packed_reloaded.shape == packed.shape
assert packed_reloaded.dtype == packed.dtype
Expand Down
2 changes: 1 addition & 1 deletion test/tensor/weights/test_weight_qbits_tensor.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def test_weight_qbits_tensor_serialization(qtype, axis):
b = io.BytesIO()
torch.save(qa, b)
b.seek(0)
qa_reloaded = torch.load(b)
qa_reloaded = torch.load(b, weights_only=False)
assert isinstance(qa_reloaded, WeightQBitsTensor)
assert qa_reloaded.qtype == qa.qtype
assert qa_reloaded.dtype == qa.dtype
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def test_weights_qbytes_tensor_serialization(input_shape, qtype, dtype, axis):
b = io.BytesIO()
torch.save(qinputs, b)
b.seek(0)
qinputs_reloaded = torch.load(b)
qinputs_reloaded = torch.load(b, weights_only=False)
assert qinputs_reloaded.qtype == qtype
assert torch.equal(qinputs_reloaded._scale, qinputs._scale)
if qtype.is_floating_point:
Expand Down