Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/release_notes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ Upcoming Version
* Improve handling of CPLEX solver quality attributes to ensure metrics such are extracted correctly when available.
* Fix Xpress IIS label mapping for masked constraints and add a regression test for matching infeasible coordinates.
* Enable quadratic problems with SCIP on windows.
* Default internal integer arrays (labels, variable indices, ``_term`` coordinates) to ``int32`` instead of ``int64``, reducing memory usage by ~25% and improving model build speed by 10-35%. The dtype is configurable via ``linopy.options["label_dtype"]`` (e.g. set to ``np.int64`` to restore the old behavior). An overflow guard raises ``ValueError`` if labels exceed the int32 maximum (~2.1 billion).


Version 0.6.5
Expand Down
11 changes: 4 additions & 7 deletions linopy/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
from __future__ import annotations

import operator
import os
from collections.abc import Callable, Generator, Hashable, Iterable, Sequence
from functools import partial, reduce, wraps
from pathlib import Path
Expand All @@ -18,7 +17,7 @@
import numpy as np
import pandas as pd
import polars as pl
from numpy import arange, signedinteger
from numpy import signedinteger
from xarray import DataArray, Dataset, apply_ufunc, broadcast
from xarray import align as xr_align
from xarray.core import dtypes, indexing
Expand Down Expand Up @@ -340,11 +339,9 @@ def infer_schema_polars(ds: Dataset) -> dict[Hashable, pl.DataType]:
dict: A dictionary mapping column names to their corresponding Polars data types.
"""
schema = {}
np_major_version = int(np.__version__.split(".")[0])
use_int32 = os.name == "nt" and np_major_version < 2
for name, array in ds.items():
if np.issubdtype(array.dtype, np.integer):
schema[name] = pl.Int32 if use_int32 else pl.Int64
schema[name] = pl.Int32 if array.dtype.itemsize <= 4 else pl.Int64
elif np.issubdtype(array.dtype, np.floating):
schema[name] = pl.Float64 # type: ignore
elif np.issubdtype(array.dtype, np.bool_):
Expand Down Expand Up @@ -488,7 +485,7 @@ def save_join(*dataarrays: DataArray, integer_dtype: bool = False) -> Dataset:
)
arrs = xr_align(*dataarrays, join="outer")
if integer_dtype:
arrs = tuple([ds.fillna(-1).astype(int) for ds in arrs])
arrs = tuple([ds.fillna(-1).astype(options["label_dtype"]) for ds in arrs])
return Dataset({ds.name: ds for ds in arrs})


Expand Down Expand Up @@ -549,7 +546,7 @@ def fill_missing_coords(
# Fill in missing integer coordinates
for dim in ds.dims:
if dim not in ds.coords and dim not in skip_dims:
ds.coords[dim] = arange(ds.sizes[dim])
ds.coords[dim] = np.arange(ds.sizes[dim])

return ds

Expand Down
22 changes: 15 additions & 7 deletions linopy/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,28 +9,36 @@

from typing import Any

import numpy as np

_VALID_LABEL_DTYPES = {np.int32, np.int64}


class OptionSettings:
def __init__(self, **kwargs: int) -> None:
def __init__(self, **kwargs: Any) -> None:
self._defaults = kwargs
self._current_values = kwargs.copy()

def __call__(self, **kwargs: int) -> None:
def __call__(self, **kwargs: Any) -> None:
self.set_value(**kwargs)

def __getitem__(self, key: str) -> int:
def __getitem__(self, key: str) -> Any:
return self.get_value(key)

def __setitem__(self, key: str, value: int) -> None:
def __setitem__(self, key: str, value: Any) -> None:
return self.set_value(**{key: value})

def set_value(self, **kwargs: int) -> None:
def set_value(self, **kwargs: Any) -> None:
for k, v in kwargs.items():
if k not in self._defaults:
raise KeyError(f"{k} is not a valid setting.")
if k == "label_dtype" and v not in _VALID_LABEL_DTYPES:
raise ValueError(
f"label_dtype must be one of {_VALID_LABEL_DTYPES}, got {v}"
)
self._current_values[k] = v

def get_value(self, name: str) -> int:
def get_value(self, name: str) -> Any:
if name in self._defaults:
return self._current_values[name]
else:
Expand All @@ -57,4 +65,4 @@ def __repr__(self) -> str:
return f"OptionSettings:\n {settings}"


options = OptionSettings(display_max_rows=14, display_max_terms=6)
options = OptionSettings(display_max_rows=14, display_max_terms=6, label_dtype=np.int32)
5 changes: 4 additions & 1 deletion linopy/constraints.py
Original file line number Diff line number Diff line change
Expand Up @@ -1087,7 +1087,10 @@ def flat(self) -> pd.DataFrame:
return pd.DataFrame(columns=["coeffs", "vars", "labels", "key"])
df = pd.concat(dfs, ignore_index=True)
unique_labels = df.labels.unique()
map_labels = pd.Series(np.arange(len(unique_labels)), index=unique_labels)
map_labels = pd.Series(
np.arange(len(unique_labels), dtype=options["label_dtype"]),
index=unique_labels,
)
df["key"] = df.labels.map(map_labels)
return df

Expand Down
10 changes: 6 additions & 4 deletions linopy/expressions.py
Original file line number Diff line number Diff line change
Expand Up @@ -372,7 +372,9 @@ def __init__(self, data: Dataset | Any | None, model: Model) -> None:
)

if np.issubdtype(data.vars, np.floating):
data = assign_multiindex_safe(data, vars=data.vars.fillna(-1).astype(int))
data = assign_multiindex_safe(
data, vars=data.vars.fillna(-1).astype(options["label_dtype"])
)
if not np.issubdtype(data.coeffs, np.floating):
data["coeffs"].values = data.coeffs.values.astype(float)

Expand Down Expand Up @@ -1436,7 +1438,7 @@ def sanitize(self: GenericExpression) -> GenericExpression:
linopy.LinearExpression
"""
if not np.issubdtype(self.vars.dtype, np.integer):
return self.assign(vars=self.vars.fillna(-1).astype(int))
return self.assign(vars=self.vars.fillna(-1).astype(options["label_dtype"]))

return self

Expand Down Expand Up @@ -1840,12 +1842,12 @@ def _simplify_row(vars_row: np.ndarray, coeffs_row: np.ndarray) -> np.ndarray:
# Combined has dimensions (.., CV_DIM, TERM_DIM)

# Drop terms where all vars are -1 (i.e., empty terms across all coordinates)
vars = combined.isel({CV_DIM: 0}).astype(int)
vars = combined.isel({CV_DIM: 0}).astype(options["label_dtype"])
non_empty_terms = (vars != -1).any(dim=[d for d in vars.dims if d != TERM_DIM])
combined = combined.isel({TERM_DIM: non_empty_terms})

# Extract vars and coeffs from the combined result
vars = combined.isel({CV_DIM: 0}).astype(int)
vars = combined.isel({CV_DIM: 0}).astype(options["label_dtype"])
coeffs = combined.isel({CV_DIM: 1})

# Create new dataset with simplified data
Expand Down
3 changes: 2 additions & 1 deletion linopy/matrices.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from scipy.sparse._csc import csc_matrix

from linopy import expressions
from linopy.config import options

if TYPE_CHECKING:
from linopy.model import Model
Expand Down Expand Up @@ -134,7 +135,7 @@ def clabels(self) -> ndarray:
"""Vector of labels of all non-missing constraints."""
df: pd.DataFrame = self.flat_cons
if df.empty:
return np.array([], dtype=int)
return np.array([], dtype=options["label_dtype"])
return create_vector(df.key, df.labels, fill_value=-1)

@property
Expand Down
21 changes: 19 additions & 2 deletions linopy/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
set_int_index,
to_path,
)
from linopy.config import options
from linopy.constants import (
GREATER_EQUAL,
HELPER_DIMS,
Expand Down Expand Up @@ -633,7 +634,15 @@ def add_variables(

start = self._xCounter
end = start + data.labels.size
data.labels.values = np.arange(start, end).reshape(data.labels.shape)
label_dtype = options["label_dtype"]
if end > np.iinfo(label_dtype).max:
raise ValueError(
f"Number of labels ({end}) exceeds the maximum value for "
f"{label_dtype.__name__} ({np.iinfo(label_dtype).max})."
)
data.labels.values = np.arange(
start, end, dtype=options["label_dtype"]
).reshape(data.labels.shape)
self._xCounter += data.labels.size

if mask is not None:
Expand Down Expand Up @@ -872,7 +881,15 @@ def add_constraints(

start = self._cCounter
end = start + data.labels.size
data.labels.values = np.arange(start, end).reshape(data.labels.shape)
label_dtype = options["label_dtype"]
if end > np.iinfo(label_dtype).max:
raise ValueError(
f"Number of labels ({end}) exceeds the maximum value for "
f"{label_dtype.__name__} ({np.iinfo(label_dtype).max})."
)
data.labels.values = np.arange(
start, end, dtype=options["label_dtype"]
).reshape(data.labels.shape)
self._cCounter += data.labels.size

if mask is not None:
Expand Down
22 changes: 17 additions & 5 deletions linopy/variables.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,12 @@
to_polars,
)
from linopy.config import options
from linopy.constants import HELPER_DIMS, SOS_DIM_ATTR, SOS_TYPE_ATTR, TERM_DIM
from linopy.constants import (
HELPER_DIMS,
SOS_DIM_ATTR,
SOS_TYPE_ATTR,
TERM_DIM,
)
from linopy.solver_capabilities import SolverFeature, solver_supports
from linopy.types import (
ConstantLike,
Expand Down Expand Up @@ -1191,7 +1196,9 @@ def ffill(self, dim: str, limit: None = None) -> Variable:
.map(DataArray.ffill, dim=dim, limit=limit)
.fillna(self._fill_value)
)
return self.assign_multiindex_safe(labels=data.labels.astype(int))
return self.assign_multiindex_safe(
labels=data.labels.astype(options["label_dtype"])
)

def bfill(self, dim: str, limit: None = None) -> Variable:
"""
Expand All @@ -1218,7 +1225,7 @@ def bfill(self, dim: str, limit: None = None) -> Variable:
.map(DataArray.bfill, dim=dim, limit=limit)
.fillna(self._fill_value)
)
return self.assign(labels=data.labels.astype(int))
return self.assign(labels=data.labels.astype(options["label_dtype"]))

def sanitize(self) -> Variable:
"""
Expand All @@ -1229,7 +1236,9 @@ def sanitize(self) -> Variable:
linopy.Variable
"""
if issubdtype(self.labels.dtype, floating):
return self.assign(labels=self.labels.fillna(-1).astype(int))
return self.assign(
labels=self.labels.fillna(-1).astype(options["label_dtype"])
)
return self

def equals(self, other: Variable) -> bool:
Expand Down Expand Up @@ -1681,7 +1690,10 @@ def flat(self) -> pd.DataFrame:
"""
df = pd.concat([self[k].flat for k in self], ignore_index=True)
unique_labels = df.labels.unique()
map_labels = pd.Series(np.arange(len(unique_labels)), index=unique_labels)
map_labels = pd.Series(
np.arange(len(unique_labels), dtype=options["label_dtype"]),
index=unique_labels,
)
df["key"] = df.labels.map(map_labels)
return df

Expand Down
12 changes: 8 additions & 4 deletions test/test_constraints.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,11 @@ def test_constraint_assignment() -> None:
assert "con0" in getattr(m.constraints, attr)

assert m.constraints.labels.con0.shape == (10, 10)
assert m.constraints.labels.con0.dtype == int
assert np.issubdtype(m.constraints.labels.con0.dtype, np.integer)
assert m.constraints.coeffs.con0.dtype in (int, float)
assert m.constraints.vars.con0.dtype in (int, float)
assert np.issubdtype(m.constraints.vars.con0.dtype, np.integer) or np.issubdtype(
m.constraints.vars.con0.dtype, np.floating
)
assert m.constraints.rhs.con0.dtype in (int, float)

assert_conequal(m.constraints.con0, con0)
Expand Down Expand Up @@ -90,9 +92,11 @@ def test_anonymous_constraint_assignment() -> None:
assert "con0" in getattr(m.constraints, attr)

assert m.constraints.labels.con0.shape == (10, 10)
assert m.constraints.labels.con0.dtype == int
assert np.issubdtype(m.constraints.labels.con0.dtype, np.integer)
assert m.constraints.coeffs.con0.dtype in (int, float)
assert m.constraints.vars.con0.dtype in (int, float)
assert np.issubdtype(m.constraints.vars.con0.dtype, np.integer) or np.issubdtype(
m.constraints.vars.con0.dtype, np.floating
)
assert m.constraints.rhs.con0.dtype in (int, float)


Expand Down
75 changes: 75 additions & 0 deletions test/test_dtypes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
"""Tests for int32 default label dtype."""

import numpy as np
import pytest

from linopy import Model
from linopy.config import options


def test_default_label_dtype_is_int32() -> None:
assert options["label_dtype"] == np.int32


def test_variable_labels_are_int32() -> None:
m = Model()
x = m.add_variables(lower=0, upper=10, coords=[range(5)], name="x")
assert x.labels.dtype == np.int32


def test_constraint_labels_are_int32() -> None:
m = Model()
x = m.add_variables(lower=0, upper=10, coords=[range(5)], name="x")
m.add_constraints(x >= 1, name="c")
assert m.constraints["c"].labels.dtype == np.int32


def test_expression_vars_are_int32() -> None:
m = Model()
x = m.add_variables(lower=0, upper=10, coords=[range(5)], name="x")
expr = 2 * x + 1
assert expr.vars.dtype == np.int32


@pytest.mark.skipif(
not pytest.importorskip("highspy", reason="highspy not installed"),
reason="highspy not installed",
)
def test_solve_with_int32_labels() -> None:
m = Model()
x = m.add_variables(lower=0, upper=10, name="x")
y = m.add_variables(lower=0, upper=10, name="y")
m.add_constraints(x + y <= 15, name="c1")
m.add_objective(x + 2 * y, sense="max")
m.solve("highs")
assert m.objective.value == pytest.approx(25.0)


def test_overflow_guard_variables() -> None:
m = Model()
m._xCounter = np.iinfo(np.int32).max - 1
with pytest.raises(ValueError, match="exceeds the maximum"):
m.add_variables(lower=0, upper=1, coords=[range(5)], name="x")


def test_overflow_guard_constraints() -> None:
m = Model()
x = m.add_variables(lower=0, upper=1, coords=[range(5)], name="x")
m._cCounter = np.iinfo(np.int32).max - 1
with pytest.raises(ValueError, match="exceeds the maximum"):
m.add_constraints(x >= 0, name="c")


def test_label_dtype_option_int64() -> None:
with options:
options["label_dtype"] = np.int64
m = Model()
x = m.add_variables(lower=0, upper=10, coords=[range(5)], name="x")
assert x.labels.dtype == np.int64
expr = 2 * x + 1
assert expr.vars.dtype == np.int64


def test_label_dtype_rejects_invalid() -> None:
with pytest.raises(ValueError, match="label_dtype must be one of"):
options["label_dtype"] = np.float64
Loading