Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make polars frames lazy and stream into csv #294

Open
wants to merge 8 commits into
base: master
Choose a base branch
from
Open
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
enh(constraints): Improve infer_schema_polars
coroa committed May 18, 2024
commit 4163c1bac6986a93b58cbe74e9e8ae62298f70a9
6 changes: 4 additions & 2 deletions linopy/common.py
Original file line number Diff line number Diff line change
@@ -260,7 +260,7 @@ def check_has_nulls(df: pd.DataFrame, name: str):
raise ValueError(f"{name} contains nan's in field(s) {fields}")


def infer_schema_polars(ds: pl.DataFrame) -> dict:
def infer_schema_polars(ds: Dataset, overwrites: dict[str, pl.DataType]) -> dict:
"""
Infer the schema for a Polars DataFrame based on the data types of its columns.

@@ -272,7 +272,9 @@ def infer_schema_polars(ds: pl.DataFrame) -> dict:
"""
schema = {}
for col_name, array in ds.items():
if np.issubdtype(array.dtype, np.integer):
if col_name in overwrites:
schema[col_name] = overwrites[col_name]
elif np.issubdtype(array.dtype, np.integer):
schema[col_name] = pl.Int32 if os.name == "nt" else pl.Int64
elif np.issubdtype(array.dtype, np.floating):
schema[col_name] = pl.Float64
3 changes: 1 addition & 2 deletions linopy/constraints.py
Original file line number Diff line number Diff line change
@@ -578,8 +578,7 @@ def to_polars(self):
check_has_nulls_polars(long, name=f"{self.type} {self.name}")

short = ds[[k for k in ds if "_term" not in ds[k].dims]]
schema = infer_schema_polars(short)
schema["sign"] = pl.Enum(["=", "<=", ">="])
schema = infer_schema_polars(short, overwrites={"sign": pl.Enum(["=", "<=", ">="])})
short = to_polars(short, schema=schema)
short = filter_nulls_polars(short)
check_has_nulls_polars(short, name=f"{self.type} {self.name}")