Skip to content

Commit b2b6248

Browse files
authored
fix: Filter sorted flag from physical in CategoricalChunked (#23827)
1 parent 58accb3 commit b2b6248

File tree

2 files changed

+23
-1
lines changed

2 files changed

+23
-1
lines changed

crates/polars-core/src/chunked_array/logical/categorical.rs

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,13 @@ impl<T: PolarsCategoricalType> CategoricalChunked<T> {
3535
}
3636

3737
pub(crate) fn get_flags(&self) -> StatisticsFlags {
38-
self.phys.get_flags()
38+
// If we use lexical ordering then physical sortedness does not imply
39+
// our sortedness.
40+
let mut flags = self.phys.get_flags();
41+
if self.uses_lexical_ordering() {
42+
flags.set_sorted(IsSorted::Not);
43+
}
44+
flags
3945
}
4046

4147
/// Set flags for the ChunkedArray.

py-polars/tests/unit/operations/namespaces/test_categorical.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
from __future__ import annotations
22

3+
from io import BytesIO
4+
35
import pytest
46

57
import polars as pl
@@ -236,3 +238,17 @@ def test_cat_slice() -> None:
236238
"",
237239
None,
238240
]
241+
242+
243+
def test_cat_order_flag_csv_read_23823() -> None:
244+
data = BytesIO(b"colx,coly\nabc,123\n#not_a_row\nxyz,456")
245+
lf = pl.scan_csv(
246+
source=data,
247+
comment_prefix="#",
248+
schema_overrides={"colx": pl.Categorical},
249+
)
250+
expected = pl.DataFrame(
251+
{"colx": ["abc", "xyz"], "coly": [123, 456]},
252+
schema_overrides={"colx": pl.Categorical},
253+
)
254+
assert_frame_equal(expected, lf.sort("colx", descending=False).collect())

0 commit comments

Comments
 (0)