diff --git a/doc/whats-new.rst b/doc/whats-new.rst index b81be3c0192..6bde6504a7f 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -57,6 +57,9 @@ Bug fixes `CFMaskCoder`/`CFScaleOffsetCoder` (:issue:`2304`, :issue:`5597`, :issue:`7691`, :pull:`8713`, see also discussion in :pull:`7654`). By `Kai Mühlbauer `_. +- do not cast `_FillValue`/`missing_value` in `CFMaskCoder` if `_Unsigned` is provided + (:issue:`8844`, :pull:`8852`). + By `Kai Mühlbauer `_. Documentation ~~~~~~~~~~~~~ diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py index 3b11e7bfa02..52cf0fc3656 100644 --- a/xarray/coding/variables.py +++ b/xarray/coding/variables.py @@ -309,6 +309,9 @@ def encode(self, variable: Variable, name: T_Name = None): dtype = np.dtype(encoding.get("dtype", data.dtype)) fv = encoding.get("_FillValue") mv = encoding.get("missing_value") + # to properly handle _FillValue/missing_value below [a], [b] + # we need to check if unsigned data is written as signed data + unsigned = encoding.get("_Unsigned") is not None fv_exists = fv is not None mv_exists = mv is not None @@ -323,13 +326,19 @@ def encode(self, variable: Variable, name: T_Name = None): if fv_exists: # Ensure _FillValue is cast to same dtype as data's - encoding["_FillValue"] = dtype.type(fv) + # [a] need to skip this if _Unsigned is available + if not unsigned: + encoding["_FillValue"] = dtype.type(fv) fill_value = pop_to(encoding, attrs, "_FillValue", name=name) if mv_exists: # try to use _FillValue, if it exists to align both values # or use missing_value and ensure it's cast to same dtype as data's - encoding["missing_value"] = attrs.get("_FillValue", dtype.type(mv)) + # [b] need to provide mv verbatim if _Unsigned is available + encoding["missing_value"] = attrs.get( + "_FillValue", + (dtype.type(mv) if not unsigned else mv), + ) fill_value = pop_to(encoding, attrs, "missing_value", name=name) # apply fillna @@ -522,7 +531,6 @@ def encode(self, variable: Variable, name: T_Name = None) -> Variable: def decode(self, variable: Variable, name: T_Name = None) -> Variable: if "_Unsigned" in variable.attrs: dims, data, attrs, encoding = unpack_for_decoding(variable) - unsigned = pop_to(attrs, encoding, "_Unsigned") if data.dtype.kind == "i":