diff --git a/doc/whats-new.rst b/doc/whats-new.rst index d22a883cfa9..08016f18fbd 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -60,15 +60,17 @@ Bug fixes `CFMaskCoder`/`CFScaleOffsetCoder` (:issue:`2304`, :issue:`5597`, :issue:`7691`, :pull:`8713`, see also discussion in :pull:`7654`). By `Kai Mühlbauer `_. -- do not cast `_FillValue`/`missing_value` in `CFMaskCoder` if `_Unsigned` is provided +- Do not cast `_FillValue`/`missing_value` in `CFMaskCoder` if `_Unsigned` is provided (:issue:`8844`, :pull:`8852`). - Adapt handling of copy keyword argument for numpy >= 2.0dev - (:issue:`8844`, :pull:`8851`, :pull:`8865``). + (:issue:`8844`, :pull:`8851`, :pull:`8865`). By `Kai Mühlbauer `_. -- import trapz/trapezoid depending on numpy version. +- Import trapz/trapezoid depending on numpy version (:issue:`8844`, :pull:`8865`). By `Kai Mühlbauer `_. - +- Warn and return bytes undecoded in case of UnicodeDecodeError in h5netcdf-backend + (:issue:`5563`, :pull:`8874`). + By `Kai Mühlbauer `_. Documentation diff --git a/xarray/backends/h5netcdf_.py b/xarray/backends/h5netcdf_.py index b7c1b2a5f03..81ba37f6707 100644 --- a/xarray/backends/h5netcdf_.py +++ b/xarray/backends/h5netcdf_.py @@ -28,6 +28,7 @@ from xarray.core import indexing from xarray.core.utils import ( FrozenDict, + emit_user_level_warning, is_remote_uri, read_magic_number_from_file, try_read_magic_number_from_file_or_path, @@ -58,13 +59,6 @@ def _getitem(self, key): return array[key] -def maybe_decode_bytes(txt): - if isinstance(txt, bytes): - return txt.decode("utf-8") - else: - return txt - - def _read_attributes(h5netcdf_var): # GH451 # to ensure conventions decoding works properly on Python 3, decode all @@ -72,7 +66,16 @@ def _read_attributes(h5netcdf_var): attrs = {} for k, v in h5netcdf_var.attrs.items(): if k not in ["_FillValue", "missing_value"]: - v = maybe_decode_bytes(v) + if isinstance(v, bytes): + try: + v = v.decode("utf-8") + except UnicodeDecodeError: + emit_user_level_warning( + f"'utf-8' codec can't decode bytes for attribute " + f"{k!r} of h5netcdf object {h5netcdf_var.name!r}, " + f"returning bytes undecoded.", + UnicodeWarning, + ) attrs[k] = v return attrs diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 3fb137977e8..1d69b3adc63 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -3560,6 +3560,16 @@ def test_dump_encodings_h5py(self) -> None: assert actual.x.encoding["compression"] == "lzf" assert actual.x.encoding["compression_opts"] is None + def test_decode_utf8_warning(self) -> None: + title = b"\xc3" + with create_tmp_file() as tmp_file: + with nc4.Dataset(tmp_file, "w") as f: + f.title = title + with pytest.warns(UnicodeWarning, match="returning bytes undecoded") as w: + ds = xr.load_dataset(tmp_file, engine="h5netcdf") + assert ds.title == title + assert "attribute 'title' of h5netcdf object '/'" in str(w[0].message) + @requires_h5netcdf @requires_netCDF4