pydata · jsignell · Feb 14, 2025 · Feb 24, 2025 · Feb 24, 2025 · Feb 25, 2025
diff --git a/doc/user-guide/combining.rst b/doc/user-guide/combining.rst
@@ -43,7 +43,6 @@ new dimension by stacking lower dimensional arrays together:
 
 .. ipython:: python
 
-    da.sel(x="a")
     xr.concat([da.isel(x=0), da.isel(x=1)], "x")
 
 If the second argument to ``concat`` is a new dimension name, the arrays will
@@ -52,15 +51,18 @@ dimension:
 
 .. ipython:: python
 
-    xr.concat([da.isel(x=0), da.isel(x=1)], "new_dim")
+    da0 = da.isel(x=0, drop=True)
+    da1 = da.isel(x=1, drop=True)
+
+    xr.concat([da0, da1], "new_dim")
 
 The second argument to ``concat`` can also be an :py:class:`~pandas.Index` or
 :py:class:`~xarray.DataArray` object as well as a string, in which case it is
 used to label the values along the new dimension:
 
 .. ipython:: python
 
-    xr.concat([da.isel(x=0), da.isel(x=1)], pd.Index([-90, -100], name="new_dim"))
+    xr.concat([da0, da1], pd.Index([-90, -100], name="new_dim"))
 
 Of course, ``concat`` also works on ``Dataset`` objects:
 
@@ -75,6 +77,12 @@ between datasets. With the default parameters, xarray will load some coordinate
 variables into memory to compare them between datasets. This may be prohibitively
 expensive if you are manipulating your dataset lazily using :ref:`dask`.
 
+.. note::
+
+   In a future version of xarray the default values for many of these options
+   will change. You can opt into the new default values early using
+   ``xr.set_options(use_new_combine_kwarg_defaults=True)``.
+
 .. _merge:
 
 Merge
@@ -94,10 +102,18 @@ If you merge another dataset (or a dictionary including data array objects), by
 default the resulting dataset will be aligned on the **union** of all index
 coordinates:
 
+.. note::
+
+   In a future version of xarray the default value for ``join`` and ``compat``
+   will change. This change will mean that xarray will no longer attempt
+   to align the indices of the merged dataset. You can opt into the new default
+   values early using ``xr.set_options(use_new_combine_kwarg_defaults=True)``.
+   Or explicitly set ``join='outer'`` to preserve old behavior.
+
 .. ipython:: python
 
     other = xr.Dataset({"bar": ("x", [1, 2, 3, 4]), "x": list("abcd")})
-    xr.merge([ds, other])
+    xr.merge([ds, other], join="outer")
 
 This ensures that ``merge`` is non-destructive. ``xarray.MergeError`` is raised
 if you attempt to merge two variables with the same name but different values:
@@ -114,6 +130,16 @@ if you attempt to merge two variables with the same name but different values:
     array([[ 1.4691123 ,  0.71713666, -0.5090585 ],
            [-0.13563237,  2.21211203,  0.82678535]])
 
+.. note::
+
+    In a future version of xarray the default value for ``compat`` will change
+    from ``compat='no_conflicts'`` to ``compat='override'``. In this scenario
+    the values in the first object override all the values in other objects.
+
+    .. ipython:: python
+
+        xr.merge([ds, ds + 1], compat="override")
+
 The same non-destructive merging between ``DataArray`` index coordinates is
 used in the :py:class:`~xarray.Dataset` constructor:
 
@@ -144,6 +170,11 @@ For datasets, ``ds0.combine_first(ds1)`` works similarly to
 there are conflicting values in variables to be merged, whereas
 ``.combine_first`` defaults to the calling object's values.
 
+.. note::
+
+   In a future version of xarray the default options for ``xr.merge`` will change
+   such that the behavior matches ``combine_first``.
+
 .. _update:
 
 Update
@@ -236,7 +267,7 @@ coordinates as long as any non-missing values agree or are disjoint:
 
     ds1 = xr.Dataset({"a": ("x", [10, 20, 30, np.nan])}, {"x": [1, 2, 3, 4]})
     ds2 = xr.Dataset({"a": ("x", [np.nan, 30, 40, 50])}, {"x": [2, 3, 4, 5]})
-    xr.merge([ds1, ds2], compat="no_conflicts")
+    xr.merge([ds1, ds2], join="outer", compat="no_conflicts")
 
 Note that due to the underlying representation of missing values as floating
 point numbers (``NaN``), variable data type is not always preserved when merging
@@ -295,13 +326,12 @@ they are concatenated in order based on the values in their dimension
 coordinates, not on their position in the list passed to ``combine_by_coords``.
 
 .. ipython:: python
-    :okwarning:
 
     x1 = xr.DataArray(name="foo", data=np.random.randn(3), coords=[("x", [0, 1, 2])])
     x2 = xr.DataArray(name="foo", data=np.random.randn(3), coords=[("x", [3, 4, 5])])
     xr.combine_by_coords([x2, x1])
 
-These functions can be used by :py:func:`~xarray.open_mfdataset` to open many
+These functions are used by :py:func:`~xarray.open_mfdataset` to open many
 files as one dataset. The particular function used is specified by setting the
 argument ``'combine'`` to ``'by_coords'`` or ``'nested'``. This is useful for
 situations where your data is split across many files in multiple locations,

diff --git a/doc/user-guide/terminology.rst b/doc/user-guide/terminology.rst
@@ -217,7 +217,7 @@ complete examples, please consult the relevant documentation.*
             )
 
             # combine the datasets
-            combined_ds = xr.combine_by_coords([ds1, ds2])
+            combined_ds = xr.combine_by_coords([ds1, ds2], join="outer")
             combined_ds
 
     lazy

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
@@ -36,6 +36,18 @@ Breaking changes
 Deprecations
 ~~~~~~~~~~~~
 
+- Start a deprecation cycle for changing the default keyword arguments to ``concat``, ``merge``, ``combine``, ``open_mfdataset``.
+  Emits a ``FutureWarning`` when using old defaults and new defaults would result in different behavior.
+  Adds an option: ``use_new_combine_kwarg_defaults`` to opt in to new defaults immediately.
+
+  New values are::
+
+    - ``data_vars``: "minimal"
+    - ``coords``: "minimal"
+    - ``compat``: "override"
+    - ``join``: "exact"
+
+  By `Julia Signell <https://github.com/jsignell>`_.
 
 Bug fixes
 ~~~~~~~~~
@@ -8164,13 +8176,17 @@ Backwards incompatible changes
   Now, the default always concatenates data variables:
 
   .. ipython:: python
-      :suppress:
-
-      ds = xray.Dataset({"x": 0})
+    :verbatim:
 
-  .. ipython:: python
+    In [1]: ds = xray.Dataset({"x": 0})
 
-      xray.concat([ds, ds], dim="y")
+    In [2]: xray.concat([ds, ds], dim="y")
+    Out[2]:
+    <xarray.Dataset> Size: 16B
+    Dimensions:  (y: 2)
+    Dimensions without coordinates: y
+    Data variables:
+        x        (y) int64 16B 0 0
 
   To obtain the old behavior, supply the argument ``concat_over=[]``.
 

diff --git a/xarray/backends/api.py b/xarray/backends/api.py
@@ -34,7 +34,7 @@
 )
 from xarray.backends.locks import _get_scheduler
 from xarray.coders import CFDatetimeCoder, CFTimedeltaCoder
-from xarray.core import indexing
+from xarray.core import dtypes, indexing
 from xarray.core.dataarray import DataArray
 from xarray.core.dataset import Dataset
 from xarray.core.datatree import DataTree
@@ -50,6 +50,13 @@
     _nested_combine,
     combine_by_coords,
 )
+from xarray.util.deprecation_helpers import (
+    _COMPAT_DEFAULT,
+    _COORDS_DEFAULT,
+    _DATA_VARS_DEFAULT,
+    _JOIN_DEFAULT,
+    CombineKwargDefault,
+)
 
 if TYPE_CHECKING:
     try:
@@ -1404,14 +1411,16 @@ def open_mfdataset(
         | Sequence[Index]
         | None
     ) = None,
-    compat: CompatOptions = "no_conflicts",
+    compat: CompatOptions | CombineKwargDefault = _COMPAT_DEFAULT,
     preprocess: Callable[[Dataset], Dataset] | None = None,
     engine: T_Engine = None,
-    data_vars: Literal["all", "minimal", "different"] | list[str] = "all",
-    coords="different",
+    data_vars: Literal["all", "minimal", "different"]
+    | list[str]
+    | CombineKwargDefault = _DATA_VARS_DEFAULT,
+    coords=_COORDS_DEFAULT,
     combine: Literal["by_coords", "nested"] = "by_coords",
     parallel: bool = False,
-    join: JoinOptions = "outer",
+    join: JoinOptions | CombineKwargDefault = _JOIN_DEFAULT,
     attrs_file: str | os.PathLike | None = None,
     combine_attrs: CombineAttrsOptions = "override",
     **kwargs,
@@ -1656,6 +1665,7 @@ def open_mfdataset(
                 ids=ids,
                 join=join,
                 combine_attrs=combine_attrs,
+                fill_value=dtypes.NA,
             )
         elif combine == "by_coords":
             # Redo ordering from coordinates, ignoring how they were ordered

diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
@@ -121,7 +121,13 @@
     merge_coordinates_without_align,
     merge_data_and_coords,
 )
-from xarray.util.deprecation_helpers import _deprecate_positional_args, deprecate_dims
+from xarray.util.deprecation_helpers import (
+    _COMPAT_DEFAULT,
+    _JOIN_DEFAULT,
+    CombineKwargDefault,
+    _deprecate_positional_args,
+    deprecate_dims,
+)
 
 if TYPE_CHECKING:
     from dask.dataframe import DataFrame as DaskDataFrame
@@ -5285,7 +5291,14 @@ def stack_dataarray(da):
 
         # concatenate the arrays
         stackable_vars = [stack_dataarray(da) for da in self.data_vars.values()]
-        data_array = concat(stackable_vars, dim=new_dim)
+        data_array = concat(
+            stackable_vars,
+            dim=new_dim,
+            data_vars="all",
+            coords="different",
+            compat="equals",
+            join="outer",
+        )
 
         if name is not None:
             data_array.name = name
@@ -5529,8 +5542,8 @@ def merge(
         self,
         other: CoercibleMapping | DataArray,
         overwrite_vars: Hashable | Iterable[Hashable] = frozenset(),
-        compat: CompatOptions = "no_conflicts",
-        join: JoinOptions = "outer",
+        compat: CompatOptions | CombineKwargDefault = _COMPAT_DEFAULT,
+        join: JoinOptions | CombineKwargDefault = _JOIN_DEFAULT,
         fill_value: Any = xrdtypes.NA,
         combine_attrs: CombineAttrsOptions = "override",
     ) -> Self:

diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py
@@ -1628,7 +1628,14 @@ def _combine(self, applied, shortcut=False):
         if shortcut:
             combined = self._concat_shortcut(applied, dim, positions)
         else:
-            combined = concat(applied, dim)
+            combined = concat(
+                applied,
+                dim,
+                data_vars="all",
+                coords="different",
+                compat="equals",
+                join="outer",
+            )
             combined = _maybe_reorder(combined, dim, positions, N=self.group1d.size)
 
         if isinstance(combined, type(self._obj)):
@@ -1789,7 +1796,14 @@ def _combine(self, applied):
         """Recombine the applied objects like the original."""
         applied_example, applied = peek_at(applied)
         dim, positions = self._infer_concat_args(applied_example)
-        combined = concat(applied, dim)
+        combined = concat(
+            applied,
+            dim,
+            data_vars="all",
+            coords="different",
+            compat="equals",
+            join="outer",
+        )
         combined = _maybe_reorder(combined, dim, positions, N=self.group1d.size)
         # assign coord when the applied function does not return that coord
         if dim not in applied_example.dims:

diff --git a/xarray/core/options.py b/xarray/core/options.py
@@ -30,6 +30,7 @@
         "keep_attrs",
         "warn_for_unclosed_files",
         "use_bottleneck",
+        "use_new_combine_kwarg_defaults",
         "use_numbagg",
         "use_opt_einsum",
         "use_flox",
@@ -59,6 +60,7 @@ class T_Options(TypedDict):
         warn_for_unclosed_files: bool
         use_bottleneck: bool
         use_flox: bool
+        use_new_combine_kwarg_defaults: bool
         use_numbagg: bool
         use_opt_einsum: bool
 
@@ -87,6 +89,7 @@ class T_Options(TypedDict):
     "warn_for_unclosed_files": False,
     "use_bottleneck": True,
     "use_flox": True,
+    "use_new_combine_kwarg_defaults": False,
     "use_numbagg": True,
     "use_opt_einsum": True,
 }
@@ -117,6 +120,7 @@ def _positive_integer(value: Any) -> bool:
     "file_cache_maxsize": _positive_integer,
     "keep_attrs": lambda choice: choice in [True, False, "default"],
     "use_bottleneck": lambda value: isinstance(value, bool),
+    "use_new_combine_kwarg_defaults": lambda value: isinstance(value, bool),
     "use_numbagg": lambda value: isinstance(value, bool),
     "use_opt_einsum": lambda value: isinstance(value, bool),
     "use_flox": lambda value: isinstance(value, bool),
@@ -256,6 +260,15 @@ class set_options:
     use_flox : bool, default: True
         Whether to use ``numpy_groupies`` and `flox`` to
         accelerate groupby and resampling reductions.
+    use_new_combine_kwarg_defaults : bool, default False
+        Whether to use new kwarg default values for combine functions:
+        :py:func:`~xarray.concat`, :py:func:`~xarray.merge`,
+        :py:func:`~xarray.open_mfdataset`. New values are:
+
+        * ``data_vars``: "minimal"
+        * ``coords``: "minimal"
+        * ``compat``: "override"
+        * ``join``: "exact"
     use_numbagg : bool, default: True
         Whether to use ``numbagg`` to accelerate reductions.
         Takes precedence over ``use_bottleneck`` when both are True.

diff --git a/xarray/core/parallel.py b/xarray/core/parallel.py
@@ -351,7 +351,9 @@ def _wrapper(
         result = func(*converted_args, **kwargs)
 
         merged_coordinates = merge(
-            [arg.coords for arg in args if isinstance(arg, Dataset | DataArray)]
+            [arg.coords for arg in args if isinstance(arg, Dataset | DataArray)],
+            join="exact",
+            compat="override",
         ).coords
 
         # check all dims are present
@@ -441,7 +443,11 @@ def _wrapper(
     # rechunk any numpy variables appropriately
     xarray_objs = tuple(arg.chunk(arg.chunksizes) for arg in xarray_objs)
 
-    merged_coordinates = merge([arg.coords for arg in aligned]).coords
+    merged_coordinates = merge(
+        [arg.coords for arg in aligned],
+        join="exact",
+        compat="override",
+    ).coords
 
     _, npargs = unzip(
         sorted(
@@ -474,7 +480,9 @@ def _wrapper(
         )
 
         coordinates = merge(
-            (preserved_coords, template.coords.to_dataset()[new_coord_vars])
+            (preserved_coords, template.coords.to_dataset()[new_coord_vars]),
+            join="outer",
+            compat="override",
         ).coords
         output_chunks: Mapping[Hashable, tuple[int, ...]] = {
             dim: input_chunks[dim] for dim in template.dims if dim in input_chunks

diff --git a/xarray/plot/dataarray_plot.py b/xarray/plot/dataarray_plot.py
@@ -196,7 +196,13 @@ def _prepare_plot1d_data(
                 dim = coords_to_plot.get(v, None)
                 if (dim is not None) and (dim in darray.dims):
                     darray_nan = np.nan * darray.isel({dim: -1})
-                    darray = concat([darray, darray_nan], dim=dim)
+                    darray = concat(
+                        [darray, darray_nan],
+                        dim=dim,
+                        coords="minimal",
+                        compat="override",
+                        join="exact",
+                    )
                     dims_T.append(coords_to_plot[v])
 
         # Lines should never connect to the same coordinate when stacked,