Skip to content

Commit

Permalink
Merge branch 'main' into ig/fix_equality_checl
Browse files Browse the repository at this point in the history
  • Loading branch information
ilan-gold authored May 17, 2024
2 parents 5262440 + 31111b3 commit 89acd11
Show file tree
Hide file tree
Showing 29 changed files with 886 additions and 324 deletions.
6 changes: 3 additions & 3 deletions .github/workflows/benchmarks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,11 @@ jobs:
environment-name: xarray-tests
cache-environment: true
cache-environment-key: "${{runner.os}}-${{runner.arch}}-py${{env.PYTHON_VERSION}}-${{env.TODAY}}-${{hashFiles(env.CONDA_ENV_FILE)}}-benchmark"
# add "build" because of https://github.com/airspeed-velocity/asv/issues/1385
create-args: >-
asv
build
mamba
- name: Run benchmarks
Expand All @@ -47,9 +50,6 @@ jobs:
asv machine --yes
echo "Baseline: ${{ github.event.pull_request.base.sha }} (${{ github.event.pull_request.base.label }})"
echo "Contender: ${GITHUB_SHA} (${{ github.event.pull_request.head.label }})"
# Use mamba for env creation
# export CONDA_EXE=$(which mamba)
export CONDA_EXE=$(which conda)
# Run benchmarks for current commit against base
ASV_OPTIONS="--split --show-stderr --factor $ASV_FACTOR"
asv continuous $ASV_OPTIONS ${{ github.event.pull_request.base.sha }} ${GITHUB_SHA} \
Expand Down
14 changes: 10 additions & 4 deletions .github/workflows/ci-additional.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,12 @@ on:
pull_request:
branches:
- "main"
paths:
- 'ci/**'
- '.github/**'
- '/*' # covers files such as `pyproject.toml`
- 'properties/**'
- 'xarray/**'
workflow_dispatch: # allows you to trigger manually

concurrency:
Expand Down Expand Up @@ -127,7 +133,7 @@ jobs:
python -m mypy --install-types --non-interactive --cobertura-xml-report mypy_report xarray/
- name: Upload mypy coverage to Codecov
uses: codecov/[email protected].0
uses: codecov/[email protected].1
with:
file: mypy_report/cobertura.xml
flags: mypy
Expand Down Expand Up @@ -181,7 +187,7 @@ jobs:
python -m mypy --install-types --non-interactive --cobertura-xml-report mypy_report xarray/
- name: Upload mypy coverage to Codecov
uses: codecov/[email protected].0
uses: codecov/[email protected].1
with:
file: mypy_report/cobertura.xml
flags: mypy39
Expand Down Expand Up @@ -242,7 +248,7 @@ jobs:
python -m pyright xarray/
- name: Upload pyright coverage to Codecov
uses: codecov/[email protected].0
uses: codecov/[email protected].1
with:
file: pyright_report/cobertura.xml
flags: pyright
Expand Down Expand Up @@ -301,7 +307,7 @@ jobs:
python -m pyright xarray/
- name: Upload pyright coverage to Codecov
uses: codecov/[email protected].0
uses: codecov/[email protected].1
with:
file: pyright_report/cobertura.xml
flags: pyright39
Expand Down
8 changes: 7 additions & 1 deletion .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,12 @@ on:
pull_request:
branches:
- "main"
paths:
- 'ci/**'
- '.github/**'
- '/*' # covers files such as `pyproject.toml`
- 'properties/**'
- 'xarray/**'
workflow_dispatch: # allows you to trigger manually

concurrency:
Expand Down Expand Up @@ -156,7 +162,7 @@ jobs:
path: pytest.xml

- name: Upload code coverage to Codecov
uses: codecov/[email protected].0
uses: codecov/[email protected].1
with:
file: ./coverage.xml
flags: unittests
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/upstream-dev-ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ jobs:
run: |
python -m mypy --install-types --non-interactive --cobertura-xml-report mypy_report
- name: Upload mypy coverage to Codecov
uses: codecov/[email protected].0
uses: codecov/[email protected].1
with:
file: mypy_report/cobertura.xml
flags: mypy
Expand Down
5 changes: 3 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,8 @@ nosetests.xml
dask-worker-space/

# asv environments
.asv
asv_bench/.asv
asv_bench/pkgs

# Translations
*.mo
Expand All @@ -68,7 +69,7 @@ dask-worker-space/

# xarray specific
doc/_build
generated/
doc/generated/
xarray/tests/data/*.grib.*.idx

# Sync tools
Expand Down
10 changes: 5 additions & 5 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ ci:
exclude: 'xarray/datatree_.*'
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.5.0
rev: v4.6.0
hooks:
- id: trailing-whitespace
- id: end-of-file-fixer
Expand All @@ -13,24 +13,24 @@ repos:
- id: mixed-line-ending
- repo: https://github.com/astral-sh/ruff-pre-commit
# Ruff version.
rev: 'v0.3.4'
rev: 'v0.4.3'
hooks:
- id: ruff
args: ["--fix", "--show-fixes"]
# https://github.com/python/black#version-control-integration
- repo: https://github.com/psf/black-pre-commit-mirror
rev: 24.3.0
rev: 24.4.2
hooks:
- id: black-jupyter
- repo: https://github.com/keewis/blackdoc
rev: v0.3.9
hooks:
- id: blackdoc
exclude: "generate_aggregations.py"
additional_dependencies: ["black==24.3.0"]
additional_dependencies: ["black==24.4.2"]
- id: blackdoc-autoupdate-black
- repo: https://github.com/pre-commit/mirrors-mypy
rev: v1.9.0
rev: v1.10.0
hooks:
- id: mypy
# Copied from setup.cfg
Expand Down
12 changes: 8 additions & 4 deletions asv_bench/asv.conf.json
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
// If missing or the empty string, the tool will be automatically
// determined by looking for tools on the PATH environment
// variable.
"environment_type": "conda",
"environment_type": "mamba",
"conda_channels": ["conda-forge"],

// timeout in seconds for installing any dependencies in environment
Expand All @@ -41,7 +41,7 @@

// The Pythons you'd like to test against. If not provided, defaults
// to the current version of Python used to run `asv`.
"pythons": ["3.10"],
"pythons": ["3.11"],

// The matrix of dependencies to test. Each key is the name of a
// package (in PyPI) and the values are version numbers. An empty
Expand Down Expand Up @@ -72,8 +72,12 @@
"sparse": [""],
"cftime": [""]
},


// fix for bad builds
// https://github.com/airspeed-velocity/asv/issues/1389#issuecomment-2076131185
"build_command": [
"python -m build",
"python -mpip wheel --no-deps --no-build-isolation --no-index -w {build_cache_dir} {build_dir}"
],
// Combinations of libraries/python versions can be excluded/included
// from the set to test. Each entry is a dictionary containing additional
// key-value pairs to include/exclude.
Expand Down
17 changes: 9 additions & 8 deletions asv_bench/benchmarks/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ def setup(self, *args, **kwargs):
self.ds2d_mean = self.ds2d.groupby("b").mean().compute()


# TODO: These don't work now because we are calling `.compute` explicitly.
class GroupByPandasDataFrame(GroupBy):
"""Run groupby tests using pandas DataFrame."""

Expand Down Expand Up @@ -111,11 +112,11 @@ def setup(self, *args, **kwargs):
{
"b": ("time", np.arange(365.0 * 24)),
},
coords={"time": pd.date_range("2001-01-01", freq="H", periods=365 * 24)},
coords={"time": pd.date_range("2001-01-01", freq="h", periods=365 * 24)},
)
self.ds2d = self.ds1d.expand_dims(z=10)
self.ds1d_mean = self.ds1d.resample(time="48H").mean()
self.ds2d_mean = self.ds2d.resample(time="48H").mean()
self.ds1d_mean = self.ds1d.resample(time="48h").mean()
self.ds2d_mean = self.ds2d.resample(time="48h").mean()

@parameterized(["ndim"], [(1, 2)])
def time_init(self, ndim):
Expand All @@ -127,15 +128,15 @@ def time_init(self, ndim):
def time_agg_small_num_groups(self, method, ndim, use_flox):
ds = getattr(self, f"ds{ndim}d")
with xr.set_options(use_flox=use_flox):
getattr(ds.resample(time="3M"), method)().compute()
getattr(ds.resample(time="3ME"), method)().compute()

@parameterized(
["method", "ndim", "use_flox"], [("sum", "mean"), (1, 2), (True, False)]
)
def time_agg_large_num_groups(self, method, ndim, use_flox):
ds = getattr(self, f"ds{ndim}d")
with xr.set_options(use_flox=use_flox):
getattr(ds.resample(time="48H"), method)().compute()
getattr(ds.resample(time="48h"), method)().compute()


class ResampleDask(Resample):
Expand All @@ -154,13 +155,13 @@ def setup(self, *args, **kwargs):
},
coords={
"time": xr.date_range(
"2001-01-01", freq="H", periods=365 * 24, calendar="noleap"
"2001-01-01", freq="h", periods=365 * 24, calendar="noleap"
)
},
)
self.ds2d = self.ds1d.expand_dims(z=10)
self.ds1d_mean = self.ds1d.resample(time="48H").mean()
self.ds2d_mean = self.ds2d.resample(time="48H").mean()
self.ds1d_mean = self.ds1d.resample(time="48h").mean()
self.ds2d_mean = self.ds2d.resample(time="48h").mean()


@parameterized(["use_cftime", "use_flox"], [[True, False], [True, False]])
Expand Down
11 changes: 11 additions & 0 deletions asv_bench/benchmarks/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,14 @@
nt = 500

basic_indexes = {
"1scalar": {"x": 0},
"1slice": {"x": slice(0, 3)},
"1slice-1scalar": {"x": 0, "y": slice(None, None, 3)},
"2slicess-1scalar": {"x": slice(3, -3, 3), "y": 1, "t": slice(None, -3, 3)},
}

basic_assignment_values = {
"1scalar": 0,
"1slice": xr.DataArray(randn((3, ny), frac_nan=0.1), dims=["x", "y"]),
"1slice-1scalar": xr.DataArray(randn(int(ny / 3) + 1, frac_nan=0.1), dims=["y"]),
"2slicess-1scalar": xr.DataArray(
Expand Down Expand Up @@ -74,6 +76,10 @@ def setup(self, key):
"x_coords": ("x", np.linspace(1.1, 2.1, nx)),
},
)
# Benchmark how indexing is slowed down by adding many scalar variable
# to the dataset
# https://github.com/pydata/xarray/pull/9003
self.ds_large = self.ds.merge({f"extra_var{i}": i for i in range(400)})


class Indexing(Base):
Expand All @@ -89,6 +95,11 @@ def time_indexing_outer(self, key):
def time_indexing_vectorized(self, key):
self.ds.isel(**vectorized_indexes[key]).load()

@parameterized(["key"], [list(basic_indexes.keys())])
def time_indexing_basic_ds_large(self, key):
# https://github.com/pydata/xarray/pull/9003
self.ds_large.isel(**basic_indexes[key]).load()


class Assignment(Base):
@parameterized(["key"], [list(basic_indexes.keys())])
Expand Down
2 changes: 1 addition & 1 deletion ci/min_deps_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ def process_pkg(
- publication date of version suggested by policy (YYYY-MM-DD)
- status ("<", "=", "> (!)")
"""
print("Analyzing %s..." % pkg)
print(f"Analyzing {pkg}...")
versions = query_conda(pkg)

try:
Expand Down
4 changes: 2 additions & 2 deletions doc/user-guide/io.rst
Original file line number Diff line number Diff line change
Expand Up @@ -874,7 +874,7 @@ and then calling ``to_zarr`` with ``compute=False`` to write only metadata
# The values of this dask array are entirely irrelevant; only the dtype,
# shape and chunks are used
dummies = dask.array.zeros(30, chunks=10)
ds = xr.Dataset({"foo": ("x", dummies)})
ds = xr.Dataset({"foo": ("x", dummies)}, coords={"x": np.arange(30)})
path = "path/to/directory.zarr"
# Now we write the metadata without computing any array values
ds.to_zarr(path, compute=False)
Expand All @@ -890,7 +890,7 @@ where the data should be written (in index space, not label space), e.g.,
# For convenience, we'll slice a single dataset, but in the real use-case
# we would create them separately possibly even from separate processes.
ds = xr.Dataset({"foo": ("x", np.arange(30))})
ds = xr.Dataset({"foo": ("x", np.arange(30))}, coords={"x": np.arange(30)})
# Any of the following region specifications are valid
ds.isel(x=slice(0, 10)).to_zarr(path, region="auto")
ds.isel(x=slice(10, 20)).to_zarr(path, region={"x": "auto"})
Expand Down
Loading

0 comments on commit 89acd11

Please sign in to comment.