Skip to content

Commit 15bcb4c

Browse files
author
Giacomo Caria
committed
Merge remote-tracking branch 'upstream/main' into add_boundaries_keyword_resample
2 parents 33a4d41 + e49cfc4 commit 15bcb4c

File tree

12 files changed

+80
-31
lines changed

12 files changed

+80
-31
lines changed

.pre-commit-config.yaml

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -24,24 +24,23 @@ repos:
2424
- id: rst-inline-touching-normal
2525
- id: text-unicode-replacement-char
2626
- repo: https://github.com/astral-sh/ruff-pre-commit
27-
rev: v0.13.3
27+
rev: v0.14.1
2828
hooks:
2929
- id: ruff-check
3030
args: ["--fix", "--show-fixes"]
3131
- id: ruff-format
32-
# Disabled: blackdoc v0.4.3 has compatibility issues with Python 3.13
33-
# Re-enable when blackdoc is updated to support Python 3.13
34-
# - repo: https://github.com/keewis/blackdoc
35-
# rev: v0.4.3
36-
# hooks:
37-
# - id: blackdoc
38-
# exclude: "generate_aggregations.py"
39-
# additional_dependencies: ["black==24.8.0"]
32+
- repo: https://github.com/keewis/blackdoc
33+
rev: v0.4.5
34+
hooks:
35+
- id: blackdoc
36+
exclude: "generate_aggregations.py"
37+
# make sure this is the most recent version of black
38+
additional_dependencies: ["black==25.9.0"]
4039
- repo: https://github.com/rbubley/mirrors-prettier
4140
rev: v3.6.2
4241
hooks:
4342
- id: prettier
44-
args: [--cache-location=.prettier_cache/cache]
43+
args: ["--cache-location=.prettier_cache/cache"]
4544
- repo: https://github.com/pre-commit/mirrors-mypy
4645
rev: v1.18.2
4746
hooks:
@@ -61,20 +60,22 @@ repos:
6160
numpy,
6261
]
6362
- repo: https://github.com/citation-file-format/cff-converter-python
64-
rev: ebf0b5e44d67f8beaa1cd13a0d0393ea04c6058d
63+
rev: 5295f87c0e261da61a7b919fc754e3a77edd98a7
6564
hooks:
6665
- id: validate-cff
6766
- repo: https://github.com/ComPWA/taplo-pre-commit
6867
rev: v0.9.3
6968
hooks:
7069
- id: taplo-format
7170
args: ["--option", "array_auto_collapse=false"]
71+
- id: taplo-lint
72+
args: ["--no-schema"]
7273
- repo: https://github.com/abravalheri/validate-pyproject
7374
rev: v0.24.1
7475
hooks:
7576
- id: validate-pyproject
7677
additional_dependencies: ["validate-pyproject-schema-store[all]"]
7778
- repo: https://github.com/adhtruong/mirrors-typos
78-
rev: v1.37.2
79+
rev: v1.38.1
7980
hooks:
8081
- id: typos

asv_bench/asv.conf.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
// If missing or the empty string, the tool will be automatically
3030
// determined by looking for tools on the PATH environment
3131
// variable.
32-
"environment_type": "mamba",
32+
"environment_type": "rattler",
3333
"conda_channels": ["conda-forge"],
3434

3535
// timeout in seconds for installing any dependencies in environment
@@ -76,7 +76,7 @@
7676
// https://github.com/airspeed-velocity/asv/issues/1389#issuecomment-2076131185
7777
"build_command": [
7878
"python -m build",
79-
"python -mpip wheel --no-deps --no-build-isolation --no-index -w {build_cache_dir} {build_dir}"
79+
"python -m pip wheel --no-deps --no-build-isolation --no-index -w {build_cache_dir} {build_dir}"
8080
],
8181
// Combinations of libraries/python versions can be excluded/included
8282
// from the set to test. Each entry is a dictionary containing additional

asv_bench/benchmarks/combine.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,22 @@
55
from . import requires_dask
66

77

8+
class Concat1d:
9+
"""Benchmark concatenating large datasets"""
10+
11+
def setup(self) -> None:
12+
self.data_arrays = [
13+
xr.DataArray(data=np.zeros(4 * 1024 * 1024, dtype=np.int8), dims=["x"])
14+
for _ in range(10)
15+
]
16+
17+
def time_concat(self) -> None:
18+
xr.concat(self.data_arrays, dim="x")
19+
20+
def peakmem_concat(self) -> None:
21+
xr.concat(self.data_arrays, dim="x")
22+
23+
824
class Combine1d:
925
"""Benchmark concatenating and merging large datasets"""
1026

ci/requirements/environment-benchmark.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ dependencies:
1212
- numba
1313
- numbagg
1414
- numexpr
15+
- py-rattler
1516
- numpy>=2.2,<2.3 # https://github.com/numba/numba/issues/10105
1617
- opt_einsum
1718
- packaging

doc/examples/apply_ufunc_vectorize_1d.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
"\n",
2929
"### Load data\n",
3030
"\n",
31-
"First lets load an example dataset"
31+
"First let's load an example dataset"
3232
]
3333
},
3434
{

doc/user-guide/hierarchical-data.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -426,7 +426,7 @@ We can use :py:meth:`xarray.DataTree.match` for this:
426426
We can also subset trees by the contents of the nodes.
427427
:py:meth:`xarray.DataTree.filter` retains only the nodes of a tree that meet a certain condition.
428428
For example, we could recreate the Simpson's family tree with the ages of each individual, then filter for only the adults:
429-
First lets recreate the tree but with an ``age`` data variable in every node:
429+
First let's recreate the tree but with an ``age`` data variable in every node:
430430

431431
.. jupyter-execute::
432432

doc/user-guide/io.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,7 @@ The backends are tried in order: **netcdf4 → h5netcdf → scipy → pydap →
128128
.. code-block:: python
129129
130130
# Prefer h5netcdf over netcdf4
131-
xr.set_options(netcdf_engine_order=['h5netcdf', 'netcdf4', 'scipy'])
131+
xr.set_options(netcdf_engine_order=["h5netcdf", "netcdf4", "scipy"])
132132
133133
See :ref:`options` for more details on configuration options.
134134

doc/whats-new.rst

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,12 +32,20 @@ Bug Fixes
3232
~~~~~~~~~
3333
- Fix h5netcdf backend for format=None, use same rule as netcdf4 backend (:pull:`10859`).
3434
By `Kai Mühlbauer <https://github.com/kmuehlbauer>`_
35-
3635
- ``netcdf4`` and ``pydap`` backends now use stricter URL detection to avoid incorrectly claiming
3736
remote URLs. The ``pydap`` backend now only claims URLs with explicit DAP protocol indicators
3837
(``dap2://`` or ``dap4://`` schemes, or ``/dap2/`` or ``/dap4/`` in the URL path). This prevents
3938
both backends from claiming remote Zarr stores and other non-DAP URLs without an explicit
4039
``engine=`` argument. (:pull:`10804`). By `Ian Hunt-Isaak <https://github.com/ianhi>`_.
40+
- Fix indexing with empty arrays for scipy & h5netcdf backends which now resolves to empty slices (:issue:`10867`, :pull:`10870`).
41+
By `Kai Mühlbauer <https://github.com/kmuehlbauer>`_
42+
43+
Performance
44+
~~~~~~~~~~~
45+
46+
- Speedup and reduce memory usage of :py:func:`concat`. Magnitude of improvement scales
47+
with size of the concatenation dimension. By `Deepak Cherian <https://github.com/dcherian>`_.
48+
:issue:`10864` :pull:`10866`.
4149

4250
Documentation
4351
~~~~~~~~~~~~~

xarray/backends/api.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1420,10 +1420,10 @@ def open_mfdataset(
14201420
chunks : int, dict, 'auto' or None, optional
14211421
Dictionary with keys given by dimension names and values given by chunk sizes.
14221422
In general, these should divide the dimensions of each dataset. If int, chunk
1423-
each dimension by ``chunks``. By default, chunks will be chosen to load entire
1424-
input files into memory at once. This has a major impact on performance: please
1425-
see the full documentation for more details [2]_. This argument is evaluated
1426-
on a per-file basis, so chunk sizes that span multiple files will be ignored.
1423+
each dimension by ``chunks``. By default, chunks will be chosen to match the
1424+
chunks on disk. This may impact performance: please see the full documentation
1425+
for more details [2]_. This argument is evaluated on a per-file basis, so chunk
1426+
sizes that span multiple files will be ignored.
14271427
concat_dim : str, DataArray, Index or a Sequence of these or None, optional
14281428
Dimensions to concatenate files along. You only need to provide this argument
14291429
if ``combine='nested'``, and if any of the dimensions along which you want to

xarray/core/indexing.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1366,15 +1366,19 @@ def _decompose_outer_indexer(
13661366
gains = [
13671367
(
13681368
(np.max(k) - np.min(k) + 1.0) / len(np.unique(k))
1369-
if isinstance(k, np.ndarray)
1369+
if isinstance(k, np.ndarray) and k.size != 0
13701370
else 0
13711371
)
13721372
for k in indexer_elems
13731373
]
13741374
array_index = np.argmax(np.array(gains)) if len(gains) > 0 else None
13751375

13761376
for i, (k, s) in enumerate(zip(indexer_elems, shape, strict=False)):
1377-
if isinstance(k, np.ndarray) and i != array_index:
1377+
if isinstance(k, np.ndarray) and k.size == 0:
1378+
# empty np.ndarray key is converted to empty slice
1379+
# see https://github.com/pydata/xarray/issues/10867
1380+
backend_indexer.append(slice(0, 0))
1381+
elif isinstance(k, np.ndarray) and i != array_index:
13781382
# np.ndarray key is converted to slice that covers the entire
13791383
# entries of this key.
13801384
backend_indexer.append(slice(np.min(k), np.max(k) + 1))

0 commit comments

Comments
 (0)