Skip to content

Commit 6e82a3a

Browse files
ianhidcherian
andauthored
Mostly restore netcdf backend behavior with URLs (#10931)
Co-authored-by: Deepak Cherian <[email protected]>
1 parent d057157 commit 6e82a3a

File tree

5 files changed

+76
-41
lines changed

5 files changed

+76
-41
lines changed

doc/whats-new.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,12 @@ Deprecations
3333

3434
Bug Fixes
3535
~~~~~~~~~
36+
37+
- The NetCDF4 backend will now claim to be able to read any URL except for one that contains
38+
the substring zarr. This restores backward compatibility after
39+
:pull:`10804` broke workflows that relied on ``xr.open_dataset("http://...")``
40+
(:pull:`10931`).
41+
By `Ian Hunt-Isaak <https://github.com/ianhi>`_.
3642
- Always normalize slices when indexing ``LazilyIndexedArray`` instances (:issue:`10941`, :pull:`10948`).
3743
By `Justus Magin <https://github.com/keewis>`_.
3844

xarray/backends/common.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -849,3 +849,37 @@ def open_groups_as_dict(
849849

850850
# mapping of engine name to (module name, BackendEntrypoint Class)
851851
BACKEND_ENTRYPOINTS: dict[str, tuple[str | None, type[BackendEntrypoint]]] = {}
852+
853+
854+
def _is_likely_dap_url(url: str) -> bool:
855+
"""
856+
Determines if a URL is likely an OPeNDAP (DAP) endpoint based on
857+
known protocols, server software path patterns, and file extensions.
858+
859+
Parameters
860+
----------
861+
url : str
862+
863+
Returns
864+
-------
865+
True if the URL matches common DAP patterns, False otherwise.
866+
"""
867+
if not url:
868+
return False
869+
870+
url_lower = url.lower()
871+
872+
# For remote URIs, check for DAP server software path patterns
873+
if is_remote_uri(url_lower):
874+
dap_path_patterns = (
875+
"/dodsc/", # THREDDS Data Server (TDS) DAP endpoint (case-insensitive)
876+
"/dods/", # GrADS Data Server (GDS) DAP endpoint
877+
"/opendap/", # Generic OPeNDAP/Hyrax server
878+
"/erddap/", # ERDDAP data server
879+
"/dap2/", # Explicit DAP2 version in path
880+
"/dap4/", # Explicit DAP4 version in path
881+
"/dap/",
882+
)
883+
return any(pattern in url_lower for pattern in dap_path_patterns)
884+
885+
return False

xarray/backends/netCDF4_.py

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -715,10 +715,19 @@ def _has_netcdf_ext(path: str | os.PathLike, is_remote: bool = False) -> bool:
715715
_, ext = os.path.splitext(path)
716716
return ext in {".nc", ".nc4", ".cdf"}
717717

718-
if isinstance(filename_or_obj, str) and is_remote_uri(filename_or_obj):
719-
# For remote URIs, check extension (accounting for query params/fragments)
720-
# Remote netcdf-c can handle both regular URLs and DAP URLs
721-
return _has_netcdf_ext(filename_or_obj, is_remote=True)
718+
if isinstance(filename_or_obj, str):
719+
if is_remote_uri(filename_or_obj):
720+
# For remote URIs, check extension (accounting for query params/fragments)
721+
# Remote netcdf-c can handle both regular URLs and DAP URLs
722+
if _has_netcdf_ext(filename_or_obj, is_remote=True):
723+
return True
724+
elif "zarr" in filename_or_obj.lower():
725+
return False
726+
# return true for non-zarr URLs so we don't have a breaking change for people relying on this
727+
# netcdf backend guessing true for all remote sources.
728+
# TODO: emit a warning here about deprecation of this behavior
729+
# https://github.com/pydata/xarray/pull/10931
730+
return True
722731

723732
if isinstance(filename_or_obj, str | os.PathLike):
724733
# For local paths, check magic number first, then extension

xarray/backends/pydap_.py

Lines changed: 2 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
BackendArray,
1313
BackendEntrypoint,
1414
T_PathFileOrDataStore,
15+
_is_likely_dap_url,
1516
_normalize_path,
1617
datatree_from_dict_with_io_cleanup,
1718
robust_getitem,
@@ -22,7 +23,6 @@
2223
Frozen,
2324
FrozenDict,
2425
close_on_error,
25-
is_remote_uri,
2626
)
2727
from xarray.core.variable import Variable
2828
from xarray.namedarray.pycompat import integer_types
@@ -252,23 +252,7 @@ class PydapBackendEntrypoint(BackendEntrypoint):
252252
def guess_can_open(self, filename_or_obj: T_PathFileOrDataStore) -> bool:
253253
if not isinstance(filename_or_obj, str):
254254
return False
255-
256-
# Check for explicit DAP protocol indicators:
257-
# 1. DAP scheme: dap2:// or dap4:// (case-insensitive, may not be recognized by is_remote_uri)
258-
# 2. Remote URI with /dap2/ or /dap4/ in URL path (case-insensitive)
259-
# Note: We intentionally do NOT check for .dap suffix as that would match
260-
# file extensions like .dap which trigger downloads of binary data
261-
url_lower = filename_or_obj.lower()
262-
if url_lower.startswith(("dap2://", "dap4://")):
263-
return True
264-
265-
# For standard remote URIs, check for DAP indicators in path
266-
if is_remote_uri(filename_or_obj):
267-
return (
268-
"/dap2/" in url_lower or "/dap4/" in url_lower or "/dodsC/" in url_lower
269-
)
270-
271-
return False
255+
return _is_likely_dap_url(filename_or_obj)
272256

273257
def open_dataset(
274258
self,

xarray/tests/test_backends.py

Lines changed: 21 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,6 @@
7777
has_h5netcdf_1_4_0_or_above,
7878
has_netCDF4,
7979
has_numpy_2,
80-
has_pydap,
8180
has_scipy,
8281
has_zarr,
8382
has_zarr_v3,
@@ -7294,9 +7293,9 @@ def test_netcdf4_entrypoint(tmp_path: Path) -> None:
72947293
_check_guess_can_open_and_open(entrypoint, path, engine="netcdf4", expected=ds)
72957294
_check_guess_can_open_and_open(entrypoint, str(path), engine="netcdf4", expected=ds)
72967295

7297-
# Remote URLs without extensions are no longer claimed (stricter detection)
7298-
assert not entrypoint.guess_can_open("http://something/remote")
7299-
# Remote URLs with netCDF extensions are claimed
7296+
# Remote URLs without extensions return True (backward compatibility)
7297+
assert entrypoint.guess_can_open("http://something/remote")
7298+
# Remote URLs with netCDF extensions are also claimed
73007299
assert entrypoint.guess_can_open("http://something/remote.nc")
73017300
assert entrypoint.guess_can_open("something-local.nc")
73027301
assert entrypoint.guess_can_open("something-local.nc4")
@@ -7440,15 +7439,22 @@ def test_remote_url_backend_auto_detection() -> None:
74407439
f"URL {url!r} should select {expected_backend!r} but got {engine!r}"
74417440
)
74427441

7443-
# DAP URLs without extensions - pydap wins if available, netcdf4 otherwise
7444-
# When pydap is not installed, netCDF4 should handle these DAP URLs
7445-
expected_dap_backend = "pydap" if has_pydap else "netcdf4"
7442+
# DAP URLs - netcdf4 should handle these (it comes first in backend order)
7443+
# Both netcdf4 and pydap can open DAP URLs, but netcdf4 has priority
7444+
expected_dap_backend = "netcdf4"
74467445
dap_urls = [
7446+
# Explicit DAP protocol schemes
74477447
"dap2://opendap.earthdata.nasa.gov/collections/dataset",
74487448
"dap4://opendap.earthdata.nasa.gov/collections/dataset",
7449+
"dap://example.com/dataset",
74497450
"DAP2://example.com/dataset", # uppercase scheme
74507451
"DAP4://example.com/dataset", # uppercase scheme
7452+
# DAP path indicators
74517453
"https://example.com/services/DAP2/dataset", # uppercase in path
7454+
"http://test.opendap.org/opendap/data/nc/file.nc", # /opendap/ path
7455+
"https://coastwatch.pfeg.noaa.gov/erddap/griddap/erdMH1chla8day", # ERDDAP
7456+
"http://thredds.ucar.edu/thredds/dodsC/grib/NCEP/GFS/", # THREDDS dodsC
7457+
"https://disc2.gesdisc.eosdis.nasa.gov/dods/TRMM_3B42", # GrADS /dods/
74527458
]
74537459

74547460
for url in dap_urls:
@@ -7457,20 +7463,16 @@ def test_remote_url_backend_auto_detection() -> None:
74577463
f"URL {url!r} should select {expected_dap_backend!r} but got {engine!r}"
74587464
)
74597465

7460-
# URLs that should raise ValueError (no backend can open them)
7461-
invalid_urls = [
7462-
"http://test.opendap.org/opendap/data/nc/coads_climatology.nc.dap", # .dap suffix
7463-
"https://example.com/data.dap", # .dap suffix
7464-
"http://opendap.example.com/data", # no extension, no DAP indicators
7465-
"https://test.opendap.org/dataset", # no extension, no DAP indicators
7466+
# URLs with .dap suffix are claimed by netcdf4 (backward compatibility fallback)
7467+
# Note: .dap suffix is intentionally NOT recognized as a DAP dataset URL
7468+
fallback_urls = [
7469+
("http://test.opendap.org/opendap/data/nc/coads_climatology.nc.dap", "netcdf4"),
7470+
("https://example.com/data.dap", "netcdf4"),
74667471
]
74677472

7468-
for url in invalid_urls:
7469-
with pytest.raises(
7470-
ValueError,
7471-
match=r"did not find a match in any of xarray's currently installed IO backends",
7472-
):
7473-
guess_engine(url)
7473+
for url, expected_backend in fallback_urls:
7474+
engine = guess_engine(url)
7475+
assert engine == expected_backend
74747476

74757477

74767478
@requires_netCDF4

0 commit comments

Comments
 (0)