I am trying to prevent unnecessary downloading of large datasets by reading the publicly available files directly from their online location. Surprisingly I cannot find an answer to my question on StackOverflow already.
I use JupyterLab, and have tried the following:
import xarray as xr
url="https://thredds.met.no/thredds/catalog/metusers/oskaral/PolarRES/ARC11_ALADIN43_v1_CNRMESM21_r1i1p1f2_hist/day/tas/catalog.html?dataset=metusers/oskaral/PolarRES/ARC11_ALADIN43_v1_CNRMESM21_r1i1p1f2_hist/day/tas/tas_ARC11_CNRM-ESM2-1_historical_r1i1p1f2_HCLIMcom-METNO_ALADIN43_v1-r1_day_20140101-20141231.nc"
data = xr.open_dataset(url)
This produces the following error message:
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
File /usr/local/apps/python3/3.11.10-01/lib/python3.11/site-packages/xarray/backends/file_manager.py:211, in CachingFileManager._acquire_with_cache_info(self, needs_lock)
210 try:
--> 211 file = self._cache[self._key]
212 except KeyError:
File /usr/local/apps/python3/3.11.10-01/lib/python3.11/site-packages/xarray/backends/lru_cache.py:56, in LRUCache.__getitem__(self, key)
55 with self._lock:
---> 56 value = self._cache[key]
57 self._cache.move_to_end(key)
KeyError: [<class 'netCDF4._netCDF4.Dataset'>, ('https://thredds.met.no/thredds/catalog/metusers/oskaral/PolarRES/ARC11_ALADIN43_v1_CNRMESM21_r1i1p1f2_hist/day/tas/catalog.html?dataset=metusers/oskaral/PolarRES/ARC11_ALADIN43_v1_CNRMESM21_r1i1p1f2_hist/day/tas/tas_ARC11_CNRM-ESM2-1_historical_r1i1p1f2_HCLIMcom-METNO_ALADIN43_v1-r1_day_20140101-20141231.nc',), 'r', (('clobber', True), ('diskless', False), ('format', 'NETCDF4'), ('persist', False)), 'd2d8feab-7dab-434f-ae9c-a79c655b259b']
During handling of the above exception, another exception occurred:
OSError Traceback (most recent call last)
Cell In[4], line 3
1 #read in dataset from website
2 url="https://thredds.met.no/thredds/catalog/metusers/oskaral/PolarRES/ARC11_ALADIN43_v1_CNRMESM21_r1i1p1f2_hist/day/tas/catalog.html?dataset=metusers/oskaral/PolarRES/ARC11_ALADIN43_v1_CNRMESM21_r1i1p1f2_hist/day/tas/tas_ARC11_CNRM-ESM2-1_historical_r1i1p1f2_HCLIMcom-METNO_ALADIN43_v1-r1_day_20140101-20141231.nc"
----> 3 data = xr.open_dataset(url)
File /usr/local/apps/python3/3.11.10-01/lib/python3.11/site-packages/xarray/backends/api.py:611, in open_dataset(filename_or_obj, engine, chunks, cache, decode_cf, mask_and_scale, decode_times, decode_timedelta, use_cftime, concat_characters, decode_coords, drop_variables, inline_array, chunked_array_type, from_array_kwargs, backend_kwargs, **kwargs)
599 decoders = _resolve_decoders_kwargs(
600 decode_cf,
601 open_backend_dataset_parameters=backend.open_dataset_parameters,
(...)
607 decode_coords=decode_coords,
608 )
610 overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
--> 611 backend_ds = backend.open_dataset(
612 filename_or_obj,
613 drop_variables=drop_variables,
614 **decoders,
615 **kwargs,
616 )
617 ds = _dataset_from_backend_dataset(
618 backend_ds,
619 filename_or_obj,
(...)
629 **kwargs,
630 )
631 return ds
File /usr/local/apps/python3/3.11.10-01/lib/python3.11/site-packages/xarray/backends/netCDF4_.py:649, in NetCDF4BackendEntrypoint.open_dataset(self, filename_or_obj, mask_and_scale, decode_times, concat_characters, decode_coords, drop_variables, use_cftime, decode_timedelta, group, mode, format, clobber, diskless, persist, lock, autoclose)
628 def open_dataset( # type: ignore[override] # allow LSP violation, not supporting **kwargs
629 self,
630 filename_or_obj: str | os.PathLike[Any] | BufferedIOBase | AbstractDataStore,
(...)
646 autoclose=False,
647 ) -> Dataset:
648 filename_or_obj = _normalize_path(filename_or_obj)
--> 649 store = NetCDF4DataStore.open(
650 filename_or_obj,
651 mode=mode,
652 format=format,
653 group=group,
654 clobber=clobber,
655 diskless=diskless,
656 persist=persist,
657 lock=lock,
658 autoclose=autoclose,
659 )
661 store_entrypoint = StoreBackendEntrypoint()
662 with close_on_error(store):
File /usr/local/apps/python3/3.11.10-01/lib/python3.11/site-packages/xarray/backends/netCDF4_.py:410, in NetCDF4DataStore.open(cls, filename, mode, format, group, clobber, diskless, persist, lock, lock_maker, autoclose)
404 kwargs = dict(
405 clobber=clobber, diskless=diskless, persist=persist, format=format
406 )
407 manager = CachingFileManager(
408 netCDF4.Dataset, filename, mode=mode, kwargs=kwargs
409 )
--> 410 return cls(manager, group=group, mode=mode, lock=lock, autoclose=autoclose)
File /usr/local/apps/python3/3.11.10-01/lib/python3.11/site-packages/xarray/backends/netCDF4_.py:357, in NetCDF4DataStore.__init__(self, manager, group, mode, lock, autoclose)
355 self._group = group
356 self._mode = mode
--> 357 self.format = self.ds.data_model
358 self._filename = self.ds.filepath()
359 self.is_remote = is_remote_uri(self._filename)
File /usr/local/apps/python3/3.11.10-01/lib/python3.11/site-packages/xarray/backends/netCDF4_.py:419, in NetCDF4DataStore.ds(self)
417 @property
418 def ds(self):
--> 419 return self._acquire()
File /usr/local/apps/python3/3.11.10-01/lib/python3.11/site-packages/xarray/backends/netCDF4_.py:413, in NetCDF4DataStore._acquire(self, needs_lock)
412 def _acquire(self, needs_lock=True):
--> 413 with self._manager.acquire_context(needs_lock) as root:
414 ds = _nc4_require_group(root, self._group, self._mode)
415 return ds
File /usr/local/apps/python3/3.11.10-01/lib/python3.11/contextlib.py:137, in _GeneratorContextManager.__enter__(self)
135 del self.args, self.kwds, self.func
136 try:
--> 137 return next(self.gen)
138 except StopIteration:
139 raise RuntimeError("generator didn't yield") from None
File /usr/local/apps/python3/3.11.10-01/lib/python3.11/site-packages/xarray/backends/file_manager.py:199, in CachingFileManager.acquire_context(self, needs_lock)
196 @contextlib.contextmanager
197 def acquire_context(self, needs_lock=True):
198 """Context manager for acquiring a file."""
--> 199 file, cached = self._acquire_with_cache_info(needs_lock)
200 try:
201 yield file
File /usr/local/apps/python3/3.11.10-01/lib/python3.11/site-packages/xarray/backends/file_manager.py:217, in CachingFileManager._acquire_with_cache_info(self, needs_lock)
215 kwargs = kwargs.copy()
216 kwargs["mode"] = self._mode
--> 217 file = self._opener(*self._args, **kwargs)
218 if self._mode == "w":
219 # ensure file doesn't get overridden when opened again
220 self._mode = "a"
File src/netCDF4/_netCDF4.pyx:2470, in netCDF4._netCDF4.Dataset.__init__()
File src/netCDF4/_netCDF4.pyx:2107, in netCDF4._netCDF4._ensure_nc_success()
OSError: [Errno -75] NetCDF: Malformed or unexpected Constraint: 'https://thredds.met.no/thredds/catalog/metusers/oskaral/PolarRES/ARC11_ALADIN43_v1_CNRMESM21_r1i1p1f2_hist/day/tas/catalog.html?dataset=metusers/oskaral/PolarRES/ARC11_ALADIN43_v1_CNRMESM21_r1i1p1f2_hist/day/tas/tas_ARC11_CNRM-ESM2-1_historical_r1i1p1f2_HCLIMcom-METNO_ALADIN43_v1-r1_day_20140101-20141231.nc'
Is it in any way possible to read these files into an xarray dataset directly from their online location?
Thank you!
Your url
is pointing to the catalog of the THREDDS server. This is a valid URL for a website, with a dataset
constraint, but it is not the path to a netCDF file. If you use the url
in a web browser and click the link, you'll be taken to a page that has the correct URLs to download the data for all the access protocols that the server supports. You'll want the OpenDAP link for use with xarray
.
Usually (by default?), OpenDAP links are available through the dodsC
sub-path after the thredds
path. So you should keep the root of the catalog link up to (but not including) the catalog
path, branch to dodsC
, then everything after the dataset
constraint. So that is:
url = "https://thredds.met.no/thredds/dodsC/metusers/oskaral/PolarRES/ARC11_ALADIN43_v1_CNRMESM21_r1i1p1f2_hist/day/tas/tas_ARC11_CNRM-ESM2-1_historical_r1i1p1f2_HCLIMcom-METNO_ALADIN43_v1-r1_day_20140101-20141231.nc"
In R
, I get this:
library(ncdfCF)
url <- "https://thredds.met.no/thredds/dodsC/metusers/oskaral/PolarRES/ARC11_ALADIN43_v1_CNRMESM21_r1i1p1f2_hist/day/tas/tas_ARC11_CNRM-ESM2-1_historical_r1i1p1f2_HCLIMcom-METNO_ALADIN43_v1-r1_day_20140101-20141231.nc"
(ds <- open_ncdf(url))
#> <Dataset> tas_ARC11_CNRM-ESM2-1_historical_r1i1p1f2_HCLIMcom-METNO_ALADIN43_v1-r1_day_20140101-20141231
#> Resource : https://thredds.met.no/thredds/dodsC/metusers/oskaral/PolarRES/ARC11_ALADIN43_v1_CNRMESM21_r1i1p1f2_hist/day/tas/tas_ARC11_CNRM-ESM2-1_historical_r1i1p1f2_HCLIMcom-METNO_ALADIN43_v1-r1_day_20140101-20141231.nc
#> Format : classic
#> Type : generic netCDF data
#> Conventions: CF-1.4
#> Keep open : FALSE
#>
#> Variables:
#> name long_name units data_type axes
#> tas Near-Surface Air Temperature K NC_FLOAT x, y, time, height
#>
#> Axes:
#> id axis name long_name length unlim values unit
#> 0 T time Time 365 U [2014-01-01 11:30:00.288 ... 2014-12-31 11:30:0... days since 1984-09-01 00:00:00.0
#> 3 X x X Coordinate Of Projection 629 [0 ... 6908000] m
#> 4 Y y Y Coordinate Of Projection 709 [0 ... 7788000] m
#> 2 maxStrlen64 64 [1 ... 64]
#> Z height Height 1 [2] m
#>
#> Attributes:
#> id name type length value
#> 0 CDI NC_CHAR 64 Climate Data Interface version 2.0.5 (https://m...
#> 1 Conventions NC_CHAR 6 CF-1.4
#> 2 institute_id NC_CHAR 8 HCLIMcom
#> 3 model_id NC_CHAR 14 HCLIM43_Arctic
#> 4 experiment_id NC_CHAR 41 ARC11_ALADIN43_v1_CNRMESM21_r1i1p1f2_hist
#> 5 domain NC_CHAR 5 ARC11
#> 6 frequency NC_CHAR 3 day
#> 7 driving_model_id NC_CHAR 4 ERA5
#> 8 creation_date NC_CHAR 24 Sun May 5 01:20:07 2024
#> 9 title NC_CHAR 28 Near-Surface Air Temperature
#> 10 comment NC_CHAR 21 Created with gl/xtool
#> 11 history NC_CHAR 1800 Wed Nov 13 14:44:44 2024: cdo mergetime tas_fp_...
#> 12 NCO NC_CHAR 95 netCDF Operators version 4.8.1 (Homepage = http...
#> 13 CDO NC_CHAR 64 Climate Data Operators version 2.0.5 (https://m...
#> 14 DODS.strlen NC_INT 1 0
#> 15 DODS_EXTRA.Unlimited_Dimension NC_CHAR 4 time