urlpython-xarraynetcdfjupyterhub

Using xarray in JupyterLab to read NC file from url


I am trying to prevent unnecessary downloading of large datasets by reading the publicly available files directly from their online location. Surprisingly I cannot find an answer to my question on StackOverflow already.

I use JupyterLab, and have tried the following:

import xarray as xr
url="https://thredds.met.no/thredds/catalog/metusers/oskaral/PolarRES/ARC11_ALADIN43_v1_CNRMESM21_r1i1p1f2_hist/day/tas/catalog.html?dataset=metusers/oskaral/PolarRES/ARC11_ALADIN43_v1_CNRMESM21_r1i1p1f2_hist/day/tas/tas_ARC11_CNRM-ESM2-1_historical_r1i1p1f2_HCLIMcom-METNO_ALADIN43_v1-r1_day_20140101-20141231.nc"
data = xr.open_dataset(url)

This produces the following error message:

---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
File /usr/local/apps/python3/3.11.10-01/lib/python3.11/site-packages/xarray/backends/file_manager.py:211, in CachingFileManager._acquire_with_cache_info(self, needs_lock)
    210 try:
--> 211     file = self._cache[self._key]
    212 except KeyError:

File /usr/local/apps/python3/3.11.10-01/lib/python3.11/site-packages/xarray/backends/lru_cache.py:56, in LRUCache.__getitem__(self, key)
     55 with self._lock:
---> 56     value = self._cache[key]
     57     self._cache.move_to_end(key)

KeyError: [<class 'netCDF4._netCDF4.Dataset'>, ('https://thredds.met.no/thredds/catalog/metusers/oskaral/PolarRES/ARC11_ALADIN43_v1_CNRMESM21_r1i1p1f2_hist/day/tas/catalog.html?dataset=metusers/oskaral/PolarRES/ARC11_ALADIN43_v1_CNRMESM21_r1i1p1f2_hist/day/tas/tas_ARC11_CNRM-ESM2-1_historical_r1i1p1f2_HCLIMcom-METNO_ALADIN43_v1-r1_day_20140101-20141231.nc',), 'r', (('clobber', True), ('diskless', False), ('format', 'NETCDF4'), ('persist', False)), 'd2d8feab-7dab-434f-ae9c-a79c655b259b']

During handling of the above exception, another exception occurred:

OSError                                   Traceback (most recent call last)
Cell In[4], line 3
      1 #read in dataset from website
      2 url="https://thredds.met.no/thredds/catalog/metusers/oskaral/PolarRES/ARC11_ALADIN43_v1_CNRMESM21_r1i1p1f2_hist/day/tas/catalog.html?dataset=metusers/oskaral/PolarRES/ARC11_ALADIN43_v1_CNRMESM21_r1i1p1f2_hist/day/tas/tas_ARC11_CNRM-ESM2-1_historical_r1i1p1f2_HCLIMcom-METNO_ALADIN43_v1-r1_day_20140101-20141231.nc"
----> 3 data = xr.open_dataset(url)

File /usr/local/apps/python3/3.11.10-01/lib/python3.11/site-packages/xarray/backends/api.py:611, in open_dataset(filename_or_obj, engine, chunks, cache, decode_cf, mask_and_scale, decode_times, decode_timedelta, use_cftime, concat_characters, decode_coords, drop_variables, inline_array, chunked_array_type, from_array_kwargs, backend_kwargs, **kwargs)
    599 decoders = _resolve_decoders_kwargs(
    600     decode_cf,
    601     open_backend_dataset_parameters=backend.open_dataset_parameters,
   (...)
    607     decode_coords=decode_coords,
    608 )
    610 overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
--> 611 backend_ds = backend.open_dataset(
    612     filename_or_obj,
    613     drop_variables=drop_variables,
    614     **decoders,
    615     **kwargs,
    616 )
    617 ds = _dataset_from_backend_dataset(
    618     backend_ds,
    619     filename_or_obj,
   (...)
    629     **kwargs,
    630 )
    631 return ds

File /usr/local/apps/python3/3.11.10-01/lib/python3.11/site-packages/xarray/backends/netCDF4_.py:649, in NetCDF4BackendEntrypoint.open_dataset(self, filename_or_obj, mask_and_scale, decode_times, concat_characters, decode_coords, drop_variables, use_cftime, decode_timedelta, group, mode, format, clobber, diskless, persist, lock, autoclose)
    628 def open_dataset(  # type: ignore[override]  # allow LSP violation, not supporting **kwargs
    629     self,
    630     filename_or_obj: str | os.PathLike[Any] | BufferedIOBase | AbstractDataStore,
   (...)
    646     autoclose=False,
    647 ) -> Dataset:
    648     filename_or_obj = _normalize_path(filename_or_obj)
--> 649     store = NetCDF4DataStore.open(
    650         filename_or_obj,
    651         mode=mode,
    652         format=format,
    653         group=group,
    654         clobber=clobber,
    655         diskless=diskless,
    656         persist=persist,
    657         lock=lock,
    658         autoclose=autoclose,
    659     )
    661     store_entrypoint = StoreBackendEntrypoint()
    662     with close_on_error(store):

File /usr/local/apps/python3/3.11.10-01/lib/python3.11/site-packages/xarray/backends/netCDF4_.py:410, in NetCDF4DataStore.open(cls, filename, mode, format, group, clobber, diskless, persist, lock, lock_maker, autoclose)
    404 kwargs = dict(
    405     clobber=clobber, diskless=diskless, persist=persist, format=format
    406 )
    407 manager = CachingFileManager(
    408     netCDF4.Dataset, filename, mode=mode, kwargs=kwargs
    409 )
--> 410 return cls(manager, group=group, mode=mode, lock=lock, autoclose=autoclose)

File /usr/local/apps/python3/3.11.10-01/lib/python3.11/site-packages/xarray/backends/netCDF4_.py:357, in NetCDF4DataStore.__init__(self, manager, group, mode, lock, autoclose)
    355 self._group = group
    356 self._mode = mode
--> 357 self.format = self.ds.data_model
    358 self._filename = self.ds.filepath()
    359 self.is_remote = is_remote_uri(self._filename)

File /usr/local/apps/python3/3.11.10-01/lib/python3.11/site-packages/xarray/backends/netCDF4_.py:419, in NetCDF4DataStore.ds(self)
    417 @property
    418 def ds(self):
--> 419     return self._acquire()

File /usr/local/apps/python3/3.11.10-01/lib/python3.11/site-packages/xarray/backends/netCDF4_.py:413, in NetCDF4DataStore._acquire(self, needs_lock)
    412 def _acquire(self, needs_lock=True):
--> 413     with self._manager.acquire_context(needs_lock) as root:
    414         ds = _nc4_require_group(root, self._group, self._mode)
    415     return ds

File /usr/local/apps/python3/3.11.10-01/lib/python3.11/contextlib.py:137, in _GeneratorContextManager.__enter__(self)
    135 del self.args, self.kwds, self.func
    136 try:
--> 137     return next(self.gen)
    138 except StopIteration:
    139     raise RuntimeError("generator didn't yield") from None

File /usr/local/apps/python3/3.11.10-01/lib/python3.11/site-packages/xarray/backends/file_manager.py:199, in CachingFileManager.acquire_context(self, needs_lock)
    196 @contextlib.contextmanager
    197 def acquire_context(self, needs_lock=True):
    198     """Context manager for acquiring a file."""
--> 199     file, cached = self._acquire_with_cache_info(needs_lock)
    200     try:
    201         yield file

File /usr/local/apps/python3/3.11.10-01/lib/python3.11/site-packages/xarray/backends/file_manager.py:217, in CachingFileManager._acquire_with_cache_info(self, needs_lock)
    215     kwargs = kwargs.copy()
    216     kwargs["mode"] = self._mode
--> 217 file = self._opener(*self._args, **kwargs)
    218 if self._mode == "w":
    219     # ensure file doesn't get overridden when opened again
    220     self._mode = "a"

File src/netCDF4/_netCDF4.pyx:2470, in netCDF4._netCDF4.Dataset.__init__()

File src/netCDF4/_netCDF4.pyx:2107, in netCDF4._netCDF4._ensure_nc_success()

OSError: [Errno -75] NetCDF: Malformed or unexpected Constraint: 'https://thredds.met.no/thredds/catalog/metusers/oskaral/PolarRES/ARC11_ALADIN43_v1_CNRMESM21_r1i1p1f2_hist/day/tas/catalog.html?dataset=metusers/oskaral/PolarRES/ARC11_ALADIN43_v1_CNRMESM21_r1i1p1f2_hist/day/tas/tas_ARC11_CNRM-ESM2-1_historical_r1i1p1f2_HCLIMcom-METNO_ALADIN43_v1-r1_day_20140101-20141231.nc' 

Is it in any way possible to read these files into an xarray dataset directly from their online location?

Thank you!


Solution

  • Your url is pointing to the catalog of the THREDDS server. This is a valid URL for a website, with a dataset constraint, but it is not the path to a netCDF file. If you use the url in a web browser and click the link, you'll be taken to a page that has the correct URLs to download the data for all the access protocols that the server supports. You'll want the OpenDAP link for use with xarray.

    Usually (by default?), OpenDAP links are available through the dodsC sub-path after the thredds path. So you should keep the root of the catalog link up to (but not including) the catalog path, branch to dodsC, then everything after the dataset constraint. So that is:

    url = "https://thredds.met.no/thredds/dodsC/metusers/oskaral/PolarRES/ARC11_ALADIN43_v1_CNRMESM21_r1i1p1f2_hist/day/tas/tas_ARC11_CNRM-ESM2-1_historical_r1i1p1f2_HCLIMcom-METNO_ALADIN43_v1-r1_day_20140101-20141231.nc"
    
    

    In R, I get this:

    library(ncdfCF)
    
    url <- "https://thredds.met.no/thredds/dodsC/metusers/oskaral/PolarRES/ARC11_ALADIN43_v1_CNRMESM21_r1i1p1f2_hist/day/tas/tas_ARC11_CNRM-ESM2-1_historical_r1i1p1f2_HCLIMcom-METNO_ALADIN43_v1-r1_day_20140101-20141231.nc"
    
    (ds <- open_ncdf(url))
    #> <Dataset> tas_ARC11_CNRM-ESM2-1_historical_r1i1p1f2_HCLIMcom-METNO_ALADIN43_v1-r1_day_20140101-20141231 
    #> Resource   : https://thredds.met.no/thredds/dodsC/metusers/oskaral/PolarRES/ARC11_ALADIN43_v1_CNRMESM21_r1i1p1f2_hist/day/tas/tas_ARC11_CNRM-ESM2-1_historical_r1i1p1f2_HCLIMcom-METNO_ALADIN43_v1-r1_day_20140101-20141231.nc 
    #> Format     : classic 
    #> Type       : generic netCDF data 
    #> Conventions: CF-1.4 
    #> Keep open  : FALSE 
    #> 
    #> Variables:
    #>  name long_name                    units data_type axes              
    #>  tas  Near-Surface Air Temperature K     NC_FLOAT  x, y, time, height
    #> 
    #> Axes:
    #>  id axis name        long_name                  length unlim values                                             unit                            
    #>  0  T    time        Time                       365    U     [2014-01-01 11:30:00.288 ... 2014-12-31 11:30:0... days since 1984-09-01 00:00:00.0
    #>  3  X    x           X Coordinate Of Projection 629          [0 ... 6908000]                                    m                               
    #>  4  Y    y           Y Coordinate Of Projection 709          [0 ... 7788000]                                    m                               
    #>  2       maxStrlen64                             64          [1 ... 64]                                                                         
    #>     Z    height      Height                       1          [2]                                                m                               
    #> 
    #> Attributes:
    #>  id name                           type    length value                                             
    #>   0 CDI                            NC_CHAR   64   Climate Data Interface version 2.0.5 (https://m...
    #>   1 Conventions                    NC_CHAR    6   CF-1.4                                            
    #>   2 institute_id                   NC_CHAR    8   HCLIMcom                                          
    #>   3 model_id                       NC_CHAR   14   HCLIM43_Arctic                                    
    #>   4 experiment_id                  NC_CHAR   41   ARC11_ALADIN43_v1_CNRMESM21_r1i1p1f2_hist         
    #>   5 domain                         NC_CHAR    5   ARC11                                             
    #>   6 frequency                      NC_CHAR    3   day                                               
    #>   7 driving_model_id               NC_CHAR    4   ERA5                                              
    #>   8 creation_date                  NC_CHAR   24   Sun May  5 01:20:07 2024                          
    #>   9 title                          NC_CHAR   28   Near-Surface Air Temperature                      
    #>  10 comment                        NC_CHAR   21   Created with gl/xtool                             
    #>  11 history                        NC_CHAR 1800   Wed Nov 13 14:44:44 2024: cdo mergetime tas_fp_...
    #>  12 NCO                            NC_CHAR   95   netCDF Operators version 4.8.1 (Homepage = http...
    #>  13 CDO                            NC_CHAR   64   Climate Data Operators version 2.0.5 (https://m...
    #>  14 DODS.strlen                    NC_INT     1   0                                                 
    #>  15 DODS_EXTRA.Unlimited_Dimension NC_CHAR    4   time