Issue
I downloaded AQUA-MODIS netcdf files (total = 3422) for chlorophyll-a from the Earthdata website from 2012 to 2024 for The Northern Red Sea, Egypt.
I need to:
I have tried using packages such as ncdf4, terra, raster, and RNetCDF without any success (see my efforts below). I've also tried looking for solutions on Stackoverflow and did some troubleshooting with suggestions from ChatGPT and nothing has worked. I really don't understand how find a solution.
Many thanks if anyone can help.
Here's the manifest.txt link to get the data (expires Oct 22nd 2024)
Read the files into R:
library(terra)
library(ncdf4)
library(raster)
library(RNetCDF)
#Start the file path to all the netcdf files as a variable 'folder'
folder<-"~/Documents/GIS_Data/CHL-A/"
#Make a list of all the files within the folder
files <-list.files(folder, pattern='*.nc', full.names ="TRUE")
Loop through all the file paths and the AQUA MODIS netcdf files to gain access to them:
for (file in files) {
nc=open.nc(file)
print.nc(nc)
}
Contents of one of the AQUA MODIS files
netcdf netcdf4 {
// global attributes:
NC_CHAR :product_name = "AQUA_MODIS.20221217.L3b.DAY.CHL.x.nc" ;
NC_CHAR :title = "MODIS Level-3 Binned Data" ;
NC_CHAR :instrument = "MODIS" ;
NC_CHAR :platform = "Aqua" ;
NC_CHAR :temporal_range = "day" ;
NC_INT :start_orbit_number = 109682 ;
NC_INT :end_orbit_number = 109698 ;
NC_CHAR :date_created = "2024289095750000" ;
NC_CHAR :processing_version = "unspecified" ;
NC_CHAR :source = "satellite observations from MODIS-Aqua" ;
NC_CHAR :history = "/sdps/sdpsoper/Science/OCSSW/DEVEL/bin/l3bin ifile=AQUA_MODIS.20221217.L3b.DAY.CHL.nc ofile=AQUA_MODIS.20221217.L3b.DAY.CHL.x.nc latnorth=37.0000 latsouth=5.1000 lonwest=26.7200 loneast=54.1400" ;
NC_CHAR :time_coverage_start = "2022-12-17T00:25:01.000Z" ;
NC_CHAR :time_coverage_end = "2022-12-18T02:59:59.000Z" ;
NC_FLOAT :northernmost_latitude = 36.8958320617676 ;
NC_FLOAT :southernmost_latitude = 5.10416650772095 ;
NC_FLOAT :easternmost_longitude = 50.6941337585449 ;
NC_FLOAT :westernmost_longitude = 26.7268047332764 ;
NC_DOUBLE :geospatial_lat_max = 36.8958320617676 ;
NC_DOUBLE :geospatial_lat_min = 5.10416650772095 ;
NC_DOUBLE :geospatial_lon_max = 50.6941337585449 ;
NC_DOUBLE :geospatial_lon_min = 26.7268047332764 ;
NC_CHAR :geospatial_lat_units = "degrees_north" ;
NC_CHAR :geospatial_lon_units = "degrees_east" ;
NC_CHAR :geospatial_lat_resolution = "4.638312 km" ;
NC_CHAR :geospatial_lon_resolution = "4.638312 km" ;
NC_CHAR :spatialResolution = "4.638312 km" ;
NC_INT :data_bins = 31421 ;
NC_FLOAT :percent_data_bins = 0.132233932614326 ;
NC_CHAR :units = "chlor_a:mg m^-3" ;
NC_CHAR :binning_scheme = "Integerized Sinusoidal Grid" ;
NC_CHAR :project = "Ocean Biology Processing Group (NASA/GSFC/OBPG)" ;
NC_CHAR :institution = "NASA Goddard Space Flight Center, Ocean Ecology Laboratory, Ocean Biology Processing Group" ;
NC_CHAR :standard_name_vocabulary = "CF Standard Name Table v36" ;
NC_CHAR :Conventions = "CF-1.6 ACDD-1.3" ;
NC_CHAR :naming_authority = "gov.nasa.gsfc.sci.oceandata" ;
NC_CHAR :id = "AQUA_MODIS.20221217.L3b.DAY.CHL.x.nc/L3/AQUA_MODIS.20221217.L3b.DAY.CHL.x.nc" ;
NC_CHAR :license = "https://science.nasa.gov/earth-science/earth-science-data/data-information-policy/" ;
NC_CHAR :creator_name = "NASA/GSFC/OBPG" ;
NC_CHAR :publisher_name = "NASA/GSFC/OBPG" ;
NC_CHAR :creator_email = "data@oceancolor.gsfc.nasa.gov" ;
NC_CHAR :publisher_email = "data@oceancolor.gsfc.nasa.gov" ;
NC_CHAR :creator_url = "https://oceandata.sci.gsfc.nasa.gov" ;
NC_CHAR :publisher_url = "https://oceandata.sci.gsfc.nasa.gov" ;
NC_CHAR :processing_level = "L3 Binned" ;
NC_CHAR :cdm_data_type = "point" ;
NC_CHAR :keywords_vocabulary = "NASA Global Change Master Directory (GCMD) Science Keywords" ;
NC_CHAR :keywords = "Earth Science > Oceans > Ocean Chemistry > Pigments > Chlorophyll; Earth Science > Oceans > Ocean Chemistry > Chlorophyll" ;
group: level-3_binned_data {
dimensions:
binListDim = UNLIMITED ; // (31421 currently)
binDataDim = UNLIMITED ; // (31421 currently)
binIndexDim = UNLIMITED ; // (4320 currently)
types:
compound binListType {
NC_UINT bin_num ;
NC_SHORT nobs ;
NC_SHORT nscenes ;
NC_FLOAT weights ;
NC_FLOAT time_rec ;
}; // binListType
compound binDataType {
NC_FLOAT sum ;
NC_FLOAT sum_squared ;
}; // binDataType
compound binIndexType {
NC_UINT start_num ;
NC_UINT begin ;
NC_UINT extent ;
NC_UINT max ;
}; // binIndexType
variables:
binListType BinList(binListDim) ;
binDataType chlor_a(binDataDim) ;
binIndexType BinIndex(binIndexDim) ;
} // group level-3_binned_data
group: processing_control {
// group attributes:
NC_CHAR :software_name = "L3BIN" ;
NC_CHAR :software_version = "5.14" ;
NC_CHAR :input_sources = "AQUA_MODIS.20221217.L3b.DAY.CHL.nc" ;
NC_CHAR :l2_flag_names = "ATMFAIL,LAND,HILT,HISATZEN,STRAYLIGHT,CLDICE,COCCOLITH,LOWLW,CHLWARN,CHLFAIL,NAVWARN,MAXAERITER,ATMWARN,HISOLZEN,NAVFAIL,FILTER,HIGLINT" ;
group: input_parameters {
// group attributes:
NC_CHAR :infile = "AQUA_MODIS.20221217.L3b.DAY.CHL.nc" ;
NC_CHAR :ofile = "AQUA_MODIS.20221217.L3b.DAY.CHL.x.nc" ;
NC_CHAR :pfile = "AQUA_MODIS.20221217.L3b.DAY.CHL.x.nc" ;
NC_CHAR :oformat = "netCDF4" ;
NC_CHAR :syear = "9999" ;
NC_CHAR :eyear = "9999" ;
NC_CHAR :sday = "1970001" ;
NC_CHAR :eday = "2038018" ;
NC_CHAR :sorbit = "-1" ;
NC_CHAR :eorbit = "-1" ;
NC_CHAR :out_parm = ":DEFAULT:" ;
NC_CHAR :processing_version = "unspecified" ;
NC_CHAR :reduce_fac = "1" ;
NC_CHAR :resolve = "" ;
NC_CHAR :merged = "" ;
NC_CHAR :loneast = "54.139999" ;
NC_CHAR :lonwest = "26.719999" ;
NC_CHAR :latnorth = "37.000000" ;
NC_CHAR :latsouth = "5.100000" ;
NC_CHAR :verbose = "0" ;
NC_CHAR :unit_wgt = "0" ;
NC_CHAR :median = "0" ;
NC_CHAR :deflate = "5" ;
NC_CHAR :composite_prod = "" ;
NC_CHAR :composite_scheme = "" ;
NC_CHAR :doi = "" ;
} // group input_parameters
} // group processing_control
}
Attempts to Stack the netcdf files:
Attempt 1:
chla1<-terra::rast(files)
Error Message
Error: [rast] cannot open this file as a SpatRaster: /Documents/GIS_Data/CHL-A/AQUA_MODIS.20120101.L3b.DAY.CHL.x.nc
In addition: Warning message:
`Documents/GIS_Data/CHL-A/AQUA_MODIS.20120101.L3b.DAY.CHL.x.nc' not recognized as a supported file format. (GDAL error 4)
Attempt 2:
chla1<-terra::rast(nc)
Error Message:
Error in methods::as(x, "SpatRaster") :
no method or default for coercing “NetCDF” to “SpatRaster”
Attempt 3:
chl4<-raster::stack(files)
Error Message
Error in ncvar_type_to_string(rv$precint) :
Error, unrecognized type code of variable supplied: -1
Screenshot of my R Environment
Screenshot of some of my files in my directory
Package ncdfCF can read L3b files directly:
ds <- open_ncdf("~/AQUA_MODIS.20030101.L3b.DAY.CHL.nc")
#> <Dataset> AQUA_MODIS.20030101.L3b.DAY.CHL
#> Resource : ~/AQUA_MODIS.20030101.L3b.DAY.CHL.nc
#> Format : netcdf4
#> Type : NASA level-3 binned data
#> Conventions: CF-1.6 ACDD-1.3
#> Keep open : FALSE
#> Has groups : TRUE
#>
#> Variable:
#> name units data_type axes
#> /level-3_binned_data/chlor_a mg m^-3 binDataType latitude, longitude, time
#>
#> Attributes:
#> id name type length value
#> 0 product_name NC_CHAR 35 AQUA_MODIS.20030101.L3b.DAY.CHL.nc
#> 1 title NC_CHAR 26 MODIS Level-3 Binned Data
#> 2 instrument NC_CHAR 6 MODIS
#> 3 platform NC_CHAR 5 Aqua
... (many more attributes)
# Get the data variable and plot it
chl <- ds[["/level-3_binned_data/chlor_a"]]
plot(chl$terra())
plot(rnaturalearth::ne_coastline(), add = TRUE)
# Data variable details
chl
#> <Variable> chlor_a
#> Path name: /level-3_binned_data/chlor_a
#>
#> Values: [0.003207558 ... 86.26505] mg m^-3
#> NA: 22578965 (89.2%)
#>
#> Coordinate reference system:
#> name grid_mapping
#> geo latitude_longitude
#>
#> Axes:
#> axis name length values unit
#> Y latitude 3001 [-77.979167 ... 47.020833] degrees_north
#> X longitude 8432 [-179.979167 ... 171.3125] degrees_east
#> T time 1 [2003-01-01T12:00:00] seconds since 1970-01-01
#>
#> Attributes:
#> name type length value
#> units NC_CHAR 7 mg m^-3
#> actual_range NC_FLOAT 2 0.00320755835801588, 86.265...
These are "level-3 binned" data. That means that the data are packed by latitudinal rows and longitudinal bins per row. Neither terra nor raster will read that, and even ncdf4 reports an error upon opening the file. The only package that I know of that will open the files without error is ncdfCF but even that won't recognise the data set contained in each file. The below function is built on top of ncdfCF to read L5b data; you need the dev version from GitHub for this to work. Please note that the code is largely untested; it is also not very performant but it will get the job done.
#' Read a MODIS level-3 binned (L3b) file
#'
#' This function will read a MODIS L3b file and return a `CFData` object with
#' the data in physical units and registered to a latitude-longitude coordinate
#' system on an authalic sphere of radius 6,378,145 m.
#'
#' This is very crude code to test the processing of L3b files. There are no
#' significant checks to capture and correct for edge cases or other potential
#' problems. The code is also not very performant, emphasis is on proof-of-concept.
#' Report any problems in the GitHub issue dedicated to building this
#' functionality into ncdfCF: https://github.com/R-CF/ncdfCF/issues/4.
#'
#' Once tested, this code is likely to be merged into ncdfCF.
#'
#' This code requires version 0.2.1 or higher of ncdfCF and library abind.
#'
#' @param fn Fully qualified name of the MODIS L3b netCDF file.
#' @param aoi Subset of data to extract, vector of four values minimum and
#' maximum longitude, minimum and maximum latitude.
#'
#' @return A `CFData` object.
#' @export
#'
#' @noRd
read_l5b <- function(fn, aoi = c(-180, 180, -90, 90)) {
require(abind)
ds <- open_ncdf(fn)
if (length(names(ds)))
stop("This file has recognized data variables. It is probably not a L3b formatted file. Quitting.")
units <- ds$root$attribute("units")
if (nzchar(units)) {
units <- strsplit(units, ":")[[1L]]
variable <- units[1L]
units <- units[2L]
} else stop("Required attribute 'units' not found. Quitting.")
grp <- ds$root$subgroups[["level-3_binned_data"]]
if (is.null(grp) || length(names(grp$NCvars)) != 3L)
stop("This file does not have the required netCDF variables for L3b data. Quitting.")
# Read the data
h <- grp$handle
binList <- RNetCDF::var.get.nc(h, "BinList")
binIndex <- RNetCDF::var.get.nc(h, "BinIndex")
binData <- RNetCDF::var.get.nc(h, variable)
# Calculate global center lat-long and bounds
numRows <- length(binIndex$start_num)
lat <- (1:numRows - 0.5) * 180 / numRows - 90
lat_bnds <- 0:numRows * 180 / numRows - 90
lon_bins <- binIndex$max[numRows * 0.5]
lon <- (1:lon_bins - 0.5) * 360 / lon_bins - 180
lon_bnds <- 0:lon_bins * 360 / lon_bins - 180
# Get the binned data in rows
data_row <- findInterval(binList$bin_num, binIndex$start_num)
data_row_bin <- as.integer(binList$bin_num - binIndex$start_num[data_row]) + 1L
# Subset the latitude
startRow <- floor((aoi[3L] + 90) * numRows / 180) + 1L
endRow <- floor((aoi[4L] + 90) * numRows / 180)
aoi_rows <- endRow - startRow + 1L
lat <- lat[startRow:endRow]
lat_bnds <- lat_bnds[startRow:(endRow+1L)]
# Subset the longitude
startBin <- floor((aoi[1L] + 180) * lon_bins / 360) + 1L
endBin <- floor((aoi[2L] + 180) * lon_bins / 360)
aoi_bins <- endBin - startBin + 1L
lon <- lon[startBin:endBin]
lon_bnds <- lon_bnds[startBin:(endBin+1L)]
l <- lapply(startRow:endRow, function(r) {
out <- rep(NA_real_, lon_bins)
idx <- which(data_row == r)
if (!length(idx)) return(out[startBin:endBin])
d <- binData$sum[idx] / binList$weights[idx] # The data of the physical property
b <- data_row_bin[idx] # The bins to put the data in
# Expand to lon_bins grid cells
allocate <- ceiling(1:lon_bins * binIndex$max[r] / lon_bins)
# Allocate the data to the expanded grid cells
for (bin in seq_along(b))
out[which(allocate == b[bin])] <- d[bin]
out[startBin:endBin]
})
data <- abind::abind(l, along = 2)
out_group <- makeMemoryGroup(-1L, "Memory_group", "/Memory_group", paste("Regridding L5b file", ds$name),
paste0(format(Sys.time(), "%FT%T%z"), " R package ncdfCF(", packageVersion("ncdfCF"), ")::read_l5b()"))
axis_lon <- makeLongitudeAxis(-1L, "longitude", out_group, aoi_bins, lon, rbind(lon_bnds[-aoi_bins], lon_bnds[-1L]), "degrees_east")
axis_lat <- makeLatitudeAxis(-1L, "latitude", out_group, aoi_rows, lat, rbind(lat_bnds[-aoi_rows], lat_bnds[-1L]), "degrees_north")
atts <- ds$attributes()
atts <- atts[!(atts$name %in% c("data_bins", "percent_data_bins", "binning_scheme")), ]
atts[atts$name == "title", "value"] <- paste0(atts[atts$name == "title", ]$value[[1L]], " - regridded")
atts[atts$name == "units", "value"] <- units
atts[atts$name == "processing_level", "value"] <- paste0(atts[atts$name == "processing_level", ]$value[[1L]], "; regridded to lat-long using ncdfCF::read_l5b()")
crs <- 'GEOGCRS["Unknown",DATUM["unknown",ELLIPSOID["unknown",6378145,0,LENGTHUNIT["metre",1]]],PRIMEM["Greenwich",0,ANGLEUNIT["degree",0.0174532925199433]],CS[ellipsoidal,2],AXIS["Geodetic latitude (Lat)",north,ORDER[1]],AXIS["Geodetic longitude (Lon)",east,ORDER[2]],ANGLEUNIT["degree",0.0174532925199433]]'
CFData$new(variable, out_group, data, list(longitude = axis_lon, latitude = axis_lat), crs, atts)
}
The function returns a CFData instance which can be used for further processing. Reading and plotting a file looks like this:
(chlor_a <- read_l5b("/my_data/AQUA_MODIS.20030101.L3b.DAY.CHL.nc"))
#> <Data> chlor_a
#>
#> Values: [0.003207558 ... 86.26505]
#> NA: 34481801 (92.4%)
#>
#> Axes:
#> id axis name length values
#> -1 X longitude 8640 [-179.97917 ... 179.97917]
#> -1 Y latitude 4320 [-89.979167 ... 89.979167]
plot(chlor_a$terra())
plot(rnaturalearth::ne_coastline(), add = TRUE)
To do your analysis with all 3,422 files you need to run the above function for each of them. From the resulting CFData objects you can get access to the data with the CFData::array() method. You can then join them into an array with the abind::abind() function and then apply(<data array>, 1:2, mean, na.rm = TRUE) to get your desired result.
The code also works if your data is a subset for the northern Red Sea only. In this case you should call the function like chlor_a <- read_l5b("/my_data/AQUA_MODIS.20030101.L3b.DAY.CHL.nc", aio <- c(26, 55, 5, 37)), using your preferred extent.