python numpy interpolation spatial-interpolation pygrib

How to Deal with Lat/Lon Arrays with Multiple Dimensions?

I'm working with Pygrib trying to get surface temperatures for particular lat/lon coordinates using the NBM grib data (available here if it helps).

I've been stuck trying to get an index value to use with representative data for a particular latitude and longitude. I was able to derive an index, but the problem is the latitude and longitude appear to have 2 coordinates each. I'll use Miami, FL (25.7617° N, 80.1918° W) as an example to illustrate this. Formatted to be minimum reproducible IF a grib file is provided.

def get_grib_data(self, gribfile, shortName):
    grbs = pygrib.open(gribfile)
    # Temp needs level specified
    if shortName == '2t':
        grib_param = grbs.select(shortName=shortName, level=2)
    # Convention- use short name for less than 5 chars
    # Else, use name
    elif len(shortName) < 5:
        grib_param = grbs.select(shortName=shortName)
    else:
        grib_param = grbs.select(name=shortName)
        data_values = grib_param[0].values
    # Need varying returns depending on parameter
    grbs.close()
    if shortName == '2t':
        return data_values, grib_param
    else:
        return data_values

# This function is used to find the closest lat/lon value to the entered one
def closest(self, coordinate, value): 
    ab_array = np.abs(coordinate - value)
    smallest_difference_index = np.amin(ab_array)
    ind = np.unravel_index(np.argmin(ab_array, axis=None), ab_array.shape)
    return ind

def get_local_value(data, j, in_lats, in_lons, lats, lons):
    lat_ind = closest(lats, in_lats[j])
    lon_ind = closest(lons, in_lons[j])

    print(lat_ind[0])
    print(lat_ind[1])
    print(lon_ind[0])
    print(lon_ind[1])
       
    if len(lat_ind) > 1 or len(lon_ind) > 1:
        lat_ind = lat_ind[0]
        lon_ind = lon_ind[0]
        dtype = data[lat_ind][lon_ind]
    else:
        dtype = data[lat_ind][lon_ind]

    return dtype 

if __name__ == '__main__':
    tfile = # Path to grib file
    temps, param = grib_data.get_grib_data(tfile, '2t')
    lats, lons = param[0].latlons()
    j = 0
    in_lats = [25.7617, 0 , 0]
    in_lons = [-80.198, 0, 0]
    temp = grib_data.get_local_value(temps, j, in_lats, in_lons, lats, lons)

When I do the print listed, I get the following for indices:

lat_ind[0]: 182
lat_ind[1]: 1931
lon_ind[0]: 1226
lon_ind[1]: 1756

So if my lat/lon were 1 dimensional, I would just do temp = data[lat[0]][lon[0]], but in this case that would give non-representative data. How would I go about handling the fact that lat/lon are in 2 coordinates? I have verified that lats[lat_ind[0][lat_ind1] gives the input latitude and the same for longitude.

Solution

You cannot evaluate "closeness" of latitudes independently of longitudes - you have to evaluate how close the pair of coordinates is to your input coordinates.

Lat/Lon are really just spherical coordinates. Given two points (lat1,lon1) (lat2,lon2), closeness (in terms of great circles) is given by the angle between the spherical vectors between those two points (approximating the Earth as a sphere).

You can compute this by constructing cartesian vectors of the two points and taking the dot product, which is a * b * cos(theta) where theta is what you want.

import numpy as np

def lat_lon_cartesian(lats,lons):

    lats = np.ravel(lats) #make both inputs 1-dimensional
    lons = np.ravel(lons)

    x = np.cos(np.radians(lons))*np.cos(np.radians(lats))
    y = np.sin(np.radians(lons))*np.cos(np.radians(lats))
    z = np.sin(np.radians(lats))
    return np.c_[x,y,z]

def closest(in_lats,in_lons,data_lats,data_lons):
    in_vecs = lat_lon_cartesian(in_lats,in_lons)
    data_vecs = lat_lon_cartesian(data_lats,data_lons)
    indices = []
    for in_vec in in_vecs: # if input lats/lons is small list then doing a for loop is ok
        # otherwise can be vectorized with some array gymnastics
        dot_product = np.sum(in_vec*data_vecs,axis=1)
        angles = np.arccos(dot_product) # all are unit vectors so a=b=1
        indices.append(np.argmin(angles))
    return indices

def get_local_value(data, in_lats, in_lons, data_lats, data_lons):
    raveled_data = np.ravel(data)
    raveled_lats = np.ravel(data_lats)
    raveled_lons = np.ravel(data_lons)
    inds = closest(in_lats,in_lons,raveled_lats,raveled_lons)
    dtypes = []
    closest_lat_lons = []
    
    for ind in inds:
                
        #if data is 2-d with same shape as the lat and lon meshgrids, then
        #it should be raveled as well and indexed by the same index
        dtype = raveled_data[ind]
        dtypes.append(dtype)

        closest_lat_lons.append((raveled_lats[ind],raveled_lons[ind]))
        #can return the closes matching lat lon data in the grib if you want
    return dtypes

Edit: Alternatively use interpolation.

import numpyp as np
from scipy.interpolate import RegularGridInterpolator

#assuming a grb object from pygrib
#see https://jswhit.github.io/pygrib/api.html#example-usage


lats, lons = grb.latlons()
#source code for pygrib looks like it calls lons,lats = np.meshgrid(...)
#so the following should give the unique lat/lon sequences
lat_values = lats[:,0]
lon_values = lons[0,:]

grb_values = grb.values

#create interpolator
grb_interp = RegularGridInterpolator((lat_values,lon_values),grb_values)

#in_lats, in_lons = desired input points (1-d each)
interpolated_values = grb_interp(np.c_[in_lats,in_lons])

#the result should be the linear interpolation between the four closest lat/lon points in the data set around each of your input lat/lon points.

Dummy data interpolation example:

>>> import numpy as np
>>> lats = np.array([1,2,3])
>>> lons = np.array([4,5,6,7])
>>> lon_mesh,lat_mesh = np.meshgrid(lons,lats)
>>> lon_mesh
array([[4, 5, 6, 7],
       [4, 5, 6, 7],
       [4, 5, 6, 7]])
>>> lat_mesh
array([[1, 1, 1, 1],
       [2, 2, 2, 2],
       [3, 3, 3, 3]])
>>> z = lon_mesh + lat_mesh #some example function of lat/lon (simple sum)
>>> z
array([[ 5,  6,  7,  8],
       [ 6,  7,  8,  9],
       [ 7,  8,  9, 10]])
>>> from scipy.interpolate import RegularGridInterpolator
>>> lon_mesh[0,:] #should produce lons
array([4, 5, 6, 7])
>>> lat_mesh[:,0] #should produce lats
array([1, 2, 3])
>>> interpolator = RegularGridInterpolator((lats,lons),z)
>>> input_lats = np.array([1.5,2.5])
>>> input_lons = np.array([5.5,7])
>>> input_points = np.c_[input_lats,input_lons]
>>> input_points
array([[1.5, 5.5],
       [2.5, 7. ]])
>>> interpolator(input_points)
array([7. , 9.5])
>>> #7 = 1.5+5.5 : correct
... #9.5 = 2.5+7 : correct
...
>>>