pythonnumpyinterpolationspatial-interpolationpygrib

How to Deal with Lat/Lon Arrays with Multiple Dimensions?


I'm working with Pygrib trying to get surface temperatures for particular lat/lon coordinates using the NBM grib data (available here if it helps).

I've been stuck trying to get an index value to use with representative data for a particular latitude and longitude. I was able to derive an index, but the problem is the latitude and longitude appear to have 2 coordinates each. I'll use Miami, FL (25.7617° N, 80.1918° W) as an example to illustrate this. Formatted to be minimum reproducible IF a grib file is provided.

def get_grib_data(self, gribfile, shortName):
    grbs = pygrib.open(gribfile)
    # Temp needs level specified
    if shortName == '2t':
        grib_param = grbs.select(shortName=shortName, level=2)
    # Convention- use short name for less than 5 chars
    # Else, use name
    elif len(shortName) < 5:
        grib_param = grbs.select(shortName=shortName)
    else:
        grib_param = grbs.select(name=shortName)
        data_values = grib_param[0].values
    # Need varying returns depending on parameter
    grbs.close()
    if shortName == '2t':
        return data_values, grib_param
    else:
        return data_values

# This function is used to find the closest lat/lon value to the entered one
def closest(self, coordinate, value): 
    ab_array = np.abs(coordinate - value)
    smallest_difference_index = np.amin(ab_array)
    ind = np.unravel_index(np.argmin(ab_array, axis=None), ab_array.shape)
    return ind

def get_local_value(data, j, in_lats, in_lons, lats, lons):
    lat_ind = closest(lats, in_lats[j])
    lon_ind = closest(lons, in_lons[j])

    print(lat_ind[0])
    print(lat_ind[1])
    print(lon_ind[0])
    print(lon_ind[1])
       
    if len(lat_ind) > 1 or len(lon_ind) > 1:
        lat_ind = lat_ind[0]
        lon_ind = lon_ind[0]
        dtype = data[lat_ind][lon_ind]
    else:
        dtype = data[lat_ind][lon_ind]

    return dtype 

if __name__ == '__main__':
    tfile = # Path to grib file
    temps, param = grib_data.get_grib_data(tfile, '2t')
    lats, lons = param[0].latlons()
    j = 0
    in_lats = [25.7617, 0 , 0]
    in_lons = [-80.198, 0, 0]
    temp = grib_data.get_local_value(temps, j, in_lats, in_lons, lats, lons)

When I do the print listed, I get the following for indices:

lat_ind[0]: 182
lat_ind[1]: 1931
lon_ind[0]: 1226
lon_ind[1]: 1756

So if my lat/lon were 1 dimensional, I would just do temp = data[lat[0]][lon[0]], but in this case that would give non-representative data. How would I go about handling the fact that lat/lon are in 2 coordinates? I have verified that lats[lat_ind[0][lat_ind1] gives the input latitude and the same for longitude.


Solution

  • You cannot evaluate "closeness" of latitudes independently of longitudes - you have to evaluate how close the pair of coordinates is to your input coordinates.

    Lat/Lon are really just spherical coordinates. Given two points (lat1,lon1) (lat2,lon2), closeness (in terms of great circles) is given by the angle between the spherical vectors between those two points (approximating the Earth as a sphere).

    You can compute this by constructing cartesian vectors of the two points and taking the dot product, which is a * b * cos(theta) where theta is what you want.

    import numpy as np
    
    def lat_lon_cartesian(lats,lons):
    
        lats = np.ravel(lats) #make both inputs 1-dimensional
        lons = np.ravel(lons)
    
        x = np.cos(np.radians(lons))*np.cos(np.radians(lats))
        y = np.sin(np.radians(lons))*np.cos(np.radians(lats))
        z = np.sin(np.radians(lats))
        return np.c_[x,y,z]
    
    def closest(in_lats,in_lons,data_lats,data_lons):
        in_vecs = lat_lon_cartesian(in_lats,in_lons)
        data_vecs = lat_lon_cartesian(data_lats,data_lons)
        indices = []
        for in_vec in in_vecs: # if input lats/lons is small list then doing a for loop is ok
            # otherwise can be vectorized with some array gymnastics
            dot_product = np.sum(in_vec*data_vecs,axis=1)
            angles = np.arccos(dot_product) # all are unit vectors so a=b=1
            indices.append(np.argmin(angles))
        return indices
    
    def get_local_value(data, in_lats, in_lons, data_lats, data_lons):
        raveled_data = np.ravel(data)
        raveled_lats = np.ravel(data_lats)
        raveled_lons = np.ravel(data_lons)
        inds = closest(in_lats,in_lons,raveled_lats,raveled_lons)
        dtypes = []
        closest_lat_lons = []
        
        for ind in inds:
                    
            #if data is 2-d with same shape as the lat and lon meshgrids, then
            #it should be raveled as well and indexed by the same index
            dtype = raveled_data[ind]
            dtypes.append(dtype)
    
            closest_lat_lons.append((raveled_lats[ind],raveled_lons[ind]))
            #can return the closes matching lat lon data in the grib if you want
        return dtypes
    

    Edit: Alternatively use interpolation.

    import numpyp as np
    from scipy.interpolate import RegularGridInterpolator
    
    #assuming a grb object from pygrib
    #see https://jswhit.github.io/pygrib/api.html#example-usage
    
    
    lats, lons = grb.latlons()
    #source code for pygrib looks like it calls lons,lats = np.meshgrid(...)
    #so the following should give the unique lat/lon sequences
    lat_values = lats[:,0]
    lon_values = lons[0,:]
    
    grb_values = grb.values
    
    #create interpolator
    grb_interp = RegularGridInterpolator((lat_values,lon_values),grb_values)
    
    #in_lats, in_lons = desired input points (1-d each)
    interpolated_values = grb_interp(np.c_[in_lats,in_lons])
    
    #the result should be the linear interpolation between the four closest lat/lon points in the data set around each of your input lat/lon points.
    

    Dummy data interpolation example:

    >>> import numpy as np
    >>> lats = np.array([1,2,3])
    >>> lons = np.array([4,5,6,7])
    >>> lon_mesh,lat_mesh = np.meshgrid(lons,lats)
    >>> lon_mesh
    array([[4, 5, 6, 7],
           [4, 5, 6, 7],
           [4, 5, 6, 7]])
    >>> lat_mesh
    array([[1, 1, 1, 1],
           [2, 2, 2, 2],
           [3, 3, 3, 3]])
    >>> z = lon_mesh + lat_mesh #some example function of lat/lon (simple sum)
    >>> z
    array([[ 5,  6,  7,  8],
           [ 6,  7,  8,  9],
           [ 7,  8,  9, 10]])
    >>> from scipy.interpolate import RegularGridInterpolator
    >>> lon_mesh[0,:] #should produce lons
    array([4, 5, 6, 7])
    >>> lat_mesh[:,0] #should produce lats
    array([1, 2, 3])
    >>> interpolator = RegularGridInterpolator((lats,lons),z)
    >>> input_lats = np.array([1.5,2.5])
    >>> input_lons = np.array([5.5,7])
    >>> input_points = np.c_[input_lats,input_lons]
    >>> input_points
    array([[1.5, 5.5],
           [2.5, 7. ]])
    >>> interpolator(input_points)
    array([7. , 9.5])
    >>> #7 = 1.5+5.5 : correct
    ... #9.5 = 2.5+7 : correct
    ...
    >>>