pandasnumpynetcdfdata-extractionera5

Extract a time and space variable from a moving ship from the ERA5 reanalysis


I want to extract the measured wind from a station inside a moving ship, which I have the latitude, longitude and time values and the wind value for each time step in space. I can extract a fixed point in space for all time steps but I would like to extract for example the wind at time step x to a date longitude and latitude as the ship moves. How can I do this from the code below?

data = xr.open_dataset('C:/Users/William Jacondino/Desktop/Dados/ERA5\\ERA5_2017.nc', decode_times=False)

dir_out = 'C:/Users/William Jacondino/Desktop/MovingShip'
if not os.path.exists(dir_out):
    os.makedirs(dir_out)

print("\nReading the observation station names:\n")

stations = pd.read_csv(r"C:/Users/William Jacondino/Desktop/MovingShip/Date-TIME.csv",index_col=0, sep='\;')

print(stations)

Reading the observation station names:

                  Latitude  Longitude
Date-Time                            
16/11/2017 00:00  0.219547 -38.247914
16/11/2017 06:00  0.861717 -38.188858
16/11/2017 12:00  1.529534 -38.131039
16/11/2017 18:00  2.243760 -38.067467
17/11/2017 00:00  2.961202 -38.009050
...                    ...        ...
10/12/2017 00:00 -5.775127 -35.206581
10/12/2017 06:00 -5.775120 -35.206598
10/12/2017 12:00 -5.775119 -35.206583
10/12/2017 18:00 -5.775122 -35.206584
11/12/2017 00:00 -5.775115 -35.206590



# variável tempo e unidade 

times = data.variables['time'][:]
unit =  data.time.units

# variáveis latitude (lat) e longitude (lon)

lon = data.variables['longitude'][:]
lat = data.variables['latitude'][:]

# variável temperatura em 2 metros em celsius

temp = data.variables['t2m'][:]-275.15

# variável temperatura do ponto de orvalho em 2 metros em celsius

tempdw = data.variables['d2m'][:]-275.15

# variável sea surface temperature (sst) em celsius

sst = data.variables['sst'][:]-275.15

# variável Surface sensible heat flux sshf

sshf = data.variables['sshf'][:]
unitsshf = data.sshf.units

# variável Surface latent heat flux

slhf = data.variables['slhf'][:]
unitslhf = data.slhf.units

# variável Mean sea level pressure

msl = data.variables['msl'][:]/100
unitmsl = data.msl.units

# variável Total precipitation em mm/h

tp = data.variables['tp'][:]*1000

# componente zonal do vento em 100 metros

uten100 = data.variables['u100'][:]
unitu100 = data.u100.units

# componente meridional do vento em 100 metros

vten100  = data.variables['v100'][:]
unitv100 = data.v100.units

# componente zonal do vento em 10 metros

uten  = data.variables['u10'][:]
unitu = data.u10.units

# componente meridional do vento em 10 metros

vten  = data.variables['v10'][:]
unitv = data.v10.units

# calculando a velocidade do vento em 10 metros

ws    = (uten**2 + vten**2)**(0.5)

# calculando a velocidade do vento em 100 metros

ws100  = (uten100**2 + vten100**2)**(0.5)

# calculando os ângulos de U e V para obter a direção do vento em 10 metros

wdir = (180 + (np.degrees(np.arctan2(uten, vten)))) % 360

# calculando os ângulos de U e V para obter a direção do vento em 100 metros

wdir100 = (180 + (np.degrees(np.arctan2(uten100, vten100)))) % 360

for key, value in stations.iterrows():
    #print(key,value[0], value[1], value[2])
    station = value[0]
    file_name = "{}{}".format(station+'_1991',".csv")
    #print(file_name)
    lon_point = value[1]
    lat_point = value[2]
    ########################################
    
    # Encontrando o ponto de Latitude e Longitude mais próximo das estações
    
    # Squared difference of lat and lon
    sq_diff_lat = (lat - lat_point)**2
    sq_diff_lon = (lon - lon_point)**2
    
    # Identifying the index of the minimum value for lat and lon
    min_index_lat = sq_diff_lat.argmin()
    min_index_lon = sq_diff_lon.argmin()
    print("Generating time series for station {}".format(station))
    
    ref_date   = datetime.datetime(int(unit[12:16]),int(unit[17:19]),int(unit[20:22]))

    date_range   = list()
    temp_data    = list()
    tempdw_data = list()
    sst_data     = list()
    sshf_data    = list()
    slhf_data    = list()
    msl_data     = list()
    tp_data      = list()
    uten100_data = list()
    vten100_data = list()
    uten_data    = list()
    vten_data    = list()
    ws_data      = list()
    ws100_data   = list()
    wdir_data    = list()
    wdir100_data = list()
    
    for index, time in enumerate(times):
        date_time = ref_date+datetime.timedelta(hours=int(time))
        date_range.append(date_time)
        temp_data.append(temp[index, min_index_lat, min_index_lon].values)
        tempdw_data.append(tempdw[index, min_index_lat, min_index_lon].values)
        sst_data.append(sst[index, min_index_lat, min_index_lon].values)
        sshf_data.append(sshf[index, min_index_lat, min_index_lon].values)
        slhf_data.append(slhf[index, min_index_lat, min_index_lon].values)
        msl_data.append(msl[index, min_index_lat, min_index_lon].values)
        tp_data.append(tp[index, min_index_lat, min_index_lon].values)
        uten100_data.append(uten100[index, min_index_lat, min_index_lon].values)
        vten100_data.append(vten100[index, min_index_lat, min_index_lon].values)
        uten_data.append(uten[index, min_index_lat, min_index_lon].values)
        vten_data.append(vten[index, min_index_lat, min_index_lon].values)
        ws_data.append(ws[index,min_index_lat,min_index_lon].values)
        ws100_data.append(ws100[index,min_index_lat,min_index_lon].values)
        wdir_data.append(wdir[index,min_index_lat,min_index_lon].values)
        wdir100_data.append(wdir100[index,min_index_lat,min_index_lon].values)
    ################################################################################    
    
    #print(date_range)
    
    df = pd.DataFrame(date_range, columns = ["Date-Time"])
    df["Date-Time"] = date_range
    df = df.set_index(["Date-Time"])
    df["WS10  ({})".format(unitu)] = ws_data
    df["WDIR10  ({})".format(units.deg)] = wdir_data
    df["WS100  ({})".format(unitu)] = ws100_data
    df["WDIR100  ({})".format(units.deg)] = wdir100_data
    df["Chuva({})".format(units.mm)] = tp_data 
    df["MSLP ({})".format(units.hPa)] = msl_data
    df["T2M ({})".format(units.degC)] = temp_data
    df["Td2M ({})".format(units.degC)] = tempdw_data
    df["Surface Sensible Heat Flux ({})".format(unitsshf)] = sshf_data                      
    df["Surface latent heat flux ({})".format(unitslhf)] = slhf_data 
    df["U10  ({})".format(unitu)] = uten_data
    df["V10  ({})".format(unitv)] = vten_data
    df["U100  ({})".format(unitu100)] = uten100_data
    df["V100  ({})".format(unitv100)] = vten100_data
    df["TSM ({})".format(units.degC)] = sst_data

    print("The following time series is being saved as .csv files")
        
    df.to_csv(os.path.join(dir_out,file_name), sep=';',encoding="utf-8", index=True)
    
print("\n! !Successfuly saved all the Time Series the output Directory!!\n{}".format(dir_out))

My code to extract a fixed variable at a given point in space is like this, but I would like to extract during the ship's movement, for example at time 11/12/2017 00:00, latitude -5.775115 and longitude -35.206590 I have a value of the wind, and in the next time step for another latitude x longitude I have another value. How can I adapt my code for this?


Solution

  • This is another perfect use case for xarray's advanced indexing! I feel like this part of the user guide needs a cape and a theme song :)

    I'll use a made up dataset and set of stations which are similar (I think) to yours. First step is to reset your Date-Time index, so you can use it in pulling the nearest time value from the xarray.Dataset, since you want a common index for the time, lat, and lon:

    In [14]: stations = stations.reset_index(drop=False)
        ...: stations
    Out[14]:
                Date-Time   Latitude   Longitude
    0 2017-11-16 00:00:00   0.219547  -38.247914
    1 2017-11-16 06:00:00   0.861717  -38.188858
    2 2017-11-16 12:00:00   1.529534  -38.131039
    3 2017-11-16 18:00:00   2.243760  -38.067467
    4 2017-11-17 00:00:00   2.961202  -38.009050
    5 2017-12-10 00:00:00  -5.775127  -35.206581
    6 2017-12-10 06:00:00  -5.775120  -35.206598
    7 2017-12-10 12:00:00  -5.775119  -35.206583
    8 2017-12-10 18:00:00  -5.775122  -35.206584
    9 2017-12-11 00:00:00  -5.775115  -35.206590
    
    In [15]: ds
    Out[15]:
    <xarray.Dataset>
    Dimensions:  (lat: 40, lon: 40, time: 241)
    Coordinates:
      * lat      (lat) float64 -9.75 -9.25 -8.75 -8.25 -7.75 ... 8.25 8.75 9.25 9.75
      * lon      (lon) float64 -44.75 -44.25 -43.75 -43.25 ... -26.25 -25.75 -25.25
      * time     (time) datetime64[ns] 2017-11-01 2017-11-01T06:00:00 ... 2017-12-31
    Data variables:
        temp     (lat, lon, time) float64 0.07366 0.3448 0.2456 ... 0.3081 0.4236
        tempdw   (lat, lon, time) float64 0.07366 0.3448 0.2456 ... 0.3081 0.4236
        sst      (lat, lon, time) float64 0.07366 0.3448 0.2456 ... 0.3081 0.4236
        ws       (lat, lon, time) float64 0.07366 0.3448 0.2456 ... 0.3081 0.4236
        ws100    (lat, lon, time) float64 0.07366 0.3448 0.2456 ... 0.3081 0.4236
        wdir     (lat, lon, time) float64 0.07366 0.3448 0.2456 ... 0.3081 0.4236
        wdir100  (lat, lon, time) float64 0.07366 0.3448 0.2456 ... 0.3081 0.4236
    

    Using the advanced indexing rules, if we select from the dataset using DataArrays as indexers, the result will be reshaped to match the indexer. What this means is that we can take your stations dataframe, which has the values time, lat, and lon, and pull the nearest indices from the xarray dataset:

    In [16]: ds_over_observations = ds.sel(
        ...:     time=stations["Date-Time"].to_xarray(),
        ...:     lat=stations["Latitude"].to_xarray(),
        ...:     lon=stations["Longitude"].to_xarray(),
        ...:     method="nearest",
        ...: )
    

    Now, our data has the same index as your dataframe!

    In [17]: ds_over_observations
    Out[17]:
    <xarray.Dataset>
    Dimensions:  (index: 10)
    Coordinates:
        lat      (index) float64 0.25 0.75 1.75 2.25 ... -5.75 -5.75 -5.75 -5.75
        lon      (index) float64 -38.25 -38.25 -38.25 ... -35.25 -35.25 -35.25
        time     (index) datetime64[ns] 2017-11-16 ... 2017-12-11
      * index    (index) int64 0 1 2 3 4 5 6 7 8 9
    Data variables:
        temp     (index) float64 0.1887 0.222 0.6754 0.919 ... 0.1134 0.9231 0.6095
        tempdw   (index) float64 0.1887 0.222 0.6754 0.919 ... 0.1134 0.9231 0.6095
        sst      (index) float64 0.1887 0.222 0.6754 0.919 ... 0.1134 0.9231 0.6095
        ws       (index) float64 0.1887 0.222 0.6754 0.919 ... 0.1134 0.9231 0.6095
        ws100    (index) float64 0.1887 0.222 0.6754 0.919 ... 0.1134 0.9231 0.6095
        wdir     (index) float64 0.1887 0.222 0.6754 0.919 ... 0.1134 0.9231 0.6095
        wdir100  (index) float64 0.1887 0.222 0.6754 0.919 ... 0.1134 0.9231 0.6095
    

    You can dump this into pandas with .to_dataframe:

    In [18]: df = ds_over_observations.to_dataframe()
    
    In [19]: df
    Out[19]:
            lat    lon                time      temp    tempdw       sst        ws     ws100      wdir   wdir100
    index
    0      0.25 -38.25 2017-11-16 00:00:00  0.188724  0.188724  0.188724  0.188724  0.188724  0.188724  0.188724
    1      0.75 -38.25 2017-11-16 06:00:00  0.222025  0.222025  0.222025  0.222025  0.222025  0.222025  0.222025
    2      1.75 -38.25 2017-11-16 12:00:00  0.675417  0.675417  0.675417  0.675417  0.675417  0.675417  0.675417
    3      2.25 -38.25 2017-11-16 18:00:00  0.919019  0.919019  0.919019  0.919019  0.919019  0.919019  0.919019
    4      2.75 -38.25 2017-11-17 00:00:00  0.566266  0.566266  0.566266  0.566266  0.566266  0.566266  0.566266
    5     -5.75 -35.25 2017-12-10 00:00:00  0.652490  0.652490  0.652490  0.652490  0.652490  0.652490  0.652490
    6     -5.75 -35.25 2017-12-10 06:00:00  0.429541  0.429541  0.429541  0.429541  0.429541  0.429541  0.429541
    7     -5.75 -35.25 2017-12-10 12:00:00  0.113352  0.113352  0.113352  0.113352  0.113352  0.113352  0.113352
    8     -5.75 -35.25 2017-12-10 18:00:00  0.923058  0.923058  0.923058  0.923058  0.923058  0.923058  0.923058
    9     -5.75 -35.25 2017-12-11 00:00:00  0.609493  0.609493  0.609493  0.609493  0.609493  0.609493  0.609493
    

    The index in the result is the same one as the stations data. If you'd like, you could merge in the original values using pd.concat([stations, df], axis=1).set_index("Date-Time") to get your original index back, alongside all the weather data:

    
    In [20]: pd.concat([stations, df], axis=1).set_index("Date-Time")
    Out[20]:
                         Latitude  Longitude   lat    lon                time      temp    tempdw       sst        ws     ws100      wdir   wdir100
    Date-Time
    2017-11-16 00:00:00  0.219547 -38.247914  0.25 -38.25 2017-11-16 00:00:00  0.188724  0.188724  0.188724  0.188724  0.188724  0.188724  0.188724
    2017-11-16 06:00:00  0.861717 -38.188858  0.75 -38.25 2017-11-16 06:00:00  0.222025  0.222025  0.222025  0.222025  0.222025  0.222025  0.222025
    2017-11-16 12:00:00  1.529534 -38.131039  1.75 -38.25 2017-11-16 12:00:00  0.675417  0.675417  0.675417  0.675417  0.675417  0.675417  0.675417
    2017-11-16 18:00:00  2.243760 -38.067467  2.25 -38.25 2017-11-16 18:00:00  0.919019  0.919019  0.919019  0.919019  0.919019  0.919019  0.919019
    2017-11-17 00:00:00  2.961202 -38.009050  2.75 -38.25 2017-11-17 00:00:00  0.566266  0.566266  0.566266  0.566266  0.566266  0.566266  0.566266
    2017-12-10 00:00:00 -5.775127 -35.206581 -5.75 -35.25 2017-12-10 00:00:00  0.652490  0.652490  0.652490  0.652490  0.652490  0.652490  0.652490
    2017-12-10 06:00:00 -5.775120 -35.206598 -5.75 -35.25 2017-12-10 06:00:00  0.429541  0.429541  0.429541  0.429541  0.429541  0.429541  0.429541
    2017-12-10 12:00:00 -5.775119 -35.206583 -5.75 -35.25 2017-12-10 12:00:00  0.113352  0.113352  0.113352  0.113352  0.113352  0.113352  0.113352
    2017-12-10 18:00:00 -5.775122 -35.206584 -5.75 -35.25 2017-12-10 18:00:00  0.923058  0.923058  0.923058  0.923058  0.923058  0.923058  0.923058
    2017-12-11 00:00:00 -5.775115 -35.206590 -5.75 -35.25 2017-12-11 00:00:00  0.609493  0.609493  0.609493  0.609493  0.609493  0.609493  0.609493