rrnoaa

Better way to optimize my code for getting NOAA climate data


So I've been working on grabbing climate data (specifically temperature, precip) from NOAA's network of GHCN weather stations. I've managed to get a list of the stations pertinent to my area (~200) and have built a loop to essentially get a certain a climate variable for every station on that list for every day of between a specified min and max date. Ultimately I need ~10 years worth of data. However my simple loop is taking forever to get this data and I was wondering if there's a better way to optimize it? Also I really want to access monthly data rather than daily but rnoaa doesn't seem to have an option for GHCN monthly data as the only available function is ghcnd_search(). If anyone also knows about how to mine monthly rather than daily data that would be appreciated

Station list:

df<-c("US1FLAL0048", "US1FLBK0003", "US1FLBV0002", "US1FLBV0006", 
"US1FLBV0023", "US1FLBV0040", "US1FLBW0099", "US1FLCT0012", "US1FLDV0051", 
"US1FLFR0006", "US1FLHL0003", "US1FLHN0009", "US1FLLB0001", "US1FLLE0005", 
"US1FLLK0012", "US1FLLN0004", "US1FLLN0018", "US1FLMN0013", "US1FLMR0012", 
"US1FLMR0033", "US1FLOK0017", "US1FLOR0028", "US1FLPS0002", "US1FLPS0018", 
"US1FLPT0007", "US1FLSJ0012", "US1FLSM0008", "US1FLSS0044", "US1FLST0014", 
"US1FLSW0008", "US1FLVL0035", "US1FLWK0001", "USC00080228", "USC00080236", 
"USC00080369", "USC00080414", "USC00080478", "USC00080598", "USC00080737", 
"USC00080945", "USC00080992", "USC00081163", "USC00081276", "USC00081306", 
"USC00081544", "USC00081641", "USC00081651", "USC00081978", "USC00082008", 
"USC00082046", "USC00082150", "USC00082229", "USC00082288", "USC00082298", 
"USC00082391", "USC00082418", "USC00082441", "USC00082850", "USC00082915", 
"USC00082944", "USC00083020", "USC00083153", "USC00083163", "USC00083168", 
"USC00083207", "USC00083209", "USC00083470", "USC00083874", "USC00083909", 
"USC00083956", "USC00083986", "USC00084050", "USC00084095", "USC00084210", 
"USC00084289", "USC00084320", "USC00084366", "USC00084394", "USC00084412", 
"USC00084461", "USC00084625", "USC00084662", "USC00084731", "USC00084802", 
"USC00085076", "USC00085099", "USC00085184", "USC00085275", "USC00085359", 
"USC00085377", "USC00085539", "USC00085612", "USC00085667", "USC00085879", 
"USC00085895", "USC00085973", "USC00086065", "USC00086078", "USC00086129", 
"USC00086240", "USC00086315", "USC00086406", "USC00086414", "USC00086618", 
"USC00086657", "USC00086764", "USC00086767", "USC00086828", "USC00086842", 
"USC00086999", "USC00087020", "USC00087025", "USC00087205", "USC00087228", 
"USC00087261", "USC00087304", "USC00087397", "USC00087429", "USC00087760", 
"USC00087826", "USC00087851", "USC00087869", "USC00087886", "USC00087982", 
"USC00088368", "USC00088529", "USC00088620", "USC00088756", "USC00088782", 
"USC00088824", "USC00088942", "USC00089120", "USC00089176", "USC00089219", 
"USC00089401", "USC00089430", "USC00089566", "USC00089640", "USC00089795", 
"USR0000FBLO", "USR0000FCAC", "USR0000FCEN", "USR0000FCHE", "USR0000FLSU", 
"USR0000FMER", "USR0000FMIL", "USR0000FNAV", "USR0000FOAS", "USR0000FOCH", 
"USR0000FOLU", "USR0000FRAC", "USR0000FSAN", "USR0000FSTM", "USR0000FSUM", 
"USR0000FWIL", "USW00003818", "USW00003853", "USW00012812", "USW00012815", 
"USW00012816", "USW00012818", "USW00012819", "USW00012832", "USW00012833", 
"USW00012834", "USW00012835", "USW00012836", "USW00012838", "USW00012839", 
"USW00012841", "USW00012842", "USW00012843", "USW00012844", "USW00012849", 
"USW00012850", "USW00012854", "USW00012871", "USW00012873", "USW00012876", 
"USW00012882", "USW00012885", "USW00012888", "USW00012894", "USW00012895", 
"USW00012896", "USW00012897", "USW00013884", "USW00013889", "USW00013899", 
"USW00053847", "USW00053853", "USW00053860", "USW00092805", "USW00092806", 
"USW00092809", "USW00092811", "USW00092821", "USW00093805", "USW00093837", 
"USW00093841")

Code:

library(rnoaa)
options(noaakey = "your api key")
data<-matrix(, nrow=0, ncol=0) #create empty matrix
for (i in 1:length(df)){
  a<-ghcnd_search(stationid=df[1],var='TMAX',date_min='2010-1-30',date_max='2015-12-31')
  data=rbind(data,a$tmax)

}

Solution

  • Assuming the station ID is stored in a vector called dat, we can use the functions from the package to download the data and create a data frame.

    # Load packages
    library(rnoaa)
    library(purrr)
    
    # Download the data and create a data frame. 
    dat_df <- map(dat, ghcnd_search, 
                  var='TMAX', date_min = '2010-1-30', date_max = '2015-12-31') %>%
              map_dfr("tmax")
    

    DATA

    dat<-c("US1FLAL0048", "US1FLBK0003", "US1FLBV0002", "US1FLBV0006", 
          "US1FLBV0023", "US1FLBV0040", "US1FLBW0099", "US1FLCT0012", "US1FLDV0051", 
          "US1FLFR0006", "US1FLHL0003", "US1FLHN0009", "US1FLLB0001", "US1FLLE0005", 
          "US1FLLK0012", "US1FLLN0004", "US1FLLN0018", "US1FLMN0013", "US1FLMR0012", 
          "US1FLMR0033", "US1FLOK0017", "US1FLOR0028", "US1FLPS0002", "US1FLPS0018", 
          "US1FLPT0007", "US1FLSJ0012", "US1FLSM0008", "US1FLSS0044", "US1FLST0014", 
          "US1FLSW0008", "US1FLVL0035", "US1FLWK0001", "USC00080228", "USC00080236", 
          "USC00080369", "USC00080414", "USC00080478", "USC00080598", "USC00080737", 
          "USC00080945", "USC00080992", "USC00081163", "USC00081276", "USC00081306", 
          "USC00081544", "USC00081641", "USC00081651", "USC00081978", "USC00082008", 
          "USC00082046", "USC00082150", "USC00082229", "USC00082288", "USC00082298", 
          "USC00082391", "USC00082418", "USC00082441", "USC00082850", "USC00082915", 
          "USC00082944", "USC00083020", "USC00083153", "USC00083163", "USC00083168", 
          "USC00083207", "USC00083209", "USC00083470", "USC00083874", "USC00083909", 
          "USC00083956", "USC00083986", "USC00084050", "USC00084095", "USC00084210", 
          "USC00084289", "USC00084320", "USC00084366", "USC00084394", "USC00084412", 
          "USC00084461", "USC00084625", "USC00084662", "USC00084731", "USC00084802", 
          "USC00085076", "USC00085099", "USC00085184", "USC00085275", "USC00085359", 
          "USC00085377", "USC00085539", "USC00085612", "USC00085667", "USC00085879", 
          "USC00085895", "USC00085973", "USC00086065", "USC00086078", "USC00086129", 
          "USC00086240", "USC00086315", "USC00086406", "USC00086414", "USC00086618", 
          "USC00086657", "USC00086764", "USC00086767", "USC00086828", "USC00086842", 
          "USC00086999", "USC00087020", "USC00087025", "USC00087205", "USC00087228", 
          "USC00087261", "USC00087304", "USC00087397", "USC00087429", "USC00087760", 
          "USC00087826", "USC00087851", "USC00087869", "USC00087886", "USC00087982", 
          "USC00088368", "USC00088529", "USC00088620", "USC00088756", "USC00088782", 
          "USC00088824", "USC00088942", "USC00089120", "USC00089176", "USC00089219", 
          "USC00089401", "USC00089430", "USC00089566", "USC00089640", "USC00089795", 
          "USR0000FBLO", "USR0000FCAC", "USR0000FCEN", "USR0000FCHE", "USR0000FLSU", 
          "USR0000FMER", "USR0000FMIL", "USR0000FNAV", "USR0000FOAS", "USR0000FOCH", 
          "USR0000FOLU", "USR0000FRAC", "USR0000FSAN", "USR0000FSTM", "USR0000FSUM", 
          "USR0000FWIL", "USW00003818", "USW00003853", "USW00012812", "USW00012815", 
          "USW00012816", "USW00012818", "USW00012819", "USW00012832", "USW00012833", 
          "USW00012834", "USW00012835", "USW00012836", "USW00012838", "USW00012839", 
          "USW00012841", "USW00012842", "USW00012843", "USW00012844", "USW00012849", 
          "USW00012850", "USW00012854", "USW00012871", "USW00012873", "USW00012876", 
          "USW00012882", "USW00012885", "USW00012888", "USW00012894", "USW00012895", 
          "USW00012896", "USW00012897", "USW00013884", "USW00013889", "USW00013899", 
          "USW00053847", "USW00053853", "USW00053860", "USW00092805", "USW00092806", 
          "USW00092809", "USW00092811", "USW00092821", "USW00093805", "USW00093837", 
          "USW00093841")