rfor-loopdplyrlubridatevolatility

for loop compute historical volatility select lines per year


i am a beginner on R and i am currently trying to compute the historical volatility per year. My dataset looks like this :

date           prices

01/01/2000     100

03/01/2000     98

05/10/2000     103

08/03/2001     102

08/04/2001     110

i would like to create a for loop in order to compute quickly the historical volatility i have been computing "manually" as follows :

data_hv <- read.csv("file")
data_hv_2000 <- data_hv[(1:259),]
data_hv_2001 <- data_hv[(260:518),]
data_hv_2002 <- data_hv[(519:778),]
data_hv_2003 <- data_hv[(779:1038),]

and then, computing the historical volatility once i have grouped per year the data in subsets of data :

price = data_hv_2000$price
ret <- log(lag(price)) - log(price)
print(ret)
vol <- sd(ret, na.rm = TRUE) * sqrt(259) * 100
vol

price_2001 = data_hv_2001$price
ret <- log(lag(price_2001)) - log(price_2001)
vol_2001 <- sd(ret, na.rm = TRUE) * sqrt(260) * 100
vol_2001

260 and 259 being the number of rows of each subset.

i have tried to start doing a for loop :

data_hv %>%
  for (i in 2000:2022) {
    filter(contains(i)) %>%
    price_i = data_hv_i$Wheat
    ret <- log(lag(price_i)) - log(price_i)
    vol_i <- sd(ret, na.rm = TRUE) * sqrt(nrows(data_hv_i)) * 100
    vol_i 
}

but it doesn't work and im a bit lost. I would finally like to store these values in a list (one value - the historical volatility - per year). Could anyone help me ?


Solution

  • There are a couple things you want to remember when running for loops and one of them is to create a empty object of desired type before running the loop. Also, you are piping into a for-loop and into an object, which is probably causing an error.

    data_hv %>%
      for (i in 2000:2022) {
        filter(contains(i)) %>%
        price_i = data_hv_i$Wheat
        ret <- log(lag(price_i)) - log(price_i)
        vol_i <- sd(ret, na.rm = TRUE) * sqrt(nrows(data_hv_i)) * 100
        vol_i 
    }
    

    Try this:

    # when doing a for-loop save to an empty object first
    vol_final <- list()
    
    # then iterate over what you want
    
      for (i in 2000:2022) {
          
          # do work
          data_hv_i <- data_hv %>% filter(year == i)
          
          price_i = data_hv_i$prices
          ret <- log(lag(price_i)) - log(price_i)
          vol_i <- sd(ret, na.rm = TRUE) * sqrt(nrow(data_hv_i)) * 100
          
          #finally save to a list by appending a list to a list, e.g. vol_i -> vol_final
          vol_final  <- append(vol_final, list(data.frame(year = i, vol_i)))
      }
    
    vol_final
    #
    

    Final Solution

    library(clock)
    #> Warning: package 'clock' was built under R version 4.1.3
    library(tidyverse)
    library(lubridate)
    #> 
    #> Attaching package: 'lubridate'
    #> The following object is masked from 'package:clock':
    #> 
    #>     as_date
    #> The following objects are masked from 'package:base':
    #> 
    #>     date, intersect, setdiff, union
    
    data_hv <- tibble(date = rep(date_build(2000:2022), 263),
                      prices = rnorm(6049, 100, 2),
                      year = year(date))
    
    # when doing a for-loop save to an empty object first
    vol_final <- list()
    
    # then iterate over what you want
    
      for (i in 2000:2022) {
          
          # do work
          data_hv_i <- data_hv %>% filter(year == i)
          
          price_i = data_hv_i$prices
          ret <- log(lag(price_i)) - log(price_i)
          vol_i <- sd(ret, na.rm = TRUE) * sqrt(nrow(data_hv_i)) * 100
          
          #finally save to a list by appending a list to a list, e.g. vol_i -> vol_final
          vol_final  <- append(vol_final, list(data.frame(year = i, vol_i)))
      }
    
    vol_final
    #> [[1]]
    #>   year    vol_i
    #> 1 2000 44.07769
    #> 
    #> [[2]]
    #>   year    vol_i
    #> 1 2001 43.39855
    #> 
    #> [[3]]
    #>   year    vol_i
    #> 1 2002 41.52856
    #> 
    #> [[4]]
    #>   year   vol_i
    #> 1 2003 43.0145
    #> 
    #> [[5]]
    #>   year    vol_i
    #> 1 2004 46.15526
    #> 
    #> [[6]]
    #>   year    vol_i
    #> 1 2005 49.52201
    #> 
    #> [[7]]
    #>   year    vol_i
    #> 1 2006 49.14017
    #> 
    #> [[8]]
    #>   year    vol_i
    #> 1 2007 46.17739
    #> 
    #> [[9]]
    #>   year    vol_i
    #> 1 2008 46.81863
    #> 
    #> [[10]]
    #>   year    vol_i
    #> 1 2009 43.77617
    #> 
    #> [[11]]
    #>   year    vol_i
    #> 1 2010 47.30922
    #> 
    #> [[12]]
    #>   year    vol_i
    #> 1 2011 47.00433
    #> 
    #> [[13]]
    #>   year    vol_i
    #> 1 2012 45.93459
    #> 
    #> [[14]]
    #>   year    vol_i
    #> 1 2013 40.46577
    #> 
    #> [[15]]
    #>   year    vol_i
    #> 1 2014 48.49482
    #> 
    #> [[16]]
    #>   year    vol_i
    #> 1 2015 44.76563
    #> 
    #> [[17]]
    #>   year    vol_i
    #> 1 2016 45.18651
    #> 
    #> [[18]]
    #>   year    vol_i
    #> 1 2017 49.04513
    #> 
    #> [[19]]
    #>   year   vol_i
    #> 1 2018 45.6688
    #> 
    #> [[20]]
    #>   year    vol_i
    #> 1 2019 43.08453
    #> 
    #> [[21]]
    #>   year    vol_i
    #> 1 2020 46.07952
    #> 
    #> [[22]]
    #>   year    vol_i
    #> 1 2021 47.65877
    #> 
    #> [[23]]
    #>   year    vol_i
    #> 1 2022 45.06297
    

    Created on 2022-06-20 by the reprex package (v2.0.1)