rsubsetquantmod

Get percentage of months with positive return and average return per month


I'm running the following code to download stock's data with monthly periodicity.

Also I added a couple of columns with monthly percentage return and a positive or negative return ( 0 or 1) per month.

library(quantmod)

Symbols<-c  ("AA","AAL","AAOI","ABBV","ABC","ABNB","AAPL","TSLA","AMZN","AMD","NKE",
             "NVDA","AMC","META")

start_date=as.Date("2013-06-01")


getSymbols(Symbols,from=start_date, src = 'yahoo', periodicity = 'monthly')

stock_data = sapply(.GlobalEnv, is.xts)

all_stocks <- do.call(list, mget(names(stock_data)[stock_data]))




# Calculate return function
Return <- function(x) {
  stock_name <- stringi::stri_extract(names(x)[1], regex = "^[A-Z]+")
  stock_name <- paste0(stock_name, ".Return")
  column_names <- c(names(x), stock_name)
  
  # Calculate the range (difference between closing price and opening price)
  range <- quantmod::Cl(x) - quantmod::Op(x)
  
  # Merge the range with the original x object
  x <- merge.xts(x, range)
  colnames(x) <- column_names
  
  return(x)
}

# Function to determine if monthly return is positive
Positive <- function(x) {
  stock_name <- stringi::stri_extract(names(x)[1], regex = "^[A-Z]+")
  stock_name <- paste0(stock_name, ".Return")
  positive_name <- paste0(stock_name, ".Positive")
  column_names <- c(names(x), positive_name)
  
  # Calculate the positive indicator
  x$positive <- ifelse(x[, stock_name] > 0, TRUE, FALSE)
  
  # Update the column names
  colnames(x) <- column_names
  
  return(x)
}
all_stocks <- lapply(all_stocks, Return)

all_stocks <- lapply(all_stocks, Positive)


Now, I want to get a new list to get each month performance details overview per stock:

For example, January performance. That will include average Return of all January months and Percentage of positive return over the years)

i.e stock XYZ January Performance,Average return= 11%, % Positive = 80%

Same for February,March,April...

I'm trying this to calculate percentage of positive return but getting error. Not sure if this is the best approach or what I'm doing wrong.

# Filter stocks with 100% positive return
positive_stocks <- list()

for (i in seq_along(all_stocks)) {
  stock <- all_stocks[[i]]
  positive_name <- paste0(names(stock)[1], ".Return.Positive")
  
  if (!is.null(stock[, positive_name]) && all(stock[, positive_name])) {
    positive_stocks[[names(stock)[1]]] <- stock
  }
}

# Extract stock names with 100% positive return
positive_stock_names <- names(positive_stocks)

# Print the names of stocks with 100% positive return
print(positive_stock_names)
 

Solution

  • This is how I might attempt the problem using dplyr and lubridate with your initial setup.

    Helper to convert xts to tibble

    xts_as_tibble <- function(x, name = "time", value = "value", group = "group"){
      time <- attr(x, "index")
      class_match <- match(attr(time, "tclass"), c("POSIXt", "Date"))
      is_datetime <- isTRUE(1L %in% class_match)
      is_date <- isTRUE(2L %in% class_match)
      if (is_date || is_datetime){
        if (!is.null(attr(time, "tzone"))){
          time <- lubridate::as_datetime(
            `attributes<-`(time, NULL), tz = attr(time, "tzone")
            )
        } else {
          time <- lubridate::as_datetime(
            `attributes<-`(time, NULL)
            )
        }
        if (is_date){
          time <- lubridate::as_date(time)
        }
      } else {
        time <- as.numeric(time)
      }
      ncol <- ncol(x)
      groups <- rep(colnames(x), each = length(time))
      if (is.null(groups)){
        groups <- rep(seq_len(ncol), each = length(time))
      }
      time <- rep(time, times = ncol)
      dplyr::tibble(!!group := groups,
                    !!name := time,
                    !!value := as.vector(x))
    }
    

    Calculation

    library(dplyr)
    library(lubridate)
    library(tidyr)
    for (i in seq_along(all_stocks)) {
      stock <- xts_as_tibble(all_stocks[[i]])
      positive_name <- paste0(names(all_stocks)[i], ".Return.Positive")
      return_name <- paste0(names(all_stocks)[i], ".Return")
      
      positive_stocks[[i]] <- stock %>%
        filter(group %in% c(return_name, positive_name)) %>%
        pivot_wider(names_from = group, values_from = value) %>%
        mutate(month = month(time, label = TRUE)) %>%
        group_by(month) %>%
        summarise(mean_return = mean(.data[[return_name]]),
                  proportion_pos = sum(.data[[positive_name]])/n())
    }
    names(positive_stocks) <- names(all_stocks)
    positive_stocks[1]
    $AA
    # A tibble: 12 × 3
       month mean_return proportion_pos
       <ord>       <dbl>          <dbl>
     1 Jan       -0.459           0.4  
     2 Feb        1.33            0.4  
     3 Mar       -0.187           0.4  
     4 Apr       -1.26            0.6  
     5 May       -2.35            0.3  
     6 Jun       -1.50            0.455
     7 Jul        1.38            0.727
     8 Aug        0.370           0.417
     9 Sep       -0.963           0.6  
    10 Oct       -0.0321          0.6  
    11 Nov        1.46            0.7  
    12 Dec        1.52            0.6