Get percentage of months with positive return and average return per month

I'm running the following code to download stock's data with monthly periodicity.

Also I added a couple of columns with monthly percentage return and a positive or negative return ( 0 or 1) per month.

library(quantmod)

Symbols<-c  ("AA","AAL","AAOI","ABBV","ABC","ABNB","AAPL","TSLA","AMZN","AMD","NKE",
             "NVDA","AMC","META")

start_date=as.Date("2013-06-01")


getSymbols(Symbols,from=start_date, src = 'yahoo', periodicity = 'monthly')

stock_data = sapply(.GlobalEnv, is.xts)

all_stocks <- do.call(list, mget(names(stock_data)[stock_data]))




# Calculate return function
Return <- function(x) {
  stock_name <- stringi::stri_extract(names(x)[1], regex = "^[A-Z]+")
  stock_name <- paste0(stock_name, ".Return")
  column_names <- c(names(x), stock_name)
  
  # Calculate the range (difference between closing price and opening price)
  range <- quantmod::Cl(x) - quantmod::Op(x)
  
  # Merge the range with the original x object
  x <- merge.xts(x, range)
  colnames(x) <- column_names
  
  return(x)
}

# Function to determine if monthly return is positive
Positive <- function(x) {
  stock_name <- stringi::stri_extract(names(x)[1], regex = "^[A-Z]+")
  stock_name <- paste0(stock_name, ".Return")
  positive_name <- paste0(stock_name, ".Positive")
  column_names <- c(names(x), positive_name)
  
  # Calculate the positive indicator
  x$positive <- ifelse(x[, stock_name] > 0, TRUE, FALSE)
  
  # Update the column names
  colnames(x) <- column_names
  
  return(x)
}
all_stocks <- lapply(all_stocks, Return)

all_stocks <- lapply(all_stocks, Positive)

Now, I want to get a new list to get each month performance details overview per stock:

For example, January performance. That will include average Return of all January months and Percentage of positive return over the years)

i.e stock XYZ January Performance,Average return= 11%, % Positive = 80%

Same for February,March,April...

I'm trying this to calculate percentage of positive return but getting error. Not sure if this is the best approach or what I'm doing wrong.

# Filter stocks with 100% positive return
positive_stocks <- list()

for (i in seq_along(all_stocks)) {
  stock <- all_stocks[[i]]
  positive_name <- paste0(names(stock)[1], ".Return.Positive")
  
  if (!is.null(stock[, positive_name]) && all(stock[, positive_name])) {
    positive_stocks[[names(stock)[1]]] <- stock
  }
}

# Extract stock names with 100% positive return
positive_stock_names <- names(positive_stocks)

# Print the names of stocks with 100% positive return
print(positive_stock_names)

Solution

This is how I might attempt the problem using dplyr and lubridate with your initial setup.

Helper to convert xts to tibble

xts_as_tibble <- function(x, name = "time", value = "value", group = "group"){
  time <- attr(x, "index")
  class_match <- match(attr(time, "tclass"), c("POSIXt", "Date"))
  is_datetime <- isTRUE(1L %in% class_match)
  is_date <- isTRUE(2L %in% class_match)
  if (is_date || is_datetime){
    if (!is.null(attr(time, "tzone"))){
      time <- lubridate::as_datetime(
        `attributes<-`(time, NULL), tz = attr(time, "tzone")
        )
    } else {
      time <- lubridate::as_datetime(
        `attributes<-`(time, NULL)
        )
    }
    if (is_date){
      time <- lubridate::as_date(time)
    }
  } else {
    time <- as.numeric(time)
  }
  ncol <- ncol(x)
  groups <- rep(colnames(x), each = length(time))
  if (is.null(groups)){
    groups <- rep(seq_len(ncol), each = length(time))
  }
  time <- rep(time, times = ncol)
  dplyr::tibble(!!group := groups,
                !!name := time,
                !!value := as.vector(x))
}

Calculation

library(dplyr)
library(lubridate)
library(tidyr)
for (i in seq_along(all_stocks)) {
  stock <- xts_as_tibble(all_stocks[[i]])
  positive_name <- paste0(names(all_stocks)[i], ".Return.Positive")
  return_name <- paste0(names(all_stocks)[i], ".Return")
  
  positive_stocks[[i]] <- stock %>%
    filter(group %in% c(return_name, positive_name)) %>%
    pivot_wider(names_from = group, values_from = value) %>%
    mutate(month = month(time, label = TRUE)) %>%
    group_by(month) %>%
    summarise(mean_return = mean(.data[[return_name]]),
              proportion_pos = sum(.data[[positive_name]])/n())
}
names(positive_stocks) <- names(all_stocks)
positive_stocks[1]
$AA
# A tibble: 12 × 3
   month mean_return proportion_pos
   <ord>       <dbl>          <dbl>
 1 Jan       -0.459           0.4  
 2 Feb        1.33            0.4  
 3 Mar       -0.187           0.4  
 4 Apr       -1.26            0.6  
 5 May       -2.35            0.3  
 6 Jun       -1.50            0.455
 7 Jul        1.38            0.727
 8 Aug        0.370           0.417
 9 Sep       -0.963           0.6  
10 Oct       -0.0321          0.6  
11 Nov        1.46            0.7  
12 Dec        1.52            0.6