rdataframeloopsfor-loopdistance

How can I use 2 for loops together (one for each year and one for each day)?


I think I calculated correctly the average distance between three consecutive values when the condition (described in the code) is met. However, I did this calculation only for a single year and I don't know how to do it for every year. Could you please help me? I think I need another for loop but when I tried this I failed.

set.seed(123)  
years <- rep(2010:2014, each=365)
months <- rep(rep(1:12, each=31), 5)[1:(5*365)]  
days <- rep(1:31, times=5*12)[1:(5*365)]  

# Create a data frame with year, month, day, 
x <- rnorm(5*365, mean=15, sd=5)
df <- data.frame(Year=years, Month=months, Day=days, Values=x)
library(magrittr)
df_filt <- df %>% dplyr::filter(Year == 2010)

df_filt$Distance <- NA

for (i in 3:nrow(df_filt)) {
  if (df_filt$Values[i] > df_filt$Values[i - 1] && 
      df_filt$Values[i - 1] > df_filt$Values[i - 2]) {
    df_filt$Distance[i] <-
      (1/3)*(sqrt((df_filt$Values[i] - df_filt$Values[i - 2])^2) +
               sqrt((df_filt$Values[i] - df_filt$Values[i - 1])^2) +
               sqrt((df_filt$Values[i - 2] - df_filt$Values[i - 1])^2))
  } else {
    df_filt$Distance[i] <- NA
  }
}

df_filt$Distance2 <- NA

for (i in 3:nrow(df_filt)) {
  if (df_filt$Values[i] > df_filt$Values[i - 1] &&
      df_filt$Values[i - 1] < df_filt$Values[i - 2]) {
    df_filt$Distance[i] <- 
      (1/3)*(sqrt((df_filt$Values[i] - df_filt$Values[i - 2])^2) +
               sqrt((df_filt$Values[i] - df_filt$Values[i - 1])^2) +
               sqrt((df_filt$Values[i - 2] - df_filt$Values[i - 1])^2))
  } else {
    df_filt$Distance[i] <- NA
  }
}

sum <- colSums(df_filt[5:6], na.rm=TRUE)
print(sum)

final_sum <- sum(sum)
print(final_sum)

Solution

  • Yes, as per the comment the loops could be eliminated. Use the lag function to remove the first group of loops then use the summarize function handle the yearly totals.
    Depending on how you would like to handle the first couple days of the year you may need to change the placement of the distance calculations.

    library(dplyr)
    
    df_filt <- df
    
    Distance1 <- ifelse( (df_filt$Values > lag(df_filt$Values, 1, 0)) & (lag(df_filt$Values, 1, 0) > lag(df_filt$Values, 2, 0)), 
                    (1/3)*(sqrt((df_filt$Values - lag(df_filt$Values, 2))^2) +
                              sqrt((df_filt$Values - lag(df_filt$Values, 1))^2) +
                              sqrt((lag(df_filt$Values, 2) - lag(df_filt$Values, 1))^2)),  NA)
    
    Distance2 <- ifelse( (df_filt$Values > lag(df_filt$Values, 1)) & (lag(df_filt$Values, 1) < lag(df_filt$Values, 2))  , 
                     (1/3)*(sqrt((df_filt$Values - lag(df_filt$Values, 2))^2) +
                               sqrt((df_filt$Values - lag(df_filt$Values, 1))^2) +
                               sqrt((lag(df_filt$Values, 2) - lag(df_filt$Values, 1))^2)) , NA)
    
    df_filt %>% mutate(Distance1, Distance2) %>% 
       group_by(Year) %>% 
       summarize(sumDistance1 = sum(Distance1, na.rm=TRUE), sumDistance2 = sum(Distance2, na.rm=TRUE))