rscatter-plotabline

Error plotting lines on a graph made in R


Could you help me solve following issue:

I have two codes that were made to generate the same scatter plot. The first one works normally, generates the graph and the lines without any problems. It is a code that requires vector i to generate the mean and standard deviation(sd).

Code 2, on the other hand, does not require vector i, but the result is not the desired one regarding the construction of lines in relation to the mean and sd. In my opinion it was to work.

Could the problem be with the ylim?

I hope someone helps me with this! =)

Thank you so much!

First code

library(dplyr)
library(tidyr)
library(lubridate)


data <- structure(
  list(Id=c(1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1),
       date1 = c("2021-06-20","2021-06-20","2021-06-20","2021-06-20","2021-06-20",
                 "2021-06-20","2021-06-20","2021-06-20","2021-06-20","2021-06-20","2021-06-20",
                 "2021-06-20","2021-06-20","2021-06-20","2021-06-20","2021-06-20","2021-06-20",
                 "2021-06-20","2021-06-20","2021-06-20","2021-06-20"),
       date2 = c("2021-07-01","2021-07-01","2021-07-01","2021-07-01","2021-04-02",
                 "2021-04-02","2021-04-02","2021-04-02","2021-04-02","2021-04-02","2021-04-03",
                 "2021-04-03","2021-04-03","2021-04-03","2021-04-03","2021-04-08","2021-04-08",
                 "2021-04-09","2021-04-09","2021-04-10","2021-04-10"),
       Week= c("Thursday","Thursday","Thursday","Thursday","Friday","Friday","Friday","Friday",
               "Friday","Friday","Saturday","Saturday","Saturday","Saturday","Saturday","Thursday",
               "Thursday","Friday","Friday","Saturday","Saturday"),
       DTPE = c("Ho","Ho","Ho","Ho","","","","","","","","","","","","","","","","Ho","Ho"),
       D1 = c(8,1,9, 3,5,4,7,6,3,8,2,3,4,6,7,8,4,2,6,2,3), DR01 = c(4,1,4,3,3,4,3,6,3,7,2,3,4,6,7,8,4,2,6,7,3),
       DR02 = c(8,1,4,3,3,4,1,6,3,7,2,3,4,6,7,8,4,2,6,2,3), DR03 = c(7,5,4,3,3,4,1,5,3,3,2,3,4,6,7,8,4,2,6,4,3),
       DR04= c(4,5,6,7,3,2,7,4,2,1,2,3,4,6,7,8,4,2,6,4,3),DR05 = c(9,5,4,3,3,2,1,5,3,7,2,3,4,7,7,8,4,2,6,4,3)),
  class = "data.frame", row.names = c(NA, -21L))

graph <- function(dt, dta = data) {                                        
dim_data<-dim(data)

day<-c(seq.Date(from = as.Date(data$date2[1]), by = "days",
                length = dim_data[1]
)) 

data_grouped <- data %>%
  mutate(across(starts_with("date"), as.Date)) %>%
  group_by(date2) %>%
  summarise(Id = first(Id),
            date1 = first(date1),
            Week = first(Week),
            DTPE = first(DTPE),
            D1 = sum(D1)) %>%
  select(Id,date1,date2,Week,DTPE,D1)

data_grouped <- data_grouped %>% mutate(date1=format(date1,"%d/%m/%Y"),
                                    date2=format(date2,"%d/%m/%Y"))
data_grouped<-data.frame(data_grouped)
data_grouped %>% 
  mutate(DTPE = na_if(DTPE, ""))

DS=c("Thursday","Friday","Saturday") 

i<-2

df_OC<-subset(data_grouped, DTPE == "")

ds_CO<-subset(df_OC,df_OC$Week==DS[i])

mean<-mean(as.numeric(ds_CO[,"D1"]) )
sd<-sd(as.numeric(ds_CO[,"D1"]))
    

  
  dta %>%
    filter(date2 == ymd(dt)) %>%
    summarize(across(starts_with("DR"), sum)) %>%
    pivot_longer(everything(), names_pattern = "DR(.+)", values_to = "val") %>%
    mutate(name = as.numeric(name)) %>%
    plot(xlab = "Days", ylab = "Number", xlim = c(0, 45),cex=1.5,cex.lab=1.5, 
         cex.axis=1.5, cex.main=2, cex.sub=2, lwd=2.5, ylim = c((min(.$val) %/% 10) * 15, (max(.$val) %/% 10 + 1) * 100))
  abline(h=mean, col='blue') +
    abline(h=(mean + sd), col='green',lty=2) 
  abline(h=(mean - sd), col='orange',lty=2)
  
}  
graph("2021-04-09",data)

enter image description here

Second code

library(dplyr)
library(tidyr)
library(lubridate)


data <- structure(
  list(Id=c(1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1),
       date1 = c("2021-06-20","2021-06-20","2021-06-20","2021-06-20","2021-06-20",
                 "2021-06-20","2021-06-20","2021-06-20","2021-06-20","2021-06-20","2021-06-20",
                 "2021-06-20","2021-06-20","2021-06-20","2021-06-20","2021-06-20","2021-06-20",
                 "2021-06-20","2021-06-20","2021-06-20","2021-06-20"),
       date2 = c("2021-07-01","2021-07-01","2021-07-01","2021-07-01","2021-04-02",
                 "2021-04-02","2021-04-02","2021-04-02","2021-04-02","2021-04-02","2021-04-03",
                 "2021-04-03","2021-04-03","2021-04-03","2021-04-03","2021-04-08","2021-04-08",
                 "2021-04-09","2021-04-09","2021-04-10","2021-04-10"),
       Week= c("Thursday","Thursday","Thursday","Thursday","Friday","Friday","Friday","Friday",
               "Friday","Friday","Saturday","Saturday","Saturday","Saturday","Saturday","Thursday",
               "Thursday","Friday","Friday","Saturday","Saturday"),
       DTPE = c("Ho","Ho","Ho","Ho","","","","","","","","","","","","","","","","Ho","Ho"),
       D1 = c(8,1,9, 3,5,4,7,6,3,8,2,3,4,6,7,8,4,2,6,2,3), DR01 = c(4,1,4,3,3,4,3,6,3,7,2,3,4,6,7,8,4,2,6,7,3),
       DR02 = c(8,1,4,3,3,4,1,6,3,7,2,3,4,6,7,8,4,2,6,2,3), DR03 = c(7,5,4,3,3,4,1,5,3,3,2,3,4,6,7,8,4,2,6,4,3),
       DR04= c(4,5,6,7,3,2,7,4,2,1,2,3,4,6,7,8,4,2,6,4,3),DR05 = c(9,5,4,3,3,2,1,5,3,7,2,3,4,7,7,8,4,2,6,4,3)),
  class = "data.frame", row.names = c(NA, -21L))


graph <- function(dt, dta = data) {
  
dim_data<-dim(data)

day<-c(seq.Date(from = as.Date(data$date2[1]), by = "days",
                length = dim_data[1]
)) 

data_grouped <- data %>%
  mutate(across(starts_with("date"), as.Date)) %>%
  group_by(date2) %>%
  summarise(Id = first(Id),
            date1 = first(date1),
            Week = first(Week),
            DTPE = first(DTPE),
            D1 = sum(D1)) %>%
  select(Id,date1,date2,Week,DTPE,D1)

data_grouped <- data_grouped %>% mutate(date1=format(date1,"%d/%m/%Y"),
                                        date2=format(date2,"%d/%m/%Y"))
data_grouped<-data.frame(data_grouped)
data_grouped %>% 
  mutate(DTPE = na_if(DTPE, ""))

# get the week day
  
my_day <- weekdays(as.Date(dt))

df_OC<-subset(data_grouped, DTPE == "")

ds_CO<-subset(df_OC,df_OC$Week == my_day)

mean<-mean(as.numeric(ds_CO[,"D1"]) )
sd<-sd(as.numeric(ds_CO[,"D1"]))


dta %>%
  filter(date2 == ymd(dt)) %>%
  summarize(across(starts_with("DR"), sum)) %>%
  pivot_longer(everything(), names_pattern = "DR(.+)", values_to = "val") %>%
  mutate(name = as.numeric(name)) %>%
  plot(xlab = "Days", ylab = "Number", xlim = c(0, 45),cex=1.5,cex.lab=1.5, 
       cex.axis=1.5, cex.main=2, cex.sub=2, lwd=2.5, ylim = c((min(.$val) %/% 10) * 15, (max(.$val) %/% 10 + 1) * 100))
abline(h=mean, col='blue') +
  abline(h=(mean + sd), col='green',lty=2) 
abline(h=(mean - sd), col='orange',lty=2)

}  
graph("2021-04-09",data)

enter image description here


Solution

  • You've messed up a lot with these data transformations. Below, however, I present my code that works according to your expectations.

    The main problem here was my_day <- weekdays (as.Date (dt)), In my system I was getting "piątek" and you didn't have such a day in your data, right?

    library(dplyr)
    library(tidyr)
    library(lubridate)
    
    
    data <- structure(
      list(Id=c(1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1),
           date1 = c("2021-06-20","2021-06-20","2021-06-20","2021-06-20","2021-06-20",
                     "2021-06-20","2021-06-20","2021-06-20","2021-06-20","2021-06-20","2021-06-20",
                     "2021-06-20","2021-06-20","2021-06-20","2021-06-20","2021-06-20","2021-06-20",
                     "2021-06-20","2021-06-20","2021-06-20","2021-06-20"),
           date2 = c("2021-07-01","2021-07-01","2021-07-01","2021-07-01","2021-04-02",
                     "2021-04-02","2021-04-02","2021-04-02","2021-04-02","2021-04-02","2021-04-03",
                     "2021-04-03","2021-04-03","2021-04-03","2021-04-03","2021-04-08","2021-04-08",
                     "2021-04-09","2021-04-09","2021-04-10","2021-04-10"),
           Week= c("Thursday","Thursday","Thursday","Thursday","Friday","Friday","Friday","Friday",
                   "Friday","Friday","Saturday","Saturday","Saturday","Saturday","Saturday","Thursday",
                   "Thursday","Friday","Friday","Saturday","Saturday"),
           DTPE = c("Ho","Ho","Ho","Ho","","","","","","","","","","","","","","","","Ho","Ho"),
           D1 = c(8,1,9, 3,5,4,7,6,3,8,2,3,4,6,7,8,4,2,6,2,3), DR01 = c(4,1,4,3,3,4,3,6,3,7,2,3,4,6,7,8,4,2,6,7,3),
           DR02 = c(8,1,4,3,3,4,1,6,3,7,2,3,4,6,7,8,4,2,6,2,3), DR03 = c(7,5,4,3,3,4,1,5,3,3,2,3,4,6,7,8,4,2,6,4,3),
           DR04= c(4,5,6,7,3,2,7,4,2,1,2,3,4,6,7,8,4,2,6,4,3),DR05 = c(9,5,4,3,3,2,1,5,3,7,2,3,4,7,7,8,4,2,6,4,3)),
      class = "data.frame", row.names = c(NA, -21L))
    
    
    graph <- function(dt, dta = data) {
      
      dim_data<-dim(data)
      
      day<-c(seq.Date(from = as.Date(data$date2[1]), by = "days",
                      length = dim_data[1]
      )) 
      
      data_grouped <- data %>%
        mutate(across(starts_with("date"), as.Date)) %>%
        group_by(date2) %>%
        summarise(Id = first(Id),
                  date1 = first(date1),
                  Week = first(Week),
                  DTPE = first(DTPE),
                  D1 = sum(D1)) %>%
        select(Id,date1,date2,Week,DTPE,D1)
      
      #data_grouped <- data_grouped %>% mutate(date1=format(date1,"%d/%m/%Y"),
      #                                        date2=format(date2,"%d/%m/%Y"))
      #data_grouped<-data.frame(data_grouped)
      data_grouped %>% 
        mutate(DTPE = na_if(DTPE, ""))
      
      # get the week day
      
      #my_day <- weekdays(as.Date(dt))
      
      df_OC<-subset(data_grouped, DTPE == "")
      ds_CO = df_OC %>% filter(weekdays(date2) %in% weekdays(as.Date(dt)))
      #ds_CO<-subset(df_OC,df_OC$Week == my_day)
      
      mean<-mean(ds_CO$D1)
      sd<-sd(ds_CO$D1)
      
      
      dta %>%
        filter(date2 == ymd(dt)) %>%
        summarize(across(starts_with("DR"), sum)) %>%
        pivot_longer(everything(), names_pattern = "DR(.+)", values_to = "val") %>%
        mutate(name = as.numeric(name)) %>%
        plot(xlab = "Days", ylab = "Number", xlim = c(0, 45),cex=1.5,cex.lab=1.5, 
             cex.axis=1.5, cex.main=2, cex.sub=2, lwd=2.5, ylim = c((min(.$val) %/% 10) * 15, (max(.$val) %/% 10 + 1) * 100))
      abline(h=mean, col='blue') +
        abline(h=(mean + sd), col='green',lty=2) 
      abline(h=(mean - sd), col='orange',lty=2)
      
    }  
    graph("2021-04-09",data)
    
    

    Finally, I recommend:

    1. Keep your data in tibble,
    2. do not unnecessarily transform the date into a string several times and vice versa,
    3. use ggplo2. The charts will be much nicer.