rspline

Plot Spline in R


I want to plot a spline with R. But I have the error message "'newdata' has 38 rows, but the variables found have 700 rows" I am not sure why a spline does has so many rows. It has indeed to many.

My professor told me that I should work with the functions lm() and bs(). That is why I am using them.

I hope for your help :-)

Bonus-Question: Can I plot 3D how all three variables: sales, visits and discount are connected with each other?

--

This here is my code. I hope that I am not missing anything. I am still missing the concept how to make out of a spline-model (spline_model) a line. And why do I need to predict the data again (ChatGPT told me that and some sources in the internet) with seq and pred. Why can I not use the normal spline_model for plotting?

library(dplyr) #Datenmanipulation
library(DataExplorer) #DatenExplore
library(skimr) #Datenüberblick
library(lmtest) #Regression
library(olsrr)
library(jtools)
library(moments)
library(highcharter) #Diagramme keine gewerbliche Nutzung
library(ggstatsplot)
library(ggplot2)
library(graphics) 
library(mgcv) #spline
library(ggeffects) #GAM <- Spline
library(DiagrammeR) #Kausalitätsdiagramm
library(splines) #Splines

data_path <- "https://raw.githubusercontent.com/juanitorduz/website_projects/master/data/sales_dag.csv"
data <- read.csv(data_path)
data %>% select(visits, discount, sales) -> data_clean
spline_model <- lm(sales ~ bs(data_clean$visits, df = 3, knots = 3)+ data_clean$discount -1, data = data_clean) #spline

visits_range <- range(data_clean$visits) #MinMax der Var
visits_seq <- seq(from=visits_range[1], to=visits_range[2]) #Sequenz von Min nach Max neu
spline_model_pred<-predict(spline_model, newdata = data.frame(visits=visits_seq),se=T) #Vorhersage neu berechnen

plot(data_clean$visits, data_clean$sales, main="Beobachtete vs. Vorhergesagte Werte", xlab="Visits (IV)", ylab="Sales (DV)")
lines(visits_seq, spline_model_pred$fit, col = "red", lwd = 3)

Solution

  • Update. Everything solved with your help.

    #Not every library is needed
    library(dplyr) #Datenmanipulation
    library(DataExplorer) #DatenExplore
    library(skimr) #Datenüberblick
    library(lmtest) #Regression
    library(olsrr)
    library(jtools)
    library(moments)
    library(highcharter) #Diagramme keine gewerbliche Nutzung
    library(ggstatsplot)
    library(ggplot2)
    library(graphics) 
    library(mgcv) #spline
    library(ggeffects) #GAM <- Spline
    library(DiagrammeR) #Kausalitätsdiagramm
    library(splines) #Splines
    
    # Laden der erforderlichen Bibliotheken für 3D
    library(mgcv)
    library(plot3D)
    library(scatterplot3d)
    library("rgl")
        
        # Spline-Modell erstellen
    spline_model <- lm(sales ~ bs(visits, df = 3, knots = 3) + discount - 1, data = data_clean)
    
    # Vorhersage für die Spline-Funktion generieren
    visits_range <- range(data_clean$visits)
    discount_range <- range(data_clean$discount)
    visits_seq <- seq(from = visits_range[1], to = visits_range[2], length.out = 100)
    discount_seq <- seq(from = discount_range[1], to = discount_range[2], length.out = 100)
    visits_discount_grid <- expand.grid(visits = visits_seq, discount = discount_seq)
    spline_model_pred <- predict(spline_model, newdata = visits_discount_grid, se = TRUE)
    
    # 3D-Scatterplot der Rohdaten
    plot3d(data_clean$visits, data_clean$discount, data_clean$sales, 
            xlab = "Visits", ylab = "Discount", zlab = "Sales",
            main = "3D-Modell der Spline-Funktion und Rohdaten", type = "s", col = "red", size = 2)
    
    # Hinzufügen der Spline-Funktion als Drahtgitter
    spline_surface <- matrix(spline_model_pred$fit, nrow = length(visits_seq), ncol = length(discount_seq), byrow = FALSE)
    surface3d(visits_seq, discount_seq, spline_surface, col = "blue")
    
    # Legende hinzufügen
    legend3d("topright", legend = c("Spline-Funktion", "Rohdaten"), col = c("blue", "red"), pch = c(NA, 16), lty = c(1, NA), lwd = c(4, NA), cex = 2)
    

    This is my current result