I want to plot a spline with R. But I have the error message "'newdata' has 38 rows, but the variables found have 700 rows" I am not sure why a spline does has so many rows. It has indeed to many.
My professor told me that I should work with the functions lm() and bs(). That is why I am using them.
I hope for your help :-)
Bonus-Question: Can I plot 3D how all three variables: sales, visits and discount are connected with each other?
--
This here is my code. I hope that I am not missing anything. I am still missing the concept how to make out of a spline-model (spline_model) a line. And why do I need to predict the data again (ChatGPT told me that and some sources in the internet) with seq and pred. Why can I not use the normal spline_model for plotting?
library(dplyr) #Datenmanipulation
library(DataExplorer) #DatenExplore
library(skimr) #Datenüberblick
library(lmtest) #Regression
library(olsrr)
library(jtools)
library(moments)
library(highcharter) #Diagramme keine gewerbliche Nutzung
library(ggstatsplot)
library(ggplot2)
library(graphics)
library(mgcv) #spline
library(ggeffects) #GAM <- Spline
library(DiagrammeR) #Kausalitätsdiagramm
library(splines) #Splines
data_path <- "https://raw.githubusercontent.com/juanitorduz/website_projects/master/data/sales_dag.csv"
data <- read.csv(data_path)
data %>% select(visits, discount, sales) -> data_clean
spline_model <- lm(sales ~ bs(data_clean$visits, df = 3, knots = 3)+ data_clean$discount -1, data = data_clean) #spline
visits_range <- range(data_clean$visits) #MinMax der Var
visits_seq <- seq(from=visits_range[1], to=visits_range[2]) #Sequenz von Min nach Max neu
spline_model_pred<-predict(spline_model, newdata = data.frame(visits=visits_seq),se=T) #Vorhersage neu berechnen
plot(data_clean$visits, data_clean$sales, main="Beobachtete vs. Vorhergesagte Werte", xlab="Visits (IV)", ylab="Sales (DV)")
lines(visits_seq, spline_model_pred$fit, col = "red", lwd = 3)
Update. Everything solved with your help.
#Not every library is needed
library(dplyr) #Datenmanipulation
library(DataExplorer) #DatenExplore
library(skimr) #Datenüberblick
library(lmtest) #Regression
library(olsrr)
library(jtools)
library(moments)
library(highcharter) #Diagramme keine gewerbliche Nutzung
library(ggstatsplot)
library(ggplot2)
library(graphics)
library(mgcv) #spline
library(ggeffects) #GAM <- Spline
library(DiagrammeR) #Kausalitätsdiagramm
library(splines) #Splines
# Laden der erforderlichen Bibliotheken für 3D
library(mgcv)
library(plot3D)
library(scatterplot3d)
library("rgl")
# Spline-Modell erstellen
spline_model <- lm(sales ~ bs(visits, df = 3, knots = 3) + discount - 1, data = data_clean)
# Vorhersage für die Spline-Funktion generieren
visits_range <- range(data_clean$visits)
discount_range <- range(data_clean$discount)
visits_seq <- seq(from = visits_range[1], to = visits_range[2], length.out = 100)
discount_seq <- seq(from = discount_range[1], to = discount_range[2], length.out = 100)
visits_discount_grid <- expand.grid(visits = visits_seq, discount = discount_seq)
spline_model_pred <- predict(spline_model, newdata = visits_discount_grid, se = TRUE)
# 3D-Scatterplot der Rohdaten
plot3d(data_clean$visits, data_clean$discount, data_clean$sales,
xlab = "Visits", ylab = "Discount", zlab = "Sales",
main = "3D-Modell der Spline-Funktion und Rohdaten", type = "s", col = "red", size = 2)
# Hinzufügen der Spline-Funktion als Drahtgitter
spline_surface <- matrix(spline_model_pred$fit, nrow = length(visits_seq), ncol = length(discount_seq), byrow = FALSE)
surface3d(visits_seq, discount_seq, spline_surface, col = "blue")
# Legende hinzufügen
legend3d("topright", legend = c("Spline-Funktion", "Rohdaten"), col = c("blue", "red"), pch = c(NA, 16), lty = c(1, NA), lwd = c(4, NA), cex = 2)