rlinear-interpolation

Linear interpolation between data points onto a fixed y-grid


I want to use linear interpolation to create a finer dataset where my responses y are non-monotonic and I need to interpolate onto a fixed y-grid (this is not negotiable). I have a script that does this, I am wondering if there is an easier approach I could use.

system("Rscript -e 'q(\"no\")'")  # command to restart R
gc()  # should clear global variables
rm(list = ls())  # clear the workspace

library(tidyverse)
library(dplyr)
library(ggplot2)

# the original data
x = c(0,1,2,3,4,5,6,7,8)  
y = c(3,6,1,2,4,6,7,5.5,3.5)
df <- data.frame(x = x, y = y)

# the y values I want to interpolate onto
ynew = seq(min(y), max(y), length.out = 21) 


interpolatedX <- function(i, x, y, ynew) {  # function that calculates the new X value given the points at the ends
  m = (y[i+1] - y[i]) / (x[i+1] - x[i])
  c = y[i] - m * x[i]
  
  Xnew = (ynew - c) / m
  return(Xnew)
}

mydata <- data.frame(X = numeric(), y_new = numeric()) 

for (i in 1:(length(y) - 1)) {  # work through each of the y-values in the observations
  ystart <- y[i]
  yend <- y[i + 1]
  
 # find location of values in ynew that are between the y-points
  if (i == length(y) - 1) {  # if we are at the last point
    if (ystart < yend) {
      Y_inbetween_start_and_end <- which(ynew >= ystart & ynew <= yend)  
    } else {
      Y_inbetween_start_and_end <- which(ynew >= yend & ynew <= ystart)
    }
  } else { # if we are not at the last point
    if (ystart < yend) {
      Y_inbetween_start_and_end <- which(ynew >= ystart & ynew < yend)
    } else {
      Y_inbetween_start_and_end <- which(ynew > yend & ynew <= ystart)
    }
  }
  
  for (j in 1:(length(Y_inbetween_start_and_end) - 1)) {
    pos_in_ynew <- Y_inbetween_start_and_end[j]
    newX <- interpolatedX(i, x, y, ynew[pos_in_ynew] )
    mydata<- rbind(mydata, data.frame(X = newX, y_new = ynew[pos_in_ynew]))
  }
}

dfi <- mydata %>% drop_na() %>% arrange(X)

ggplot() +
  geom_point(data = df, aes(x = x, y = y), color = "blue", size = 2, stroke = NA, alpha = 0.4) +
  geom_point(data = dfi, aes(x = X, y = y_new), color = "red", size = 1, stroke = NA) +
  labs(x = "X", y = "Y", title = "Original vs Interpolated Data") +
  theme_minimal()

Solution

  • I'm not sure this is much of an improvement, but it demonstrates an approach relying on some dplyr data wrangling and the base approx function.

    # prepare version of data with start and end coordinates of each segment
    bind_rows(df |> mutate(segment = row_number()),
              df |> mutate(segment = row_number() - 1)) |>
      filter(n() == 2, .by = segment) |>
      mutate(low = min(y), high = max(y), .by = segment) |>
    
      # create a row for every target y within each segment y range
      left_join(data.frame(y_pts = seq(min(y), max(y), length.out = 21)),
                join_by(low <= y_pts, high >= y_pts)) |>
    
      # use `approx` with x/y swapped since we are solving for x from y
      mutate(x_fit = approx(y, x, xout = y_pts)$y, .by = segment) |>
    
      # plot
      ggplot(aes(x_fit, y_pts)) +
      geom_point() +
      geom_point(aes(x,y), data = df, color = "red")
      
    

    enter image description here