I would like to apply a 2nd order low-pass Butterworth filter on my data, and then use cubic spline interpolation for resampling at every 1 meter in Python.
I tried to prevent having non-finite values but I still receive the following ValueError:
cs_v = CubicSpline(distance, filtered_v)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^`
raise ValueError("`x` must contain only finite values.")
ValueError: `x` must contain only finite values.
Here are the relevant parts of my code:
import numpy as np
import pandas as pd
from scipy.signal import butter, filtfilt
from scipy.interpolate import CubicSpline, interp1d
import matplotlib.pyplot as plt
import plotly.graph_objects as go
# Butterworth filter
def butterworth_filter(data, cutoff, fs, order=2):
nyquist = 0.5 * fs
normal_cutoff = cutoff / nyquist
b, a = butter(order, normal_cutoff, btype='low', analog=False)
y = filtfilt(b, a, data)
return y
# ensuring data arrays are finite
def ensure_finite(data):
nans = np.isnan(data) | np.isinf(data)
if np.any(nans):
interp_func = interp1d(np.arange(len(data))[~nans], data[~nans], kind='linear', fill_value="extrapolate")
data[nans] = interp_func(np.arange(len(data))[nans])
return data
def handle_infinite(data, t):
nans = np.isnan(data) | np.isinf(data)
if np.any(nans):
valid_mask = ~nans
if valid_mask.sum() < 2:
raise ValueError("No valid data.")
interp_func = interp1d(t[valid_mask], data[valid_mask], kind='linear', fill_value="extrapolate")
data[nans] = interp_func(t[nans])
return data
# ...
data = pd.read_excel(r"xy")
t = data['Time'].values
x = data['X'].values
y = data['Y'].values
v = data['speed'].values
z = data['altitude'].values
a = data['acceleration'].values
distance = data['distance'].values
# Set initial NaN value in distance to 0
if np.isnan(distance[0]):
distance[0] = 0
# Ensuring distance array is increasing
sorted_indices = np.argsort(distance)
distance = distance[sorted_indices]
x = x[sorted_indices]
y = y[sorted_indices]
v = v[sorted_indices]
z = z[sorted_indices]
a = a[sorted_indices]
# Ensuring data arrays are finite
v = ensure_finite(v)
a = ensure_finite(a)
z = ensure_finite(z)
x = ensure_finite(x)
y = ensure_finite(y)
# Butterworth filter
fs = 1 / (t[1] - t[0]) # sampling frequency
cutoff = 0.3 # frequency
filtered_v = butterworth_filter(v, cutoff, fs)
filtered_a = butterworth_filter(a, cutoff, fs)
filtered_z = butterworth_filter(z, cutoff, fs)
filtered_v = handle_infinite(filtered_v, t)
filtered_a = handle_infinite(filtered_a, t)
filtered_z = handle_infinite(filtered_z, t)
print(filtered_v)
print(filtered_z)
# Resampling
distance_new = np.arange(0, distance[0], 1) # at every 1 meter
cs_v = CubicSpline(distance, filtered_v)
cs_a = CubicSpline(distance, filtered_a)
cs_z = CubicSpline(distance, filtered_z)
cs_x = CubicSpline(distance, x)
cs_y = CubicSpline(distance, y)
v_cubic = cs_v(distance_new)
a_cubic = cs_a(distance_new)
z_cubic = cs_z(distance_new)
x_cubic = cs_x(distance_new)
y_cubic = cs_y(distance_new)
So apparently, I could solve the problem.
Please find the relevant parts of my script for a 2nd order low-pass Butterworth filter with an adjustable cut-off frequency and resampling, using cubic spline interpolation, to achieve higher accuracy.
The method can be used for GPS data processing.
import numpy as np
import pandas as pd
from scipy.signal import butter, filtfilt
from scipy.interpolate import CubicSpline
import matplotlib.pyplot as plt
# Butterworth filter
def butterworth_filter(data, cutoff, fs, order=2):
nyquist = 0.5 * fs
normal_cutoff = cutoff / nyquist
b, a = butter(order, normal_cutoff, btype='low', analog=False)
y = filtfilt(b, a, data)
return y
# Load data
data = pd.read_excel(r"")
t = data['Time'].values
x = data['X'].values
y = data['Y'].values
v = data['speed'].values
z = data['altitude'].values
a = data['acceleration'].values
distance = data['distance'].values
# Set initial NaN value in distance to 0
if np.isnan(distance[0]):
distance[0] = 0
# Ensuring distance array is increasing
unique_distances, indices = np.unique(distance, return_index=True)
unique_values = {dist: {'x': [], 'y': [], 'v': [], 'a': [], 'z': []} for dist in unique_distances}
for i in range(len(distance)):
unique_values[distance[i]]['x'].append(x[i])
unique_values[distance[i]]['y'].append(y[i])
unique_values[distance[i]]['v'].append(v[i])
unique_values[distance[i]]['a'].append(a[i])
unique_values[distance[i]]['z'].append(z[i])
distance = np.array(list(unique_distances))
x = np.array([np.mean(unique_values[d]['x']) for d in distance])
y = np.array([np.mean(unique_values[d]['y']) for d in distance])
v = np.array([np.mean(unique_values[d]['v']) for d in distance])
a = np.array([np.mean(unique_values[d]['a']) for d in distance])
z = np.array([np.mean(unique_values[d]['z']) for d in distance])
# Butterworth filtering
fs = 1 / (t[1] - t[0]) # sampling frequency
cutoff = 0.1 # frequency
filtered_y = butterworth_filter(y, cutoff, fs)
filtered_x = butterworth_filter(x, cutoff, fs)
filtered_v = butterworth_filter(v, cutoff, fs)
filtered_a = butterworth_filter(a, cutoff, fs)
filtered_z = butterworth_filter(z, cutoff, fs)
# Resampling
if np.any(np.diff(distance) <= 0):
raise ValueError("Distance array must be strictly increasing.")
distance_new = np.arange(0, distance[-1], 1) # at every 1 meter
cs_v = CubicSpline(distance, filtered_v)
cs_a = CubicSpline(distance, filtered_a)
cs_z = CubicSpline(distance, filtered_z)
cs_x = CubicSpline(distance, x)
cs_y = CubicSpline(distance, y)
v_cubic = cs_v(distance_new)
a_cubic = cs_a(distance_new)
z_cubic = cs_z(distance_new)
x_cubic = cs_x(distance_new)
y_cubic = cs_y(distance_new)
min_len = min(len(t), len(x), len(y), len(v), len(z), len(filtered_x), len(filtered_y), len(filtered_v), len(filtered_z), len(filtered_a))
t = t[:min_len]
x = x[:min_len]
y = y[:min_len]
v = v[:min_len]
z = z[:min_len]
filtered_x = filtered_x[:min_len]
filtered_y = filtered_y[:min_len]
filtered_v = filtered_v[:min_len]
filtered_z = filtered_z[:min_len]
filtered_a = filtered_a[:min_len]