I have written the following code for calculating the SNR-EMD of signal in python language:
from numpy import array, sign, zeros
from scipy.interpolate import interp1d
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from line_profiler import proflie
from math import exp, log10, sqrt
df = pd.read_csv("ps12017011-withNpW.csv",index_col=0)
df.drop("time2", axis=1, inplace=True)
df.drop("Null", axis=1, inplace=True)
FPData = df["pressure"].values
@profile
def mean_envelope(s):
q_u = zeros(s.shape)
q_l = zeros(s.shape)
u_x = [0,]
u_y = [s[0],]
l_x = [0,]
l_y = [s[0],]
#Detect peaks and troughs and mark their location in u_x,u_y,l_x,l_y respectively.
for k in range(1,len(s)-1):
if (sign(s[k]-s[k-1])==1) and (sign(s[k]-s[k+1])==1):
u_x.append(k)
u_y.append(s[k])
if (sign(s[k]-s[k-1])==-1) and ((sign(s[k]-s[k+1]))==-1):
l_x.append(k)
l_y.append(s[k])
u_x.append(len(s)-1)
u_y.append(s[-1])
l_x.append(len(s)-1)
l_y.append(s[-1])
q_u = np.zeros_like(s)
q_l = np.zeros_like(s)
u_p = interp1d(u_x, u_y, kind = 'cubic',bounds_error = False, fill_value=np.nan)
l_p = interp1d(l_x,l_y,kind = 'cubic',bounds_error = False, fill_value=np.nan)
#Evaluate each model over the domain of (s)
for k in range(len(s)):
q_u[k] = u_p(k)
q_l[k] = l_p(k)
return (q_u+q_l)/2
def signaltonoise_dB(a, axis=0, ddof=0):
a = np.asanyarray(a)
m = a.mean(axis)
sd = a.std(axis=axis, ddof=ddof)
return 20*np.log10(abs(np.where(sd == 0, 0, m/sd)))
def c(i=0):
if i == 0:
return 0.5
elif i>=1:
return 1/(1+exp(0.1*i))+0.15
r=10
x2 = FPData
lx5 = []
snr = signaltonoise_dB(x2)
print("SNR is ", snr)
k1 = sqrt(10**(-snr/10))
print("k1 is ", k1)
t = 1
h = 0
firstSet = True
while k1>1e-8:
x4 = x2+r*max(np.absolute(x2))
x5 = mean_envelope(x4)
x1hat = (x4**2-x5**2)/2/x5
x0hat = x2-x1hat*c(h)
x2 = x0hat
k1 = k1 - c(h)*(2*k1+k1**2)/2
snr = 10*log10((x0hat**2).sum()/(x1hat**2).sum())
h+=1
print("SNR is ", snr)
print("k1 is ", k1)
plt.plot(FPData, color="g", label="pressure sensor reading")
plt.plot(x2, color="black", label="denoised data using SNR-EMD")
plt.legend()
The SNR-EMD is an algorithm for signals noise reduction using their SNR coefficient(K1) and is an empirical method which uses upper and lower envelopes' mean to calculate the signal with no noise.
how can I optimize the time bottleneck, namely, "the interpolation"?
I have seen the EMD-signal
module that does the same logic but without k1
threshold.
I have tried chunking the data to smaller parts for interpolation range to become smaller with following code but it didn't improve the outcome that much.
def chunk_mean_envelope(s, chunk_size = 8192, intersection=1024):
ret = np.zeros_like(s)
num_iter = len(s)//chunk_size
for i in range(num_iter):
ret[0 if i == 0 else i*chunk_size-intersection: len(s) if i == num_iter-1 else(i+1)*chunk_size+intersection] = mean_envelope(
s[0 if i == 0 else i*chunk_size-intersection: len(s) if i == num_iter-1 else(i+1)*chunk_size+intersection])
return ret
and have seen implementation of 3 point interpolation in EMD-signal
but don't know how to use it.
SNR is 60.656993449518524
k1 is 0.0009271506935603248
SNR is 92.16515518385509
k1 is 9.750116531462205e-09
Wrote profile results to envelope.py.lprof
Timer unit: 1e-06 s
Total time: 49.6864 s
File: .\envelope.py
Function: mean_envelope at line 57
Line # Hits Time Per Hit % Time Line Contents
==============================================================
57 @line_profiler.profile
58 def mean_envelope(s):
59 19 418.6 22.0 0.0 q_u = zeros(s.shape)
60 19 267.7 14.1 0.0 q_l = zeros(s.shape)
61 19 27.7 1.5 0.0 u_x = [0,]
62 19 40.4 2.1 0.0 u_y = [s[0],]
63
64 19 16.3 0.9 0.0 l_x = [0,]
65 19 19.4 1.0 0.0 l_y = [s[0],]
66
67 #Detect peaks and troughs and mark their location in u_x,u_y,l_x,l_y respectively.
68
69 284962 199262.0 0.7 0.4 for k in range(1,len(s)-1):
70 284943 1181890.6 4.1 2.4 if (sign(s[k]-s[k-1])==1) and (sign(s[k]-s[k+1])==1):
71 15077 13487.5 0.9 0.0 u_x.append(k)
72 15077 13497.4 0.9 0.0 u_y.append(s[k])
73
74 284943 1244697.4 4.4 2.5 if (sign(s[k]-s[k-1])==-1) and ((sign(s[k]-s[k+1]))==-1):
75 15069 12457.2 0.8 0.0 l_x.append(k)
76 15069 14431.4 1.0 0.0 l_y.append(s[k])
77
78 #Append the last value of (s) to the interpolating values. This forces the model to use the same ending point for both the upper and lower envelope models.
79
80 19 101.8 5.4 0.0 u_x.append(len(s)-1)
81 19 26.4 1.4 0.0 u_y.append(s[-1])
82
83 19 34.0 1.8 0.0 l_x.append(len(s)-1)
84 19 23.7 1.2 0.0 l_y.append(s[-1])
85 19 1946.4 102.4 0.0 q_u = np.zeros_like(s)
86 19 643.5 33.9 0.0 q_l = np.zeros_like(s)
87
88 19 46290.1 2436.3 0.1 u_p = interp1d(u_x, u_y, kind = 'cubic',bounds_error = False, fill_value=np.nan)
89 19 21385.7 1125.6 0.0 l_p = interp1d(l_x,l_y,kind = 'cubic',bounds_error = False, fill_value=np.nan)
90
91 #Evaluate each model over the domain of (s)
92 285000 214460.2 0.8 0.4 for k in range(len(s)):
93 284981 23360016.9 82.0 47.0 q_u[k] = u_p(k)
94 284981 23359182.9 82.0 47.0 q_l[k] = l_p(k)
95
96 19 1797.4 94.6 0.0 return (q_u+q_l)/2
Thanks to line_profiler we can see what exactly is your problem, interpolating per point instead of using vectorized opration on all points.
Now, instead of:
for k in range(len(s)):
q_u[k] = u_p(k)
q_l[k] = l_p(k)
interpolate whole range of k values in single calls:
k_range = np.arange(len(s)) # use some better name
q_u = u_p(k_range)
q_l = l_p(k_range)
That should speed-up nicely your code and other parts will be bottleneck