pythonoptimizationnoise-reduction

How Can I Optimize the interpolation in calculation of SNR-EMD Denoising


I have written the following code for calculating the SNR-EMD of signal in python language:


from numpy import array, sign, zeros
from scipy.interpolate import interp1d

import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from line_profiler import proflie

from math import exp, log10, sqrt

df = pd.read_csv("ps12017011-withNpW.csv",index_col=0)
df.drop("time2", axis=1, inplace=True)
df.drop("Null", axis=1, inplace=True)

FPData = df["pressure"].values
@profile
def mean_envelope(s):
    q_u = zeros(s.shape)
    q_l = zeros(s.shape)
    u_x = [0,]
    u_y = [s[0],]

    l_x = [0,]
    l_y = [s[0],]

    #Detect peaks and troughs and mark their location in u_x,u_y,l_x,l_y respectively.

    for k in range(1,len(s)-1):
        if (sign(s[k]-s[k-1])==1) and (sign(s[k]-s[k+1])==1):
            u_x.append(k)
            u_y.append(s[k])

        if (sign(s[k]-s[k-1])==-1) and ((sign(s[k]-s[k+1]))==-1):
            l_x.append(k)
            l_y.append(s[k])

    u_x.append(len(s)-1)
    u_y.append(s[-1])

    l_x.append(len(s)-1)
    l_y.append(s[-1])
    q_u = np.zeros_like(s)
    q_l = np.zeros_like(s)

    u_p = interp1d(u_x, u_y, kind = 'cubic',bounds_error = False, fill_value=np.nan)
    l_p = interp1d(l_x,l_y,kind = 'cubic',bounds_error = False, fill_value=np.nan)
        
        #Evaluate each model over the domain of (s)
    for k in range(len(s)):
        q_u[k] = u_p(k)
        q_l[k] = l_p(k)
    
    return (q_u+q_l)/2

def signaltonoise_dB(a, axis=0, ddof=0):
    a = np.asanyarray(a)
    m = a.mean(axis)
    sd = a.std(axis=axis, ddof=ddof)
    return 20*np.log10(abs(np.where(sd == 0, 0, m/sd)))
def c(i=0):
    if i == 0:
        return 0.5
    elif i>=1:
        return 1/(1+exp(0.1*i))+0.15
r=10
x2 = FPData
lx5 = []
snr = signaltonoise_dB(x2)
print("SNR is ", snr)
k1 = sqrt(10**(-snr/10))
print("k1 is ", k1)    

t = 1 
h = 0
firstSet = True
while k1>1e-8:
    x4 = x2+r*max(np.absolute(x2))

    
    x5 = mean_envelope(x4)

    x1hat = (x4**2-x5**2)/2/x5
        
    x0hat = x2-x1hat*c(h)

    x2 = x0hat
    k1 = k1 - c(h)*(2*k1+k1**2)/2
    snr = 10*log10((x0hat**2).sum()/(x1hat**2).sum())
    h+=1

print("SNR is ", snr)
print("k1 is ", k1)
plt.plot(FPData, color="g", label="pressure sensor reading")
plt.plot(x2, color="black", label="denoised data using SNR-EMD")
plt.legend()

The SNR-EMD is an algorithm for signals noise reduction using their SNR coefficient(K1) and is an empirical method which uses upper and lower envelopes' mean to calculate the signal with no noise.

how can I optimize the time bottleneck, namely, "the interpolation"?

Faster Implementation, EMD not SNR-EMD

I have seen the EMD-signal module that does the same logic but without k1 threshold.

I have tried chunking the data to smaller parts for interpolation range to become smaller with following code but it didn't improve the outcome that much.

def chunk_mean_envelope(s, chunk_size = 8192, intersection=1024):
    ret = np.zeros_like(s)
    num_iter = len(s)//chunk_size
    for i in range(num_iter):
        ret[0 if i == 0 else i*chunk_size-intersection: len(s) if i == num_iter-1 else(i+1)*chunk_size+intersection] = mean_envelope(
            s[0 if i == 0 else i*chunk_size-intersection: len(s) if i == num_iter-1 else(i+1)*chunk_size+intersection]) 
    return ret

and have seen implementation of 3 point interpolation in EMD-signal but don't know how to use it.

The Results of profiling

SNR is  60.656993449518524
k1 is  0.0009271506935603248
SNR is  92.16515518385509
k1 is  9.750116531462205e-09
Wrote profile results to envelope.py.lprof
Timer unit: 1e-06 s

Total time: 49.6864 s
File: .\envelope.py
Function: mean_envelope at line 57

Line #      Hits         Time  Per Hit   % Time  Line Contents
==============================================================
    57                                           @line_profiler.profile
    58                                           def mean_envelope(s):
    59        19        418.6     22.0      0.0      q_u = zeros(s.shape)
    60        19        267.7     14.1      0.0      q_l = zeros(s.shape)
    61        19         27.7      1.5      0.0      u_x = [0,]
    62        19         40.4      2.1      0.0      u_y = [s[0],]
    63
    64        19         16.3      0.9      0.0      l_x = [0,]
    65        19         19.4      1.0      0.0      l_y = [s[0],]
    66
    67                                               #Detect peaks and troughs and mark their location in u_x,u_y,l_x,l_y respectively.
    68
    69    284962     199262.0      0.7      0.4      for k in range(1,len(s)-1):
    70    284943    1181890.6      4.1      2.4          if (sign(s[k]-s[k-1])==1) and (sign(s[k]-s[k+1])==1):
    71     15077      13487.5      0.9      0.0              u_x.append(k)
    72     15077      13497.4      0.9      0.0              u_y.append(s[k])
    73
    74    284943    1244697.4      4.4      2.5          if (sign(s[k]-s[k-1])==-1) and ((sign(s[k]-s[k+1]))==-1):
    75     15069      12457.2      0.8      0.0              l_x.append(k)
    76     15069      14431.4      1.0      0.0              l_y.append(s[k])
    77
    78                                               #Append the last value of (s) to the interpolating values. This forces the model to use the same ending point for both the upper and lower envelope models.
    79
    80        19        101.8      5.4      0.0      u_x.append(len(s)-1)
    81        19         26.4      1.4      0.0      u_y.append(s[-1])
    82
    83        19         34.0      1.8      0.0      l_x.append(len(s)-1)
    84        19         23.7      1.2      0.0      l_y.append(s[-1])
    85        19       1946.4    102.4      0.0      q_u = np.zeros_like(s)
    86        19        643.5     33.9      0.0      q_l = np.zeros_like(s)
    87
    88        19      46290.1   2436.3      0.1      u_p = interp1d(u_x, u_y, kind = 'cubic',bounds_error = False, fill_value=np.nan)
    89        19      21385.7   1125.6      0.0      l_p = interp1d(l_x,l_y,kind = 'cubic',bounds_error = False, fill_value=np.nan)
    90
    91                                                   #Evaluate each model over the domain of (s)
    92    285000     214460.2      0.8      0.4      for k in range(len(s)):
    93    284981   23360016.9     82.0     47.0          q_u[k] = u_p(k)
    94    284981   23359182.9     82.0     47.0          q_l[k] = l_p(k)
    95
    96        19       1797.4     94.6      0.0      return (q_u+q_l)/2


Solution

  • Thanks to line_profiler we can see what exactly is your problem, interpolating per point instead of using vectorized opration on all points.

    Now, instead of:

    for k in range(len(s)):
        q_u[k] = u_p(k)
        q_l[k] = l_p(k)
    

    interpolate whole range of k values in single calls:

    k_range = np.arange(len(s))  # use some better name
    q_u = u_p(k_range)
    q_l = l_p(k_range) 
    

    That should speed-up nicely your code and other parts will be bottleneck