pythonmatplotlibcurve-fittingcovariance-matrix

Best fit of an increasing function that becomes constant


I am trying to fit a function to the following data.

x_data = np.array([0.01      , 0.01871795, 0.0274359 , 0.03615385, 0.04487179,
       0.05358974, 0.06230769, 0.07102564, 0.07974359, 0.08846154,
       0.09717949, 0.10589744, 0.11461538, 0.12333333, 0.13205128,
       0.14076923, 0.14948718, 0.15820513, 0.16692308, 0.17564103,
       0.18435897, 0.19307692, 0.20179487, 0.21051282, 0.21923077,
       0.22794872, 0.23666667, 0.24538462, 0.25410256, 0.26282051,
       0.27153846, 0.28025641, 0.28897436, 0.29769231, 0.30641026,
       0.31512821, 0.32384615, 0.3325641 , 0.34128205, 0.35      ])
y_data = array([0.07462271, 0.12197987, 0.15732335, 0.18376046, 0.2035856 ,
       0.21849438, 0.22974112, 0.23825469, 0.24472376, 0.24965963,
       0.25344248, 0.25635547, 0.2586099 , 0.26036377, 0.26173554,
       0.26281424, 0.26366699, 0.26434454, 0.26488545, 0.2653191 ,
       0.265668  , 0.26594946, 0.26617689, 0.26636074, 0.26650917,
       0.26662865, 0.2667243 , 0.26680021, 0.26685972, 0.26690551,
       0.26693978, 0.26696438, 0.26698081, 0.26699036, 0.2669941 ,
       0.26699297, 0.26698774, 0.26697912, 0.26696768, 0.26695394])

I have tried a general power function however it did not capture the fact that after a certain x value the y values becomes more or less constant. I also tried fitting a general piecewise function which is a power law when xa.

def fpiece(x, a, b, c, d):
    return np.where(x < a, b*np.power(x, c), d)

# initial guess
pars0 = (0.15, 0.4, 1, 0.25)

# perform fitting with custom weights
popt, pcov = curve_fit(fpiece, x_data, y_data, p0=pars0, maxfev=5000)

# plot data
plt.errorbar(x_data, y_data, yerr=0, fmt =".", color='black', label ='data', zorder=1, markersize=10)

# creating x interval to include in y fit
x_interval = np.linspace(0, max(x_data), len(x_data))
y_fit = fpiece( x_interval , *popt)

plt.plot( x_interval, y_fit, color = "red", label = "Best fit", zorder=2 , linewidth=3)

plt.grid(True)
plt.ylabel("y data")
plt.xlabel("x data")
plt.title("Best fit of data")
plt.legend()
plt.show()

However, you can see below that it doesn't fit the curve exactly. enter image description here


Solution

  • You need a "switch" function - one that goes through the origin but asymptotes/tops-out to your final value.

    I tried a few but the best I could get was y=a.(1-exp(-bx)).

    Here:

    a =  0.2664100717998893     b =  32.14540706754588
    

    (Note that you would get an even better visual fit to the curved part of the graph by removing the last 10 or so data points, because it is currently weighting heavily and unnecessarily to the points where y is essentially constant.)

    Code:

    import numpy as np
    import matplotlib.pyplot as plt
    from scipy.optimize import curve_fit
    
    
    x_data = np.array([0.01      , 0.01871795, 0.0274359 , 0.03615385, 0.04487179,
           0.05358974, 0.06230769, 0.07102564, 0.07974359, 0.08846154,
           0.09717949, 0.10589744, 0.11461538, 0.12333333, 0.13205128,
           0.14076923, 0.14948718, 0.15820513, 0.16692308, 0.17564103,
           0.18435897, 0.19307692, 0.20179487, 0.21051282, 0.21923077,
           0.22794872, 0.23666667, 0.24538462, 0.25410256, 0.26282051,
           0.27153846, 0.28025641, 0.28897436, 0.29769231, 0.30641026,
           0.31512821, 0.32384615, 0.3325641 , 0.34128205, 0.35      ])
    y_data = np.array([0.07462271, 0.12197987, 0.15732335, 0.18376046, 0.2035856 ,
           0.21849438, 0.22974112, 0.23825469, 0.24472376, 0.24965963,
           0.25344248, 0.25635547, 0.2586099 , 0.26036377, 0.26173554,
           0.26281424, 0.26366699, 0.26434454, 0.26488545, 0.2653191 ,
           0.265668  , 0.26594946, 0.26617689, 0.26636074, 0.26650917,
           0.26662865, 0.2667243 , 0.26680021, 0.26685972, 0.26690551,
           0.26693978, 0.26696438, 0.26698081, 0.26699036, 0.2669941 ,
           0.26699297, 0.26698774, 0.26697912, 0.26696768, 0.26695394])
    
    
    def fpiece( x, a, b ):
        return a * ( 1.0 - np.exp( -b * x ) )
    
    
    pars0 = ( 0.267, 1 )
    popt, pcov = curve_fit( fpiece, x_data, y_data, p0=pars0 )
    print( "a = ", popt[0], "    b = ", popt[1] )
    
    # plot data
    plt.errorbar(x_data, y_data, yerr=0, fmt =".", color='black', label ='data', zorder=1, markersize=10)
    # creating x interval to include in y fit
    x_interval = np.linspace(0, max(x_data), len(x_data))
    y_fit = fpiece( x_interval , *popt)
    plt.plot( x_interval, y_fit, color = "red", label = "Best fit", zorder=2 , linewidth=3)
    plt.grid(True)
    plt.ylabel("y data")
    plt.xlabel("x data")
    plt.title("Best fit of data")
    plt.legend()
    plt.show()
    

    enter image description here