I am trying to fit a double-exponential (i.e. mixture of two exponential or bi-exp) data using MLE. Though there is no direct example of such a problem, yet I found some hint of using MLE for linear (Maximum Likelihood Estimate pseudocode), sigmoidal (https://stats.stackexchange.com/questions/66199/maximum-likelihood-curve-model-fitting-in-python) and normal (Scipy MLE fit of a normal distribution) distribution fitting. Using these examples I have tested the following code:
import numpy as np
import matplotlib.pyplot as plt
from scipy import optimize
import scipy.stats as stats
size = 300
def simu_dt():
## simulate Exp2 data
np.random.seed(0)
## generate random values between 0 to 1
x = np.random.rand(size)
data = []
for n in x:
if n < 0.6:
# generating 1st exp data
data.append(np.random.exponential(scale=20)) # t1
else:
# generating 2nd exp data
data.append(np.random.exponential(scale=500)) # t2
return np.array(data)
ydata2 = simu_dt() # call to generate simulated data
## trimming the data at the beginning and the end a bit
ydata2 = ydata2[np.where(2 < ydata2)]
ydata2 = ydata2[np.where(ydata2 < 3000)]
## creating the normalized log binned histogram data
bins = 10 ** np.linspace(np.log10(np.min(ydata2)), np.log10(np.max(ydata2)), 10)
counts, bin_edges = np.histogram(ydata2, bins=bins)
bin_centres = (bin_edges[:-1] + bin_edges[1:]) / 2
bin_width = (bin_edges[1:] - bin_edges[:-1])
counts = counts / bin_width / np.sum(counts)
## generating arbitrary x value
x1 = np.linspace(bin_centres.min(), bin_centres.max(), len(ydata2))
def MLE(params):
""" find the max likelihood """
a1, k1, k2, sd = params
yPred = (1-a1)*k1*np.exp(-k1*x1) + a1*k2*np.exp(-k2*x1)
negLL = -np.sum(stats.norm.pdf(ydata2, loc=yPred, scale=sd))
return negLL
guess = np.array([0.4, 1/30, 1/320, 0.2])
bnds = ((0, 0.9), (1/200, 1/2), (1/1000, 1/100), (0, 1))
## best function used for global fitting
results = optimize.minimize(MLE, guess, method='SLSQP', bounds=bnds)
print(results)
A1, K1, K2, _ = results.x
y_fitted = (1-A1)*K1*np.exp(-K1*x1) + A1*K2*np.exp(-K2*x1)
## plot actual data
plt.plot(bin_centres, counts, 'ko', label=" actual data")
plt.xlabel("Dwell Times (s)")
plt.ylabel("Probability")
## plot fitted data on original data
plt.plot(x1, y_fitted, c='r', linestyle='dashed', label="fit")
plt.legend()
plt.xscale('log')
plt.yscale('log')
plt.show()
The fit summary shows:
Out:
fun: -1.7494005752178573e-16
jac: array([-3.24161825e-18, 0.00000000e+00, 4.07105635e-16, -6.38053319e-14])
message: 'Optimization terminated successfully.'
nfev: 6
nit: 1
njev: 1
status: 0
success: True
x: array([0.4 , 0.03333333, 0.003125 , 0.2 ])
This is the plot showing the fit. Though the fit seems working the result returns the guesses I have provided! Also, if I change the guesses the fit is also changing, meaning it probably not converging at all. I am not sure what wrong I am doing. Just to say I am not an expert in Python and math as well. So, any help is highly appreciated. Thanks in Advance.
There are several places where I would say you made mistakes. For example, you are passing x1 (equidistant x-values) instead of ydata2 directly. Then, you are using an inappropriate negativeLL, as you are supposed to take the negative log of your own probabilities under the assumption of some parameters. Thus, your fourth parameter is unnecessary.Your function should be:
def MLE(params):
""" find the max likelihood """
a1, k1, k2 = params
yPred = (1-a1)*k1*np.exp(-k1*ydata2) + a1*k2*np.exp(-k2*ydata2)
negLL = -np.sum(np.log(yPred))
return negLL
The code still fails to converge for numerical reasons (scales badly), and some suggestions of linearization might help. What you can easily do is change your optimization method to e.g. L-BFGS-B and the method should converge properly.
Full code:
import numpy as np
import matplotlib.pyplot as plt
from scipy import optimize
import scipy.stats as stats
size = 300000
nbins = 30
def simu_dt():
## simulate Exp2 data
np.random.seed(20)
## generate random values between 0 to 1
x = np.random.rand(size)
data = []
for n in x:
if n < 0.6:
# generating 1st exp data
data.append(np.random.exponential(scale=20)) # t1
else:
# generating 2nd exp data
data.append(np.random.exponential(scale=500)) # t2
return np.array(data)
ydata2 = simu_dt() # call to generate simulated data
## trimming the data at the beginning and the end a bit
ydata2 = ydata2[np.where(2 < ydata2)]
ydata2 = ydata2[np.where(ydata2 < 3000)]
## creating the normalized log binned histogram data
bins = 10 ** np.linspace(np.log10(np.min(ydata2)), np.log10(np.max(ydata2)), nbins)
counts, bin_edges = np.histogram(ydata2, bins=bins)
bin_centres = (bin_edges[:-1] + bin_edges[1:]) / 2
bin_width = (bin_edges[1:] - bin_edges[:-1])
counts = counts / bin_width / np.sum(counts)
## generating arbitrary x value
x1 = np.linspace(bin_centres.min(), bin_centres.max(), len(ydata2))
def MLE(params):
""" find the max likelihood """
k1, k2 = params
yPred = 0.6*k1*np.exp(-k1*ydata2) + 0.4*k2*np.exp(-k2*ydata2)
negLL = -np.sum(np.log(yPred))
return negLL
guess = np.array([1/30, 1/200])
bnds = ((1/100, 1/2), (1/1000, 1/100))
## best function used for global fitting
results = optimize.minimize(MLE, guess, bounds=bnds)
print(results)
K1, K2 = results.x
y_fitted = 0.6*K1*np.exp(-K1*x1) + 0.4*K2*np.exp(-K2*x1)
## plot actual data
plt.plot(bin_centres, counts, 'ko', label=" actual data")
plt.xlabel("Dwell Times (s)")
plt.ylabel("Probability")
## plot fitted data on original data
plt.plot(x1, y_fitted, c='r', linestyle='dashed', label="fit")
plt.legend()
plt.xscale('log')
plt.yscale('log')
plt.show()