I have a large dataset of diode measurements and I'm trying to extract the theoretical parameters for further modeling. I have had success with scipy curve_fit
before with simpler functions but currently, it's not returning anything close to the measured data.
Here is the code I am running:
import numpy as np
from scipy.special import lambertw as W
import scipy.optimize
import matplotlib
matplotlib.use('TkAgg')
import matplotlib.pyplot as plt
def shockley(v_arr, Is, Rs, n, Vt):
i_fit = ((n*Vt)/Rs)*W(((Is*Rs)/(n*Vt))*(np.exp(v_arr/(n*Vt))))
i_fit = i_fit.real
return i_fit
v_arr = np.array((0,0.01,0.02,0.03,0.04,0.05,0.06,0.07,0.08,0.09,0.1,0.11,0.12,0.13,0.14,0.15,0.16,0.17,0.18,0.19,0.2,
0.21,0.22,0.23,0.24,0.25,0.26,0.27,0.28,0.29,0.3,0.31,0.32,0.33,0.34,0.35,0.36,0.37,0.38,0.39,0.4,
0.41,0.42,0.43,0.44,0.45,0.46,0.47,0.48,0.49,0.5,0.51,0.52,0.53,0.54,0.55,0.56,0.57,0.58,0.59,0.6,
0.61,0.62,0.63,0.64,0.65,0.66,0.67,0.68,0.69,0.7,0.71,0.72,0.73,0.74,0.75,0.76,0.77,0.78,0.79,0.8,
0.81,0.82,0.83,0.84,0.85,0.86,0.87,0.88,0.89,0.9,0.91,0.92,0.93,0.94,0.95,0.96,0.97,0.98,0.99,1,1.01,
1.02,1.03,1.04,1.05,1.06,1.07,1.08,1.09,1.1,1.11,1.12,1.13,1.14,1.15,1.16,1.17,1.18,1.19,1.2,1.21,
1.22,1.23,1.24,1.25,1.26,1.27,1.28,1.29,1.3,1.31,1.32,1.33,1.34,1.35,1.36,1.37,1.38,1.39,1.4,1.41,
1.42,1.43,1.44,1.45,1.46,1.47,1.48,1.49,1.5,1.51,1.52,1.53,1.54,1.55,1.56,1.57,1.58,1.59))
i_arr = np.array((-3.76E-11,5.83E-11,-1.05E-10,1.25E-10,-2.45E-11,-1.75E-11,-1.26E-11,-1.01E-11,-9.41E-12,-1.04E-11,
-9.47E-12,-9.03E-12,-1.03E-11,-9.54E-12,-7.91E-12,-9.89E-12,-1.65E-11,1.59E-10,-5.90E-11,-1.17E-11,
-1.09E-11,-7.61E-12,-2.86E-12,-8.39E-12,-7.87E-12,-7.91E-12,-6.43E-12,-6.87E-12,1.48E-12,-7.25E-12,
-6.58E-12,-5.73E-12,-5.39E-12,-3.99E-12,-2.34E-12,-1.08E-12,2.06E-12,3.01E-12,8.24E-12,1.36E-11,
2.26E-11,2.99E-11,4.11E-11,6.32E-11,8.54E-11,1.16E-10,1.53E-10,2.09E-10,2.73E-10,3.65E-10,4.93E-10,
6.32E-10,8.46E-10,1.09E-09,1.42E-09,1.85E-09,2.41E-09,3.09E-09,3.98E-09,5.16E-09,6.61E-09,8.36E-09,
1.10E-08,1.43E-08,1.84E-08,2.39E-08,3.11E-08,4.05E-08,5.41E-08,6.95E-08,9.13E-08,1.19E-07,1.57E-07,
2.08E-07,2.79E-07,3.71E-07,4.94E-07,6.57E-07,8.83E-07,1.19E-06,1.61E-06,2.19E-06,3.00E-06,4.12E-06,
5.72E-06,7.91E-06,1.10E-05,1.54E-05,2.16E-05,3.03E-05,4.23E-05,5.87E-05,8.09E-05,0.000110276,
0.000148352,0.00019647,0.000256439,0.000329539,0.000416616,0.000519519,0.000638944,0.000775399,
0.000930671,0.001105803,0.001299725,0.001514556,0.001750505,0.002006911,0.00228582,0.002586988,
0.002909377,0.003255466,0.003622608,0.004013351,0.004426898,0.004860486,0.005317697,0.005796602,
0.006294098,0.006813415,0.007352292,0.007908308,0.008483431,0.009075115,0.009680456,0.01030162,
0.01094043,0.01158479,0.01224334,0.01290703,0.01358056,0.01426283,0.01494866,0.01564087,0.01633509,
0.01703126,0.01773028,0.01843017,0.01912786,0.01982758,0.0205251,0.02121851,0.02191115,0.0226002,
0.02328385,0.02396644,0.02464198,0.02531404,0.02598236,0.02664429,0.02730172,0.02795239,0.02859697,
0.02923778,0.02987059,0.02999878,0.02999881,0.02999872,0.02999867,0.02999871))
guessIs = 2e-9
guessRs = 14.5
guessn = 1.3
guessVt = 0.026
guess_fit = shockley(v_arr, guessIs, guessRs, guessn,guessVt)
plt.plot(v_arr, guess_fit, label='guess')
plt.plot(v_arr, i_arr, label='data')
guess = np.array([guessIs, guessRs, guessn, guessVt])
popt, pcov = scipy.optimize.curve_fit(shockley, v_arr, i_arr, p0=guess, bounds=((2e-9, 0, 1, 0),(1, 20, 2, 1)))
Is, Rs, n, Vt = popt
print(f'Is={Is}, Rs={Rs}, n={n}, Vt={Vt}')
test = shockley(v_arr, Is, Rs, n, Vt)
fit = shockley(v_arr, Is, Rs, n , Vt)
plt.plot(v_arr, fit, label='fit')
plt.yscale('log')
plt.xlabel('V')
plt.xlabel('I')
plt.legend()
plt.show()
The plots show that the initial guess is quite close but the fit always diverges. I have added bounds that help a bit but I don't know what variability my results are going to have so I can't have them too tight.
I have tried changing the method to 'dogbox'
and 'lm'
(as per this example). 'dogbox'
gets closest but seems to just follow the guess.
This is not a linear dataset (or function); a linear fit is not appropriate - or perhaps more specifically, a linear fit is not the first fit you should perform; you can use it as a secondary finishing step.
Since the data are exponential, it's crucial that they be plotted on a semilog scale, which will make it obvious that your fit is way out of whack. Exponential functions are very touchy when it comes to initial conditions; I demonstrate how to use sliders to get "in the neighbourhood" by hand to choose initial conditions.
import numpy as np
from matplotlib.widgets import Slider
from scipy.special import lambertw as W
import scipy.optimize
import matplotlib
import matplotlib.pyplot as plt
def shockley(
v: np.ndarray,
Is: float,
Rs: float,
n: float,
Vt: float,
) -> np.ndarray:
i_fit = (
n*Vt/Rs
)*W(
Is*Rs/(n*Vt)
*np.exp(v/(n*Vt))
)
return i_fit.real
def shockley_log(
v: np.ndarray,
Is: float,
Rs: float,
n: float,
Vt: float,
) -> np.ndarray:
return np.log(shockley(v, Is, Rs, n, Vt))
def load_data() -> tuple[np.ndarray, np.ndarray]:
v_arr = np.arange(0, 1.6, 0.01)
i_arr = np.array((
-3.76E-11,5.83E-11,-1.05E-10,1.25E-10,-2.45E-11,-1.75E-11,-1.26E-11,-1.01E-11,-9.41E-12,-1.04E-11,
-9.47E-12,-9.03E-12,-1.03E-11,-9.54E-12,-7.91E-12,-9.89E-12,-1.65E-11,1.59E-10,-5.90E-11,-1.17E-11,
-1.09E-11,-7.61E-12,-2.86E-12,-8.39E-12,-7.87E-12,-7.91E-12,-6.43E-12,-6.87E-12,1.48E-12,-7.25E-12,
-6.58E-12,-5.73E-12,-5.39E-12,-3.99E-12,-2.34E-12,-1.08E-12,2.06E-12,3.01E-12,8.24E-12,1.36E-11,
2.26E-11,2.99E-11,4.11E-11,6.32E-11,8.54E-11,1.16E-10,1.53E-10,2.09E-10,2.73E-10,3.65E-10,4.93E-10,
6.32E-10,8.46E-10,1.09E-09,1.42E-09,1.85E-09,2.41E-09,3.09E-09,3.98E-09,5.16E-09,6.61E-09,8.36E-09,
1.10E-08,1.43E-08,1.84E-08,2.39E-08,3.11E-08,4.05E-08,5.41E-08,6.95E-08,9.13E-08,1.19E-07,1.57E-07,
2.08E-07,2.79E-07,3.71E-07,4.94E-07,6.57E-07,8.83E-07,1.19E-06,1.61E-06,2.19E-06,3.00E-06,4.12E-06,
5.72E-06,7.91E-06,1.10E-05,1.54E-05,2.16E-05,3.03E-05,4.23E-05,5.87E-05,8.09E-05,0.000110276,
0.000148352,0.00019647,0.000256439,0.000329539,0.000416616,0.000519519,0.000638944,0.000775399,
0.000930671,0.001105803,0.001299725,0.001514556,0.001750505,0.002006911,0.00228582,0.002586988,
0.002909377,0.003255466,0.003622608,0.004013351,0.004426898,0.004860486,0.005317697,0.005796602,
0.006294098,0.006813415,0.007352292,0.007908308,0.008483431,0.009075115,0.009680456,0.01030162,
0.01094043,0.01158479,0.01224334,0.01290703,0.01358056,0.01426283,0.01494866,0.01564087,0.01633509,
0.01703126,0.01773028,0.01843017,0.01912786,0.01982758,0.0205251,0.02121851,0.02191115,0.0226002,
0.02328385,0.02396644,0.02464198,0.02531404,0.02598236,0.02664429,0.02730172,0.02795239,0.02859697,
0.02923778,0.02987059,0.02999878,0.02999881,0.02999872,0.02999867,0.02999871,
))
return v_arr, i_arr
def fit(
v: np.ndarray,
i: np.ndarray,
guessIs: float = 2e-16,
guessRs: float = 15,
guessn: float = 0.2,
guessVt: float = 0.2,
) -> tuple[
np.ndarray,
np.ndarray,
np.ndarray,
]:
p0 = np.array((guessIs, guessRs, guessn, guessVt))
i_guess = shockley(v, *p0)
popt, pcov = scipy.optimize.curve_fit(
f=shockley_log,
xdata=v, ydata=np.log(i), p0=p0,
# bounds=(
# (1e-16, 0, 0.05, 0),
# (1e-9, 20, 1, 1),
# ),
method='lm',
)
i_fit = shockley(v, *popt)
return popt, i_guess, i_fit
def plot(
v: np.ndarray,
i: np.ndarray,
i_guess: np.ndarray,
i_fit: np.ndarray,
) -> tuple[plt.Figure, Slider, Slider, Slider, Slider]:
fig, ax = plt.subplots()
ax.semilogy(v, i, label='experiment')
ax.semilogy(v[:i_guess.size], i_guess, label='estimate')
ax.semilogy(v[:i_fit.size], i_fit, label='fit')
ax.set_xlabel('v')
ax.set_ylabel('i')
ax.legend()
return fig
def show_sliders(
v: np.ndarray,
i: np.ndarray,
) -> plt.Figure:
fig: plt.Figure
ax: plt.Axes
fig, (ax, *slider_axes) = plt.subplots(ncols=5)
p0 = 1e-9, 20, 0.2, 0.2
ax.semilogy(v, i, label='experiment')
line, = ax.semilogy(v, shockley(v, *p0), label='parametric')
ax.set_xlabel('v')
ax.set_ylabel('i')
is_slider = Slider(
ax=slider_axes[0], label='Is', valmin=-20, valmax=-9, valinit=-16, orientation='vertical',
)
rs_slider = Slider(
ax=slider_axes[1], label='Rs', valmin=0.1, valmax=100, valinit=p0[1], orientation='vertical',
)
n_slider = Slider(
ax=slider_axes[2], label='n', valmin=0.01, valmax=0.5, valinit=p0[2], orientation='vertical',
)
vt_slider = Slider(
ax=slider_axes[3], label='Vt', valmin=0, valmax=1, valinit=p0[3], orientation='vertical',
)
def update(val):
line.set_ydata(shockley(v, 10**is_slider.val, rs_slider.val, n_slider.val, vt_slider.val))
fig.canvas.draw_idle()
is_slider.on_changed(update)
rs_slider.on_changed(update)
n_slider.on_changed(update)
vt_slider.on_changed(update)
return fig, is_slider, rs_slider, n_slider, vt_slider
def main() -> None:
v, i = load_data()
section = (v > 0.3) & (i > 0)
v = v[section]
i = i[section]
matplotlib.use('TkAgg')
# fig, *sliders = show_sliders(v, i)
popt, i_guess, i_fit = fit(v=v, i=i)
(Is, Rs, n, Vt) = popt
print(f'Is={Is}, Rs={Rs}, n={n}, Vt={Vt}')
plot(v, i, i_guess, i_fit)
plt.show()
if __name__ == '__main__':
main()
Is=1.8087970458977566e-16, Rs=14.46916151330944, n=1.0978775079999148, Vt=0.031461129386302314
Further: notice that n
and Vt
are not meaningfully distinct degrees of freedom, since they always appear as a product in the Shockley equation. That means that you can choose any value for n
and you could still fit a Vt
to appear correct. For your purposes, you can just merge those two variables to a single nVt
during fit - or better, fix Vt
at the thermal voltage for standard ambient temperature - 25.85 mV - and only optimise on n
. This gets you closer to
Is=1.8087964456455226e-16, Rs=14.469165848153489, n=1.336188473508956