I have this code that aims to fit the data in here https://drive.google.com/file/d/1uBrHxuftALiQTcTeBl-s6AHGiC8DGBWV/view?usp=sharing.
Where the function read_file
is used to extract the information in a proper way. I do not what i am doing wrong that the gaussian fit is not right as you can see in the image (Gaussian fit) , and the standard deviation for all the fitted gaussian is the same or it varies very little (1e-09). Can anyone help me?
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
import os
file_path='/content/drive/MyDrive/ProjetoXRF_Manuel/April2024/Amostra4.mca' # You need to change this to your directory
def read_file(file_path):
# Abrir el archivo
with open(file_path, 'r', encoding='latin1') as f:
data = []
live_time = None # Inicializar variable live_time
real_time = None # Inicializar variable real_time
for line in f:
if 'LIVE_TIME' in line:
live_time = line.strip() # Almacenar la línea que contiene LIVE_TIME
elif 'REAL_TIME' in line:
real_time = line.strip() # Almacenar la línea que contiene REAL_TIME
elif all(char.isdigit() or char in ".-+e" for char in line.strip()):
# Analizar y almacenar datos numéricos como flotante único
row = [float(x) for x in line.split()]
data.extend(row)
# Convertir 'data' a un arreglo de numpy
return np.array(data), live_time, real_time
data, live_time, real_time = read_numeric_data(file_path)
a = -0.0076702251954372525
b = 0.03952691936704189
Data=data
peaks, _ = find_peaks(Data[:, 0] / maxim, height=0.1)
# Get peak values and indices
peak_values = Data[peaks] / maxim
peak_indices = peaks
# Definición de la función gaussiana
def gaus(x, a, x0, sigma):
return a * np.exp(-(x - x0) ** 2 / (2 * sigma ** 2))
limite=(10-a)/b
print(limite)
# Datos
x_values = a + b * np.linspace(0, 270, 270)
y_values = np.squeeze(Data[:270]) / np.max(Data[:270])
# Parámetros iniciales
sigma = 2 # initial value
standar=[]
# Ajuste de curvas para diferentes conjuntos de datos
for i in range(len(peak_values)):
mean = a + b * peak_indices[i]
amplitude = peak_values[i][0]
# Ajustar la curva para cada conjunto de datos
popt, pcov = curve_fit(gaus, x_values, y_values, p0=[amplitude, mean, 1])
# Visualización del ajuste
m,n,p= popt
plt.plot(x_values, gaus(x_values,amplitude,mean,p), label='fit')
print(mean, amplitude,"Desviación standar",p)
standar.append(p)
plt.scatter(x_values, y_values, label='Data')
plt.xlim(4, 10)
plt.xlabel('X')
plt.ylabel('Y')
plt.legend()
plt.grid(True)
plt.show()
The two main issues were:
(amplitude, mean, sigma)
parameters for each gaussian, and add the resulting gaussians together.sigma
, so I've used a better starting value.The modified code:
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
from scipy.signal import find_peaks
file_path='Amostra4.mca'
def read_file(file_path):
# Abrir el archivo
with open(file_path, 'r', encoding='latin1') as f:
data = []
live_time = None # Inicializar variable live_time
real_time = None # Inicializar variable real_time
for line in f:
if 'LIVE_TIME' in line:
live_time = line.strip() # Almacenar la línea que contiene LIVE_TIME
elif 'REAL_TIME' in line:
real_time = line.strip() # Almacenar la línea que contiene REAL_TIME
elif all(char.isdigit() or char in ".-+e" for char in line.strip()):
# Analizar y almacenar datos numéricos como flotante único
row = [float(x) for x in line.split()]
data.extend(row)
# Convertir 'data' a un arreglo de numpy
return np.array(data), live_time, real_time
def multi_gaussian(x, *params):
eps = 1e-10
y = np.zeros_like(x)
for i in range(0, len(params), 3):
amplitude, mean, sigma = params[i:i+3]
y += amplitude * np.exp(-(x - mean)**2 / (2 * sigma**2 + eps))
return y
#Load and prepare data
data, live_time, real_time = read_file(file_path)
a = -0.0076702251954372525
b = 0.03952691936704189
y_start = 150
y_end = 270
data_y = data[y_start:y_end] / data[y_start:y_end].max()
data_x = a + b * np.linspace(y_start, y_end, num=len(data_y))
#Some initial estimates that could help with curve_fit() convergence
peak_indices, _ = find_peaks(data_y / data_y.max(), height=0.1)
peak_amplitudes = data_y[peak_indices]
peak_means = data_x[peak_indices]
peak_sigmas = [0.1] * len(peak_indices)
params_init = list(zip(peak_amplitudes, peak_means, peak_sigmas))
params_init = np.concatenate(params_init)
#Fit
params, params_cov = curve_fit(multi_gaussian, data_x, data_y, p0=params_init)
#Get a high-resolution interpolation of the fit
x_fine = np.linspace(data_x.min(), data_x.max(), num=500)
y_fit = multi_gaussian(x_fine, *params)
#
# Plot
#
fig, ax = plt.subplots(figsize=(8, 2))
#Data
ax.scatter(data_x, data_y, label='data', color='black', marker='o', s=20)
#Fitted curve
ax.plot(x_fine, y_fit, label='fit', linewidth=1.5, color='tab:purple')
#Annotate the estimated locations
[ax.axvline(mean, color='darkslategray', linewidth=1, linestyle=':') for mean in params[1::3]]
#Formatting
ax.set(xlabel='X', ylabel='Y', title='Data and fit')
ax.spines[['right', 'top']].set_visible(False)
ax.legend()