python matplotlib curve-fitting scipy-optimize astronomy

What function would best fit the data I have from a galaxy?

I have the following set of data:

surface_brightnesses_o2 = [12076.0616666451, 11850.730704516911, 10265.598145816548, 9120.859898168235, 7070.26133100111, 5636.138833975608, 3968.1608109082404, 2923.2839406153525, 1963.9315683870766, 1417.3534005331746, 953.9023540784231, 705.6331341427699, 494.19332394388607, 368.6833467905476, 266.41823769096874, 209.98748543636287, 162.17577134818487, 125.70474388251918, 99.72308185010249, 77.89696236284223, 53.44842864009773, 44.01192443651109, 35.52192383706094, 28.055033719366026]

surface_brightnesses_o3 = [24172.942124480545, 23257.99074788583, 19560.86193185194, 16867.86523112749, 12362.182457744273, 9447.974865736134, 6155.667579526176, 4233.309154367383, 2589.6992946467008, 1744.3756532539348, 1096.6861498588305, 768.600975237508, 512.7340397075068, 378.58271663510016, 268.4441550825379, 206.52758729119557, 155.45645416835472, 124.71693391104529, 97.34230151849876, 79.90134896492059, 63.519334039447266, 52.12382464229779, 41.91733978896593, 37.68365343589249, 31.54091147651983, 25.80764998552268, 22.808177293717083, 20.4718551088832, 16.05156984850126, 15.497358990115051, 15.42389243808505, 13.54177847744223]

radii_o2 = [0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0, 5.5, 6.0, 6.5, 7.0, 7.5, 8.0, 8.5, 9.0, 9.5, 10.0, 10.5, 11.0, 11.5, 12.0]

radii_o3 = [0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0, 5.5, 6.0, 6.5, 7.0, 7.5, 8.0, 8.5, 9.0, 9.5, 10.0, 10.5, 11.0, 11.5, 12.0, 12.5, 13.0, 13.5, 14, 14.5, 15, 15.5, 16]

surface_brightnesses_error_o2 = 
[109.89113552  85.30012943  80.8548183   76.55283021  66.49162753
  58.35388488  49.4425817   43.48019603  36.48439283  32.13758154
  28.57971998  26.30618542  24.27602806  23.10048171  22.01106869
  21.3172123   20.77203895  20.41962288  20.12573286  19.8928839
  19.84192745  19.80754151  19.6515864   19.60323267]
  
  surface_brightnesses_error_o3 = 
  [155.47650023  84.28555314  74.17986129  66.93258861  54.67881726
  46.5099896   36.86637245  30.71396278  25.45559327  22.40018842
  19.83606727  18.43327984  16.94700871  16.13059484  15.55795461
  15.155422    14.7707935   14.59604581  14.30144021  14.13502224
  14.04555569  13.9530354   14.01473729  14.13623735  14.16959504
  14.1342218   13.9836842   13.87870645  13.88701116  13.91734777
  13.96048525  13.98621865]

I am trying to plot a fit such that the yscale (surface brightnesses) is log and the xscale (radii) is linear. I would also like to incorporate the errors for O2 and O3 in the corresponding plots for the surface brightnesses of O2 and O3.

I do not want to take log of the surface brightness values, I just want to plot the data as it is and set the yscale to log. However, I couldn't find a function that fits the data correctly.

I would appreciate some input on what would be a good fit here, and how to code it in.

I tried fitting a Sersic function, which is a brightness profile function used to study the surface brightness profiles of galaxies.

fig, ax = plt.subplots(figsize=(10, 7))

# Define Sersic function
def sersic(r, I_e, R_e, n):
    b_n = 1.9992*n - 0.3271  
    return I_e * np.exp(-b_n * ((r/R_e)**(1/n) - 1))

# Fit the model to the O2 data
popt_o2, pcov_o2 = curve_fit(sersic, radii_o2, surface_brightnesses_o2, sigma=surface_brightnesses_error_o2, p0=[100000, 16, 2])

# Fit the model to the O3 data
popt_o3, pcov_o3 = curve_fit(sersic, radii_o3, surface_brightnesses_o3, sigma=surface_brightnesses_error_o3, p0=[10000, 16, 2])

# O2 data with error bars and fitted line
plt.errorbar(radii_o2, surface_brightnesses_o2, yerr=surface_brightnesses_error_o2, fmt='o', label='O2 data', capsize=4)
plt.plot(radii_o2, sersic(radii_o2, *popt_o2), 'r-', label='O2 fit: I_e=%5.3f, R_e=%5.3f, n=%5.3f' % tuple(popt_o2), color = 'blue')

# O3 data with error bars and fitted line
plt.errorbar(radii_o3, surface_brightnesses_o3, yerr=surface_brightnesses_error_o3, fmt='o', label='O3 data', capsize=4)
plt.plot(radii_o3, sersic(radii_o3, *popt_o3), 'b-', label='O3 fit: I_e=%5.3f, R_e=%5.3f, n=%5.3f' % tuple(popt_o3), color = 'red')

plt.xlabel('Radii')
plt.ylabel('Surface Brightness')
plt.yscale('log')
plt.ylim(1, 30000)  # Adjust the y-axis limits here
plt.title('Sersic Fit to Surface Brightness vs Radii for O2 and O3')
plt.legend()

plt.show()

Sersic Plot

And then I tried fitting a log-Gaussian plot:

# Define the log-Gaussian function to fit to the data
def log_gaussian(x, amp, cen, wid):
    return amp * np.exp(-(np.log(x) - cen)**2 / wid**2)

# Initial guess for parameters (necessary for log-Gaussian)
popt_o2, pcov_o2 = curve_fit(power_law, radii_o2, surface_brightnesses_o2)
popt_o3, pcov_o3 = curve_fit(power_law, radii_o3, surface_brightnesses_o3

# Fit the log-Gaussian model to the data
params_o2, _ = curve_fit(log_gaussian, radii_o2, surface_brightnesses_o2, p0_o2)
params_o3, _ = curve_fit(log_gaussian, radii_o3, surface_brightnesses_o3, p0_o3)

# Generate points for the fitted log-Gaussian function
fit_o2 = power_law(radii_smooth_o2, *popt_o2)
fit_o3 = power_law(radii_smooth_o3, *popt_o3)

# Create the plot
plt.figure(figsize=(10, 6))

# Plot the original data
plt.errorbar(radii_o2, surface_brightnesses_o2, yerr=surface_brightnesses_error_o2, fmt='o', label='Data O2', capsize=4)
plt.errorbar(radii_o3, surface_brightnesses_o3, yerr=surface_brightnesses_error_o3, fmt='o', label='Data O3', capsize=4)

# Plot the fitted log-Gaussian function
plt.plot(radii_fit, fit_o2, label='Fit O2', color = 'blue')
plt.plot(radii_fit, fit_o3, label='Fit O3', color = 'red')

# Decorate the plot and set yscale to log
plt.xlabel('Radii')
plt.ylabel('Surface Brightnesses')
plt.title('Surface Brightnesses vs Radii')

plt.legend()
plt.yscale('log')

# Show the plot
plt.show()

enter image description here

Solution

Use a different model, and when you do, perform a log-fit. You've applied your log on x when I believe you should apply it on y during fit.

There's an infinite number of models to choose from; which are scientifically valid is up to you to determine. One that has a loosely reasonable fit is a generalized Gaussian with a linear decay term; there are others.

import numpy as np
from matplotlib import pyplot as plt
from scipy.optimize import curve_fit

surface_brightnesses_o2 = np.array([
    12076.0616666451, 11850.730704516911, 10265.598145816548, 9120.859898168235, 7070.26133100111,
    5636.138833975608, 3968.1608109082404, 2923.2839406153525, 1963.9315683870766, 1417.3534005331746,
    953.9023540784231, 705.6331341427699, 494.19332394388607, 368.6833467905476, 266.41823769096874,
    209.98748543636287, 162.17577134818487, 125.70474388251918, 99.72308185010249, 77.89696236284223,
    53.44842864009773, 44.01192443651109, 35.52192383706094, 28.055033719366026
])

surface_brightnesses_o3 = np.array([
    24172.942124480545, 23257.99074788583, 19560.86193185194, 16867.86523112749, 12362.182457744273,
    9447.974865736134, 6155.667579526176, 4233.309154367383, 2589.6992946467008, 1744.3756532539348,
    1096.6861498588305, 768.600975237508, 512.7340397075068, 378.58271663510016, 268.4441550825379,
    206.52758729119557, 155.45645416835472, 124.71693391104529, 97.34230151849876, 79.90134896492059,
    63.519334039447266, 52.12382464229779, 41.91733978896593, 37.68365343589249, 31.54091147651983,
    25.80764998552268, 22.808177293717083, 20.4718551088832, 16.05156984850126, 15.497358990115051,
    15.42389243808505, 13.54177847744223
])

radii_o2 = np.array([
    0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0, 5.5, 6.0, 6.5, 7.0, 7.5, 8.0, 8.5, 9.0, 9.5,
    10.0, 10.5, 11.0, 11.5, 12.0
])

radii_o3 = np.array([
    0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0, 5.5, 6.0, 6.5, 7.0, 7.5, 8.0, 8.5, 9.0, 9.5,
    10.0, 10.5, 11.0, 11.5, 12.0, 12.5, 13.0, 13.5, 14, 14.5, 15, 15.5, 16
])

surface_brightnesses_error_o2 = [
    109.89113552,  85.30012943,  80.85481830,  76.55283021,  66.49162753,
    58.353884880,  49.44258170,  43.48019603,  36.48439283,  32.13758154,
    28.579719980,  26.30618542,  24.27602806,  23.10048171,  22.01106869,
    21.317212300,  20.77203895,  20.41962288,  20.12573286,  19.89288390,
    19.841927450,  19.80754151,  19.65158640,  19.60323267]

surface_brightnesses_error_o3 = [
    155.47650023, 84.28555314, 74.17986129, 66.93258861, 54.67881726,
     46.50998960, 36.86637245, 30.71396278, 25.45559327, 22.40018842,
     19.83606727, 18.43327984, 16.94700871, 16.13059484, 15.55795461,
     15.15542200, 14.77079350, 14.59604581, 14.30144021, 14.13502224,
     14.04555569, 13.95303540, 14.01473729, 14.13623735, 14.16959504,
     14.13422180, 13.98368420, 13.87870645, 13.88701116, 13.91734777,
     13.96048525, 13.98621865]


def gaussian(x: np.ndarray, amp: float, cen: float, wid: float, pow: float, slope: float, off: float) -> np.ndarray:
    return amp * np.exp(-np.abs((x - cen)/wid)**pow) + slope*x + off


def log_gaussian(x: np.ndarray, *params: float) -> np.ndarray:
    return np.log(gaussian(x, *params))


ax: plt.Axes
fig, ax = plt.subplots()

for title, brightness, radii, error, guess in (
    (
        'O2', surface_brightnesses_o2, radii_o2, surface_brightnesses_error_o2,
        (1e4, 0, 1, 2, 0, 0),
    ),
    (
        'O3', surface_brightnesses_o3, radii_o3, surface_brightnesses_error_o3,
        (1e4, 0, 1, 2, 0, 0),
    ),
):
    ax.errorbar(radii, brightness, yerr=error, fmt='o', capsize=4, label=f'{title} data')
    # ax.plot(radii, gaussian(radii, *guess), label=f'{title} guess')
    fit, _ = curve_fit(
        f=log_gaussian, xdata=radii, ydata=np.log(brightness), p0=guess,
        bounds=(
            (  1, -20,  0.01,  0.5, -1e6, -1e6),
            (1e9,  20, 10.00, 10.0,  1e6,  1e6),
        ),
    )
    print(fit)
    plot_radii = np.linspace(start=fit[1], stop=max(radii_o2.max(), radii_o3.max()), num=200)
    ax.plot(plot_radii, gaussian(plot_radii, *fit), label=f'{title} fit')

plt.title('Gaussian Fit to Surface Brightness vs Radii for O2 and O3')
ax.set_xlabel('Radii')
ax.set_ylabel('Surface brightness')
ax.set_yscale('log')
ax.set_ylim(1, 1e5)
ax.legend()
plt.show()