pythonnumpycurve-fittingdtype

Unable to obtain correct data type for curve_fit. ex: unhashable, array with sequence, etc


I am working with a script to analyze nmr relaxation rates using curve_fit, and am bringing both the input and output values in from external files. I was running into many error messages regarding these data types when I was actually trying to run curve_fit, such as the data being in an unhashable type, incorrectly converting an array to a sequence, data being different shapes, etc. I have modified my script to ensure they are the same data types (object string) and shape (36,), but am still unable to run it without getting error messages. I sense there is a very simple fix, and I am just missing something but I can't seem to figure out what, and any help would be greatly appreciated!

This is the current full script:

import math 
import numpy as np
from numpy import *
import scipy
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
import matplotlib.cm as cm
import nmrglue as ng
import sys
from mpl_toolkits.mplot3d import Axes3D
from pathlib import Path
import csv
import pandas as pd

def exp(x, a, b, c):
    d=1/b 
    return a+c*np.exp(-b*x) 

out=open((f'/Users/haileyrude/Dropbox/OkunoNMR/1/output.txt'), 'w')

List_area=[]
List_inten=[]

with open(str(f'Okuno Lab Python Work/Okuno Test 1/tau.list'), 'r') as d:
    reader = csv.reader(d)
    prepretauList = list(reader)
    pretauList = np.array(prepretauList)
    tauList = pretauList.astype(str)

dic,data = ng.pipe.read(str(Path(f'/Users/haileyrude/Dropbox/OkunoNMR/1/ft/test036.ft1')))#ZY

udic = ng.pipe.guess_udic(dic,data)
uc = ng.fileiobase.uc_from_udic(udic)
ppm_1H = uc.ppm_scale()
uc0 = ng.pipe.make_uc(dic,data,dim=0)
ub=uc0("6.5ppm")
lb=uc0("9.5ppm")

ppm = ppm_1H[lb:ub]
inten = data[lb:ub]
maxvalue=max(inten.real)
pt=np.where(data.real==maxvalue)
intensity=data[pt]
maxpt=uc0.ppm(pt[0])
    
display(uc0.ppm(pt[0]))
off=0
count=0
for i in range(1, len(tauList)+1, 1):
    
    off=off+500
    count=count+1
    try:
        dic,data = ng.pipe.read(str(Path(f'/Users/haileyrude/Dropbox/OkunoNMR/1/ft/test00{str(i)}.ft1')))#ZY
    except IOError:
        dic,data = ng.pipe.read(str(Path(f'/Users/haileyrude/Dropbox/OkunoNMR/1/ft/test0{str(i)}.ft1')))#ZY

    #print(dic,data)
    #define intergration area for the methyl, include some baseline
    ppm = ppm_1H[lb:ub]
    inten = data[lb:ub]
    intensity=data[pt]
    
    #intergrate
    area=inten.sum()
    List_area.append(float(area.real)) 
    List_inten.append(intensity.real)

    colors = cm.gist_heat(np.linspace(0, 1, len(tauList)+1))
    plt.xlim([6.5, 9.5])
    plt.plot(ppm, inten.real, color=colors[count]) 


prex=array(tauList, dtype="object")
arx=prex.astype(str)
x=prex.flatten()

prey=array(List_inten, dtype="object")
ary=prey.astype(str)
y=ary.flatten()

popt, pcov = curve_fit(exp, x, y)
#get the errors
errors=np.sqrt(pcov.diagonal())
print ('rate', popt[1], '+/-', errors[1] )   
print(out,'rate', popt[1], '+/-', errors[1]) 
    

#print 'Relax rate:', popt[1], '+/-', float(errors[1])


rate_fig = plt.figure(1)
plt.plot(tauList, List_inten, marker='o', markersize=10, linestyle='None')
plt.plot(xaxis, exp(xaxis, *popt), marker='None', linestyle='-', linewidth=2, color='r')
plt.show()

When I run this exact script I get the error message: "unhashable type: 'numpy.ndarray'", but again whenever I try others I just get similar error messages about incompatible data types for curve_fit.

Additionally, while trying to understand the root of my problem I ran this cell:

print(x)
print(len(x))
print(x.shape)
print(x.dtype)

print(y)
print(len(y))
print(y.shape)
print(y.dtype)

and received this output:

x = ['0.001000' '0.001500' '0.002000' '0.002500' '0.003000' '0.004000'
 '0.005000' '0.007500' '0.010000' '0.012500' '0.015000' '0.017500'
 '0.020000' '0.022500' '0.025000' '0.027500' '0.030000' '0.035000'
 '0.050000' '0.075000' '0.100000' '0.125000' '0.150000' '0.175000'
 '0.250000' '0.500000' '0.750000' '1.000000' '1.250000' '1.500000'
 '1.750000' '2.000000' '2.500000' '3.000000' '3.500000' '5.000000']

len(x) = 36

x.shape = (36,)

x dtype = object

y = ['-157341216.0' '-156514192.0' '-156109792.0' '-155292736.0'
 '-155541808.0' '-154359088.0' '-153406112.0' '-152017536.0'
 '-150163360.0' '-148821312.0' '-147121408.0' '-145200544.0'
 '-143513360.0' '-142319280.0' '-140684704.0' '-139887536.0'
 '-138132720.0' '-135468448.0' '-127346128.0' '-114155104.0'
 '-101980152.0' '-90303376.0' '-78995040.0' '-68271560.0' '-38386176.0'
 '38851676.0' '89095032.0' '121053536.0' '142517744.0' '155447904.0'
 '163979280.0' '168583824.0' '174455648.0' '176974160.0' '177651056.0'
 '178662960.0']

len(y) = 36

y.shape = (36,)

y dtype = <U12

Again, I imagine there is a very simple solution, but I unfortunately can't seem to find it.


Solution

  • The main problem is probably the type of your data.

    import numpy as np
    import matplotlib.pyplot as plt
    from scipy import optimize
    

    You must cast it into numeric type (float):

    x = ['0.001000', '0.001500', '0.002000', '0.002500', '0.003000', '0.004000', '0.005000', '0.007500', '0.010000', '0.012500', '0.015000', '0.017500', '0.020000', '0.022500', '0.025000', '0.027500', '0.030000', '0.035000', '0.050000', '0.075000', '0.100000', '0.125000', '0.150000', '0.175000', '0.250000', '0.500000', '0.750000', '1.000000', '1.250000', '1.500000', '1.750000', '2.000000', '2.500000', '3.000000', '3.500000', '5.000000']
    y = ['-157341216.0', '-156514192.0', '-156109792.0', '-155292736.0', '-155541808.0', '-154359088.0', '-153406112.0', '-152017536.0', '-150163360.0', '-148821312.0', '-147121408.0', '-145200544.0', '-143513360.0', '-142319280.0', '-140684704.0', '-139887536.0', '-138132720.0', '-135468448.0', '-127346128.0', '-114155104.0', '-101980152.0', '-90303376.0', '-78995040.0', '-68271560.0', '-38386176.0', '38851676.0', '89095032.0', '121053536.0', '142517744.0', '155447904.0', '163979280.0', '168583824.0', '174455648.0', '176974160.0', '177651056.0', '178662960.0']
    
    x = np.array(x, dtype="float64")
    y = np.array(y, dtype="float64")
    

    Then rewrite your model as follow:

    def model(x, a, b, c):
        return a - c * np.exp(- b * x) 
    

    And you will able to fit it as this:

    popt, pcov = optimize.curve_fit(model, x, y)
    # (array([1.78891661e+08, 1.75535824e+00, 3.35100511e+08]),
    #  array([[ 1.15003627e+11, -1.55092365e+03,  1.04426615e+11],
    #         [-1.55092365e+03,  4.71871162e-05, -1.02346030e+03],
    #         [ 1.04426615e+11, -1.02346030e+03,  1.27053653e+11]]))
    

    Which fits the dataset pretty well:

    enter image description here