nlm function equivalent in Python (Non-Linear-Minimization)

I am trying to translate from R to Python the nlm function for non linear minimization from stats package. In the help menu in R it says that "function carries out a minimization of the function f using a Newton-type algorithm"

The original and right code in R is the following:

m=3
la1 = 1
la2 = 3
la3 = 7
del = matrix(0,1,m-1);del
del[1] = 0.5
del[2] = 0.2
del[3] = 1 - (del[1]+del[2])
del
cd = cumsum(del);cd
# Generate random data for a m mixture model
N = 100 # number of data points
unifvec = runif(N)
d1 = rpois(sum(unifvec < cd[1]),la1);d1
d2 = rpois(sum(unifvec > cd[1] & unifvec < cd[2]),la2)
d3 = rpois(sum(unifvec > cd[2]),la3);d3
data = c(d1,d2,d3);data # Data vector
# Functions for parameter transformation
logit <- function(vec) log(vec/(1-sum(vec)))
invlogit <- function(vec) exp(vec)/(1+sum(exp(vec)))


# Make function for the negative log-likelihood
f <- function(PAR) {
  M = length(PAR)
  m = ceiling(M/2)
  LA = exp(PAR[1:m]) # transform lambdas
  DELs = invlogit(PAR[(m+1):M]) # transform deltas
  DEL = c(DELs,1-sum(DELs))
  # Equation (1.1) on p. 9
  L = DEL[1]*dpois(data,LA[1])
  for(i in 2:m){
    L = L+DEL[i]*dpois(data,LA[i])
  }
  -sum(log(L))
}

# Define starting guess for optimization
par = c(2,4,7,0.5,0.2)
PAR = par
PAR[1:3] = log(par[1:3])
PAR[4:5] = logit(par[4:5])
PAR
# Optimize using nlm
res = nlm(f,PAR);res

with results :

$minimum
[1] 211.2481

$estimate
[1] -0.2827635  1.1449476  1.8346681  0.7380511 -0.3305408

$gradient
[1] -1.185185e-05 -1.372745e-05 -3.084352e-05  4.720846e-05
[5]  1.506351e-06

$code
[1] 1

$iterations
[1] 22

Doing so in Python I find the scipy.optimize.minimize that I think it does the same thing .

m=3
la1 = 1
la2 = 3
la3 = 7
de = np.zeros(m);de
de[0] = 0.5
de[1] = 0.2
de[2] = 1 - (de[0]+de[1])
de
cd = np.cumsum(de);cd
N = 100 # number of data points
unifvec = np.random.uniform(0,1,N)
d1 = np.random.poisson(la1, sum(unifvec < cd[0], la1));d1
d2 = np.random.poisson(la2,sum( (unifvec > cd[0]) & (unifvec <cd[1])  ) );d2
d3 = np.random.poisson(la1, sum(unifvec < cd[1], la1));d3
data = np.concatenate((d1,d2,d3), axis=None);(data)
# Functions for parameter transformation
def logit(vec):
    return(np.log(vec/(1-sum(vec))))
def invlogit(vec):
    return(np.exp(vec)/(1+sum(np.exp(vec))))
# Make function for the negative log-likelihood
def f(PAR):
    M = len(PAR)
    m = int(np.ceil(M/2))
    LA = np.exp(PAR[:m])         # transform lambdas
    DELs = invlogit(PAR[m:M]) # transform deltas
    DEL = np.concatenate((DELs, 1-sum(DELs)), axis=None)
    # Equation (1.1) on p. 9
    from scipy.stats import poisson
    L = DEL[0]*poisson.pmf(data,LA[0])
    for i in range(1,m):
        L = L+DEL[i]*poisson.pmf(data,LA[i])
    return(-sum(np.log(L)))

# Define starting guess for optimization
par = np.array([2,4,7,0.5,0.2])
PAR = par
PAR[0:3] = np.log(par[:3])
PAR[3:5] = logit(par[3:5])


from scipy.optimize import minimize
res = minimize(f, PAR, method='Nelder-Mead', tol=1e-6)
res

but the results are not similar to those in R

final_simplex: (array([[   0.29287179,    1.87973654, -175.80084603,    3.0604109 ,
          -0.95479   ],
       [   0.29287179,    1.87973654, -175.80084639,    3.06041089,
          -0.95479   ],
       [   0.29287179,    1.87973654, -175.80084608,    3.0604109 ,
          -0.95479   ],
       [   0.29287179,    1.87973654, -175.80084587,    3.0604109 ,
          -0.95479   ],
       [   0.29287179,    1.87973654, -175.80084553,    3.0604109 ,
          -0.95478999],
       [   0.29287179,    1.87973654, -175.80084576,    3.0604109 ,
          -0.95478999]]), array([211.89342437, 211.89342437, 211.89342437, 211.89342437,
       211.89342437, 211.89342437]))
           fun: 211.89342437364002
       message: 'Optimization terminated successfully.'
          nfev: 782
           nit: 457
        status: 0
       success: True
             x: array([   0.29287179,    1.87973654, -175.80084603,    3.0604109 ,
         -0.95479   ])

the minimum is similar but the estimates are not.Specifically the third estimate is dead wrong

Solution

I test that if you use the same data, you can get the similar minimum. In R:

data <- c(0L, 1L, 0L, 1L, 1L, 2L, 1L, 1L, 0L, 0L, 1L, 1L, 0L, 0L, 2L, 
1L, 3L, 1L, 1L, 0L, 3L, 2L, 0L, 1L, 2L, 1L, 2L, 1L, 3L, 3L, 0L, 
0L, 0L, 3L, 1L, 0L, 0L, 0L, 1L, 3L, 2L, 0L, 3L, 2L, 2L, 2L, 0L, 
0L, 0L, 0L, 3L, 3L, 3L, 5L, 1L, 3L, 0L, 1L, 2L, 3L, 1L, 3L, 5L, 
3L, 2L, 2L, 1L, 2L, 5L, 10L, 5L, 8L, 8L, 10L, 8L, 7L, 10L, 6L, 
5L, 9L, 6L, 3L, 5L, 9L, 7L, 10L, 5L, 4L, 6L, 8L, 8L, 6L, 8L, 
5L, 4L, 13L, 9L, 9L, 4L, 10L)
PAR <- c(0.693147180559945, 1.38629436111989, 1.94591014905531, 0.510825623765991, 
-0.405465108108164)
f(PAR) 
# 239.3489

Output:

res = nlm(f,PAR);
res 

$minimum
[1] 228.2598

$estimate
[1] -1.3531276  0.6409517  1.9662910 -0.6879096  0.5427875

$gradient
[1] -1.228761e-05 -4.965273e-05  1.113862e-04  1.637090e-05 -2.287948e-05

$code
[1] 1

$iterations
[1] 32

In Python

data = np.array([0, 1, 0, 1, 1, 2, 1, 1, 0, 0, 1, 1, 0, 0, 2, 
          1, 3, 1, 1, 0, 3, 2, 0, 1, 2, 1, 2, 1, 3, 3, 0, 
          0, 0, 3, 1, 0, 0, 0, 1, 3, 2, 0, 3, 2, 2, 2, 0, 
          0, 0, 0, 3, 3, 3, 5, 1, 3, 0, 1, 2, 3, 1, 3, 5, 
          3, 2, 2, 1, 2, 5, 10, 5, 8, 8, 10, 8, 7, 10, 6, 
          5, 9, 6, 3, 5, 9, 7, 10, 5, 4, 6, 8, 8, 6, 8, 
          5, 4, 13, 9, 9, 4, 10])
PAR = np.array([ 0.69314718,  1.38629436,  1.94591015,  0.51082562, -0.40546511])

f(PAR)
#239.34891885662626

Output

res = minimize(f, PAR, method='Nelder-Mead', tol=1e-6);
res

final_simplex: (array([[-1.35304276,  0.64096297,  1.96629305, -0.68786541,  0.54278448],
       [-1.35304245,  0.64096303,  1.96629305, -0.68786503,  0.54278438],
       [-1.35304179,  0.64096308,  1.96629302, -0.68786502,  0.54278439],
       [-1.35304231,  0.64096294,  1.96629301, -0.6878652 ,  0.54278434],
       [-1.35304278,  0.64096297,  1.966293  , -0.68786511,  0.5427845 ],
       [-1.3530437 ,  0.64096284,  1.96629297, -0.68786577,  0.54278438]]), array([228.25982274, 228.25982274, 228.25982274, 228.25982274,
       228.25982274, 228.25982274]))
           fun: 228.259822735201
       message: 'Optimization terminated successfully.'
          nfev: 768
           nit: 478
        status: 0
       success: True
             x: array([-1.35304276,  0.64096297,  1.96629305, -0.68786541,  0.54278448])