I am doing an optimization where the parameters need to be estimated are variance-covariance matrix. And I am using differential_evolution
from scipy
for the optimation.
def likelihood(params):
p = params[0] # this parameter is not related to var-covar matrix
dev = np.diag(params[8:15])
X = np.eye(dev.shape[0])
X[0, 1:7] = params[15:21]
X[1, 2:7] = params[21:26]
X[2, 3:7] = params[26:30]
X[3, 4:7] = params[30:33]
X[4, 5:7] = params[33:35]
X[5, 6:7] = params[35:36]
X = X + X.T - np.diag(np.diag(X))
cov = dev.dot(X).dot(dev)
L = np.linalg.cholesky(cov)
# only for reproduce.
arg1 = -0.2 * np.sqrt(0.5 * (params[0] ** 2 + params[1] ** 2))
arg2 = 0.5 * (np.cos(2. * np.pi * params[0]) + np.cos(2. * np.pi * params[1]))
return -20. * np.exp(arg1) - np.exp(arg2) + 20. + np.e + params.sum()
def constrain(params):
dev = np.diag(params[8:15])
X = np.eye(dev.shape[0])
X[0, 1:7] = params[15:21]
X[1, 2:7] = params[21:26]
X[2, 3:7] = params[26:30]
X[3, 4:7] = params[30:33]
X[4, 5:7] = params[33:35]
X[5, 6:7] = params[35:36]
X = X + X.T - np.diag(np.diag(X))
cov = dev.dot(X).dot(dev)
try:
np.linalg.cholesky(cov)
return 0
except np.linalg.LinAlgError:
return 1
def print_de(x, convergence):
print(x,"convergence: ",convergence)
bounds = (((0.0, 1.0),) * 8 + ((0.0, 1.0),) + ((0.0, 0.5),) * 6 + ((-1.0,1.0),)*21)
differential_evolution(likelihood, bounds=bounds, disp=True,
callback=print_de,
constraints=NonlinearConstraint(constrain, 0, 0)
)
For the first 100 steps from the callback, f(x)=inf
, and it progresses very fast.
I tried to verify one x
, and it does not meet the constraint. So I believe all these first 100 steps are ruling out x
that do not meet the constraint.
However, it seems that this causes problem of non-convergence. See here.
So my question is how to verify var-covariance matrix without affecting convergence?
Edit:
I added an optimization problem from the documentation for reproduce only. In this example, it shows the first couple steps have f(x)=inf
, but it does give convergence>0
which is different from my actual problem, where I am only getting convergence=0.0
so far (at the step 458).
Taking a series of wild guesses about the nature of your data, you don't actually need differential evolution. Use plain-old minimize
, and as I said in the comments, don't attempt a cholesky
in the constraint - instead, use a continuous proxy metric that implies positive definiteness when the bounds are met. This runs without problem:
import numpy as np
import numpy.linalg
from scipy.optimize import minimize, NonlinearConstraint
def unpack(params: np.ndarray) -> tuple[
float, # p
np.ndarray, # variable means
np.ndarray, # standard errors
np.ndarray, # correlation coefficients
np.ndarray, # covariance
]:
(p,), means, dev_diag, X_triu = np.split(params, (1, 8, 15))
dev = np.diag(dev_diag)
n = dev_diag.size
X = np.zeros((n, n))
X[np.triu_indices(n=n, k=1)] = X_triu
X += X.T + np.eye(n)
cov = dev @ X @ dev
return p, means, dev, X, cov
def likelihood(params: np.ndarray) -> float:
p, means, dev, X, cov = unpack(params)
try:
L = np.linalg.cholesky(cov)
except numpy.linalg.LinAlgError:
return means.size**2
return p - L.sum() # bogus
def positive_definite(params: np.ndarray) -> np.ndarray:
p, means, dev, X, cov = unpack(params)
return np.real(np.linalg.eigvals(cov))
bounds = (
(( 0.0, 1.0),)*8
+ (( 0.0, 1.0),)
+ (( 0.0, 0.5),)*6
+ ((-1.0, 1.0),)*21
)
result = minimize(
fun=likelihood,
x0=np.concatenate((
(0,),
np.zeros(7),
np.ones(7),
np.zeros(7*6//2)
)),
bounds=bounds,
constraints=NonlinearConstraint(positive_definite, lb=0, ub=np.inf),
)
assert result.success
p, means, dev, X, cov = unpack(result.x)
np.set_printoptions(linewidth=200)
print('cost:', result.fun)
print('p:', p)
print('means:', means)
print('dev:')
print(dev)
print('X:')
print(X)
print('cov:')
print(cov)
print('eigenvalues:', positive_definite(result.x))
cost: -7.2387859198084925
p: 9.7150304110818e-16
means: [7.27948618e-15 5.87216902e-15 7.23738763e-14 2.36667516e-14 1.04145402e-15 3.84911146e-14 7.87661099e-15]
dev:
[[1. 0. 0. 0. 0. 0. 0. ]
[0. 0.5 0. 0. 0. 0. 0. ]
[0. 0. 0.5 0. 0. 0. 0. ]
[0. 0. 0. 0.5 0. 0. 0. ]
[0. 0. 0. 0. 0.5 0. 0. ]
[0. 0. 0. 0. 0. 0.5 0. ]
[0. 0. 0. 0. 0. 0. 0.5]]
X:
[[1. 0.70748057 0.57765563 0.50015331 0.4474081 0.40859447 0.37793046]
[0.70748057 1. 0.81654105 0.70737526 0.63253508 0.57763506 0.53445386]
[0.57765563 0.81654105 1. 0.8663462 0.77468924 0.70720782 0.65472736]
[0.50015331 0.70737526 0.8663462 1. 0.89428674 0.81661324 0.75591993]
[0.4474081 0.63253508 0.77468924 0.89428674 1. 0.91300002 0.84509787]
[0.40859447 0.57763506 0.70720782 0.81661324 0.91300002 1. 0.92577982]
[0.37793046 0.53445386 0.65472736 0.75591993 0.84509787 0.92577982 1. ]]
cov:
[[1. 0.35374028 0.28882782 0.25007665 0.22370405 0.20429723 0.18896523]
[0.35374028 0.25 0.20413526 0.17684382 0.15813377 0.14440876 0.13361346]
[0.28882782 0.20413526 0.25 0.21658655 0.19367231 0.17680196 0.16368184]
[0.25007665 0.17684382 0.21658655 0.25 0.22357168 0.20415331 0.18897998]
[0.22370405 0.15813377 0.19367231 0.22357168 0.25 0.22825 0.21127447]
[0.20429723 0.14440876 0.17680196 0.20415331 0.22825 0.25 0.23144496]
[0.18896523 0.13361346 0.16368184 0.18897998 0.21127447 0.23144496 0.25 ]]
eigenvalues: [1.72809096 0.52824861 0.12663946 0.05380617 0.03070819 0.01962472 0.01288189]