pythonoptimizationcvxpycvxopt

Python: CVXPY SolverError


Purpose: I am trying to use cvxpy in python to maximize the dual_func, however I get the below SolverError, I believe it may be Variables having different dimensions, but cant seem to figure out the issue. I have tried using other solvers like ECOS, but to no avail. I am new to convex optimization, so would be great if someone can help point me in the right direction of the error in the code.

Problem: The code works in cvxpy version 0.4, but not in the latest cvxpy version, giving me the error:

SolverError: Either candidate conic solvers (['CVXOPT']) do not support the
cones output by the problem (SOC, ExpCone, PSD), or there are not enough 
constraints in the problem.

The dual_func is convex and is DCP, hence I am still not sure why it can't be solved.

Additional Question: Is is possible to use strict inequality constraints in cvxpy? The documentation states it is not possible as it does not make sense in the real world, however, I don't really understand why it is the case.

from cvxpy import *
alpha_k = Variable()
mu_k_1 = Variable(shape=(int(len(h_undl_list))))
mu_k_2 = Variable(shape=(int(len(h_undl_list))))
covar_k_1 = Variable(shape=(int(len(h_undl_list)), int(len(h_undl_list))), PSD=True)
covar_k_2 = Variable(shape=(int(len(h_undl_list)), int(len(h_undl_list))), PSD=True)

# fitting the first 1000 returns with normal GMM
from sklearn import mixture
clf = mixture.GaussianMixture(n_components=2, max_iter=1000, tol=1e-7).fit(ret_df_period)
clf = mixture.GaussianMixture(n_components=2, max_iter=500, tol=1e-7, weights_init=[0.2, 0.8]).fit(ret_df_period)

print('weights')
print(clf.weights_)

# sorting the values to the normal regime or the stressed regime
if clf.weights_[0] > clf.weights_[1]:
    alpha_k.value = clf.weights_[0]
    mu_k_1.value = clf.means_[0]
    mu_k_2.value = clf.means_[1]
    covar_k_1.value = clf.covariances_[0]
    covar_k_2.value = clf.covariances_[1]
else:
    alpha_k.value = clf.weights_[1]
    mu_k_1.value = clf.means_[1]
    mu_k_2.value = clf.means_[0]
    covar_k_1.value = clf.covariances_[1]
    covar_k_2.value = clf.covariances_[0]
iter_t = 3
N = ret_df_period.shape[0] #time step, e.g. return at t
K = 2

for t_iter in range(iter_t):
    q_k = [alpha_k.value, 1 - alpha_k.value]
    mu_sk_list = [np.array(mu_k_1.value).flatten(), np.array(mu_k_2.value).flatten()]
    covar_sk_list = [covar_k_1.value, covar_k_2.value]
    q_posterior_list = []
    q_posterior_map = {}
    
    
    # E-Step
    import math
    # marginal probability, lambda_k * multivariate_normal.pdf, returns a matrix with K rows, N columns
    mar_prob_list = np.zeros((K,N))
    for k_idx_temp in range(0, K):
        for j_temp in range(0, N):
            mar_prob_list[k_idx_temp, j_temp] = (q_k[k_idx_temp] * multivariate_normal.pdf(ret_df_period.iloc[j_temp].values, mean = mu_sk_list[k_idx_temp], cov = covar_sk_list[k_idx_temp]))
    
    # lambda_k_n, q_n, q_posterior_distribution, returns a matrix with K rows, N columns
    for k_idx in range(0, K):
        for j in range(0, N):
            joint_prob_k = q_k[k_idx] * multivariate_normal.pdf(ret_df_period.iloc[j], mean = mu_sk_list[k_idx], cov = covar_sk_list[k_idx])
            q_posterior = joint_prob_k / np.sum(mar_prob_list[:, j])
            q_posterior_list.append(q_posterior)
        
        q_posterior_map[k_idx] = q_posterior_list
        q_posterior_list = []
    
    
    #####a priori after E-Step, i.e. M-Step
    # Empirical probability, returns the two lambda_sk values
    alpha_sk_list = []
    for k_idx in range(0, K):
        alpha_sk = 0
        for j in range(0, N):
            alpha_sk += q_posterior_map[k_idx][j]
        alpha_sk_list.append(alpha_sk / N)
    alpha_sk_list[1] = 1 - alpha_sk_list[0]
        
    
    # Mean, returns two sets of means
    mu_sk_list = []
    weighted_xs_map = {}
    for k_idx in range(0, K):
        weighted_xs_map[k_idx] = np.empty([int(len(h_undl_list)), N])
    
    for k_idx in range(0, K):
        mu_sk = 0
        for j in range(0, N):
            weighted_x = (q_posterior_map[k_idx][j]) * ret_df_period.iloc[j].values
            mu_sk += weighted_x
            weighted_xs_map[k_idx][:, j] = weighted_x / alpha_sk_list[k_idx] # should divide twice of alpha_sk here? Come back and double check later!!!
        mu_sk_list.append(mu_sk / N / alpha_sk_list[k_idx])
    
    # Cov, returns two cov
    covar_sk_list = []
    for k_idx in range(0, K):
        sum_covar = 0
        for j in range(0, N):
            w0_temp = (ret_df_period.iloc[j] - mu_sk_list[k_idx]).values
            w0_covar = np.outer(w0_temp, w0_temp)
            weighted_covar = (q_posterior_map[k_idx][j]) * w0_covar
            sum_covar += weighted_covar
        sum_covar = sum_covar / (alpha_sk_list[k_idx] * N)
        covar_sk_list.append(sum_covar)


    #####
    #information params
    n_sk_list = []
    for k_idx in range(0, K - 1):
        n_sk = np.log(alpha_sk_list[k_idx] / (1 - (np.sum(alpha_sk_list[: -1]))))
        n_sk_list.append(n_sk)
    
    m_sk_list = []
    for k_idx in range(0, K):
        m_sk = np.dot(np.linalg.inv(covar_sk_list[k_idx]), mu_sk_list[k_idx])
        m_sk_list.append(m_sk)
    
    S_sk_list = []
    for k_idx in range(0, K):
        S_sk = np.linalg.inv(covar_sk_list[k_idx])
        S_sk_list.append(S_sk)
    
    
    alpha_sk_1 = Parameter(nonneg=True)
    alpha_sk_2 = Parameter(nonneg=True)
    n_sk_pr = Parameter()
    dim = Parameter(nonneg=True)
    m_sk_1 = Parameter(shape=(int(len(h_undl_list)),))
    m_sk_2 = Parameter(shape=(int(len(h_undl_list)),))
    alpha_sk_1.value = alpha_sk_list[0]
    alpha_sk_2.value = alpha_sk_list[1]
    n_sk_pr.value = n_sk_list[0]
    m_sk_1.value = m_sk_list[0]
    m_sk_2.value = m_sk_list[1]
    dim.value = int(len(h_undl_list))
    covar_k_tilde = Variable(shape=(int(len(h_undl_list)), int(len(h_undl_list))), PSD=True)
    
    
    dual_func = entr(alpha_k) + entr(1 - alpha_k) + \
    alpha_sk_1*0.5*log_det(covar_k_1) + alpha_sk_1*dim*0.5*log(2*np.e*np.pi) + alpha_sk_2*0.5*log_det(covar_k_1 + covar_k_tilde) + alpha_sk_2*dim*0.5*log(2*np.e*np.pi) + alpha_k*n_sk_pr + \
    alpha_sk_1*(mu_k_1.T)*m_sk_1 - alpha_sk_1*0.5*trace(covar_k_1*S_sk_list[0]) - alpha_sk_1*0.5*quad_form(mu_k_1, S_sk_list[0]) + \
    alpha_sk_2*(mu_k_1.T)*m_sk_2 - alpha_sk_2*0.5*trace((covar_k_1 + covar_k_tilde)*S_sk_list[1]) - alpha_sk_2*0.5*quad_form(mu_k_2, S_sk_list[1])


    prob = Problem(Maximize(dual_func), [alpha_k == 0.96, mu_k_1 >= 0, mu_k_2 - mu_k_1 <= 0, mu_k_2 >= - mu_k_1, mu_k_2 <= 0])
    
    print(prob.solve(solver=CVXOPT, verbose=True, kktsolver='robust', max_iters=1000))
    print(t_iter)
    covar_k_2.value = covar_k_1.value + covar_k_tilde.value
    print(is_pos_def(covar_k_1.value))
    print(is_pos_def(covar_k_tilde.value))
    print(is_pos_def(covar_k_2.value))
    
    covar_k_1_temp = np.array(covar_k_1.value)
    covar_k_2_temp = np.array(covar_k_2.value)
    covar_k_tilde_temp = np.array(covar_k_tilde.value)
    mu_k_1_temp = np.array(mu_k_1.value).flatten()
    mu_k_2_temp = np.array(mu_k_2.value).flatten()
    D_1 = np.diag(np.sqrt(np.diag(covar_k_1_temp)))
    correl_k_1 = np.dot(np.dot(np.linalg.inv(D_1), covar_k_1_temp), np.linalg.inv(D_1))
    D2 = np.diag(np.sqrt(np.diag(covar_k_2_temp)))
    correl_k_2 = np.dot(np.dot(np.linalg.inv(D_2), covar_k_2_temp), np.linalg.inv(D_2))

Error:

---------------------------------------------------------------------------
SolverError                               Traceback (most recent call last)
<ipython-input-28-1e1e9c7b0d73> in <module>
    109     prob = Problem(Maximize(dual_func), [alpha_k == 0.96, mu_k_1 >= 0, mu_k_2 - mu_k_1 <= 0, mu_k_2 >= - mu_k_1, mu_k_2 <= 0])
    110 
--> 111     print(prob.solve(solver=CVXOPT, verbose=True, kktsolver='robust', max_iters=1000))
    112     print(t_iter)
    113     covar_k_2.value = covar_k_1.value + covar_k_tilde.value

~\Anaconda3\lib\site-packages\cvxpy\problems\problem.py in solve(self, *args, **kwargs)
    287         else:
    288             solve_func = Problem._solve
--> 289         return solve_func(self, *args, **kwargs)
    290 
    291     @classmethod

~\Anaconda3\lib\site-packages\cvxpy\problems\problem.py in _solve(self, solver, warm_start, verbose, parallel, gp, qcp, **kwargs)
    565                     solver, warm_start, verbose, **kwargs)
    566 
--> 567         self._construct_chains(solver=solver, gp=gp)
    568         data, solving_inverse_data = self._solving_chain.apply(
    569             self._intermediate_problem)

~\Anaconda3\lib\site-packages\cvxpy\problems\problem.py in _construct_chains(self, solver, gp)
    508 
    509             except Exception as e:
--> 510                 raise e
    511 
    512     def _solve(self,

~\Anaconda3\lib\site-packages\cvxpy\problems\problem.py in _construct_chains(self, solver, gp)
    503                 self._solving_chain = \
    504                     construct_solving_chain(self._intermediate_problem,
--> 505                                             candidate_solvers)
    506 
    507                 self._cached_chain_key = chain_key

~\Anaconda3\lib\site-packages\cvxpy\reductions\solvers\solving_chain.py in construct_solving_chain(problem, candidates)
     93                       "enough constraints in the problem." % (
     94                           candidates['conic_solvers'],
---> 95                           ", ".join([cone.__name__ for cone in cones])))
     96 
     97 

SolverError: Either candidate conic solvers (['CVXOPT']) do not support the cones output by the problem (SOC, ExpCone, PSD), or there are not enough constraints in the problem.

Code referenced from: https://www.maths.ox.ac.uk/system/files/attachments/TT18_dissertation_Vu_0.pdf


Solution

  • Your objective contains PSD matrices, log_det and entr, therefore you need a solver that supports both the semidefinite cone and the exponential cone. As far as I know only MOSEK 9 and SCS can do both of these for you in CVXPY.

    Strict inequalities make no practical sense because of precision in floating point arithmetic and because of issues with non-attainment. Consider minimizing x subject to x>0. Problems with strict inequalities tend to have the optimal point in the limit, which means you are incorporating the non-strict closure anyway.