optimization scikit-learn gekko ray-tune elasticnet

Gekko solutions not found while trying to implement an elastic net regression

I am presently trying to build a elastic net regression model by using Gekko. I'm using Gekko instead of sklearn etc. because I'd also need to implement additional constraints on my variable coefficients. And the Gekko code works if I omit the regularization part. But whenever I try to add the l1 & l2 penalty, the solver either always return with the lowest value specified for the regularization parameters or can't find a solution. I get a solution more frequently if I set the lower bound of the regularization parameters to a very low value e.g. 0.001, but even then the solutions are not consistent. I'm wondering if I'm making an error while setting up these parameters in Gekko. Any help or alternate suggestion would be greatly helpful.

Following is the example code with a toy data:

#test case for stackoverflow

# load data
x1 = np.array([1,2,5,3,2,5,2])
x2 = np.array([5,6,7,2,1,3,2])
x3 = x1*2+1
x4 = x2*3-1
x = np.array(pd.DataFrame({'x1':x1,'x2':x2,'x3':x3,'x4':x4}))
y = np.array([3,2,3,5,6,7,8])
sign_constraint = [1,1,-1,-1]

coeffs_dict = defaultdict(dict)
#setting the optimizer
m = gk.GEKKO(remote=False)
m.options.EV_TYPE=2  #turning gekko solver to minimize SSE instead of default l1-norm
m.options.MAX_ITER = 100000
opt_path = m.path
model_iter = opt_path.rsplit("_",1)[1]
m.options.IMODE=2 #Regression mode
n = 4 #number of variables
#print(n)
c  = m.Array(m.FV, n) #array of parameters and intercept: c for coefficients
for i,ci in enumerate(c):
    ci.STATUS = 1 #calculate fixed parameter
    if sign_constraint[i] > 0:
        ci.LOWER = 0
    elif sign_constraint[i] < 0:
        ci.UPPER = 0

L = m.FV() #l1 ratio
L.STATUS = 1
L.value = 0.5
L.LOWER = 0.5
#L.UPPER = 1
R = m.FV() #lambda for regularization
R.STATUS = 1
R.LOWER = 100

x_pred = [None]*n  #array of gekko variable to predict the coefficients

xd = m.Array(m.Param,n)
yd = m.Param(value=y)

for i in range(n):
    xd[i].value = x[:,i]
    x_pred[i] = m.Var() # y = c0 + sum(c[i]*x[i])    
    m.Equation(x_pred[i]==c[i]*xd[i])

y_pred = m.Var()
m.Equation(y_pred==m.sum([x_pred[i] for i in range(n)]))
l1_penalty = m.Var()
l2_penalty = m.Var()
reg_factor = m.Var()
m.Equation(l1_penalty==L*m.sum([abs(c[i]) for i in range(n)]))
m.Equation(l2_penalty==(1-L)*m.sum([c[i]**2 for i in range(n)])/2)
#m.Equation(l2_penalty==R*m.sum([c[i]**2 for i in range(n)]))
m.Equation(reg_factor==R*(l1_penalty + l2_penalty))

#reg_obj = m.Intermediate(((yd-y_pred)**2)/n_obs + reg_factor)
m.Minimize(((yd-y_pred)**2)/n_obs + reg_factor)
#Minimize difference between actual and predicted y
#m.Minimize((yd-y_pred)**2)

#APOPT solver
m.options.SOLVER = 1
#Solve
m.solve(disp=False)
    
try:
    m.solve(disp=False)
except:
    print('solver failed: check infeasibilities')
#Retrieve parameter values
a = [i.value[0] for i in c]
for i in range(n):
    coeffs_dict[i] = a[i]
print(coeffs_dict) 
print(L.value[0], R.value[0])

This example is returning an error with the specified lower bound. Although if I replace L & R with scalar value 0.5 & 100 respectively I get a solution (although completely nonsensical), but if I replace R with 10 or 1000 it can't provide a solution. So replacing the parameters with scalar value and opting for a gridsearch wouldn't work as well.

I modified the code to standardize the variables and tried different variable bounds as following:

# load data
x1 = np.array([1,2,5,3,2,5,2])
x2 = np.array([5,6,7,9,10,3,12])
x3 = x1*2+1
x4 = x2*3-1
x1 = (x1-np.mean(x1))/np.std(x1)
x2 = (x2-np.mean(x2))/np.std(x2)
x3 = (x3-np.mean(x3))/np.std(x3)
x4 = (x4-np.mean(x4))/np.std(x4)
x = np.array(pd.DataFrame({'x1':x1,'x2':x2,'x3':x3,'x4':x4}))
y = np.array([3,2,3,5,6,7,8])
y = (y-np.mean(y))/np.std(y)
sign_constraint = [1,1,-1,-1]

coeffs_dict = defaultdict(dict)
#setting the optimizer
m = gk.GEKKO(remote=False)
#m.options.EV_TYPE=2 #turning gekko solver to minimize SSE instead of default l1-norm
m.options.MAX_ITER = 100000
opt_path = m.path
print(opt_path)
model_iter = opt_path.rsplit("_",1)[1]
m.options.IMODE=2 #Regression mode
n = 4 #number of variables
#print(n)
c  = m.Array(m.FV, n) #array of parameters and intercept: c for coefficients
for i,ci in enumerate(c):
    ci.STATUS = 1 #calculate fixed parameter

    if sign_constraint[i] > 0:
        ci.LOWER = 0
    elif sign_constraint[i] < 0:
        ci.UPPER = 0

L = m.FV() #l1 ratio
L.STATUS = 1
L.value = 0.5
L.LOWER = 0.001
L.UPPER = 1
R = m.FV() #lambda for regularization
R.STATUS = 1
R.LOWER = 10

x_pred = [None]*n  #array of gekko variable to predict the coefficients

#load data
xd = m.Array(m.Param,n)
yd = m.Param(value=y)

#I = np.identity(n)
#m.Equation(c==)  

for i in range(n):
    xd[i].value = x[:,i]
    x_pred[i] = m.Var() # y = c0 + sum(c[i]*x[i])    
    m.Equation(x_pred[i]==c[i]*xd[i])
    #m.Equation(abs(c[i])>0.001)

y_pred = m.Var()
m.Equation(y_pred==m.sum([x_pred[i] for i in range(n)]))
l1_penalty = m.Var()
l2_penalty = m.Var()
reg_factor = m.Var()
m.Equation(l1_penalty==m.sum([L*abs(c[i]) for i in range(n)]))
m.Equation(l2_penalty==m.sum([(1-L)*(c[i]**2) for i in range(n)])/2)
m.Equation(reg_factor==R*(l1_penalty + l2_penalty))
m.Minimize(((yd-y_pred)**2)/7 + reg_factor)

#m.Equation(l2_penalty==R*m.sum([c[i]**2 for i in range(n)]))
#m.Equation(reg_factor==(l1_penalty + l2_penalty))
#m.Minimize(((yd-y_pred)**2) + l2_penalty)
#m.Minimize(((yd-y_pred)**2) + reg_factor)

#reg_obj = m.Intermediate(((yd-y_pred)**2)/n_obs + reg_factor)

#Minimize difference between actual and predicted y
#m.Minimize((yd-y_pred)**2)

#APOPT solver
m.options.SOLVER = 1
#Solve
#m.solve(disp=False)
    
try:
    m.solve(disp=False)
except:
    print('solver failed: check infeasibilities')
#Retrieve parameter values
a = [i.value[0] for i in c]
for i in range(n):
    coeffs_dict[i] = a[i]
print(coeffs_dict) 
print(L.value[0], R.value[0])

In this code, I'm assigning variable L & R (line 37-44) as the l1-ratio & penalty factor respectively. With the posted example it throws a '@error: Solution Not Found' error. When I apply m.options.EV_TYPE=2 I get an infeasibility error but without any "***** POSSIBLE INFEASBILE EQUATIONS ************".

Also with m.options.EV_TYPE=2, if I change the R.LOWER to 1 I get a solution, but if I omit the L.LOWER statement it produces the same infeasibility error without any "***** POSSIBLE INFEASBILE EQUATIONS ************".

I've tried multiple such scenarios and they fail (more frequently when m.options.EV_TYPE=2) without any specific pattern visible to me. Also whenever the code successfully runs, the L & R values are always the pre-specified lower bounds for these variables. So what can be done/improved in this code to get the desired outcome and make the code stable? Or is it infeasible in Gekko and/or is there any other alternatives?

Solution

The problem is the abs(x) function. It is not continuously differentiable at x=0 and often causes the gradient-based solvers to struggle at finding a solution. Use the m.abs3(x) function instead. This uses a binary variable and evaluates both the x>0 and x<0 sides of the absolute value function with continuous first and second derivatives. Here is a modified script that solves successfully:

import numpy as np
import gekko as gk
import pandas as pd

# load data
x1 = np.array([1,2,5,3,2,5,2])
x2 = np.array([5,6,7,9,10,3,12])
x3 = x1*2+1
x4 = x2*3-1
x1 = (x1-np.mean(x1))/np.std(x1)
x2 = (x2-np.mean(x2))/np.std(x2)
x3 = (x3-np.mean(x3))/np.std(x3)
x4 = (x4-np.mean(x4))/np.std(x4)
x = np.array(pd.DataFrame({'x1':x1,'x2':x2,'x3':x3,'x4':x4}))
y = np.array([3,2,3,5,6,7,8])
y = (y-np.mean(y))/np.std(y)
sign_constraint = [1,1,-1,-1]

#setting the optimizer
m = gk.GEKKO(remote=True)
#m.options.EV_TYPE=2 #turning gekko solver to minimize SSE instead of default l1-norm
m.options.MAX_ITER = 200
opt_path = m.path
model_iter = opt_path.rsplit("_",1)[1]
m.options.IMODE=2 #Regression mode
n = 4 #number of variables
#print(n)
c  = m.Array(m.FV, n) #array of parameters and intercept: c for coefficients
for i,ci in enumerate(c):
    ci.STATUS = 1 #calculate fixed parameter

    if sign_constraint[i] > 0:
        ci.LOWER = 0
    elif sign_constraint[i] < 0:
        ci.UPPER = 0

L = m.FV() #l1 ratio
L.STATUS = 1
L.value = 0.5
L.LOWER = 0.001
L.UPPER = 1
R = m.FV() #lambda for regularization
R.STATUS = 1
R.LOWER = 10

x_pred = [None]*n  #array of gekko variable to predict the coefficients

#load data
xd = m.Array(m.Param,n)
yd = m.Param(value=y)

for i in range(n):
    xd[i].value = x[:,i]
    x_pred[i] = m.Var() # y = c0 + sum(c[i]*x[i])    
    m.Equation(x_pred[i]==c[i]*xd[i])

y_pred = m.Var()
m.Equation(y_pred==m.sum([x_pred[i] for i in range(n)]))
l1_penalty = m.Var()
l2_penalty = m.Var()
reg_factor = m.Var()
m.Equation(l1_penalty==m.sum([L*m.abs3(c[i]) for i in range(n)]))
m.Equation(l2_penalty==m.sum([(1-L)*(c[i]**2) for i in range(n)])/2)
m.Equation(reg_factor==R*(l1_penalty + l2_penalty))
m.Minimize(((yd-y_pred)**2)/7 + reg_factor)

#APOPT solver
m.options.SOLVER = 1
    
try:
    m.solve(disp=True)
except:
    print('solver failed: check infeasibilities')
#Retrieve parameter values
a = [i.value[0] for i in c]
print(L.value[0], R.value[0])

Here is more information on logical conditions in optimization for efficient gradient-based optimization.