"""
WDRO Estimators
"""
import numpy as np
import torch as pt
from sklearn.base import BaseEstimator
from sklearn.utils.validation import check_array
from typing import Optional
from skwdro.base.problems import EmpiricalDistributionWithoutLabels
from skwdro.base.losses_torch.portfolio import SimplePortfolio
from skwdro.solvers.utils import detach_tensor
from skwdro.wrap_problem import dualize_primal_loss
from skwdro.base.cost_decoder import cost_from_str
from skwdro.solvers.optim_cond import OptCondTorch
import skwdro.solvers.specific_solvers as spS
import skwdro.solvers.entropic_dual_torch as entTorch
[docs]
class Portfolio(BaseEstimator):
r""" A Wasserstein Distributionally Robust Mean-Risk Portfolio estimator.
Model for the portfolio optimization problem
.. math::
\mathbb{E}[ - \langle x ; \xi \rangle ] + \eta \mathrm{CVar}_\alpha[- \langle x ; \xi \rangle]
which amounts to using the following loss function
.. math::
\ell(x,\tau;\xi) = - \langle x ; \xi \rangle + \eta \tau + \frac{1}{\alpha} \max( - \langle x ; \xi \rangle - \tau ; 0)
where :math:`\tau` is an extra real parameter accounting for the threshold of the CVaR (see [Rockafellar and Uryasev (2000)]). The parameter :math:`x` is constrained to live in the simplex (This is encoded in the constraints of the problem in [Esfahani et al. (2018)] and by an exponential reparametrization for the entropy-regularized version).
Parameters
----------
rho : float, default=1e-2
Robustness radius
eta: float > 0, default=0
Risk-aversion parameter linked to the Conditional Value at Risk
alpha: float in (0,1], default=0.95
Confidence level of the Conditional Value at Risk
C : (nb_constraints, nb_assets), default=None
Matrix of constraints observed by the user.
d : (nb_constraints,), default=None
Vector of constraints observed by the user.
fit_intercept: boolean, default=None
Determines if an intercept is fit or not
cost: str, default="t-NC-1-1"
Tiret-separated code to define the transport cost: "<engine>-<cost id>-<k-norm type>-<power>" for :math:`c(x, y):=\|x-y\|_k^p`
solver: str, default='entropic'
Solver to be used: 'entropic' or 'dedicated'
solver_reg: float, default=1.0
regularization value for the entropic solver
reparam: str, default="softmax"
Reparametrization method of theta for the entropic torch loss
random_state: int, default=None
Seed used by the random number generator when using non-deterministic methods
on the estimator
Examples
>>> from skwdro.estimators import Portfolio
>>> import numpy as np
>>> X = np.random.normal(size=(10,2))
>>> estimator = Portfolio()
>>> estimator.fit(X)
Portfolio()
--------
"""
def __init__(
self,
rho=1e-2,
eta=0.,
alpha=.95,
C=None,
d=None,
fit_intercept=None,
cost="t-NC-1-1",
solver="dedicated",
solver_reg=1e-3,
reparam="softmax",
n_zeta_samples: int = 10,
seed: int = 0,
opt_cond: Optional[OptCondTorch] = OptCondTorch(
2
) # type: ignore
):
# Verifying conditions on rho, eta and alpha
if rho < 0:
raise ValueError("The Wasserstein radius cannot be negative")
elif eta < 0:
raise ValueError("Risk-aversion error eta cannot be negative")
elif alpha <= 0 or alpha > 1:
raise ValueError(
"Confidence level alpha needs to be in the (0,1] interval")
elif solver_reg < 0:
raise ValueError("The regularization parameter cannot be negative")
elif n_zeta_samples < 0:
raise ValueError("Cannot sample a negative number of zetas")
self.rho = rho
self.eta = eta
self.alpha = alpha
self.C = C
self.d = d
self.fit_intercept = fit_intercept
self.cost = cost
self.solver = solver
self.solver_reg = solver_reg
self.reparam = reparam
self.n_zeta_samples = n_zeta_samples
self.seed = seed
self.opt_cond = opt_cond
[docs]
def fit(self, X, y=None):
"""Fits the WDRO regressor.
Parameters
----------
X : array-like, shape (n_samples_train,m)
The training input samples.
y : None
The prediction. Always none for a portfolio estimator.
"""
del y
# Conversion to float to prevent torch.nn conversion errors
self.rho_ = float(self.rho)
self.eta_ = float(self.eta)
self.alpha_ = float(self.alpha)
self.solver_reg_ = float(self.solver_reg)
# Check that X has correct shape
X = check_array(X)
# Store data
self.X_ = X
# Setup problem parameters
N = np.shape(X)[0] # N samples for the empirical distribution
m = np.shape(X)[1] # m assets
self.n_features_in_ = m
emp = EmpiricalDistributionWithoutLabels(m=N, samples=X)
self.cost_ = cost_from_str(self.cost) # NormCost(1, 1., "L1 cost")
p = self.cost_.power
# Setup values C and d that define the polyhedron of xi_maj
if (self.C is None or self.d is None):
self.C_ = np.zeros((1, m))
self.d_ = np.zeros((1, 1))
else:
self.C_ = self.C
self.d_ = self.d
# Check that the matrix-vector product is well-defined
if np.shape(self.C_)[1] != m:
raise ValueError(
' '.join([
"The number of columns",
"of C don't match the",
"number of lines of",
"any xi"
])
)
if self.solver == "entropic":
raise (DeprecationWarning(
"The entropic (numpy) solver is now deprecated"
))
elif self.solver == "dedicated":
_res = spS.WDROPortfolioSpecificSolver(
C=self.C_,
d=self.d_,
m=self.n_features_in_,
p=p,
eta=self.eta,
alpha=self.alpha,
rho=self.rho,
samples=emp.samples
)
self.coef_, self.tau_, self.dual_var_, self.result_ = _res
elif "torch" in self.solver:
self._wdro_loss = dualize_primal_loss(
SimplePortfolio(m, risk_aversion=self.eta_,
risk_level=self.alpha_),
None,
pt.tensor(self.rho_),
pt.Tensor(emp.samples),
None,
"post" not in self.solver,
self.cost,
self.n_zeta_samples,
self.seed,
epsilon=self.solver_reg_,
l2reg=0.
)
_res = entTorch.solve_dual_wdro(
self._wdro_loss,
emp,
self.opt_cond, # type: ignore
)
(
self.coef_,
self.intercept_,
self.dual_var_,
self.robust_loss_
) = _res
self.coef_ = detach_tensor(
self._wdro_loss.primal_loss.loss.assets.weight # type: ignore
)
else:
raise NotImplementedError("Designation for solver not recognized")
self.is_fitted_ = True
# Return the estimator
return self
[docs]
def score(self, X, y=None):
'''
Score method to estimate the quality of the model.
Parameters
----------
X : array-like, shape (n_samples_test,m)
The testing input samples.
y : None
The prediction. Always None for a Portfolio estimator.
'''
del y
return -self.eval(X)
[docs]
def eval(self, X):
'''
Evaluates the loss with the theta obtained from the fit function.
Parameters
----------
X : array-like, shape (n_samples_test,m)
The testing input samples.
'''
# Check that X has correct shape
X = check_array(X)
assert self.is_fitted_ # We have to fit before evaluating
if "entropic" in self.solver:
return self._wdro_loss.primal_loss.forward(pt.from_numpy(X)).mean()
elif self.solver == "dedicated":
return -np.mean(X, axis=0) @ self.coef_
else:
raise (ValueError("Solver not recognized"))