Source code for skwdro.operations_research._weber

"""
Weber problem
"""
import numpy as np
import torch as pt

from sklearn.base import BaseEstimator
from sklearn.utils.validation import check_X_y, check_array

from typing import Optional

from skwdro.solvers.optim_cond import OptCondTorch

from skwdro.solvers.utils import detach_tensor
from skwdro.base.problems import EmpiricalDistributionWithLabels
from skwdro.base.losses_torch.weber import SimpleWeber
from skwdro.wrap_problem import dualize_primal_loss
import skwdro.solvers.entropic_dual_torch as entTorch



[docs]
class Weber(BaseEstimator):
    """ A Weber Wasserstein Distributionally Robust Estimator.

    The cost function is XXX
    Uncertainty is XXX

    Parameters
    ----------
    rho : float, default=1e-1
        Robustness radius
    kappa: float, default=10.
        For the cost
    solver_reg: float, default=1e-2
        regularization value for the entropic solver
    n_zeta_samples: int, default=10
        number of adversarial samples to draw
    solver: str, default='entropic_torch'
        Solver to be used: 'entropic_torch' (only this is implemented for now)

    Attributes
    ----------
    position_ : float
        parameter vector (:math:`w` in the cost function formula)

    Examples
    --------
    >>> from skwdro.operations_research import Weber
    >>> import numpy as np
    >>> m = 20
    >>> X = np.random.exponential(scale=2.0,size=(m,2))
    >>> w = np.ones(m)
    >>> estimator = Weber()
    >>> estimator.fit(X,w)
    Weber()
    """

    def __init__(
        self,
        rho: float = 1e-1,
        kappa: float = 10.0,
        solver_reg: float = 1e-2,
        sampler_reg: float = 1e-2,
        l2_reg: float = 0.,
        n_zeta_samples: int = 10,
        cost: str = "t-NLC-2-2",
        solver="entropic_torch",
        random_state: int = 0,
        opt_cond: Optional[OptCondTorch] = OptCondTorch(2)
    ):

        if rho < 0:
            raise ValueError(
                f"The uncertainty radius rho should be non-negative, received {rho}")

        self.rho = rho
        self.kappa = kappa
        self.solver = solver
        self.solver_reg = solver_reg
        self.sampler_reg = sampler_reg
        self.l2_reg = l2_reg
        self.n_zeta_samples = n_zeta_samples
        self.random_state = random_state
        self.cost = cost
        self.opt_cond = opt_cond


[docs]
    def fit(self, X, y):
        """Fits a Weber WDRO model

        Parameters
        ----------
        X : array-like, shape (n_samples,2)
            The training input positions.
        y : array-like, shape (n_samples,) or (n_samples,1)
            The training input importance weights

        Returns
        -------
        self : object
            Returns self.
        """

        X, y = check_X_y(X, y, y_numeric=True)

        if self.rho is not float:
            try:
                self.rho = float(self.rho)
            except ValueError:
                raise TypeError(
                    f"The uncertainty radius rho should be numeric, received {type(self.rho)}")

        m, d = np.shape(X)

        emp = EmpiricalDistributionWithLabels(
            m=m, samples_x=X, samples_y=y.reshape(-1, 1))

        if "torch" in self.solver:
            assert self.opt_cond is not None
            _post_sample = self.solver == "entropic_torch" or self.solver == "entropic_torch_post"
            self._wdro_loss = dualize_primal_loss(
                SimpleWeber(d),
                None,
                pt.tensor(self.rho),
                pt.Tensor(emp.samples_x),
                pt.Tensor(emp.samples_y),
                _post_sample,
                self.cost,
                self.n_zeta_samples,
                self.random_state,
                epsilon=self.solver_reg,
                sigma=self.sampler_reg,
                l2reg=self.l2_reg
            )
            self._wdro_loss.n_iter = 300
            self.coef_, self.intercept_, self.dual_var_, self.robust_loss_ = entTorch.solve_dual_wdro(
                self._wdro_loss,
                emp,
                self.opt_cond,
            )
            self.coef_ = detach_tensor(
                self.problem_.loss.primal_loss.loss.pos).flatten()  # type: ignore

        else:
            raise NotImplementedError

        self.is_fitted_ = True

        self.n_features_in_ = d

        self.position_ = self.coef_

        # `fit` should always return `self`
        return self



[docs]
    def score(self, X, y=None):
        '''
        Score method to estimate the quality of the model.

        Parameters
        ----------
        X : array-like, shape (n_samples_test,m)
            The testing input samples.
        y : None
            The prediction. Always None for a Newsvendor estimator.
        '''
        del y
        return -self.eval(X)



[docs]
    def eval(self, X):
        '''
        Evaluates the loss with the theta obtained from the fit function.

        Parameters
        ----------
        X : array-like, shape (n_samples_test,m)
            The testing input samples.
        '''

        assert self.is_fitted_  # We have to fit before evaluating

        # Check that X has correct shape
        X = check_array(X)

        if "entropic" in self.solver:
            return self._wdro_loss.primal_loss.forward(pt.from_numpy(X)).mean()
        else:
            raise (ValueError("Solver not recognized"))