Source code for deephyper.ensemble.selector._online_selector
import copy
import numpy as np
from typing import List, Callable
from deephyper.ensemble import EnsemblePredictor
from deephyper.ensemble.selector._selector import Selector
from deephyper.evaluator.callback import Callback
[docs]
class OnlineSelector(Callback):
"""This class performs ensemble selection after each hyperparameter optimization job completion.
The ``run``-function passed to the ``Evaluator`` should return in its
output the ``"online_selector"`` key. This key has for value a
dictionnary that includes both the ``"y_pred"`` key (i.e., predictions of
the predictor on which the selection algorithm is applied) and the
``"y_pred_idx"`` key (i.e., indexes of the considered sampled in ``y``
used to score the selection):
.. code-block:: python
def run(job):
...
return {
"objective": objective,
"online_selector": {"y_pred": y_pred, "y_pred_idx": idx},
}
the ``y_pred`` and ``y_pred_idx`` have same first dimension.
Then, we can create an instance of ``OnlineSelector``:
.. code-block:: python
from deephyper.ensemble.aggregator import MeanAggregator
from deephyper.ensemble.loss import SquaredError
from deephyper.ensemble.selector import GreedySelector
online_selector = OnlineSelector(
y=valid_y,
selector=GreedySelector(
loss_func=SquaredError(),
aggregator=MeanAggregator(),
k=20,
),
)
Winally pass this callback to the ``Evaluator`` used for hyperparameter optimization:
.. code-block:: python
evaluator = Evaluator.create(
run,
method_kwargs={
"callbacks": [
online_selector,
],
},
)
Args:
y (np.ndarray): the data to use for the selector.
selector (Selector): the selection strategy to use.
"""
def __init__(
self,
y: np.ndarray,
selector: Selector,
ensemble: EnsemblePredictor,
load_predictor_func: Callable,
):
#: the data to use for the ``selector``.
self.y: np.ndarray = y
#: the ensemble selection algorithm.
self.selector: Selector = selector
#: the list of received job.id from completed hyperparameters optimization jobs.
self.y_predictors_job_ids: List[str] = []
#: the list of received predictions mapped to the same shape as ``y``.
self.y_predictors: List[np.ma.MaskedArray] = []
#: the list of indexes of the first dimension of ``y_predictors`` from the ``selector``.
self.selected_predictors_indexes: List[int] = []
#: the weights of selected predictors.
self.selected_predictors_weights: List[float] = []
self._ensemble = ensemble
self._load_predictor_func = load_predictor_func
[docs]
def on_done(self, job):
if type(job.output["objective"]) is str:
return
self.y_predictors_job_ids.append(job.id)
# All mask entries set to 1 represent invalid values in a MaskedArray
# The most import part of this class is to build the MaskedArray
y_pred_mask = np.ones_like(self.y, dtype=int)
y_pred_mask[job.output["online_selector"]["y_pred_idx"]] = 0
y_pred = np.zeros_like(self.y, dtype=float)
y_pred[job.output["online_selector"]["y_pred_idx"]] = job.output["online_selector"][
"y_pred"
]
m_y_pred = np.ma.masked_array(y_pred, mask=y_pred_mask)
self.y_predictors.append(m_y_pred)
# Ensemble
(
self.selected_predictors_indexes,
self.selected_predictors_weights,
) = self.selector.select(self.y, self.y_predictors)
[docs]
def on_done_other(self, job):
return self.on_done(job)
@property
def selected_predictors_job_ids(self) -> List[str]:
"""List of ``job.id`` corresponding to the selected set of predictors."""
return [self.y_predictors_job_ids[idx] for idx in self.selected_predictors_indexes]
@property
def ensemble(self):
"""The ensemble with its weights.
It will provide the ``ensemble`` with adapted ``.predictors`` and ``.weights`` from the
latest selection.
"""
# TODO: the following deepcopy can create issues with the evaluator used by the ensemble
# ensemble = copy.deepcopy(self._ensemble)
ensemble = copy.copy(self._ensemble)
ensemble.predictors = [
self._load_predictor_func(job_id) for job_id in self.selected_predictors_job_ids
]
ensemble.weights = self.selected_predictors_weights
return ensemble