Source code for deephyper.ensemble.aggregator._mode
from typing import List, Optional, Union, Dict
import numpy as np
from deephyper.ensemble.aggregator._aggregator import Aggregator
[docs]
class ModeAggregator(Aggregator):
"""Aggregate predictions using the mode of categorical distributions from predictors.
.. list-table::
:widths: 25 25
:header-rows: 1
* - Array (Fixed Set)
- MaskedArray
* - ✅
- ✅
This aggregator is useful when the ensemble is composed of predictors that output categorical
distributions. The mode of the ensemble is the mode of the modes of the predictors, minimizing
the 0-1 loss.
Args:
with_uncertainty (bool, optional): a boolean that sets if the uncertainty should be
returned when calling the aggregator. Defaults to ``False``.
"""
def __init__(self, with_uncertainty: bool = False):
self.with_uncertainty = with_uncertainty
[docs]
def aggregate(
self,
y: List[Union[np.ndarray, np.ma.MaskedArray]],
weights: Optional[List[float]] = None,
) -> Union[
Union[np.ndarray, np.ma.MaskedArray],
Dict[str, Union[np.ndarray, np.ma.MaskedArray]],
]:
"""Aggregate predictions using the mode of categorical distributions.
Args:
y (List[Union[np.ndarray, np.ma.MaskedArray]]): List of categorical probability arrays
of shape ``(n_predictors, n_samples, ..., n_classes)``.
weights (Optional[List[float]]): Weights for the predictors. Default is ``None``.
Returns:
Union[Union[np.ndarray, np.ma.MaskedArray], Dict[str, Union[np.ndarray,
np.ma.MaskedArray]]]: Aggregated results, as an array corresponding to the mode when
``with_uncertainty=False`` and as a dict otherwise including:
- ``"loc"``: Aggregated mode of shape ``(n_samples, ...)``.
- ``"uncertainty"``: Uncertainty values of shape ``(n_samples, ...)``.
Raises:
ValueError: If `y` dimensions are invalid or if `weights` length does not match `y`.
"""
if not isinstance(y, list) or not all(
isinstance(arr, (np.ndarray, np.ma.MaskedArray)) for arr in y
):
raise TypeError("Input `y` must be a list of numpy.ndarray or numpy.ma.MaskedArray.")
self._np = np
is_masked = False
if all(isinstance(pred, np.ma.MaskedArray) for pred in y):
self._np = np.ma
is_masked = True
# Categorical probabilities (n_predictors, n_samples, ..., n_classes)
y_proba_models = self._np.stack(y, axis=0)
n_predictors = y_proba_models.shape[0]
num_classes = y_proba_models.shape[-1]
# Mode of the ensemble (n_samples, ...)
y_mode_models = self._np.argmax(y_proba_models, axis=-1)
weighted_counts = self._np.zeros_like(y_proba_models, dtype=np.float64).sum(axis=0)
eye_arr = np.eye(num_classes, dtype=np.float64)
for i in range(n_predictors):
if weights is None:
weighted_counts += eye_arr[y_mode_models[i]] / n_predictors
else:
weighted_counts += eye_arr[y_mode_models[i]] * weights[i]
y_mode_ensemble = weighted_counts.argmax(axis=-1)
if is_masked:
mask = weighted_counts.sum(axis=-1).mask
y_mode_ensemble = self._np.array(y_mode_ensemble, mask=mask)
if not self.with_uncertainty:
return y_mode_ensemble
else:
# Uncertainty of ensemble
uncertainty = 1 - self._np.max(weighted_counts, axis=-1)
return {
"loc": y_mode_ensemble,
"uncertainty": uncertainty,
}