Source code for deephyper.hpo._regevo

from collections import deque
from typing import Dict, List, Literal, Optional

import numpy as np

from ConfigSpace.util import deactivate_inactive_hyperparameters

from deephyper.hpo._search import Search
from deephyper.hpo._solution import SolutionSelection
from deephyper.hpo.utils import get_inactive_value_of_hyperparameter

__all__ = ["RegularizedEvolution"]



[docs]
class RegularizedEvolution(Search):
    """Regularized evolution algorithm.

    This implementation is an example for the Search API to implement new search algorithms.

    .. list-table::
        :widths: 25 25 25
        :header-rows: 1

        * - Single-Objective
          - Multi-Objectives
          - Failures
        * - ✅
          - ❌
          - ✅

    Args:
        problem:
            object describing the search/optimization problem.

        random_state (np.random.RandomState, optional):
            Initial random state of the search. Defaults to ``None``.

        log_dir (str, optional):
            Path to the directoy where results of the search are stored. Defaults to ``"."``.

        verbose (int, optional):
            Use verbose mode. Defaults to ``0``.

        stopper (Stopper, optional):
            a stopper to leverage multi-fidelity when evaluating the function. Defaults to
            ``None`` which does not use any stopper.

        checkpoint_history_to_csv (bool, optional):
            wether the results from progressively collected evaluations should be checkpointed
            regularly to disc as a csv. Defaults to ``True``.

        solution_selection (Literal["argmax_obs", "argmax_est"] | SolutionSelection, optional):
            the solution selection strategy. It can be a string where ``"argmax_obs"`` would
            select the argmax of observed objective values, and ``"argmax_est"`` would select the
            argmax of estimated objective values (through a predictive model).

        population_size (int, optional):
            The size of the population. Defaults to ``100``.

        sample_size (int, optional):
            The number of samples to draw from the population. Defaults to ``10``.
    """

    def __init__(
        self,
        problem,
        random_state=None,
        log_dir=".",
        verbose=0,
        stopper=None,
        checkpoint_history_to_csv: bool = True,
        solution_selection: Optional[
            Literal["argmax_obs", "argmax_est"] | SolutionSelection
        ] = None,
        population_size: int = 100,
        sample_size: int = 10,
    ):
        super().__init__(
            problem,
            random_state,
            log_dir,
            verbose,
            stopper,
            checkpoint_history_to_csv,
            solution_selection,
        )
        self._problem.space.seed(self._random_state.randint(0, np.iinfo(np.int32).max))
        assert population_size > sample_size, "population_size must be greater than sample_size"
        self.population_size = population_size
        self.sample_size = sample_size
        self._population = deque(maxlen=self.population_size)

    def _ask(self, n: int = 1) -> List[Dict]:
        """Ask the search for new configurations to evaluate.

        Args:
            n (int, optional): The number of configurations to ask. Defaults to 1.

        Returns:
            List[Dict]: a list of hyperparameter configurations to evaluate.
        """
        space = self._problem.space

        # Random sampling
        if len(self._population) < self.population_size:
            import warnings

            with warnings.catch_warnings():
                warnings.filterwarnings("ignore", category=DeprecationWarning)

                new_samples = space.sample_configuration(size=n)

            if not (isinstance(new_samples, list)):
                new_samples = [new_samples]

            for i, sample in enumerate(new_samples):
                sample = dict(sample)
                for hp_name in self._problem.hyperparameter_names:
                    # If the parameter is inactive due to some conditions then we attribute the
                    # lower bound value to break symmetries and enforce the same representation.
                    if hp_name not in sample:
                        sample[hp_name] = get_inactive_value_of_hyperparameter(space[hp_name])

                    # Make sure to have JSON serializable values
                    if type(sample[hp_name]).__module__ == np.__name__:
                        sample[hp_name] = sample[hp_name].tolist()

                new_samples[i] = sample

        # Regularized evolution
        else:
            new_samples = []
            for i in range(n):
                samples_idxs = self._random_state.choice(
                    self.population_size, size=self.sample_size, replace=False
                )

                samples = [self._population[i] for i in samples_idxs]

                parent_sample = max(samples, key=lambda x: x[1])[0]

                child_sample = parent_sample.copy()
                active_hyperparameter_names = list(
                    space.get_active_hyperparameters(
                        deactivate_inactive_hyperparameters(child_sample, space)
                    )
                )
                hp_name = self._random_state.choice(active_hyperparameter_names)
                hp = space[hp_name]
                hp_value = hp.rvs(size=None, random_state=space.random)

                child_sample[hp_name] = hp_value
                child_sample = dict(deactivate_inactive_hyperparameters(child_sample, space))

                for hp_name in self._problem.hyperparameter_names:
                    # If the parameter is inactive due to some conditions then we attribute the
                    # lower bound value to break symmetries and enforce the same representation.
                    if hp_name not in child_sample:
                        child_sample[hp_name] = get_inactive_value_of_hyperparameter(
                            self._problem.space[hp_name]
                        )

                    # Make sure to have JSON serializable values
                    if type(child_sample[hp_name]).__module__ == np.__name__:
                        child_sample[hp_name] = child_sample[hp_name].tolist()

                new_samples.append(child_sample)

        return new_samples

    def _tell(
        self, results: list[tuple[dict[str, Optional[str | int | float]], str | int | float]]
    ):
        """Tell the search the results of the evaluations.

        Args:
            results (list[tuple[dict[str, Optional[str | int | float]], str | int | float]]):
                a dictionary containing the results of the evaluations.
        """
        for config, obj in results:
            # Do not add failures to population
            if isinstance(obj, str):
                continue
            self._population.append((config, obj))