Source code for deephyper.search.nas._ambsmixed

import logging

import ConfigSpace as CS
import numpy as np
import deephyper.skopt
from deephyper.problem import HpProblem
from deephyper.search.nas._base import NeuralArchitectureSearch

# Adapt minimization -> maximization with DeepHyper
MAP_liar_strategy = {
    "cl_min": "cl_max",
    "cl_max": "cl_min",
}
MAP_acq_func = {
    "UCB": "LCB",
}


[docs]class AMBSMixed(NeuralArchitectureSearch): """Asynchronous Model-Based Search baised on the `Scikit-Optimized Optimizer <https://scikit-optimize.github.io/stable/modules/generated/deephyper.skopt.Optimizer.html#deephyper.skopt.Optimizer>`_. It is extended to the case of joint hyperparameter and neural architecture search. Args: problem (NaProblem): Neural architecture search problem describing the search space to explore. evaluator (Evaluator): An ``Evaluator`` instance responsible of distributing the tasks. random_state (int, optional): Random seed. Defaults to None. log_dir (str, optional): Log directory where search's results are saved. Defaults to ".". verbose (int, optional): Indicate the verbosity level of the search. Defaults to 0. surrogate_model (str, optional): Surrogate model used by the Bayesian optimization. Can be a value in ``["RF", "ET", "GBRT", "DUMMY"]``. Defaults to ``"RF"``. acq_func (str, optional): Acquisition function used by the Bayesian optimization. Can be a value in ``["UCB", "EI", "PI", "gp_hedge"]``. Defaults to ``"UCB"``. kappa (float, optional): Manage the exploration/exploitation tradeoff for the "UCB" acquisition function. Defaults to ``1.96`` for a balance between exploitation and exploration. xi (float, optional): Manage the exploration/exploitation tradeoff of ``"EI"`` and ``"PI"`` acquisition function. Defaults to ``0.001`` for a balance between exploitation and exploration. n_points (int, optional): The number of configurations sampled from the search space to infer each batch of new evaluated configurations. Defaults to ``10000``. liar_strategy (str, optional): Definition of the constant value use for the Liar strategy. Can be a value in ``["cl_min", "cl_mean", "cl_max"]`` . Defaults to ``"cl_max"``. n_jobs (int, optional): Number of parallel processes used to fit the surrogate model of the Bayesian optimization. A value of ``-1`` will use all available cores. Defaults to ``1``. """ def __init__( self, problem, evaluator, random_state=None, log_dir=".", verbose=0, surrogate_model: str = "RF", acq_func: str = "UCB", kappa: float = 1.96, xi: float = 0.001, n_points: int = 10000, liar_strategy: str = "cl_max", n_jobs: int = 1, **kwargs, ): super().__init__(problem, evaluator, random_state, log_dir, verbose) # Setup the search space na_search_space = self._problem.build_search_space() self.hp_space = self._problem._hp_space #! hyperparameters self.hp_size = len(self.hp_space.space.get_hyperparameter_names()) self.na_space = HpProblem() self.na_space._space.seed(self._random_state.get_state()[1][0]) for i, vnode in enumerate(na_search_space.variable_nodes): self.na_space.add_hyperparameter( (0, vnode.num_ops - 1), name=f"vnode_{i:05d}" ) self._space = CS.ConfigurationSpace(seed=self._random_state.get_state()[1][0]) self._space.add_configuration_space( prefix="1", configuration_space=self.hp_space.space ) self._space.add_configuration_space( prefix="2", configuration_space=self.na_space.space ) # check input parameters surrogate_model_allowed = ["RF", "ET", "GBRT", "DUMMY"] if not (surrogate_model in surrogate_model_allowed): raise ValueError( f"Parameter 'surrogate_model={surrogate_model}' should have a value in {surrogate_model_allowed}!" ) acq_func_allowed = ["UCB", "EI", "PI", "gp_hedge"] if not (acq_func in acq_func_allowed): raise ValueError( f"Parameter 'acq_func={acq_func}' should have a value in {acq_func_allowed}!" ) if not (np.isscalar(kappa)): raise ValueError(f"Parameter 'kappa' should be a scalar value!") if not (np.isscalar(xi)): raise ValueError("Parameter 'xi' should be a scalar value!") if not (type(n_points) is int): raise ValueError("Parameter 'n_points' shoud be an integer value!") liar_strategy_allowed = ["cl_min", "cl_mean", "cl_max"] if not (liar_strategy in liar_strategy_allowed): raise ValueError( f"Parameter 'liar_strategy={liar_strategy}' should have a value in {liar_strategy_allowed}!" ) if not (type(n_jobs) is int): raise ValueError(f"Parameter 'n_jobs' should be an integer value!") self._n_initial_points = self._evaluator.num_workers self._liar_strategy = MAP_liar_strategy.get(liar_strategy, liar_strategy) base_estimator = self._get_surrogate_model( surrogate_model, n_jobs, random_state=self._random_state.get_state()[1][0] ) self._opt = None self._opt_kwargs = dict( dimensions=self._space, base_estimator=base_estimator, acq_func=MAP_acq_func.get(acq_func, acq_func), acq_optimizer="sampling", acq_func_kwargs={"xi": xi, "kappa": kappa, "n_points": n_points}, n_initial_points=self._n_initial_points, random_state=self._random_state, ) def _setup_optimizer(self): self._opt = deephyper.skopt.Optimizer(**self._opt_kwargs) def _saved_keys(self, job): res = {"arch_seq": str(job.config["arch_seq"])} hp_names = self._problem._hp_space._space.get_hyperparameter_names() for hp_name in hp_names: if hp_name == "loss": res["loss"] = job.config["loss"] else: res[hp_name] = job.config["hyperparameters"][hp_name] return res def _search(self, max_evals, timeout): if self._opt is None: self._setup_optimizer() num_evals_done = 0 # Filling available nodes at start logging.info(f"Generating {self._evaluator.num_workers} initial points...") self._evaluator.submit(self._get_random_batch(size=self._n_initial_points)) # Main loop while max_evals < 0 or num_evals_done < max_evals: # Collecting finished evaluations new_results = list(self._evaluator.gather("BATCH", size=1)) num_received = len(new_results) if num_received > 0: self._evaluator.dump_evals( saved_keys=self._saved_keys, log_dir=self._log_dir ) num_evals_done += num_received if num_evals_done >= max_evals: break # Transform configurations to list to fit optimizer opt_X = [] opt_y = [] for cfg, obj in new_results: arch_seq = cfg["arch_seq"] hp_val = self._problem.extract_hp_values(cfg) x = replace_nan(hp_val + arch_seq) opt_X.append(x) opt_y.append(-obj) #! maximizing self._opt.tell(opt_X, opt_y) #! fit: costly new_X = self._opt.ask( n_points=len(new_results), strategy=self._liar_strategy ) new_batch = [] for x in new_X: new_cfg = self._problem.gen_config( x[self.hp_size :], x[: self.hp_size] ) new_batch.append(new_cfg) # submit_childs if len(new_results) > 0: self._evaluator.submit(new_batch) def _get_surrogate_model( self, name: str, n_jobs: int = None, random_state: int = None ): """Get a surrogate model from Scikit-Optimize. Args: name (str): name of the surrogate model. n_jobs (int): number of parallel processes to distribute the computation of the surrogate model. Raises: ValueError: when the name of the surrogate model is unknown. """ accepted_names = ["RF", "ET", "GBRT", "DUMMY"] if not (name in accepted_names): raise ValueError( f"Unknown surrogate model {name}, please choose among {accepted_names}." ) if name == "RF": surrogate = deephyper.skopt.learning.RandomForestRegressor( n_jobs=n_jobs, random_state=random_state ) elif name == "ET": surrogate = deephyper.skopt.learning.ExtraTreesRegressor( n_jobs=n_jobs, random_state=random_state ) elif name == "GBRT": surrogate = deephyper.skopt.learning.GradientBoostingQuantileRegressor( n_jobs=n_jobs, random_state=random_state ) else: # for DUMMY and GP surrogate = name return surrogate def _get_random_batch(self, size: int) -> list: batch = [] n_points = max(0, size - len(batch)) if n_points > 0: points = self._opt.ask(n_points=n_points) for point in points: point_as_dict = self._problem.gen_config( point[self.hp_size :], point[: self.hp_size] ) batch.append(point_as_dict) return batch
def replace_nan(x): """ :meta private: """ return [np.nan if x_i == "nan" else x_i for x_i in x]