import copy
import warnings
import ConfigSpace as cs
import ConfigSpace.hyperparameters as csh
import numpy as np
import pandas as pd
from sklearn.utils import check_random_state
import deephyper.skopt
from deephyper.skopt.joblib import Parallel, delayed
def convert_to_skopt_dim(cs_hp, surrogate_model=None):
if surrogate_model in ["RF", "ET", "GBRT", "HGBRT", "MF", "BT"]:
# models not sensitive to the metric space such as trees
surrogate_model_type = "rule_based"
else:
# models sensitive to the metric space such as GP, neural networks
surrogate_model_type = "distance_based"
if isinstance(cs_hp, csh.UniformIntegerHyperparameter):
skopt_dim = deephyper.skopt.space.Integer(
low=cs_hp.lower,
high=cs_hp.upper,
prior="log-uniform" if cs_hp.log else "uniform",
name=cs_hp.name,
)
elif isinstance(cs_hp, csh.UniformFloatHyperparameter):
skopt_dim = deephyper.skopt.space.Real(
low=cs_hp.lower,
high=cs_hp.upper,
prior="log-uniform" if cs_hp.log else "uniform",
name=cs_hp.name,
)
elif isinstance(cs_hp, csh.CategoricalHyperparameter):
# the transform is important if we don't want the complexity of trees
# to explode with categorical variables
skopt_dim = deephyper.skopt.space.Categorical(
categories=cs_hp.choices,
name=cs_hp.name,
transform="onehot" if surrogate_model_type == "distance_based" else "label",
)
elif isinstance(cs_hp, csh.OrdinalHyperparameter):
categories = list(cs_hp.sequence)
if all(
isinstance(x, (int, np.integer)) or isinstance(x, (float, np.floating))
for x in categories
):
transform = "identity"
else:
transform = "label"
skopt_dim = deephyper.skopt.space.Categorical(
categories=categories, name=cs_hp.name, transform=transform
)
elif isinstance(cs_hp, csh.Constant):
categories = [cs_hp.value]
skopt_dim = deephyper.skopt.space.Categorical(
categories=categories, name=cs_hp.name, transform="label"
)
elif isinstance(cs_hp, csh.NormalIntegerHyperparameter):
skopt_dim = deephyper.skopt.space.Integer(
low=cs_hp.lower,
high=cs_hp.upper,
prior="normal",
name=cs_hp.name,
loc=cs_hp.mu,
scale=cs_hp.sigma,
)
elif isinstance(cs_hp, csh.NormalFloatHyperparameter):
skopt_dim = deephyper.skopt.space.Real(
low=cs_hp.lower,
high=cs_hp.upper,
prior="normal",
name=cs_hp.name,
loc=cs_hp.mu,
scale=cs_hp.sigma,
)
else:
raise TypeError(f"Cannot convert hyperparameter of type {type(cs_hp)}")
return skopt_dim
def convert_to_skopt_space(cs_space, surrogate_model=None):
"""Convert a ConfigurationSpace to a scikit-optimize Space.
Args:
cs_space (ConfigurationSpace): the ``ConfigurationSpace`` to convert.
surrogate_model (str, optional): the type of surrogate model/base estimator used to
perform Bayesian optimization. Defaults to ``None``.
Raises:
TypeError: if the input space is not a ConfigurationSpace.
Returns:
deephyper.skopt.space.Space: a scikit-optimize Space.
"""
# verify pre-conditions
if not (isinstance(cs_space, cs.ConfigurationSpace)):
raise TypeError("Input space should be of type ConfigurationSpace")
sample_with_config_space = len(cs_space.conditions) > 0 or len(cs_space.forbidden_clauses) > 0
# convert the ConfigSpace to deephyper.skopt.space.Space
dimensions = []
for hp in list(cs_space.values()):
dimensions.append(convert_to_skopt_dim(hp, surrogate_model))
skopt_space = deephyper.skopt.space.Space(
dimensions, config_space=cs_space if sample_with_config_space else None
)
return skopt_space
def check_hyperparameter(parameter, name=None, default_value=None):
"""Check if the passed parameter is a valid description of an hyperparameter.
:meta private:
Args:
parameter (str|Hyperparameter): an instance of ``ConfigSpace.hyperparameters.
hyperparameter`` or a synthetic description (e.g., ``list``, ``tuple``).
name (str): the name of the hyperparameter. Only required when the parameter is not a
``ConfigSpace.hyperparameters.hyperparameter``.
default_value: a default value for the hyperparameter.
Returns:
Hyperparameter: the ConfigSpace hyperparameter instance corresponding to the ``parameter``
description.
"""
if isinstance(parameter, csh.Hyperparameter):
return parameter
if not isinstance(parameter, (list, tuple, np.ndarray, dict)):
if isinstance(parameter, (int, float, str)):
return csh.Constant(name=name, value=parameter)
raise ValueError(
"Shortcut definition of an hyper-parameter has to be a type in [list, tuple, array, "
"dict, float, int, str]."
)
if type(name) is not str:
raise ValueError("The 'name' of an hyper-parameter should be a string!")
kwargs = {}
if default_value is not None:
kwargs["default_value"] = default_value
if type(parameter) is tuple: # Range of reals or integers
if len(parameter) == 2:
prior = "uniform"
elif len(parameter) == 3:
prior = parameter[2]
assert prior in [
"uniform",
"log-uniform",
], (
f"Prior has to be 'uniform' or 'log-uniform' when {prior} "
f"was given for parameter '{name}'"
)
parameter = parameter[:2]
log = prior == "log-uniform"
if all([isinstance(p, int) for p in parameter]):
return csh.UniformIntegerHyperparameter(
name=name, lower=parameter[0], upper=parameter[1], log=log, **kwargs
)
elif any([isinstance(p, float) for p in parameter]):
return csh.UniformFloatHyperparameter(
name=name, lower=parameter[0], upper=parameter[1], log=log, **kwargs
)
elif type(parameter) is list: # Categorical
if any([isinstance(p, (str, bool)) or isinstance(p, np.bool_) for p in parameter]):
return csh.CategoricalHyperparameter(name, choices=parameter, **kwargs)
elif all([isinstance(p, (int, float)) for p in parameter]):
return csh.OrdinalHyperparameter(name, sequence=parameter, **kwargs)
elif type(parameter) is dict: # Integer or Real distribution
# Normal
if "mu" in parameter and "sigma" in parameter:
if type(parameter["mu"]) is float:
return csh.NormalFloatHyperparameter(name=name, **parameter, **kwargs)
elif type(parameter["mu"]) is int:
return csh.NormalIntegerHyperparameter(name=name, **parameter, **kwargs)
else:
raise ValueError(
"Wrong hyperparameter definition! 'mu' should be either a float or an integer."
)
raise ValueError(
f"Invalid dimension {name}: {parameter}. Read the documentation for supported types."
)
[docs]
class HpProblem:
"""Class to define an hyperparameter problem.
>>> from deephyper.hpo import HpProblem
>>> problem = HpProblem()
Args:
config_space (ConfigurationSpace, optional): In case the ``HpProblem`` is defined from a
`ConfigurationSpace`.
"""
def __init__(self, config_space=None, seed: int | None = None):
if config_space is not None and not (isinstance(config_space, cs.ConfigurationSpace)):
raise ValueError(
"Parameter 'config_space' should be an instance of ConfigurationSpace!"
)
if config_space:
self._space = copy.deepcopy(config_space)
else:
self._space = cs.ConfigurationSpace()
if seed is not None:
self._space.seed(seed)
self.rng = check_random_state(seed)
self.skopt_dims = {}
self.references = [] # starting points
self.constraint_fn = None
self.sampling_fn = None
def __str__(self):
return repr(self)
def __repr__(self):
prob = repr(self._space)
return prob
def __len__(self):
return len(self.hyperparameter_names)
def __getitem__(self, hyperparameter_name):
return self.space[hyperparameter_name]
[docs]
def add_hyperparameter(self, value, name: str = None, default_value=None) -> csh.Hyperparameter:
"""Add an hyperparameter to the ``HpProblem``.
Hyperparameters can be added to a ``HpProblem`` with a short syntax:
>>> problem.add_hyperparameter((0, 10), "discrete", default_value=5)
>>> problem.add_hyperparameter((0.0, 10.0), "real", default_value=5.0)
>>> problem.add_hyperparameter([0, 10], "categorical", default_value=0)
Sampling distributions can be provided:
>>> problem.add_hyperparameter((0.0, 10.0, "log-uniform"), "real", default_value=5.0)
It is also possible to use `ConfigSpace Hyperparameters
<https://automl.github.io/ConfigSpace/master/API-Doc.html#hyperparameters>`_:
>>> import ConfigSpace.hyperparameters as csh
>>> csh_hp = csh.UniformIntegerHyperparameter(
... name='uni_int', lower=10, upper=100, log=False)
>>> problem.add_hyperparameter(csh_hp)
Args:
value (tuple or list or ConfigSpace.Hyperparameter): a valid hyperparametr description.
name (str): The name of the hyperparameter to add.
default_value (float or int or str): A default value for the corresponding
hyperparameter.
Returns:
ConfigSpace.Hyperparameter: a ConfigSpace ``Hyperparameter`` object corresponding to
the ``(value, name, default_value)``.
"""
if not (type(name) is str or name is None):
raise TypeError(
f"Dimension name: '{name}' is of type == {type(name)} when should be 'str'!"
)
csh_parameter = check_hyperparameter(value, name, default_value=default_value)
self._space.add(csh_parameter)
if isinstance(csh_parameter, csh.Hyperparameter):
skopt_dim = convert_to_skopt_dim(csh_parameter, surrogate_model="ET")
self.skopt_dims[skopt_dim.name] = skopt_dim
return csh_parameter
[docs]
def add_hyperparameters(self, hp_list):
"""Add a list of hyperparameters.
It can be useful when a list of ``ConfigSpace.Hyperparameter`` are defined and we need to
add them to the ``HpProblem``.
Args:
hp_list (ConfigSpace.Hyperparameter): a list of ConfigSpace hyperparameters.
Returns:
list: The list of added hyperparameters.
"""
return [self.add_hyperparameter(hp) for hp in hp_list]
[docs]
def add_forbidden_clause(self, clause):
r"""Add a forbidden clause to the problem.
Add a `forbidden clause <https://automl.github.io/ConfigSpace/master/API-Doc.html#forbidden-clauses>`_
to the ``HpProblem``.
For example if we want to optimize :math:`\frac{1}{x}` where :math:`x` cannot be equal to 0:
>>> from deephyper.hpo import HpProblem
>>> import ConfigSpace as cs
>>> problem = HpProblem()
>>> x = problem.add_hyperparameter((0.0, 10.0), "x")
>>> problem.add_forbidden_clause(cs.ForbiddenEqualsClause(x, 0.0))
Args:
clause: a ConfigSpace forbidden clause.
"""
self._space.add(clause)
[docs]
def add_condition(self, condition):
"""Add a condition to the problem.
Add a `condition <https://automl.github.io/ConfigSpace/master/API-Doc.html#conditions>`_ to
the ``HpProblem``.
>>> from deephyper.hpo import HpProblem
>>> import ConfigSpace as cs
>>> problem = HpProblem()
>>> x = problem.add_hyperparameter((0.0, 10.0), "x")
>>> y = problem.add_hyperparameter((1e-4, 1.0), "y")
>>> problem.add_condition(cs.LessThanCondition(y, x, 1.0))
Args:
condition: A ConfigSpace condition.
"""
self._space.add(condition)
[docs]
def add_conditions(self, conditions: list) -> None:
"""Add a list of conditions to the problem.
Args:
conditions (list): A list of ConfigSpace conditions.
"""
self._space.add(*conditions)
[docs]
def add(self, value, name=None, default_value=None) -> None:
"""Add a component to the configuration space.
An added component can be an hyperparameter, a forbidden rule or a condition.
"""
if name is not None:
self.add_hyperparameter(value, name, default_value)
return
if isinstance(value, csh.Hyperparameter):
skopt_dim = convert_to_skopt_dim(value, surrogate_model="ET")
self.skopt_dims[skopt_dim.name] = skopt_dim
self._space.add(value)
[docs]
def sample(
self,
size: int = 1,
strict: bool = False,
max_trials: int = 5,
n_jobs: int = 1,
) -> list[dict]:
"""Sample a list of hyperparameter configuration.
Args:
size (int): The number of configurations to sample.
strict (bool): If the returned number of samples should be strictly equal to
``size``. Defaults to ``False``.
max_trials (int): The maximum number of sampling trials. Defaults to ``5``.
n_jobs (int): The number of concurrent threads for sampling flat search space.
Returns:
list[dict]: the list of sampled configurations.
"""
def _sample_dimension(dim, i, n_samples, random_state, out):
"""Wrapper to sample dimension for joblib parallelization."""
out[0][:, i] = dim.rvs(n_samples=n_samples, random_state=random_state)
def sample_fn(size: int) -> list[dict]:
if self.sampling_fn is None:
sample_with_config_space = (
len(self._space.conditions) > 0 or len(self._space.forbidden_clauses) > 0
)
if sample_with_config_space:
samples = self._space.sample_configuration(size=size)
samples = [dict(s) for s in samples]
else:
# Regular sampling without transfer learning from flat search space
# Joblib parallel optimization
# Draw
n_columns = len(self.hyperparameter_names)
columns = np.zeros((size, n_columns), dtype="O")
random_states = self.rng.randint(
low=0,
high=np.iinfo(np.int32).max,
size=n_columns,
)
Parallel(n_jobs=n_jobs, verbose=0, require="sharedmem")(
delayed(_sample_dimension)(
self.skopt_dims[dim_name],
i,
size,
np.random.RandomState(random_states[i]),
[columns],
)
for i, dim_name in enumerate(self.hyperparameter_names)
)
df = pd.DataFrame(
{k: columns[:, i] for i, k in enumerate(self.hyperparameter_names)}
)
samples = df.to_dict(orient="records")
else:
samples = self.sampling_fn(size)
return samples
if self.constraint_fn is None:
# Fast path: no constraint
return sample_fn(size)
accepted = []
trials = 0
batch_size = size
while len(accepted) < size and trials < max_trials:
# Sample a batch
batch = sample_fn(size)
# Convert batch into DataFrame only once
df = pd.DataFrame(batch)
# Apply constraint ---
accept_mask = self.constraint_fn(df)
df = df[accept_mask]
accepted.extend(df.to_dict(orient="records"))
trials += 1
ratio_accept = float(accept_mask.sum() / batch_size)
if ratio_accept <= 1e-3:
batch_size = 2 * batch_size
if batch_size > 100_000:
warnings.warn(
f"Constraint is hard to sample with {ratio_accept=}! "
"Consider setting a custom sampling_fn",
category=UserWarning,
)
else:
batch_size = int((size - len(accepted)) / ratio_accept + 0.5)
# If constraints are too strict, return what we have (or raise)
# You can choose to raise if you need strictly size samples
if strict:
accepted = accepted[:size]
if len(accepted) < size:
return RuntimeError(f"The number of samples is less than {size=}!")
return accepted
@property
def space(self) -> cs.ConfigurationSpace:
"""The wrapped ConfigurationSpace object."""
return self._space
@property
def hyperparameter_names(self):
"""The list of hyperparameters names."""
return list(self._space.keys())
[docs]
def check_configuration(self, parameters: dict, raise_if_not_valid: bool = True) -> bool:
"""Check if a configuration is valid.
Args:
parameters (dict): the configuration of parameters to test.
raise_if_not_valid (bool): indicate if an error is raised if the configuration of
parameters is invalid.
Raise:
ValueError: if the configuration is invalid.
"""
try:
# Check is included in the init of Configuration
cs.Configuration(self._space, parameters)
except ValueError as e:
if raise_if_not_valid:
raise ValueError(str(e))
return False
if self.constraint_fn:
if not self.constraint_fn(parameters):
if raise_if_not_valid:
raise ValueError(
f"The {parameters=} are not valid with respect to the defined constraint_fn"
)
else:
return False
return True
@property
def default_configuration(self):
"""The default configuration as a dictionnary."""
config = dict(self._space.get_default_configuration())
for hp_name, hp in self._space.items():
if hp_name not in config:
if isinstance(hp, csh.CategoricalHyperparameter):
config[hp_name] = hp.choices[0]
elif isinstance(hp, csh.OrdinalHyperparameter):
config[hp_name] = hp.sequence[0]
elif isinstance(hp, csh.Constant):
config[hp_name] = hp.value
elif isinstance(
hp,
(csh.UniformIntegerHyperparameter, csh.UniformFloatHyperparameter),
):
config[hp_name] = hp.lower
else:
config[hp_name] = hp.default_value
return config
[docs]
def to_json(self) -> dict:
"""Returns a dictionary of the space which can be saved as JSON."""
d = self._space.to_serialized_dict()
return d
[docs]
def set_seed(self, seed: int):
"""Set the random seed of the space."""
self._space.seed(seed)
self.rng = check_random_state(seed)
[docs]
def set_constraint_fn(self, fn: callable):
"""Set the constraint function.
Example:
.. code-block:: python
pb = HpProblem()
pb.add((0.0, 10.0), "x")
pb.add((0.0, 10.0), "y")
def constraint_fn(df: pd.DataFrame) -> pd.Series:
accept = df["x"] + df["y"] >= 10
return accept
pb.set_constraint_fn(constraint_fn)
samples = pb.sample(size=100)
df = pd.DataFrame(samples)
assert all(df["x"] + df["y"] >= 10)
"""
self.constraint_fn = fn
[docs]
def set_sampling_fn(self, fn: callable):
"""Set the sampling function."""
self.sampling_fn = fn