Source code for deephyper.skopt.space.space

import numbers

import ConfigSpace as CS
import numpy as np
import scipy.stats as ss
import yaml
from ConfigSpace.util import deactivate_inactive_hyperparameters
from scipy.stats import gaussian_kde
from sklearn.utils import check_random_state

from deephyper.core.utils.joblib_utils import Parallel, delayed

from .transformers import (
    CategoricalEncoder,
    Identity,
    LabelEncoder,
    LogN,
    Normalize,
    Pipeline,
    StringEncoder,
    ToInteger,
)


# helper class to be able to print [1, ..., 4] instead of [1, '...', 4]
class _Ellipsis:
    def __repr__(self):
        return "..."


def _transpose_list_array(x):
    """Transposes a list matrix"""

    n_dims = len(x)
    assert n_dims > 0
    n_samples = len(x[0])
    rows = [None] * n_samples
    for i in range(n_samples):
        r = [None] * n_dims
        for j in range(n_dims):
            r[j] = x[j][i]
        rows[i] = r
    return rows


[docs]def check_dimension(dimension, transform=None): """Turn a provided dimension description into a dimension object. Checks that the provided dimension falls into one of the supported types. For a list of supported types, look at the documentation of ``dimension`` below. If ``dimension`` is already a ``Dimension`` instance, return it. Args: dimension (Dimension): Search space Dimension. Each search dimension can be defined either as - a `(lower_bound, upper_bound)` tuple (for `Real` or `Integer` dimensions), - a `(lower_bound, upper_bound, "prior")` tuple (for `Real` dimensions), - as a list of categories (for `Categorical` dimensions), or - an instance of a `Dimension` object (`Real`, `Integer` or `Categorical`). transform (str): One of "identity", "normalize", "string", "label", "onehot" optional - For `Categorical` dimensions, the following transformations are supported. - "onehot" (default) one-hot transformation of the original space. - "label" integer transformation of the original space - "string" string transformation of the original space. - "identity" same as the original space. - For `Real` and `Integer` dimensions, the following transformations are supported. - "identity", (default) the transformed space is the same as the original space. - "normalize", the transformed space is scaled to be between 0 and 1. Returns: dimension (Dimension): Dimension instance. """ if isinstance(dimension, Dimension): return dimension if not isinstance(dimension, (list, tuple, np.ndarray)): raise ValueError("Dimension has to be a list or tuple.") # A `Dimension` described by a single value is assumed to be # a `Categorical` dimension. This can be used in `BayesSearchCV` # to define subspaces that fix one value, e.g. to choose the # model type, see "sklearn-gridsearchcv-replacement.py" # for examples. if len(dimension) == 1: return Categorical(dimension, transform=transform) if len(dimension) == 2: if any( [isinstance(d, (str, bool)) or isinstance(d, np.bool_) for d in dimension] ): return Categorical(dimension, transform=transform) elif all([isinstance(dim, numbers.Integral) for dim in dimension]): return Integer(*dimension, transform=transform) elif any([isinstance(dim, numbers.Real) for dim in dimension]): return Real(*dimension, transform=transform) else: raise ValueError( "Invalid dimension {}. Read the documentation for" " supported types.".format(dimension) ) if len(dimension) == 3: if any([isinstance(dim, int) for dim in dimension[:2]]) and dimension[2] in [ "uniform", "log-uniform", ]: return Integer(*dimension, transform=transform) elif any( [isinstance(dim, (float, int)) for dim in dimension[:2]] ) and dimension[2] in ["uniform", "log-uniform"]: return Real(*dimension, transform=transform) else: return Categorical(dimension, transform=transform) if len(dimension) == 4: if ( any([isinstance(dim, int) for dim in dimension[:2]]) and dimension[2] == "log-uniform" and isinstance(dimension[3], int) ): return Integer(*dimension, transform=transform) elif ( any([isinstance(dim, (float, int)) for dim in dimension[:2]]) and dimension[2] == "log-uniform" and isinstance(dimension[3], int) ): return Real(*dimension, transform=transform) if len(dimension) > 3: return Categorical(dimension, transform=transform) raise ValueError( "Invalid dimension {}. Read the documentation for " "supported types.".format(dimension) )
[docs]class Dimension: """Base class for search space dimensions.""" prior = None
[docs] def rvs(self, n_samples=1, random_state=None): """Draw random samples. Parameters ---------- n_samples : int or None The number of samples to be drawn. random_state : int, RandomState instance, or None (default) Set random state to something other than None for reproducible results. """ rng = check_random_state(random_state) samples = self._rvs.rvs(size=n_samples, random_state=rng) return self.inverse_transform(samples)
[docs] def transform(self, X): """Transform samples form the original space to a warped space.""" return self.transformer.transform(X)
[docs] def inverse_transform(self, Xt): """Inverse transform samples from the warped space back into the original space. """ return self.transformer.inverse_transform(Xt)
def set_transformer(self): raise NotImplementedError @property def size(self): return 1 @property def transformed_size(self): return 1 @property def bounds(self): raise NotImplementedError @property def is_constant(self): raise NotImplementedError @property def transformed_bounds(self): raise NotImplementedError @property def name(self): return self._name @name.setter def name(self, value): if isinstance(value, str) or value is None: self._name = value else: raise ValueError("Dimension's name must be either string or None.")
def _uniform_inclusive(loc=0.0, scale=1.0): # like scipy.stats.distributions but inclusive of `high` # XXX scale + 1. might not actually be a float after scale if # XXX scale is very large. return ss.uniform(loc=loc, scale=np.nextafter(scale, scale + 1.0)) def _normal_inclusive(loc=0.0, scale=1.0, lower=-2, upper=2): assert lower <= upper a, b = (lower - loc) / scale, (upper - loc) / scale return ss.truncnorm(a, b, loc=loc, scale=scale)
[docs]class Real(Dimension): """Search space dimension that can take on any real value. Parameters ---------- low : float Lower bound (inclusive). high : float Upper bound (inclusive). prior : "uniform" or "log-uniform", default="uniform" Distribution to use when sampling random points for this dimension. - If `"uniform"`, points are sampled uniformly between the lower and upper bounds. - If `"log-uniform"`, points are sampled uniformly between `log(lower, base)` and `log(upper, base)` where log has base `base`. base : int The logarithmic base to use for a log-uniform prior. - Default 10, otherwise commonly 2. transform : "identity", "normalize", optional The following transformations are supported. - "identity", (default) the transformed space is the same as the original space. - "normalize", the transformed space is scaled to be between 0 and 1. name : str or None Name associated with the dimension, e.g., "learning rate". dtype : str or dtype, default=float float type which will be used in inverse_transform, can be float. """ def __init__( self, low, high, prior="uniform", base=10, transform=None, name=None, dtype=float, loc=None, scale=None, ): if high <= low: raise ValueError( "the lower bound {} has to be less than the" " upper bound {}".format(low, high) ) if prior not in ["uniform", "log-uniform", "normal"]: raise ValueError( "prior should be 'normal', 'uniform' or 'log-uniform'" " got {}".format(prior) ) self.low = low self.high = high self.prior = prior self.base = base self.log_base = np.log10(base) self.name = name self.dtype = dtype self.loc = loc self.scale = scale self._rvs = None self.transformer = None self.transform_ = transform if isinstance(self.dtype, str) and self.dtype not in [ "float", "float16", "float32", "float64", ]: raise ValueError( "dtype must be 'float', 'float16', 'float32'" "or 'float64'" " got {}".format(self.dtype) ) elif isinstance(self.dtype, type) and not np.issubdtype( self.dtype, np.floating ): raise ValueError( "dtype must be a np.floating subtype;" " got {}".format(self.dtype) ) if transform is None: transform = "identity" self.set_transformer(transform)
[docs] def set_transformer(self, transform="identity"): """Define rvs and transformer spaces. Parameters ---------- transform : str Can be 'normalize' or 'identity' """ self.transform_ = transform if self.transform_ not in ["normalize", "identity"]: raise ValueError( "transform should be 'normalize' or 'identity'" " got {}".format(self.transform_) ) # XXX: The _rvs is for sampling in the transformed space. # The rvs on Dimension calls inverse_transform on the points sampled # using _rvs if self.transform_ == "normalize": # set upper bound to next float after 1. to make the numbers # inclusive of upper edge self._rvs = _uniform_inclusive(0.0, 1.0) assert self.prior in ["uniform", "log-uniform"] if self.prior == "uniform": self.transformer = Pipeline( [Identity(), Normalize(self.low, self.high)] ) else: self.transformer = Pipeline( [ LogN(self.base), Normalize( np.log10(self.low) / self.log_base, np.log10(self.high) / self.log_base, ), ] ) else: if self.prior == "uniform": self._rvs = _uniform_inclusive(self.low, self.high - self.low) self.transformer = Identity() elif self.prior == "normal": self._rvs = _normal_inclusive(self.loc, self.scale, self.low, self.high) self.transformer = Identity() else: self._rvs = _uniform_inclusive( np.log10(self.low) / self.log_base, np.log10(self.high) / self.log_base - np.log10(self.low) / self.log_base, ) self.transformer = LogN(self.base)
def __eq__(self, other): return ( type(self) is type(other) and np.allclose([self.low], [other.low]) and np.allclose([self.high], [other.high]) and self.prior == other.prior and self.transform_ == other.transform_ and self.loc == other.loc and self.scale == other.scale ) def __repr__(self): return "Real(low={}, high={}, prior='{}', transform='{}', loc='{}', scale='{}')".format( self.low, self.high, self.prior, self.transform_, self.loc, self.scale )
[docs] def inverse_transform(self, Xt): """Inverse transform samples from the warped space back into the original space. """ inv_transform = super(Real, self).inverse_transform(Xt) if isinstance(inv_transform, list): inv_transform = np.array(inv_transform) if self.dtype == float or self.dtype == "float": # necessary, otherwise the type is converted to a numpy type return getattr(inv_transform, "tolist")() else: return inv_transform
@property def bounds(self): return (self.low, self.high) @property def is_constant(self): return self.low == self.high def __contains__(self, point): if isinstance(point, list): point = np.array(point) if point == np.nan: return True else: return self.low <= point <= self.high @property def transformed_bounds(self): if self.transform_ == "normalize": return 0.0, 1.0 else: if self.prior == "uniform": return self.low, self.high else: return ( np.log10(self.low) / self.log_base, np.log10(self.high) / self.log_base, )
[docs] def distance(self, a, b): """Compute distance between point `a` and `b`. Parameters ---------- a : float First point. b : float Second point. """ if not (a in self and b in self): raise RuntimeError( "Can only compute distance for values within " "the space, not %s and %s." % (a, b) ) return abs(a - b)
[docs] def update_prior(self, X, y, q=0.9): """Fit a Kernel Density Estimator to the data to increase density of samples around regions of interest instead of uniform random-sampling.""" X = np.array(X) y = np.array(y) y_ = np.quantile(y, q) # threshold X_low = X[y <= y_] # It is possible that fitting the Gaussian Kernel Density Estimator # triggers an error, for example if all values of X_low are the same. # In this case, we fall back to uniform sampling or we reuse the last # fitted self._kde. try: kde = gaussian_kde(X_low) self._kde = kde except np.linalg.LinAlgError: pass
[docs] def rvs(self, n_samples=1, random_state=None): """Draw random samples. Parameters ---------- n_samples : int or None The number of samples to be drawn. random_state : int, RandomState instance, or None (default) Set random state to something other than None for reproducible results. """ rng = check_random_state(random_state) if hasattr(self, "_kde"): samples = self._kde.resample(n_samples, rng).reshape(-1) samples = np.clip(samples, self.low, self.high) else: samples = self._rvs.rvs(size=n_samples, random_state=rng) return self.inverse_transform(samples)
[docs]class Integer(Dimension): """Search space dimension that can take on integer values. Parameters ---------- low : int Lower bound (inclusive). high : int Upper bound (inclusive). prior : "uniform" or "log-uniform", default="uniform" Distribution to use when sampling random integers for this dimension. - If `"uniform"`, integers are sampled uniformly between the lower and upper bounds. - If `"log-uniform"`, integers are sampled uniformly between `log(lower, base)` and `log(upper, base)` where log has base `base`. base : int The logarithmic base to use for a log-uniform prior. - Default 10, otherwise commonly 2. transform : "identity", "normalize", optional The following transformations are supported. - "identity", (default) the transformed space is the same as the original space. - "normalize", the transformed space is scaled to be between 0 and 1. name : str or None Name associated with dimension, e.g., "number of trees". dtype : str or dtype, default=np.int64 integer type which will be used in inverse_transform, can be int, np.int16, np.uint32, np.int32, np.int64 (default). When set to int, `inverse_transform` returns a list instead of a numpy array """ def __init__( self, low, high, prior="uniform", base=10, transform=None, name=None, dtype=np.int64, loc=None, scale=None, ): if high <= low: raise ValueError( "the lower bound {} has to be less than the" " upper bound {}".format(low, high) ) if prior not in ["uniform", "log-uniform"]: raise ValueError( "prior should be 'uniform' or 'log-uniform'" " got {}".format(prior) ) self.low = low self.high = high self.prior = prior self.base = base self.log_base = np.log10(base) self.name = name self.dtype = dtype self.transform_ = transform self._rvs = None self.transformer = None self.loc = loc self.scale = scale if isinstance(self.dtype, str) and self.dtype not in [ "int", "int8", "int16", "int32", "int64", "uint8", "uint16", "uint32", "uint64", ]: raise ValueError( "dtype must be 'int', 'int8', 'int16'," "'int32', 'int64', 'uint8'," "'uint16', 'uint32', or" "'uint64', but got {}".format(self.dtype) ) elif isinstance(self.dtype, type) and self.dtype not in [ int, np.int8, np.int16, np.int32, np.int64, np.uint8, np.uint16, np.uint32, np.uint64, ]: raise ValueError( "dtype must be 'int', 'np.int8', 'np.int16'," "'np.int32', 'np.int64', 'np.uint8'," "'np.uint16', 'np.uint32', or" "'np.uint64', but got {}".format(self.dtype) ) if transform is None: transform = "identity" self.set_transformer(transform)
[docs] def set_transformer(self, transform="identity"): """Define _rvs and transformer spaces. Parameters ---------- transform : str Can be 'normalize' or 'identity' """ self.transform_ = transform if transform not in ["normalize", "identity"]: raise ValueError( "transform should be 'normalize' or 'identity'" " got {}".format(self.transform_) ) if self.transform_ == "normalize": self._rvs = _uniform_inclusive(0.0, 1.0) assert self.prior in ["uniform", "log-uniform"] if self.prior == "uniform": self.transformer = Pipeline( [Identity(), Normalize(self.low, self.high, is_int=True)] ) else: self.transformer = Pipeline( [ LogN(self.base), Normalize( np.log10(self.low) / self.log_base, np.log10(self.high) / self.log_base, ), ] ) else: if self.prior == "uniform": self._rvs = ss.randint(self.low, self.high + 1) self.transformer = Identity() elif self.prior == "normal": self._rvs = _normal_inclusive(self.loc, self.scale, self.low, self.high) self.transformer = ToInteger() else: self._rvs = _uniform_inclusive( np.log10(self.low) / self.log_base, np.log10(self.high) / self.log_base - np.log10(self.low) / self.log_base, ) self.transformer = LogN(self.base)
def __eq__(self, other): return ( type(self) is type(other) and np.allclose([self.low], [other.low]) and np.allclose([self.high], [other.high]) ) def __repr__(self): return "Integer(low={}, high={}, prior='{}', transform='{}')".format( self.low, self.high, self.prior, self.transform_ )
[docs] def inverse_transform(self, Xt): """Inverse transform samples from the warped space back into the original space. """ # The concatenation of all transformed dimensions makes Xt to be # of type float, hence the required cast back to int. inv_transform = super(Integer, self).inverse_transform(Xt) if isinstance(inv_transform, list): inv_transform = np.array(inv_transform) inv_transform = np.clip(inv_transform, self.low, self.high) # PB nan is a float cannot be converted to int if any(np.isnan(inv_transform)): nan_values = np.isnan(inv_transform) inv_transform[nan_values] = np.round(inv_transform[nan_values]) return inv_transform if self.dtype == int or self.dtype == "int": # necessary, otherwise the type is converted to a numpy type return getattr(np.round(inv_transform).astype(self.dtype), "tolist")() else: return np.round(inv_transform).astype(self.dtype)
@property def bounds(self): return (self.low, self.high) @property def is_constant(self): return self.low == self.high def __contains__(self, point): if isinstance(point, list): point = np.array(point) if point == np.nan: return True else: return self.low <= point <= self.high @property def transformed_bounds(self): if self.transform_ == "normalize": return 0.0, 1.0 else: if self.prior == "uniform": return self.low, self.high else: return ( np.log10(self.low) / self.log_base, np.log10(self.high) / self.log_base, )
[docs] def distance(self, a, b): """Compute distance between point `a` and `b`. Parameters ---------- a : int First point. b : int Second point. """ if not (a in self and b in self): raise RuntimeError( "Can only compute distance for values within " "the space, not %s and %s." % (a, b) ) return abs(a - b)
[docs]class Categorical(Dimension): """Search space dimension that can take on categorical values. Parameters ---------- categories : list, shape=(n_categories,) Sequence of possible categories. prior : list, shape=(categories,), default=None Prior probabilities for each category. By default all categories are equally likely. transform : "onehot", "string", "identity", "label", default="onehot" - "identity", the transformed space is the same as the original space. - "string", the transformed space is a string encoded representation of the original space. - "label", the transformed space is a label encoded representation (integer) of the original space. - "onehot", the transformed space is a one-hot encoded representation of the original space. name : str or None Name associated with dimension, e.g., "colors". """ def __init__(self, categories, prior=None, transform=None, name=None): self.categories = tuple(categories) self.name = name if transform is None: transform = "onehot" self.transform_ = transform self.transformer = None self._rvs = None self.prior = prior if prior is None: self.prior_ = np.tile(1.0 / len(self.categories), len(self.categories)) else: self.prior_ = prior self.set_transformer(transform)
[docs] def set_transformer(self, transform="onehot"): """Define _rvs and transformer spaces. Parameters ---------- transform : str Can be 'normalize', 'onehot', 'string', 'label', or 'identity' """ self.transform_ = transform if transform not in ["identity", "onehot", "string", "normalize", "label"]: raise ValueError( "Expected transform to be 'identity', 'string'," "'label' or 'onehot' got {}".format(transform) ) if transform == "onehot": self.transformer = CategoricalEncoder() self.transformer.fit(self.categories) elif transform == "string": self.transformer = StringEncoder() self.transformer.fit(self.categories) elif transform == "label": self.transformer = LabelEncoder() self.transformer.fit(self.categories) elif transform == "normalize": self.transformer = Pipeline( [ LabelEncoder(list(self.categories)), Normalize(0, len(self.categories) - 1, is_int=True), ] ) else: if all(isinstance(x, (int, np.integer)) for x in self.categories): self.transformer = Identity(type_func=lambda x: int(x)) else: self.transformer = Identity() self.transformer.fit(self.categories) if transform == "normalize": self._rvs = _uniform_inclusive(0.0, 1.0) else: # XXX check that sum(prior) == 1 self._rvs = ss.rv_discrete( values=(range(len(self.categories)), self.prior_) )
def __eq__(self, other): return ( type(self) is type(other) and self.categories == other.categories and np.allclose(self.prior_, other.prior_) ) def __repr__(self): if len(self.categories) > 7: cats = self.categories[:3] + (_Ellipsis(),) + self.categories[-3:] else: cats = self.categories if self.prior is not None and len(self.prior) > 7: prior = self.prior[:3] + [_Ellipsis()] + self.prior[-3:] else: prior = self.prior return "Categorical(categories={}, prior={}, transform={})".format( cats, prior, self.transform_ )
[docs] def inverse_transform(self, Xt): """Inverse transform samples from the warped space back into the original space. """ # The concatenation of all transformed dimensions makes Xt to be # of type float, hence the required cast back to int. inv_transform = super(Categorical, self).inverse_transform(Xt) if isinstance(inv_transform, list): inv_transform = np.array(inv_transform) return inv_transform
[docs] def rvs(self, n_samples=None, random_state=None): choices = self._rvs.rvs(size=n_samples, random_state=random_state) if isinstance(choices, numbers.Integral): return self.categories[choices] elif self.transform_ == "normalize" and isinstance(choices, float): return self.inverse_transform([(choices)]) elif self.transform_ == "normalize": return self.inverse_transform(list(choices)) else: return [self.categories[c] for c in choices]
@property def transformed_size(self): if self.transform_ == "onehot": size = len(self.categories) # when len(categories) == 2, CategoricalEncoder outputs a # single value return size if size != 2 else 1 return 1 @property def bounds(self): return self.categories @property def is_constant(self): return len(self.categories) <= 1 def __contains__(self, point): return point in self.categories @property def transformed_bounds(self): if self.transformed_size == 1: N = len(self.categories) if self.transform_ == "label": return 0.0, float(N - 1) elif self.transform_ == "identity": return min(self.categories), max(self.categories) else: return 0.0, 1.0 else: return [(0.0, 1.0) for i in range(self.transformed_size)]
[docs] def distance(self, a, b): """Compute distance between category `a` and `b`. As categories have no order the distance between two points is one if a != b and zero otherwise. Parameters ---------- a : category First category. b : category Second category. """ if not (a in self and b in self): raise RuntimeError( "Can only compute distance for values within" " the space, not {} and {}.".format(a, b) ) return 1 if a != b else 0
def _sample_dimension(dim, i, n_samples, random_state, out): """Wrapper to sample dimension for joblib parallelization.""" out[0][:, i] = dim.rvs(n_samples=n_samples, random_state=random_state)
[docs]class Space: """Initialize a search space from given specifications. Parameters ---------- dimensions : list, shape=(n_dims,) List of search space dimensions. Each search dimension can be defined either as - a `(lower_bound, upper_bound)` tuple (for `Real` or `Integer` dimensions), - a `(lower_bound, upper_bound, "prior")` tuple (for `Real` dimensions), - as a list of categories (for `Categorical` dimensions), or - an instance of a `Dimension` object (`Real`, `Integer` or `Categorical`). .. note:: The upper and lower bounds are inclusive for `Integer` dimensions. """ def __init__(self, dimensions, model_sdv=None, config_space=None): # attribute used when a generative model is used to sample self.model_sdv = model_sdv # attribute use when a config space is used to sample assert config_space is None or isinstance(config_space, CS.ConfigurationSpace) self.config_space = config_space self.dimensions = [check_dimension(dim) for dim in dimensions] def __eq__(self, other): return all([a == b for a, b in zip(self.dimensions, other.dimensions)]) def __repr__(self): if len(self.dimensions) > 31: dims = self.dimensions[:15] + [_Ellipsis()] + self.dimensions[-15:] else: dims = self.dimensions return "Space([{}])".format(",\n ".join(map(str, dims))) def __iter__(self): return iter(self.dimensions) @property def dimension_names(self): """ Names of all the dimensions in the search-space. """ index = 0 names = [] for dim in self.dimensions: if dim.name is None: names.append("X_%d" % index) else: names.append(dim.name) index += 1 return names @property def is_real(self): """ Returns true if all dimensions are Real """ return all([isinstance(dim, Real) for dim in self.dimensions])
[docs] @classmethod def from_yaml(cls, yml_path, namespace=None): """Create Space from yaml configuration file Parameters ---------- yml_path : str Full path to yaml configuration file, example YaML below: Space: - Integer: low: -5 high: 5 - Categorical: categories: - a - b - Real: low: 1.0 high: 5.0 prior: log-uniform namespace : str, default=None Namespace within configuration file to use, will use first namespace if not provided Returns ------- space : Space Instantiated Space object """ with open(yml_path, "rb") as f: config = yaml.safe_load(f) dimension_classes = { "real": Real, "integer": Integer, "categorical": Categorical, } # Extract space options for configuration file if isinstance(config, dict): if namespace is None: options = next(iter(config.values())) else: options = config[namespace] elif isinstance(config, list): options = config else: raise TypeError("YaML does not specify a list or dictionary") # Populate list with Dimension objects dimensions = [] for option in options: key = next(iter(option.keys())) # Make configuration case insensitive dimension_class = key.lower() values = {k.lower(): v for k, v in option[key].items()} if dimension_class in dimension_classes: # Instantiate Dimension subclass and add it to the list dimension = dimension_classes[dimension_class](**values) dimensions.append(dimension) space = cls(dimensions=dimensions) return space
[docs] def rvs(self, n_samples=1, random_state=None, n_jobs=1): """Draw random samples. The samples are in the original space. They need to be transformed before being passed to a model or minimizer by `space.transform()`. Parameters ---------- n_samples : int, default=1 Number of samples to be drawn from the space. random_state : int, RandomState instance, or None (default) Set random state to something other than None for reproducible results. Returns ------- points : list of lists, shape=(n_points, n_dims) Points sampled from the space. """ rng = check_random_state(random_state) if self.config_space: req_points = [] hps_names = self.config_space.get_hyperparameter_names() if self.model_sdv is None: confs = self.config_space.sample_configuration(n_samples) if n_samples == 1: confs = [confs] else: confs = self.model_sdv.sample(n_samples) sdv_names = confs.columns new_hps_names = list(set(hps_names) - set(sdv_names)) # randomly sample the new hyperparameters for name in new_hps_names: hp = self.config_space.get_hyperparameter(name) rvs = [] for i in range(n_samples): v = hp._sample(rng) rv = hp._transform(v) rvs.append(rv) confs[name] = rvs # reoder the column names confs = confs[hps_names] confs = confs.to_dict("records") for idx, conf in enumerate(confs): cf = deactivate_inactive_hyperparameters(conf, self.config_space) confs[idx] = cf.get_dictionary() for idx, conf in enumerate(confs): point = [] for i, hps_name in enumerate(hps_names): # If the parameter is inactive due to some conditions then we attribute the # lower bound value to break symmetries and enforce the same representation. if hps_name in conf.keys(): val = conf[hps_name] else: val = self.dimensions[i].bounds[0] point.append(val) req_points.append(point) return req_points else: if self.model_sdv is None: # Regular sampling without transfer learning from flat search space # Joblib parallel optimization # Draw columns = np.zeros((n_samples, len(self.dimensions)), dtype="O") random_states = rng.randint( low=0, high=2**31, size=len(self.dimensions) ) Parallel(n_jobs=n_jobs, verbose=0, require="sharedmem")( delayed(_sample_dimension)( dim, i, n_samples, np.random.RandomState(random_states[i]), [columns], ) for i, dim in enumerate(self.dimensions) ) return columns.tolist() else: confs = self.model_sdv.sample(n_samples) # sample from SDV columns = [] for dim in self.dimensions: if dim.name in confs.columns: columns.append(confs[dim.name].values.tolist()) else: columns.append(dim.rvs(n_samples=n_samples, random_state=rng)) # Transpose return _transpose_list_array(columns)
[docs] def set_transformer(self, transform): """Sets the transformer of all dimension objects to `transform` Parameters ---------- transform : str or list of str Sets all transformer,, when `transform` is a string. Otherwise, transform must be a list with strings with the same length as `dimensions` """ # Transform for j in range(self.n_dims): if isinstance(transform, list): self.dimensions[j].set_transformer(transform[j]) else: self.dimensions[j].set_transformer(transform)
[docs] def set_transformer_by_type(self, transform, dim_type): """Sets the transformer of `dim_type` objects to `transform` Parameters ---------- transform : str Sets all transformer of type `dim_type` to `transform` dim_type : type Can be `deephyper.skopt.space.Real`, `deephyper.skopt.space.Integer` or `deephyper.skopt.space.Categorical` """ # Transform for j in range(self.n_dims): if isinstance(self.dimensions[j], dim_type): self.dimensions[j].set_transformer(transform)
[docs] def get_transformer(self): """Returns all transformers as list""" return [self.dimensions[j].transform_ for j in range(self.n_dims)]
[docs] def transform(self, X): """Transform samples from the original space into a warped space. Note: this transformation is expected to be used to project samples into a suitable space for numerical optimization. Parameters ---------- X : list of lists, shape=(n_samples, n_dims) The samples to transform. Returns ------- Xt : array of floats, shape=(n_samples, transformed_n_dims) The transformed samples. """ # Pack by dimension columns = [list() for _ in self.dimensions] for i in range(len(X)): for j in range(self.n_dims): columns[j].append(X[i][j]) # Transform for j in range(self.n_dims): columns[j] = self.dimensions[j].transform(columns[j]) # Repack as an array Xt = np.hstack([np.asarray(c).reshape((len(X), -1)) for c in columns]) return Xt
[docs] def inverse_transform(self, Xt): """Inverse transform samples from the warped space back to the original space. Parameters ---------- Xt : array of floats, shape=(n_samples, transformed_n_dims) The samples to inverse transform. Returns ------- X : list of lists, shape=(n_samples, n_dims) The original samples. """ # Inverse transform columns = [] start = 0 Xt = np.asarray(Xt) for j in range(self.n_dims): dim = self.dimensions[j] offset = dim.transformed_size if offset == 1: columns.append(dim.inverse_transform(Xt[:, start])) else: columns.append(dim.inverse_transform(Xt[:, start : start + offset])) start += offset # Transpose return _transpose_list_array(columns)
@property def n_dims(self): """The dimensionality of the original space.""" return len(self.dimensions) @property def transformed_n_dims(self): """The dimensionality of the warped space.""" return sum([dim.transformed_size for dim in self.dimensions]) @property def bounds(self): """The dimension bounds, in the original space.""" b = [] for dim in self.dimensions: if dim.size == 1: b.append(dim.bounds) else: b.extend(dim.bounds) return b def __contains__(self, point): """Check that `point` is within the bounds of the space.""" for component, dim in zip(point, self.dimensions): if component not in dim: return False return True def __getitem__(self, dimension_names): """ Lookup and return the search-space dimension with the given name. This allows for dict-like lookup of dimensions, for example: `space['foo']` returns the dimension named 'foo' if it exists, otherwise `None` is returned. It also allows for lookup of a list of dimension-names, for example: `space[['foo', 'bar']]` returns the two dimensions named 'foo' and 'bar' if they exist. Parameters ---------- dimension_names : str or list(str) Name of a single search-space dimension (str). List of names for search-space dimensions (list(str)). Returns ------- dims tuple (index, Dimension), list(tuple(index, Dimension)), \ (None, None) A single search-space dimension with the given name, or a list of search-space dimensions with the given names. """ def _get(dimension_name): """Helper-function for getting a single dimension.""" index = 0 # Get the index of the search-space dimension using its name. for dim in self.dimensions: if dimension_name == dim.name: return (index, dim) elif dimension_name == index: return (index, dim) index += 1 return (None, None) if isinstance(dimension_names, (str, int)): # Get a single search-space dimension. dims = _get(dimension_name=dimension_names) elif isinstance(dimension_names, (list, tuple)): # Get a list of search-space dimensions. # Note that we do not check whether the names are really strings. dims = [_get(dimension_name=name) for name in dimension_names] else: msg = ( "Dimension name should be either string or" "list of strings, but got {}." ) raise ValueError(msg.format(type(dimension_names))) return dims @property def transformed_bounds(self): """The dimension bounds, in the warped space.""" b = [] for dim in self.dimensions: if dim.transformed_size == 1: b.append(dim.transformed_bounds) else: b.extend(dim.transformed_bounds) return b @property def is_categorical(self): """Space contains exclusively categorical dimensions""" return all([isinstance(dim, Categorical) for dim in self.dimensions]) @property def is_partly_categorical(self): """Space contains any categorical dimensions""" return any([isinstance(dim, Categorical) for dim in self.dimensions]) @property def n_constant_dimensions(self): """Returns the number of constant dimensions which have zero degree of freedom, e.g. an Integer dimensions with (0., 0.) as bounds. """ n = 0 for dim in self.dimensions: if dim.is_constant: n += 1 return n
[docs] def distance(self, point_a, point_b): """Compute distance between two points in this space. Parameters ---------- point_a : array First point. point_b : array Second point. """ distance = 0.0 for a, b, dim in zip(point_a, point_b, self.dimensions): distance += dim.distance(a, b) return distance
[docs] def update_prior(self, X, y, q=0.9): """Update the prior of the dimensions. Instead of doing random-sampling, a kernel density estimation is fit on the region of interest and sampling is performed from this distribution.""" y = np.array(y) for i, dim in enumerate(self.dimensions): Xi = [x[i] for x in X] if hasattr(dim, "update_prior"): dim.update_prior(Xi, y, q=q)