Source code for deephyper.skopt.learning.gbrt

import numpy as np
from sklearn.base import BaseEstimator, RegressorMixin, clone
from sklearn.ensemble import GradientBoostingRegressor, HistGradientBoostingRegressor
from sklearn.utils import check_random_state

from deephyper.core.utils.joblib_utils import Parallel, delayed


def _parallel_fit(regressor, X, y):
    return regressor.fit(X, y)


[docs]class GradientBoostingQuantileRegressor(BaseEstimator, RegressorMixin): """Predict several quantiles with one estimator. This is a wrapper around `GradientBoostingRegressor`'s quantile regression that allows you to predict several `quantiles` in one go. Parameters ---------- quantiles : array-like Quantiles to predict. By default the 16, 50 and 84% quantiles are predicted. base_estimator : GradientBoostingRegressor instance or None (default) Quantile regressor used to make predictions. Only instances of `GradientBoostingRegressor` are supported. Use this to change the hyper-parameters of the estimator. n_jobs : int, default=1 The number of jobs to run in parallel for `fit`. If -1, then the number of jobs is set to the number of cores. random_state : int, RandomState instance, or None (default) Set random state to something other than None for reproducible results. """ def __init__( self, quantiles=[0.16, 0.5, 0.84], base_estimator=None, n_jobs=1, random_state=None, ): self.quantiles = quantiles self.random_state = random_state self.base_estimator = base_estimator self.n_jobs = n_jobs
[docs] def fit(self, X, y): """Fit one regressor for each quantile. Parameters ---------- X : array-like, shape=(n_samples, n_features) Training vectors, where `n_samples` is the number of samples and `n_features` is the number of features. y : array-like, shape=(n_samples,) Target values (real numbers in regression) """ rng = check_random_state(self.random_state) if self.base_estimator is None: base_estimator = GradientBoostingRegressor(loss="quantile") else: base_estimator = self.base_estimator if not isinstance( base_estimator, (GradientBoostingRegressor, HistGradientBoostingRegressor), ): raise ValueError( "base_estimator has to be of type" " GradientBoostingRegressor or HistGradientBoostingRegressor." ) if not base_estimator.loss == "quantile": raise ValueError( "base_estimator has to use quantile" " loss not %s" % base_estimator.loss ) # The predictions for different quantiles should be sorted. # Therefore each of the regressors need the same seed. base_estimator.set_params(random_state=rng) regressors = [] for q in self.quantiles: regressor = clone(base_estimator) if isinstance(regressor, GradientBoostingRegressor): regressor.set_params(alpha=q) elif isinstance(regressor, HistGradientBoostingRegressor): regressor.set_params(quantile=q) regressors.append(regressor) self.regressors_ = Parallel(n_jobs=self.n_jobs, prefer="threads")( delayed(_parallel_fit)(regressor, X, y) for regressor in regressors ) return self
[docs] def predict(self, X, return_std=False, return_quantiles=False): """Predict. Predict `X` at every quantile if `return_std` is set to False. If `return_std` is set to True, then return the mean and the predicted standard deviation, which is approximated as the (0.84th quantile - 0.16th quantile) divided by 2.0 Parameters ---------- X : array-like, shape=(n_samples, n_features) where `n_samples` is the number of samples and `n_features` is the number of features. """ predicted_quantiles = np.asarray([rgr.predict(X) for rgr in self.regressors_]) if return_quantiles: return predicted_quantiles.T elif return_std: std_quantiles = [0.16, 0.5, 0.84] is_present_mask = np.in1d(std_quantiles, self.quantiles) if not np.all(is_present_mask): raise ValueError( "return_std works only if the quantiles during " "instantiation include 0.16, 0.5 and 0.84" ) low = self.regressors_[self.quantiles.index(0.16)].predict(X) high = self.regressors_[self.quantiles.index(0.84)].predict(X) mean = self.regressors_[self.quantiles.index(0.5)].predict(X) std = (high - low) / 2.0 # This avoids NaN when computing the Negative Log-likelihood std[std <= 0.01] = 0.01 return mean, std # return the mean return self.regressors_[self.quantiles.index(0.5)].predict(X)