Source code for deephyper.skopt.learning.gbrt

import numpy as np
from sklearn.base import BaseEstimator, RegressorMixin, clone
from sklearn.ensemble import GradientBoostingRegressor, HistGradientBoostingRegressor
from sklearn.utils import check_random_state

from deephyper.skopt.joblib import Parallel, delayed


def _parallel_fit(regressor, X, y):
    return regressor.fit(X, y)


[docs] class GradientBoostingQuantileRegressor(BaseEstimator, RegressorMixin): """Predict several quantiles with one estimator. This is a wrapper around ``GradientBoostingRegressor``'s quantile regression that allows you to predict several quantiles in one go. Args: quantiles (array-like): Quantiles to predict. By default, the 16%, 50%, and 84% quantiles are predicted. base_estimator (GradientBoostingRegressor or None): Quantile regressor used to make predictions. Only instances of ``GradientBoostingRegressor`` are supported. Use this to change the hyper-parameters of the estimator. Defaults to None. n_jobs (int): Number of jobs to run in parallel for ``fit``. If -1, then the number of jobs is set to the number of cores. Defaults to 1. random_state (int | RandomState | None): Controls the randomness of the estimator. Set this to something other than ``None`` for reproducible results. Defaults to None. """ def __init__( self, quantiles=[0.16, 0.5, 0.84], base_estimator=None, n_jobs=1, random_state=None, ): self.quantiles = quantiles self.random_state = random_state self.base_estimator = base_estimator self.n_jobs = n_jobs def set_params(self, **params): self.base_estimator.set_params(**params) return self
[docs] def fit(self, X, y): """Fit one regressor for each quantile. Args: X : array-like, shape=(n_samples, n_features) Training vectors, where `n_samples` is the number of samples and `n_features` is the number of features. y : array-like, shape=(n_samples,) Target values (real numbers in regression) """ rng = check_random_state(self.random_state) if self.base_estimator is None: base_estimator = GradientBoostingRegressor(loss="quantile") else: base_estimator = self.base_estimator if not isinstance( base_estimator, (GradientBoostingRegressor, HistGradientBoostingRegressor), ): raise ValueError( "base_estimator has to be of type" " GradientBoostingRegressor or HistGradientBoostingRegressor." ) if not base_estimator.loss == "quantile": raise ValueError( "base_estimator has to use quantile" " loss not %s" % base_estimator.loss ) # The predictions for different quantiles should be sorted. # Therefore each of the regressors need the same seed. base_estimator.set_params(random_state=rng) regressors = [] for q in self.quantiles: regressor = clone(base_estimator) if isinstance(regressor, GradientBoostingRegressor): regressor.set_params(alpha=q) elif isinstance(regressor, HistGradientBoostingRegressor): regressor.set_params(quantile=q) regressors.append(regressor) self.regressors_ = Parallel(n_jobs=self.n_jobs, prefer="threads")( delayed(_parallel_fit)(regressor, X, y) for regressor in regressors ) return self
[docs] def predict(self, X, return_std=False, return_quantiles=False): """Predict. Predict `X` at every quantile if `return_std` is set to False. If `return_std` is set to True, then return the mean and the predicted standard deviation, which is approximated as the (0.84th quantile - 0.16th quantile) divided by 2.0 Args: X : array-like, shape=(n_samples, n_features) where `n_samples` is the number of samples and `n_features` is the number of features. """ predicted_quantiles = np.asarray([rgr.predict(X) for rgr in self.regressors_]) if return_quantiles: return predicted_quantiles.T elif return_std: std_quantiles = [0.16, 0.5, 0.84] is_present_mask = np.in1d(std_quantiles, self.quantiles) if not np.all(is_present_mask): raise ValueError( "return_std works only if the quantiles during " "instantiation include 0.16, 0.5 and 0.84" ) low = self.regressors_[self.quantiles.index(0.16)].predict(X) high = self.regressors_[self.quantiles.index(0.84)].predict(X) mean = self.regressors_[self.quantiles.index(0.5)].predict(X) std = (high - low) / 2.0 # This avoids NaN when computing the Negative Log-likelihood std[std <= 0.01] = 0.01 return mean, std # return the mean return self.regressors_[self.quantiles.index(0.5)].predict(X)