Source code for deephyper.ensemble._base_ensemble

import abc
import json
import os

import ray


[docs]class BaseEnsemble(abc.ABC): """Base class for ensembles, every new ensemble algorithms needs to extend this class. Args: model_dir (str): Path to directory containing saved Keras models in .h5 format. loss (callable): a callable taking (y_true, y_pred) as input. size (int, optional): Number of unique models used in the ensemble. Defaults to 5. verbose (bool, optional): Verbose mode. Defaults to True. ray_address (str, optional): Address of the Ray cluster. If "auto" it will try to connect to an existing cluster. If "" it will start a local Ray cluster. Defaults to "". num_cpus (int, optional): Number of CPUs allocated to load one model and predict. Defaults to 1. num_gpus (int, optional): Number of GPUs allocated to load one model and predict. Defaults to None. batch_size (int, optional): Batch size used batchify the inference of loaded models. Defaults to 32. """ def __init__( self, model_dir, loss, size=5, verbose=True, ray_address="", num_cpus=1, num_gpus=None, batch_size=32, ): self.model_dir = os.path.abspath(model_dir) self.loss = loss self.members_files = [] self.size = size self.verbose = verbose self.ray_address = ray_address self.num_cpus = num_cpus self.num_gpus = num_gpus self.batch_size = batch_size if not (ray.is_initialized()): ray.init(address=self.ray_address) def __repr__(self) -> str: out = "" out += f"Model Dir: {self.model_dir}\n" out += f"Members files: {self.members_files}\n" out += f"Ensemble size: {len(self.members_files)}/{self.size}\n" return out def _list_files_in_model_dir(self): return [f for f in os.listdir(self.model_dir) if f[-2:] == "h5"]
[docs] @abc.abstractmethod def fit(self, X, y): """Fit the current algorithm to the provided data. Args: X (array): The input data. y (array): The output data. Returns: BaseEnsemble: The current fitted instance. """
[docs] @abc.abstractmethod def predict(self, X): """Execute an inference of the ensemble for the provided data. Args: X (array): An array of input data. Returns: array: The prediction. """
[docs] @abc.abstractmethod def evaluate(self, X, y, metrics=None): """Compute metrics based on the provided data. Args: X (array): An array of input data. y (array): An array of true output data. metrics (callable, optional): A metric. Defaults to None. """
[docs] def load_members_files(self, file: str = "ensemble.json") -> None: """Load the members composing an ensemble. Args: file (str, optional): Path of JSON file containing the ensemble members. All members needs to be accessible in ``model_dir``. Defaults to "ensemble.json". """ with open(file, "r") as f: self.members_files = json.load(f)
[docs] def save_members_files(self, file: str = "ensemble.json") -> None: """Save the list of file names of the members of the ensemble in a JSON file. Args: file (str, optional): Path JSON file where the file names are saved. Defaults to "ensemble.json". """ with open(file, "w") as f: json.dump(self.members_files, f)
[docs] def load(self, file: str) -> None: """Load an ensemble from a save. Args: file (str): Path to the save of the ensemble. """ self.load_members_files(file)
[docs] def save(self, file: str = None) -> None: """Save an ensemble. Args: file (str): Path to the save of the ensemble. """ self.save_members_files(file)