Source code for deephyper.ensemble._base_ensemble
import abc
import json
import os
import ray
[docs]class BaseEnsemble(abc.ABC):
"""Base class for ensembles, every new ensemble algorithms needs to extend this class.
Args:
model_dir (str): Path to directory containing saved Keras models in .h5 format.
loss (callable): a callable taking (y_true, y_pred) as input.
size (int, optional): Number of unique models used in the ensemble. Defaults to 5.
verbose (bool, optional): Verbose mode. Defaults to True.
ray_address (str, optional): Address of the Ray cluster. If "auto" it will try to connect to an existing cluster. If "" it will start a local Ray cluster. Defaults to "".
num_cpus (int, optional): Number of CPUs allocated to load one model and predict. Defaults to 1.
num_gpus (int, optional): Number of GPUs allocated to load one model and predict. Defaults to None.
batch_size (int, optional): Batch size used batchify the inference of loaded models. Defaults to 32.
"""
def __init__(
self,
model_dir,
loss,
size=5,
verbose=True,
ray_address="",
num_cpus=1,
num_gpus=None,
batch_size=32,
):
self.model_dir = os.path.abspath(model_dir)
self.loss = loss
self.members_files = []
self.size = size
self.verbose = verbose
self.ray_address = ray_address
self.num_cpus = num_cpus
self.num_gpus = num_gpus
self.batch_size = batch_size
if not (ray.is_initialized()):
ray.init(address=self.ray_address)
def __repr__(self) -> str:
out = ""
out += f"Model Dir: {self.model_dir}\n"
out += f"Members files: {self.members_files}\n"
out += f"Ensemble size: {len(self.members_files)}/{self.size}\n"
return out
def _list_files_in_model_dir(self):
return [f for f in os.listdir(self.model_dir) if f[-2:] == "h5"]
[docs] @abc.abstractmethod
def fit(self, X, y):
"""Fit the current algorithm to the provided data.
Args:
X (array): The input data.
y (array): The output data.
Returns:
BaseEnsemble: The current fitted instance.
"""
[docs] @abc.abstractmethod
def predict(self, X):
"""Execute an inference of the ensemble for the provided data.
Args:
X (array): An array of input data.
Returns:
array: The prediction.
"""
[docs] @abc.abstractmethod
def evaluate(self, X, y, metrics=None):
"""Compute metrics based on the provided data.
Args:
X (array): An array of input data.
y (array): An array of true output data.
metrics (callable, optional): A metric. Defaults to None.
"""
[docs] def load_members_files(self, file: str = "ensemble.json") -> None:
"""Load the members composing an ensemble.
Args:
file (str, optional): Path of JSON file containing the ensemble members. All members needs to be accessible in ``model_dir``. Defaults to "ensemble.json".
"""
with open(file, "r") as f:
self.members_files = json.load(f)
[docs] def save_members_files(self, file: str = "ensemble.json") -> None:
"""Save the list of file names of the members of the ensemble in a JSON file.
Args:
file (str, optional): Path JSON file where the file names are saved. Defaults to "ensemble.json".
"""
with open(file, "w") as f:
json.dump(self.members_files, f)
[docs] def load(self, file: str) -> None:
"""Load an ensemble from a save.
Args:
file (str): Path to the save of the ensemble.
"""
self.load_members_files(file)
[docs] def save(self, file: str = None) -> None:
"""Save an ensemble.
Args:
file (str): Path to the save of the ensemble.
"""
self.save_members_files(file)