from abc import ABC, abstractmethod
from typing import List
import numpy as np
from deepmol.datasets import Dataset
from deepmol.evaluator.evaluator import Evaluator
from deepmol.metrics.metrics import Metric
from deepmol.models.models import Model
[docs]class Ensemble(ABC):
"""
Abstract class for ensembles of models.
"""
def __init__(self, models: List[Model]):
"""
Initializes an ensemble of models.
Parameters
----------
models: List[Model]
List of models to be used in the ensemble.
"""
self.models = models
[docs] def fit(self, dataset: Dataset):
"""
Fits the models to the specified dataset.
"""
for model in self.models:
model.fit(dataset)
[docs] @abstractmethod
def predict(self, dataset: Dataset):
"""
Predicts the labels for the specified dataset.
"""
[docs] def evaluate(self,
dataset: Dataset,
metrics: List[Metric],
per_task_metrics: bool = False,
n_classes: int = 2):
"""
Evaluates the performance of this model on specified dataset.
Parameters
----------
dataset: Dataset
Dataset object.
metrics: List[Metric]
The set of metrics provided.
per_task_metrics: bool
If true, return computed metric for each task on multitask dataset.
n_classes: int
If specified, will use `n_classes` as the number of unique classes.
Returns
-------
multitask_scores: dict
Dictionary mapping names of metrics to metric scores.
all_task_scores: dict, optional
If `per_task_metrics == True` is passed as a keyword argument, then returns a second dictionary of scores
for each task separately.
"""
evaluator = Evaluator(self, dataset)
return evaluator.compute_model_performance(metrics,
per_task_metrics=per_task_metrics,
n_classes=n_classes)
[docs]class VotingClassifier(Ensemble):
"""
VotingClassifier Ensemble.
It uses a voting strategy to predict the labels of a dataset.
"""
def __init__(self, models: List[Model], voting: str = "soft"):
"""
Initializes a VotingClassifier ensemble.
Parameters
----------
models: List[Model]
List of models to be used in the ensemble.
voting: str
Voting strategy to use. Can be either 'soft' or 'hard'.
"""
super().__init__(models)
self.voting = voting
[docs] def predict(self, dataset: Dataset, proba: bool = False):
"""
Predicts the labels for the specified dataset.
Parameters
----------
dataset: Dataset
Dataset object.
proba: bool
If true, returns the probabilities instead of class labels.
Returns
-------
final_result: np.ndarray
Predicted labels or probabilities.
"""
assert len(self.models) > 0
n_labels = len(np.unique(dataset.y))
results_from_all_models = np.empty(shape=(len(dataset.mols), n_labels, len(self.models)))
for i, model in enumerate(self.models):
model_y_predicted = model.predict(dataset)
for j in range(len(model_y_predicted)):
for prediction_i, prediction in enumerate(model_y_predicted[j]):
results_from_all_models[j, prediction_i, i] = model_y_predicted[j, prediction_i]
if proba:
final_result = np.empty(shape=(len(dataset.mols), n_labels))
else:
final_result = np.empty(shape=(len(dataset.mols)))
if self.voting == "soft":
for mol_i, mol_predictions in enumerate(results_from_all_models):
class_predictions = np.apply_along_axis(np.mean, 1, mol_predictions)
if proba:
final_result[mol_i] = class_predictions
else:
max_prediction = 0
max_prediction_class = 0
for i, class_prediction in enumerate(class_predictions):
if class_prediction > max_prediction:
max_prediction_class = i
max_prediction = class_prediction
final_result[mol_i] = max_prediction_class
return final_result
elif self.voting == "hard":
for mol_i, mol_predictions in enumerate(results_from_all_models):
predictions_counter = {}
for i, models_class_predictions in enumerate(mol_predictions):
for model_class_prediction in models_class_predictions:
if model_class_prediction > 0.5:
if i in predictions_counter:
predictions_counter[i].append(model_class_prediction)
else:
predictions_counter[i] = [model_class_prediction]
class_with_more_predictions = None
max_n_predictions = 0
for class_ in predictions_counter:
len_predictions_counter = len(predictions_counter[class_])
if len_predictions_counter > max_n_predictions:
max_n_predictions = len_predictions_counter
class_with_more_predictions = class_
assert class_with_more_predictions is not None
final_result[mol_i] = class_with_more_predictions
else:
raise Exception("Voting has to be either 'soft' or 'hard'")
return final_result