Source code for deepmol.evaluator.evaluator
import csv
from typing import Dict, Union, List, Tuple
import numpy as np
from deepmol.datasets import Dataset
from deepmol.metrics import Metric
from deepmol.utils.utils import normalize_labels_shape
[docs]class Evaluator:
"""
Class that evaluates a model on a given dataset.
The evaluator class is used to evaluate a `Model` class on a given `Dataset` object.
"""
def __init__(self, model: 'Model', dataset: Dataset) -> None:
"""
Initialize this evaluator.
Parameters
----------
model: Model
Model to evaluate. Note that this must be a regression or classification model.
dataset: Dataset
Dataset object to evaluate `model` on.
"""
self.model = model
self.dataset = dataset
[docs] @staticmethod
def output_statistics(scores: Dict[str, float], stats_out: str) -> None:
"""
Write computed stats to file.
Parameters
----------
scores: Score
Dictionary mapping names of metrics to scores.
stats_out: str
Name of file to write scores to.
"""
with open(stats_out, "w") as stats_file:
stats_file.write(str(scores) + "\n")
[docs] def output_predictions(self, y_preds: np.ndarray, csv_out: str) -> None:
"""
Writes predictions to file.
Writes predictions made on the dataset to a specified file.
Parameters
----------
y_preds: np.ndarray
Predictions to output
csv_out: str
Name of file to write predictions to.
"""
data_ids = self.dataset.ids
n_tasks = self.dataset.n_tasks
y_preds = np.reshape(y_preds, (len(y_preds), n_tasks))
assert len(y_preds) == len(data_ids)
with open(csv_out, "w") as csvfile:
csvwriter = csv.writer(csvfile)
csvwriter.writerow(["ID"] + list(self.dataset.label_names))
for mol_id, y_pred in zip(data_ids, y_preds):
csvwriter.writerow([mol_id] + list(y_pred))
[docs] def compute_model_performance(self,
metrics: Union[Metric, List[Metric]],
per_task_metrics: bool = False) -> Tuple[Dict, Dict]:
"""
Computes statistics of model on test data and saves results to csv.
Parameters
----------
metrics: Union[Metric, List[Metric]]
The set of metrics provided.
If a single `Metric` object is provided or a list is provided, it will evaluate `Model` on those metrics.
per_task_metrics: bool
If True, return computed metric for each task on multitask dataset.
Returns
-------
multitask_scores: dict
Dictionary mapping names of metrics to metric scores.
all_task_scores: dict
If `per_task_metrics == True`, then returns a second dictionary of scores for each task separately.
"""
n_tasks = self.dataset.n_tasks
y = self.dataset.y
y_pred = self.model.predict(self.dataset)
if not y.shape == np.array(y_pred).shape:
y_pred = normalize_labels_shape(y_pred, n_tasks)
multitask_scores = {}
all_task_scores = {}
# Compute multitask metrics
for metric in metrics:
results = metric.compute_metric(y, y_pred, per_task_metrics=per_task_metrics, n_tasks=n_tasks)
if per_task_metrics:
multitask_scores[metric.name], computed_metrics = results
all_task_scores[metric.name] = computed_metrics
else:
multitask_scores[metric.name], all_task_scores = results
if not per_task_metrics:
return multitask_scores, {}
else:
return multitask_scores, all_task_scores