Source code for deepmol.evaluator.evaluator

import csv
from typing import Dict, Union, List, Tuple

import numpy as np

from deepmol.datasets import Dataset
from deepmol.metrics import Metric
from deepmol.utils.utils import normalize_labels_shape


[docs]class Evaluator:
    """
    Class that evaluates a model on a given dataset.
    The evaluator class is used to evaluate a `Model` class on a given `Dataset` object.
    """

    def __init__(self, model: 'Model', dataset: Dataset) -> None:
        """
        Initialize this evaluator.

        Parameters
        ----------
        model: Model
            Model to evaluate. Note that this must be a regression or classification model.
        dataset: Dataset
            Dataset object to evaluate `model` on.
        """
        self.model = model
        self.dataset = dataset

[docs]    @staticmethod
    def output_statistics(scores: Dict[str, float], stats_out: str) -> None:
        """
        Write computed stats to file.

        Parameters
        ----------
        scores: Score
            Dictionary mapping names of metrics to scores.
        stats_out: str
            Name of file to write scores to.
        """
        with open(stats_out, "w") as stats_file:
            stats_file.write(str(scores) + "\n")

[docs]    def output_predictions(self, y_preds: np.ndarray, csv_out: str) -> None:
        """
        Writes predictions to file.
        Writes predictions made on the dataset to a specified file.

        Parameters
        ----------
        y_preds: np.ndarray
            Predictions to output
        csv_out: str
            Name of file to write predictions to.
        """

        data_ids = self.dataset.ids
        n_tasks = self.dataset.n_tasks
        y_preds = np.reshape(y_preds, (len(y_preds), n_tasks))
        assert len(y_preds) == len(data_ids)
        with open(csv_out, "w") as csvfile:
            csvwriter = csv.writer(csvfile)
            csvwriter.writerow(["ID"] + list(self.dataset.label_names))
            for mol_id, y_pred in zip(data_ids, y_preds):
                csvwriter.writerow([mol_id] + list(y_pred))

[docs]    def compute_model_performance(self,
                                  metrics: Union[Metric, List[Metric]],
                                  per_task_metrics: bool = False) -> Tuple[Dict, Dict]:
        """
        Computes statistics of model on test data and saves results to csv.

        Parameters
        ----------
        metrics: Union[Metric, List[Metric]]
            The set of metrics provided.
            If a single `Metric` object is provided or a list is provided, it will evaluate `Model` on those metrics.
        per_task_metrics: bool
            If True, return computed metric for each task on multitask dataset.

        Returns
        -------
        multitask_scores: dict
            Dictionary mapping names of metrics to metric scores.
        all_task_scores: dict
            If `per_task_metrics == True`, then returns a second dictionary of scores for each task separately.
        """
        n_tasks = self.dataset.n_tasks
        y = self.dataset.y
        y_pred = self.model.predict(self.dataset)
        if not y.shape == np.array(y_pred).shape:
            y_pred = normalize_labels_shape(y_pred, n_tasks)

        multitask_scores = {}
        all_task_scores = {}

        # Compute multitask metrics
        for metric in metrics:
            results = metric.compute_metric(y, y_pred, per_task_metrics=per_task_metrics, n_tasks=n_tasks)
            if per_task_metrics:
                multitask_scores[metric.name], computed_metrics = results
                all_task_scores[metric.name] = computed_metrics
            else:
                multitask_scores[metric.name], all_task_scores = results

        if not per_task_metrics:
            return multitask_scores, {}
        else:
            return multitask_scores, all_task_scores