Source code for discopat.nn_training.evaluation.base

import numpy as np

from discopat.core import ComputingDevice, DataLoader, NeuralNet
from discopat.nn_training.evaluation.matching import (
    match_groundtruths_and_predictions,
)


def _get_image_id(target):
    res = target["image_id"]
    return res.item() if hasattr(res, "item") else res



[docs]
def compute_ap(
    matching_dict: dict[str, dict[str, np.array]], threshold: float
) -> float:
    """Compute the Average Precision (AP) for a given localization threshold.

    Args:
        matching_dict: dictionary in the form:
            image_id: {
                "matching_matrix": array of shape (N_preds, N_gts),
                "scores": array of shape (N_preds,)
            }
        threshold: localization threshold,

    Returns:
        The AP.

    Note:
        The predictions and scores should already be sorted by descending score.

    """
    num_groundtruths = 0
    tp_vector_list = []
    score_vector_list = []

    for image_id in matching_dict:
        matching_matrix = matching_dict[image_id]["matching_matrix"]
        scores = matching_dict[image_id]["scores"]

        if matching_matrix.size == 0:
            continue

        _, num_gts = matching_matrix.shape

        matching_mask = (matching_matrix >= threshold).astype(float)

        score_weighted_matches = scores.reshape(-1, 1) * matching_mask
        max_indices = np.argmax(score_weighted_matches, axis=0)

        max_score_mask = np.zeros_like(matching_matrix)
        max_score_mask[max_indices, np.arange(num_gts)] = 1

        tp_vector = np.max(matching_mask * max_score_mask, axis=1)

        num_groundtruths += num_gts
        tp_vector_list.append(tp_vector)
        score_vector_list.append(scores)

    if num_groundtruths == 0:
        return 0

    big_tp_vector = np.concat(tp_vector_list)
    big_score_vector = np.concat(score_vector_list)

    # Sort the TP vector by decreasing prediction score over the whole dataset
    big_tp_vector = big_tp_vector[np.argsort(-big_score_vector)]

    # Cumulative sums
    tp_cumulative = np.cumsum(big_tp_vector)
    fp_cumulative = np.cumsum(1 - big_tp_vector)

    # Prepend zeros for the case score_threshold=1
    tp_cum = np.concatenate([[0], tp_cumulative])
    fp_cum = np.concatenate([[0], fp_cumulative])

    recall = tp_cum / num_groundtruths
    precision = tp_cum / (tp_cum + fp_cum + 1e-10)

    # Ensure precision is non-increasing
    for i in range(len(precision) - 1, 0, -1):
        precision[i - 1] = max(precision[i - 1], precision[i])

    # Compute area under curve (AP)
    return np.trapezoid(precision, recall)




[docs]
def evaluate(
    model: NeuralNet,
    data_loader: DataLoader,
    localization_criterion: str,
    device: ComputingDevice,
) -> dict[str, float]:
    """Evaluate a model on a data loader.

    Args:
        model: the neural network to be evaluated,
        data_loader: the evaluation dataloader,
        localization_criterion: metric used for GT-pred matching,
        device: computing device on which the model is stored.

    Returns:
        A dict containing the name and values of the following metrics:
        AP50, AP[50:95:05].

    """
    model.eval()
    prediction_dict = {
        _get_image_id(t): pred
        for images, targets in data_loader
        for pred, t in zip(
            model([img.to(device).float() for img in images]), targets
        )
    }
    matching_dict = {
        _get_image_id(t): match_groundtruths_and_predictions(
            groundtruths=t["boxes"],
            predictions=prediction_dict[_get_image_id(t)]["boxes"],
            scores=prediction_dict[_get_image_id(t)]["scores"],
            localization_criterion=localization_criterion,
        )
        for _, targets in data_loader
        for t in targets
    }
    ap_dict = {
        f"AP{int(100 * threshold)}": compute_ap(matching_dict, threshold)
        for threshold in np.arange(0.5, 1.0, 0.05)
    }
    return {"AP50": ap_dict["AP50"], "AP": np.mean(list(ap_dict.values()))}