Source code for discopat.nn_models.detr

import torch
import torch.nn.functional as F  # noqa: N812
from torch import nn

from discopat.core import Box, ComputingDevice, Frame, NNModel
from discopat.nn_models.torch_box_ops import box_cxcywh_to_xyxy



[docs]
class DETRModel(NNModel):
    _device: ComputingDevice


[docs]
    def pre_process(self, frame: Frame) -> torch.Tensor:
        pass



[docs]
    def post_process(
        self, raw_predictions: list[dict[torch.Tensor]]
    ) -> list[Box]:
        pass





[docs]
class PostProcess(nn.Module):
    """Convert the model's output into the format expected by the coco api."""


[docs]
    @torch.no_grad()
    def forward(self, outputs, target_sizes):
        """Perform the computation.

        Parameters
        ----------
            outputs: raw outputs of the model
            target_sizes: tensor of dimension [batch_size x 2] containing the size of each images of the batch
                For evaluation, this must be the original image size (before any data augmentation)
                For visualization, this should be the image size after data augment, but before padding

        """
        out_logits, out_bbox = outputs["pred_logits"], outputs["pred_boxes"]

        assert len(out_logits) == len(target_sizes)
        assert target_sizes.shape[1] == 2

        prob = F.softmax(out_logits, -1)
        scores, labels = prob[..., :-1].max(-1)

        # convert to [x0, y0, x1, y1] format
        boxes = box_cxcywh_to_xyxy(out_bbox)
        # and from relative [0, 1] to absolute [0, height] coordinates
        img_h, img_w = target_sizes.unbind(1)
        scale_fct = torch.stack([img_w, img_h, img_w, img_h], dim=1)
        boxes = boxes * scale_fct[:, None, :]

        return [
            {"scores": s, "labels": l, "boxes": b}
            for s, l, b in zip(scores, labels, boxes)
        ]