File size: 6,059 Bytes
14114e8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
# (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary.

import copy
import json
import logging

import numpy as np
import pycocotools.mask as mask_util
from pycocotools.coco import COCO
from typing_extensions import override


class YTVIS(COCO):
    """
    Helper class for reading YT-VIS annotations
    """

    @override
    def __init__(self, annotation_file: str = None, ignore_gt_cats: bool = True):
        """
        Args:
            annotation_file: Path to the annotation file
            ignore_gt_cats: If True, we ignore the ground truth categories and replace them with a dummy "object" category. This is useful for Phrase AP evaluation.
        """
        self.ignore_gt_cats = ignore_gt_cats
        super().__init__(annotation_file=annotation_file)

    @override
    def createIndex(self):
        # We rename some keys to match the COCO format before creating the index.
        if "annotations" in self.dataset:
            for ann in self.dataset["annotations"]:
                if "video_id" in ann:
                    ann["image_id"] = int(ann.pop("video_id"))
                if self.ignore_gt_cats:
                    ann["category_id"] = -1
                else:
                    ann["category_id"] = int(ann["category_id"])
                if "bboxes" in ann:
                    # note that in some datasets we load under this YTVIS class,
                    # some "bboxes" could be None for when the GT object is invisible,
                    # so we replace them with [0, 0, 0, 0]
                    ann["bboxes"] = [
                        bbox if bbox is not None else [0, 0, 0, 0]
                        for bbox in ann["bboxes"]
                    ]
                if "areas" in ann:
                    # similar to "bboxes", some areas could be None for when the GT
                    # object is invisible, so we replace them with 0
                    areas = [a if a is not None else 0 for a in ann["areas"]]
                    # Compute average area of tracklet
                    ann["area"] = np.mean(areas)
        if "videos" in self.dataset:
            for vid in self.dataset["videos"]:
                vid["id"] = int(vid["id"])
            self.dataset["images"] = self.dataset.pop("videos")

        if self.ignore_gt_cats:
            self.dataset["categories"] = [
                {"supercategory": "object", "id": -1, "name": "object"}
            ]
        else:
            for cat in self.dataset["categories"]:
                cat["id"] = int(cat["id"])
        super().createIndex()

    @override
    def getAnnIds(self, imgIds=[], catIds=[], areaRng=[], iscrowd=None):
        if len(areaRng) > 0:
            logging.warning(
                "Note that we filter out objects based on their *average* area across the video, not per frame area"
            )

        return super().getAnnIds(imgIds=imgIds, catIds=catIds, iscrowd=iscrowd)

    @override
    def showAnns(self, anns, draw_bbox=False):
        raise NotImplementedError("Showing annotations is not supported")

    @override
    def loadRes(self, resFile):
        # Adapted from COCO.loadRes to support tracklets/masklets
        res = YTVIS(ignore_gt_cats=self.ignore_gt_cats)
        res.dataset["images"] = [img for img in self.dataset["images"]]

        if type(resFile) == str:
            with open(resFile) as f:
                anns = json.load(f)
        elif type(resFile) == np.ndarray:
            anns = self.loadNumpyAnnotations(resFile)
        else:
            anns = resFile
        assert type(anns) == list, "results is not an array of objects"
        annsImgIds = [ann["image_id"] for ann in anns]
        assert set(annsImgIds) == (
            set(annsImgIds) & set(self.getImgIds())
        ), "Results do not correspond to current coco set"
        if "bboxes" in anns[0] and not anns[0]["bboxes"] == []:
            res.dataset["categories"] = copy.deepcopy(self.dataset["categories"])
            for id, ann in enumerate(anns):
                bbs = [(bb if bb is not None else [0, 0, 0, 0]) for bb in ann["bboxes"]]
                xxyy = [[bb[0], bb[0] + bb[2], bb[1], bb[1] + bb[3]] for bb in bbs]
                if not "segmentations" in ann:
                    ann["segmentations"] = [
                        [[x1, y1, x1, y2, x2, y2, x2, y1]] for (x1, x2, y1, y2) in xxyy
                    ]
                ann["areas"] = [bb[2] * bb[3] for bb in bbs]
                # NOTE: We also compute average area of a tracklet across video, allowing us to compute area based mAP.
                ann["area"] = np.mean(ann["areas"])
                ann["id"] = id + 1
                ann["iscrowd"] = 0
        elif "segmentations" in anns[0]:
            res.dataset["categories"] = copy.deepcopy(self.dataset["categories"])
            for id, ann in enumerate(anns):
                ann["bboxes"] = [
                    mask_util.toBbox(segm) for segm in ann["segmentations"]
                ]
                if "areas" not in ann:
                    ann["areas"] = [
                        mask_util.area(segm) for segm in ann["segmentations"]
                    ]
                # NOTE: We also compute average area of a tracklet across video, allowing us to compute area based mAP.
                ann["area"] = np.mean(ann["areas"])
                ann["id"] = id + 1
                ann["iscrowd"] = 0

        res.dataset["annotations"] = anns
        res.createIndex()
        return res

    @override
    def download(self, tarDir=None, imgIds=[]):
        raise NotImplementedError

    @override
    def loadNumpyAnnotations(self, data):
        raise NotImplementedError("We don't support numpy annotations for now")

    @override
    def annToRLE(self, ann):
        raise NotImplementedError("We expect masks to be already in RLE format")

    @override
    def annToMask(self, ann):
        raise NotImplementedError("We expect masks to be already in RLE format")