|
|
|
|
|
|
|
|
|
|
|
import copy |
|
|
import json |
|
|
import logging |
|
|
|
|
|
import numpy as np |
|
|
import pycocotools.mask as mask_util |
|
|
from pycocotools.coco import COCO |
|
|
from typing_extensions import override |
|
|
|
|
|
|
|
|
class YTVIS(COCO): |
|
|
""" |
|
|
Helper class for reading YT-VIS annotations |
|
|
""" |
|
|
|
|
|
@override |
|
|
def __init__(self, annotation_file: str = None, ignore_gt_cats: bool = True): |
|
|
""" |
|
|
Args: |
|
|
annotation_file: Path to the annotation file |
|
|
ignore_gt_cats: If True, we ignore the ground truth categories and replace them with a dummy "object" category. This is useful for Phrase AP evaluation. |
|
|
""" |
|
|
self.ignore_gt_cats = ignore_gt_cats |
|
|
super().__init__(annotation_file=annotation_file) |
|
|
|
|
|
@override |
|
|
def createIndex(self): |
|
|
|
|
|
if "annotations" in self.dataset: |
|
|
for ann in self.dataset["annotations"]: |
|
|
if "video_id" in ann: |
|
|
ann["image_id"] = int(ann.pop("video_id")) |
|
|
if self.ignore_gt_cats: |
|
|
ann["category_id"] = -1 |
|
|
else: |
|
|
ann["category_id"] = int(ann["category_id"]) |
|
|
if "bboxes" in ann: |
|
|
|
|
|
|
|
|
|
|
|
ann["bboxes"] = [ |
|
|
bbox if bbox is not None else [0, 0, 0, 0] |
|
|
for bbox in ann["bboxes"] |
|
|
] |
|
|
if "areas" in ann: |
|
|
|
|
|
|
|
|
areas = [a if a is not None else 0 for a in ann["areas"]] |
|
|
|
|
|
ann["area"] = np.mean(areas) |
|
|
if "videos" in self.dataset: |
|
|
for vid in self.dataset["videos"]: |
|
|
vid["id"] = int(vid["id"]) |
|
|
self.dataset["images"] = self.dataset.pop("videos") |
|
|
|
|
|
if self.ignore_gt_cats: |
|
|
self.dataset["categories"] = [ |
|
|
{"supercategory": "object", "id": -1, "name": "object"} |
|
|
] |
|
|
else: |
|
|
for cat in self.dataset["categories"]: |
|
|
cat["id"] = int(cat["id"]) |
|
|
super().createIndex() |
|
|
|
|
|
@override |
|
|
def getAnnIds(self, imgIds=[], catIds=[], areaRng=[], iscrowd=None): |
|
|
if len(areaRng) > 0: |
|
|
logging.warning( |
|
|
"Note that we filter out objects based on their *average* area across the video, not per frame area" |
|
|
) |
|
|
|
|
|
return super().getAnnIds(imgIds=imgIds, catIds=catIds, iscrowd=iscrowd) |
|
|
|
|
|
@override |
|
|
def showAnns(self, anns, draw_bbox=False): |
|
|
raise NotImplementedError("Showing annotations is not supported") |
|
|
|
|
|
@override |
|
|
def loadRes(self, resFile): |
|
|
|
|
|
res = YTVIS(ignore_gt_cats=self.ignore_gt_cats) |
|
|
res.dataset["images"] = [img for img in self.dataset["images"]] |
|
|
|
|
|
if type(resFile) == str: |
|
|
with open(resFile) as f: |
|
|
anns = json.load(f) |
|
|
elif type(resFile) == np.ndarray: |
|
|
anns = self.loadNumpyAnnotations(resFile) |
|
|
else: |
|
|
anns = resFile |
|
|
assert type(anns) == list, "results is not an array of objects" |
|
|
annsImgIds = [ann["image_id"] for ann in anns] |
|
|
assert set(annsImgIds) == ( |
|
|
set(annsImgIds) & set(self.getImgIds()) |
|
|
), "Results do not correspond to current coco set" |
|
|
if "bboxes" in anns[0] and not anns[0]["bboxes"] == []: |
|
|
res.dataset["categories"] = copy.deepcopy(self.dataset["categories"]) |
|
|
for id, ann in enumerate(anns): |
|
|
bbs = [(bb if bb is not None else [0, 0, 0, 0]) for bb in ann["bboxes"]] |
|
|
xxyy = [[bb[0], bb[0] + bb[2], bb[1], bb[1] + bb[3]] for bb in bbs] |
|
|
if not "segmentations" in ann: |
|
|
ann["segmentations"] = [ |
|
|
[[x1, y1, x1, y2, x2, y2, x2, y1]] for (x1, x2, y1, y2) in xxyy |
|
|
] |
|
|
ann["areas"] = [bb[2] * bb[3] for bb in bbs] |
|
|
|
|
|
ann["area"] = np.mean(ann["areas"]) |
|
|
ann["id"] = id + 1 |
|
|
ann["iscrowd"] = 0 |
|
|
elif "segmentations" in anns[0]: |
|
|
res.dataset["categories"] = copy.deepcopy(self.dataset["categories"]) |
|
|
for id, ann in enumerate(anns): |
|
|
ann["bboxes"] = [ |
|
|
mask_util.toBbox(segm) for segm in ann["segmentations"] |
|
|
] |
|
|
if "areas" not in ann: |
|
|
ann["areas"] = [ |
|
|
mask_util.area(segm) for segm in ann["segmentations"] |
|
|
] |
|
|
|
|
|
ann["area"] = np.mean(ann["areas"]) |
|
|
ann["id"] = id + 1 |
|
|
ann["iscrowd"] = 0 |
|
|
|
|
|
res.dataset["annotations"] = anns |
|
|
res.createIndex() |
|
|
return res |
|
|
|
|
|
@override |
|
|
def download(self, tarDir=None, imgIds=[]): |
|
|
raise NotImplementedError |
|
|
|
|
|
@override |
|
|
def loadNumpyAnnotations(self, data): |
|
|
raise NotImplementedError("We don't support numpy annotations for now") |
|
|
|
|
|
@override |
|
|
def annToRLE(self, ann): |
|
|
raise NotImplementedError("We expect masks to be already in RLE format") |
|
|
|
|
|
@override |
|
|
def annToMask(self, ann): |
|
|
raise NotImplementedError("We expect masks to be already in RLE format") |
|
|
|