Spaces:
Runtime error
Runtime error
File size: 6,059 Bytes
14114e8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 |
# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
# (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary.
import copy
import json
import logging
import numpy as np
import pycocotools.mask as mask_util
from pycocotools.coco import COCO
from typing_extensions import override
class YTVIS(COCO):
"""
Helper class for reading YT-VIS annotations
"""
@override
def __init__(self, annotation_file: str = None, ignore_gt_cats: bool = True):
"""
Args:
annotation_file: Path to the annotation file
ignore_gt_cats: If True, we ignore the ground truth categories and replace them with a dummy "object" category. This is useful for Phrase AP evaluation.
"""
self.ignore_gt_cats = ignore_gt_cats
super().__init__(annotation_file=annotation_file)
@override
def createIndex(self):
# We rename some keys to match the COCO format before creating the index.
if "annotations" in self.dataset:
for ann in self.dataset["annotations"]:
if "video_id" in ann:
ann["image_id"] = int(ann.pop("video_id"))
if self.ignore_gt_cats:
ann["category_id"] = -1
else:
ann["category_id"] = int(ann["category_id"])
if "bboxes" in ann:
# note that in some datasets we load under this YTVIS class,
# some "bboxes" could be None for when the GT object is invisible,
# so we replace them with [0, 0, 0, 0]
ann["bboxes"] = [
bbox if bbox is not None else [0, 0, 0, 0]
for bbox in ann["bboxes"]
]
if "areas" in ann:
# similar to "bboxes", some areas could be None for when the GT
# object is invisible, so we replace them with 0
areas = [a if a is not None else 0 for a in ann["areas"]]
# Compute average area of tracklet
ann["area"] = np.mean(areas)
if "videos" in self.dataset:
for vid in self.dataset["videos"]:
vid["id"] = int(vid["id"])
self.dataset["images"] = self.dataset.pop("videos")
if self.ignore_gt_cats:
self.dataset["categories"] = [
{"supercategory": "object", "id": -1, "name": "object"}
]
else:
for cat in self.dataset["categories"]:
cat["id"] = int(cat["id"])
super().createIndex()
@override
def getAnnIds(self, imgIds=[], catIds=[], areaRng=[], iscrowd=None):
if len(areaRng) > 0:
logging.warning(
"Note that we filter out objects based on their *average* area across the video, not per frame area"
)
return super().getAnnIds(imgIds=imgIds, catIds=catIds, iscrowd=iscrowd)
@override
def showAnns(self, anns, draw_bbox=False):
raise NotImplementedError("Showing annotations is not supported")
@override
def loadRes(self, resFile):
# Adapted from COCO.loadRes to support tracklets/masklets
res = YTVIS(ignore_gt_cats=self.ignore_gt_cats)
res.dataset["images"] = [img for img in self.dataset["images"]]
if type(resFile) == str:
with open(resFile) as f:
anns = json.load(f)
elif type(resFile) == np.ndarray:
anns = self.loadNumpyAnnotations(resFile)
else:
anns = resFile
assert type(anns) == list, "results is not an array of objects"
annsImgIds = [ann["image_id"] for ann in anns]
assert set(annsImgIds) == (
set(annsImgIds) & set(self.getImgIds())
), "Results do not correspond to current coco set"
if "bboxes" in anns[0] and not anns[0]["bboxes"] == []:
res.dataset["categories"] = copy.deepcopy(self.dataset["categories"])
for id, ann in enumerate(anns):
bbs = [(bb if bb is not None else [0, 0, 0, 0]) for bb in ann["bboxes"]]
xxyy = [[bb[0], bb[0] + bb[2], bb[1], bb[1] + bb[3]] for bb in bbs]
if not "segmentations" in ann:
ann["segmentations"] = [
[[x1, y1, x1, y2, x2, y2, x2, y1]] for (x1, x2, y1, y2) in xxyy
]
ann["areas"] = [bb[2] * bb[3] for bb in bbs]
# NOTE: We also compute average area of a tracklet across video, allowing us to compute area based mAP.
ann["area"] = np.mean(ann["areas"])
ann["id"] = id + 1
ann["iscrowd"] = 0
elif "segmentations" in anns[0]:
res.dataset["categories"] = copy.deepcopy(self.dataset["categories"])
for id, ann in enumerate(anns):
ann["bboxes"] = [
mask_util.toBbox(segm) for segm in ann["segmentations"]
]
if "areas" not in ann:
ann["areas"] = [
mask_util.area(segm) for segm in ann["segmentations"]
]
# NOTE: We also compute average area of a tracklet across video, allowing us to compute area based mAP.
ann["area"] = np.mean(ann["areas"])
ann["id"] = id + 1
ann["iscrowd"] = 0
res.dataset["annotations"] = anns
res.createIndex()
return res
@override
def download(self, tarDir=None, imgIds=[]):
raise NotImplementedError
@override
def loadNumpyAnnotations(self, data):
raise NotImplementedError("We don't support numpy annotations for now")
@override
def annToRLE(self, ann):
raise NotImplementedError("We expect masks to be already in RLE format")
@override
def annToMask(self, ann):
raise NotImplementedError("We expect masks to be already in RLE format")
|