bellmake's picture
SAM3 Video Segmentation - Clean deployment
14114e8
# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
"""
Self-contained COCO JSON re-indexing function that creates temporary files.
"""
import json
import os
import tempfile
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple
def reindex_coco_to_temp(input_json_path: str) -> Optional[str]:
"""
Convert 0-indexed COCO JSON file to 1-indexed and save to temporary location.
Args:
input_json_path: Path to the input COCO JSON file
Returns:
Path to the new 1-indexed JSON file in temporary directory, or None if no conversion needed
Raises:
FileNotFoundError: If input file doesn't exist
json.JSONDecodeError: If input file is not valid JSON
ValueError: If input file is not a valid COCO format
"""
def is_coco_json(data: Dict[str, Any]) -> bool:
"""Check if data appears to be a COCO format file."""
if not isinstance(data, dict):
return False
# A COCO file should have at least one of these keys
coco_keys = {"images", "annotations", "categories"}
return any(key in data for key in coco_keys)
def check_zero_indexed(data: Dict[str, Any]) -> Tuple[bool, bool, bool]:
"""
Check if annotations, images, or categories start from index 0.
Returns:
Tuple of (annotations_zero_indexed, images_zero_indexed, categories_zero_indexed)
"""
annotations_zero = False
images_zero = False
categories_zero = False
# Check annotations
annotations = data.get("annotations", [])
if annotations and any(ann.get("id", -1) == 0 for ann in annotations):
annotations_zero = True
# Check images
images = data.get("images", [])
if images and any(img.get("id", -1) == 0 for img in images):
images_zero = True
# Check categories
categories = data.get("categories", [])
if categories and any(cat.get("id", -1) == 0 for cat in categories):
categories_zero = True
return annotations_zero, images_zero, categories_zero
def reindex_coco_data(data: Dict[str, Any]) -> Dict[str, Any]:
"""Convert 0-indexed COCO data to 1-indexed."""
modified_data = data.copy()
annotations_zero, images_zero, categories_zero = check_zero_indexed(data)
# Create ID mapping for consistency
image_id_mapping = {}
category_id_mapping = {}
# Process images first (since annotations reference image IDs)
if images_zero and "images" in modified_data:
for img in modified_data["images"]:
old_id = img["id"]
new_id = old_id + 1
image_id_mapping[old_id] = new_id
img["id"] = new_id
# Process categories (since annotations reference category IDs)
if categories_zero and "categories" in modified_data:
for cat in modified_data["categories"]:
old_id = cat["id"]
new_id = old_id + 1
category_id_mapping[old_id] = new_id
cat["id"] = new_id
# Process annotations
if "annotations" in modified_data:
for ann in modified_data["annotations"]:
# Update annotation ID if needed
if annotations_zero:
ann["id"] = ann["id"] + 1
# Update image_id reference if images were reindexed
if images_zero and ann.get("image_id") is not None:
old_image_id = ann["image_id"]
if old_image_id in image_id_mapping:
ann["image_id"] = image_id_mapping[old_image_id]
# Update category_id reference if categories were reindexed
if categories_zero and ann.get("category_id") is not None:
old_category_id = ann["category_id"]
if old_category_id in category_id_mapping:
ann["category_id"] = category_id_mapping[old_category_id]
return modified_data
# Validate input path
if not os.path.exists(input_json_path):
raise FileNotFoundError(f"Input file not found: {input_json_path}")
# Load and validate JSON data
try:
with open(input_json_path, "r", encoding="utf-8") as f:
data = json.load(f)
except json.JSONDecodeError as e:
raise json.JSONDecodeError(f"Invalid JSON in {input_json_path}: {e}")
# Validate COCO format
if not is_coco_json(data):
raise ValueError(
f"File does not appear to be in COCO format: {input_json_path}"
)
# Check if reindexing is needed
annotations_zero, images_zero, categories_zero = check_zero_indexed(data)
if not (annotations_zero or images_zero or categories_zero):
# No conversion needed - just copy to temp location
input_path = Path(input_json_path)
temp_dir = tempfile.mkdtemp()
temp_filename = f"{input_path.stem}_1_indexed{input_path.suffix}"
temp_path = os.path.join(temp_dir, temp_filename)
with open(temp_path, "w", encoding="utf-8") as f:
json.dump(data, f, indent=2, ensure_ascii=False)
return temp_path
# Perform reindexing
modified_data = reindex_coco_data(data)
# Create temporary file
input_path = Path(input_json_path)
temp_dir = tempfile.mkdtemp()
temp_filename = f"{input_path.stem}_1_indexed{input_path.suffix}"
temp_path = os.path.join(temp_dir, temp_filename)
# Write modified data to temporary file
with open(temp_path, "w", encoding="utf-8") as f:
json.dump(modified_data, f, indent=2, ensure_ascii=False)
return temp_path
# Example usage and test function
def test_reindex_function():
"""Test the reindex function with a sample COCO file."""
# Create a test COCO file
test_data = {
"info": {"description": "Test COCO dataset", "version": "1.0", "year": 2023},
"images": [
{"id": 0, "width": 640, "height": 480, "file_name": "test1.jpg"},
{"id": 1, "width": 640, "height": 480, "file_name": "test2.jpg"},
],
"categories": [
{"id": 0, "name": "person", "supercategory": "person"},
{"id": 1, "name": "car", "supercategory": "vehicle"},
],
"annotations": [
{
"id": 0,
"image_id": 0,
"category_id": 0,
"bbox": [100, 100, 50, 75],
"area": 3750,
"iscrowd": 0,
},
{
"id": 1,
"image_id": 1,
"category_id": 1,
"bbox": [200, 150, 120, 80],
"area": 9600,
"iscrowd": 0,
},
],
}
# Create temporary test file
with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f:
json.dump(test_data, f, indent=2)
test_file_path = f.name
try:
# Test the function
result_path = reindex_coco_to_temp(test_file_path)
print(f"Original file: {test_file_path}")
print(f"Converted file: {result_path}")
# Load and display the result
with open(result_path, "r") as f:
result_data = json.load(f)
print("\nConverted data sample:")
print(f"First image ID: {result_data['images'][0]['id']}")
print(f"First category ID: {result_data['categories'][0]['id']}")
print(f"First annotation ID: {result_data['annotations'][0]['id']}")
print(f"First annotation image_id: {result_data['annotations'][0]['image_id']}")
print(
f"First annotation category_id: {result_data['annotations'][0]['category_id']}"
)
# Clean up
os.unlink(result_path)
os.rmdir(os.path.dirname(result_path))
finally:
# Clean up test file
os.unlink(test_file_path)
if __name__ == "__main__":
test_reindex_function()