# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved """ Self-contained COCO JSON re-indexing function that creates temporary files. """ import json import os import tempfile from pathlib import Path from typing import Any, Dict, List, Optional, Tuple def reindex_coco_to_temp(input_json_path: str) -> Optional[str]: """ Convert 0-indexed COCO JSON file to 1-indexed and save to temporary location. Args: input_json_path: Path to the input COCO JSON file Returns: Path to the new 1-indexed JSON file in temporary directory, or None if no conversion needed Raises: FileNotFoundError: If input file doesn't exist json.JSONDecodeError: If input file is not valid JSON ValueError: If input file is not a valid COCO format """ def is_coco_json(data: Dict[str, Any]) -> bool: """Check if data appears to be a COCO format file.""" if not isinstance(data, dict): return False # A COCO file should have at least one of these keys coco_keys = {"images", "annotations", "categories"} return any(key in data for key in coco_keys) def check_zero_indexed(data: Dict[str, Any]) -> Tuple[bool, bool, bool]: """ Check if annotations, images, or categories start from index 0. Returns: Tuple of (annotations_zero_indexed, images_zero_indexed, categories_zero_indexed) """ annotations_zero = False images_zero = False categories_zero = False # Check annotations annotations = data.get("annotations", []) if annotations and any(ann.get("id", -1) == 0 for ann in annotations): annotations_zero = True # Check images images = data.get("images", []) if images and any(img.get("id", -1) == 0 for img in images): images_zero = True # Check categories categories = data.get("categories", []) if categories and any(cat.get("id", -1) == 0 for cat in categories): categories_zero = True return annotations_zero, images_zero, categories_zero def reindex_coco_data(data: Dict[str, Any]) -> Dict[str, Any]: """Convert 0-indexed COCO data to 1-indexed.""" modified_data = data.copy() annotations_zero, images_zero, categories_zero = check_zero_indexed(data) # Create ID mapping for consistency image_id_mapping = {} category_id_mapping = {} # Process images first (since annotations reference image IDs) if images_zero and "images" in modified_data: for img in modified_data["images"]: old_id = img["id"] new_id = old_id + 1 image_id_mapping[old_id] = new_id img["id"] = new_id # Process categories (since annotations reference category IDs) if categories_zero and "categories" in modified_data: for cat in modified_data["categories"]: old_id = cat["id"] new_id = old_id + 1 category_id_mapping[old_id] = new_id cat["id"] = new_id # Process annotations if "annotations" in modified_data: for ann in modified_data["annotations"]: # Update annotation ID if needed if annotations_zero: ann["id"] = ann["id"] + 1 # Update image_id reference if images were reindexed if images_zero and ann.get("image_id") is not None: old_image_id = ann["image_id"] if old_image_id in image_id_mapping: ann["image_id"] = image_id_mapping[old_image_id] # Update category_id reference if categories were reindexed if categories_zero and ann.get("category_id") is not None: old_category_id = ann["category_id"] if old_category_id in category_id_mapping: ann["category_id"] = category_id_mapping[old_category_id] return modified_data # Validate input path if not os.path.exists(input_json_path): raise FileNotFoundError(f"Input file not found: {input_json_path}") # Load and validate JSON data try: with open(input_json_path, "r", encoding="utf-8") as f: data = json.load(f) except json.JSONDecodeError as e: raise json.JSONDecodeError(f"Invalid JSON in {input_json_path}: {e}") # Validate COCO format if not is_coco_json(data): raise ValueError( f"File does not appear to be in COCO format: {input_json_path}" ) # Check if reindexing is needed annotations_zero, images_zero, categories_zero = check_zero_indexed(data) if not (annotations_zero or images_zero or categories_zero): # No conversion needed - just copy to temp location input_path = Path(input_json_path) temp_dir = tempfile.mkdtemp() temp_filename = f"{input_path.stem}_1_indexed{input_path.suffix}" temp_path = os.path.join(temp_dir, temp_filename) with open(temp_path, "w", encoding="utf-8") as f: json.dump(data, f, indent=2, ensure_ascii=False) return temp_path # Perform reindexing modified_data = reindex_coco_data(data) # Create temporary file input_path = Path(input_json_path) temp_dir = tempfile.mkdtemp() temp_filename = f"{input_path.stem}_1_indexed{input_path.suffix}" temp_path = os.path.join(temp_dir, temp_filename) # Write modified data to temporary file with open(temp_path, "w", encoding="utf-8") as f: json.dump(modified_data, f, indent=2, ensure_ascii=False) return temp_path # Example usage and test function def test_reindex_function(): """Test the reindex function with a sample COCO file.""" # Create a test COCO file test_data = { "info": {"description": "Test COCO dataset", "version": "1.0", "year": 2023}, "images": [ {"id": 0, "width": 640, "height": 480, "file_name": "test1.jpg"}, {"id": 1, "width": 640, "height": 480, "file_name": "test2.jpg"}, ], "categories": [ {"id": 0, "name": "person", "supercategory": "person"}, {"id": 1, "name": "car", "supercategory": "vehicle"}, ], "annotations": [ { "id": 0, "image_id": 0, "category_id": 0, "bbox": [100, 100, 50, 75], "area": 3750, "iscrowd": 0, }, { "id": 1, "image_id": 1, "category_id": 1, "bbox": [200, 150, 120, 80], "area": 9600, "iscrowd": 0, }, ], } # Create temporary test file with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f: json.dump(test_data, f, indent=2) test_file_path = f.name try: # Test the function result_path = reindex_coco_to_temp(test_file_path) print(f"Original file: {test_file_path}") print(f"Converted file: {result_path}") # Load and display the result with open(result_path, "r") as f: result_data = json.load(f) print("\nConverted data sample:") print(f"First image ID: {result_data['images'][0]['id']}") print(f"First category ID: {result_data['categories'][0]['id']}") print(f"First annotation ID: {result_data['annotations'][0]['id']}") print(f"First annotation image_id: {result_data['annotations'][0]['image_id']}") print( f"First annotation category_id: {result_data['annotations'][0]['category_id']}" ) # Clean up os.unlink(result_path) os.rmdir(os.path.dirname(result_path)) finally: # Clean up test file os.unlink(test_file_path) if __name__ == "__main__": test_reindex_function()