import pandas as pd import os from pathlib import Path from typing import Dict, List, Tuple, Union, Any def extract_file_dict(folder_path: List[Path]) -> Dict[str, Path]: """ Extract file dictionary from folder path. Args: folder_path: List of Path objects from Gradio file upload Returns: Dictionary mapping filename to full path """ file_dict = {} for file in folder_path: filepath = file filename = filepath.name.split("/")[-1] file_dict[filename] = filepath return file_dict def validate_data(file_dict: Dict[str, Path]) -> Tuple[Union[bool, str], str]: """ Validate the uploaded data structure. Args: file_dict: Dictionary of filename to path mappings Returns: Tuple of (validation_result, message) validation_result can be: - True: Valid data with CSV - False: Invalid data - "no_csv": Valid but no CSV file - "multiple_csv": Valid but multiple CSV files """ # Find CSV file csv_files = [fname for fname in file_dict if fname.lower().endswith('.csv')] # Find image files image_exts = ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff'] image_files = [fname for fname in file_dict if any(fname.lower().endswith(ext) for ext in image_exts)] if not image_files: return False, "No image files found in the folder or subfolders" # If no CSV or multiple CSVs, we'll proceed with file-based processing if len(csv_files) == 0: return "no_csv", "No CSV file found. Will extract data from file paths and names." elif len(csv_files) > 1: return "multiple_csv", "Multiple CSV files found. Will extract data from file paths and names." # Check if single CSV has required columns try: df = pd.read_csv(file_dict[csv_files[0]]) if 'Ground Truth' not in df.columns: return False, "CSV file does not contain 'Ground Truth' column" if 'Image Name' not in df.columns: return False, "CSV file does not contain 'Image Name' column" except Exception as e: return False, f"Error reading CSV file: {str(e)}" return True, "Data validation successful" def extract_binary_output( model_output: str, ground_truth: str = "", all_ground_truths: List[str] = None ) -> str: """ Extract binary output from model response based on unique ground truth keywords. Args: model_output: The model's text response ground_truth: Current item's ground truth (for fallback) all_ground_truths: List of all ground truth values to extract unique keywords Returns: Extracted keyword that best matches the model output """ if all_ground_truths is None: all_ground_truths = [] # Unique lowercase keywords unique_keywords = sorted({str(gt).strip().lower() for gt in all_ground_truths if gt}) # Take only the first line of model output first_line = model_output.split("\n", 1)[0].lower() print(f"DEBUG: Unique keywords extracted: {first_line}") print(f"DEBUG: Model output: {model_output[:100]}...") # First 100 chars for keyword in unique_keywords: if keyword in first_line: return keyword return "Enter the output manually"