Spaces:
Runtime error
Runtime error
| import pandas as pd | |
| import os | |
| from pathlib import Path | |
| from typing import Dict, List, Tuple, Union, Any | |
| def extract_file_dict(folder_path: List[Path]) -> Dict[str, Path]: | |
| """ | |
| Extract file dictionary from folder path. | |
| Args: | |
| folder_path: List of Path objects from Gradio file upload | |
| Returns: | |
| Dictionary mapping filename to full path | |
| """ | |
| file_dict = {} | |
| for file in folder_path: | |
| filepath = file | |
| filename = filepath.name.split("/")[-1] | |
| file_dict[filename] = filepath | |
| return file_dict | |
| def validate_data(file_dict: Dict[str, Path]) -> Tuple[Union[bool, str], str]: | |
| """ | |
| Validate the uploaded data structure. | |
| Args: | |
| file_dict: Dictionary of filename to path mappings | |
| Returns: | |
| Tuple of (validation_result, message) | |
| validation_result can be: | |
| - True: Valid data with CSV | |
| - False: Invalid data | |
| - "no_csv": Valid but no CSV file | |
| - "multiple_csv": Valid but multiple CSV files | |
| """ | |
| # Find CSV file | |
| csv_files = [fname for fname in file_dict if fname.lower().endswith('.csv')] | |
| # Find image files | |
| image_exts = ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff'] | |
| image_files = [fname for fname in file_dict if any(fname.lower().endswith(ext) for ext in image_exts)] | |
| if not image_files: | |
| return False, "No image files found in the folder or subfolders" | |
| # If no CSV or multiple CSVs, we'll proceed with file-based processing | |
| if len(csv_files) == 0: | |
| return "no_csv", "No CSV file found. Will extract data from file paths and names." | |
| elif len(csv_files) > 1: | |
| return "multiple_csv", "Multiple CSV files found. Will extract data from file paths and names." | |
| # Check if single CSV has required columns | |
| try: | |
| df = pd.read_csv(file_dict[csv_files[0]]) | |
| if 'Ground Truth' not in df.columns: | |
| return False, "CSV file does not contain 'Ground Truth' column" | |
| if 'Image Name' not in df.columns: | |
| return False, "CSV file does not contain 'Image Name' column" | |
| except Exception as e: | |
| return False, f"Error reading CSV file: {str(e)}" | |
| return True, "Data validation successful" | |
| def extract_binary_output( | |
| model_output: str, | |
| ground_truth: str = "", | |
| all_ground_truths: List[str] = None | |
| ) -> str: | |
| """ | |
| Extract binary output from model response based on unique ground truth keywords. | |
| Args: | |
| model_output: The model's text response | |
| ground_truth: Current item's ground truth (for fallback) | |
| all_ground_truths: List of all ground truth values to extract unique keywords | |
| Returns: | |
| Extracted keyword that best matches the model output | |
| """ | |
| if all_ground_truths is None: | |
| all_ground_truths = [] | |
| # Unique lowercase keywords | |
| unique_keywords = sorted({str(gt).strip().lower() for gt in all_ground_truths if gt}) | |
| # Take only the first line of model output | |
| first_line = model_output.split("\n", 1)[0].lower() | |
| print(f"DEBUG: Unique keywords extracted: {first_line}") | |
| print(f"DEBUG: Model output: {model_output[:100]}...") # First 100 chars | |
| for keyword in unique_keywords: | |
| if keyword in first_line: | |
| return keyword | |
| return "Enter the output manually" |