Spaces:

detect-tech
/

Test-Prompt

Runtime error

App Files Files Community

Test-Prompt / backend /utils /data_processing.py

abhiman181025

First commit

1314bf5 about 1 month ago

raw

history blame contribute delete

3.38 kB

	import pandas as pd
	import os
	from pathlib import Path
	from typing import Dict, List, Tuple, Union, Any

	def extract_file_dict(folder_path: List[Path]) -> Dict[str, Path]:
	"""
	Extract file dictionary from folder path.

	Args:
	folder_path: List of Path objects from Gradio file upload

	Returns:
	Dictionary mapping filename to full path
	"""
	file_dict = {}
	for file in folder_path:
	filepath = file
	filename = filepath.name.split("/")[-1]
	file_dict[filename] = filepath
	return file_dict


	def validate_data(file_dict: Dict[str, Path]) -> Tuple[Union[bool, str], str]:
	"""
	Validate the uploaded data structure.

	Args:
	file_dict: Dictionary of filename to path mappings

	Returns:
	Tuple of (validation_result, message)
	validation_result can be:
	- True: Valid data with CSV
	- False: Invalid data
	- "no_csv": Valid but no CSV file
	- "multiple_csv": Valid but multiple CSV files
	"""
	# Find CSV file
	csv_files = [fname for fname in file_dict if fname.lower().endswith('.csv')]

	# Find image files
	image_exts = ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff']
	image_files = [fname for fname in file_dict if any(fname.lower().endswith(ext) for ext in image_exts)]

	if not image_files:
	return False, "No image files found in the folder or subfolders"

	# If no CSV or multiple CSVs, we'll proceed with file-based processing
	if len(csv_files) == 0:
	return "no_csv", "No CSV file found. Will extract data from file paths and names."
	elif len(csv_files) > 1:
	return "multiple_csv", "Multiple CSV files found. Will extract data from file paths and names."

	# Check if single CSV has required columns
	try:
	df = pd.read_csv(file_dict[csv_files[0]])
	if 'Ground Truth' not in df.columns:
	return False, "CSV file does not contain 'Ground Truth' column"
	if 'Image Name' not in df.columns:
	return False, "CSV file does not contain 'Image Name' column"
	except Exception as e:
	return False, f"Error reading CSV file: {str(e)}"

	return True, "Data validation successful"


	def extract_binary_output(
	model_output: str,
	ground_truth: str = "",
	all_ground_truths: List[str] = None
	) -> str:
	"""
	Extract binary output from model response based on unique ground truth keywords.

	Args:
	model_output: The model's text response
	ground_truth: Current item's ground truth (for fallback)
	all_ground_truths: List of all ground truth values to extract unique keywords

	Returns:
	Extracted keyword that best matches the model output
	"""
	if all_ground_truths is None:
	all_ground_truths = []

	# Unique lowercase keywords
	unique_keywords = sorted({str(gt).strip().lower() for gt in all_ground_truths if gt})

	# Take only the first line of model output
	first_line = model_output.split("\n", 1)[0].lower()

	print(f"DEBUG: Unique keywords extracted: {first_line}")
	print(f"DEBUG: Model output: {model_output[:100]}...") # First 100 chars

	for keyword in unique_keywords:
	if keyword in first_line:
	return keyword

	return "Enter the output manually"