Spaces:

rohitium
/

chest-xray-classification

Sleeping

chest-xray-classification / src /calculate_threshold.py

Deploy Chest X-Ray App (LFS)

b412062 5 days ago

1.75 kB

	import pandas as pd
	import numpy as np
	from sklearn.metrics import roc_curve
	import logging
	import os

	# Configure logging
	logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
	logger = logging.getLogger(__name__)

	def calculate_optimal_threshold(results_path):
	"""
	Calculates optimal threshold using Youden's J statistic.
	"""
	if not os.path.exists(results_path):
	logger.error(f"Results file not found: {results_path}")
	return None

	try:
	df = pd.read_csv(results_path)
	logger.info(f"Loaded {len(df)} predictions from {results_path}")

	df = df.dropna(subset=['pneumothorax_score'])
	if len(df) == 0:
	logger.error("No valid predictions found.")
	return None

	# Binary Labels
	y_true = (df['true_label'] == 'Pneumothorax').astype(int)
	y_scores = df['pneumothorax_score']

	fpr, tpr, thresholds = roc_curve(y_true, y_scores)

	# Youden's J = Sensitivity + Specificity - 1
	# Sensitivity = TPR
	# Specificity = 1 - FPR
	# J = TPR + (1 - FPR) - 1 = TPR - FPR
	j_scores = tpr - fpr
	best_idx = np.argmax(j_scores)
	best_threshold = thresholds[best_idx]

	logger.info(f"Optimal Threshold (Youden's J): {best_threshold:.4f}")
	logger.info(f"Sensitivity: {tpr[best_idx]:.4f}")
	logger.info(f"Specificity: {1 - fpr[best_idx]:.4f}")

	return best_threshold

	except Exception as e:
	logger.error(f"Failed to calculate threshold: {e}")
	return None

	if __name__ == "__main__":
	results_file = "results/kaggle_predictions.csv"
	calculate_optimal_threshold(results_file)