chest-xray-classification / src /calculate_threshold.py
rohitium's picture
Deploy Chest X-Ray App (LFS)
b412062
import pandas as pd
import numpy as np
from sklearn.metrics import roc_curve
import logging
import os
# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
def calculate_optimal_threshold(results_path):
"""
Calculates optimal threshold using Youden's J statistic.
"""
if not os.path.exists(results_path):
logger.error(f"Results file not found: {results_path}")
return None
try:
df = pd.read_csv(results_path)
logger.info(f"Loaded {len(df)} predictions from {results_path}")
df = df.dropna(subset=['pneumothorax_score'])
if len(df) == 0:
logger.error("No valid predictions found.")
return None
# Binary Labels
y_true = (df['true_label'] == 'Pneumothorax').astype(int)
y_scores = df['pneumothorax_score']
fpr, tpr, thresholds = roc_curve(y_true, y_scores)
# Youden's J = Sensitivity + Specificity - 1
# Sensitivity = TPR
# Specificity = 1 - FPR
# J = TPR + (1 - FPR) - 1 = TPR - FPR
j_scores = tpr - fpr
best_idx = np.argmax(j_scores)
best_threshold = thresholds[best_idx]
logger.info(f"Optimal Threshold (Youden's J): {best_threshold:.4f}")
logger.info(f"Sensitivity: {tpr[best_idx]:.4f}")
logger.info(f"Specificity: {1 - fpr[best_idx]:.4f}")
return best_threshold
except Exception as e:
logger.error(f"Failed to calculate threshold: {e}")
return None
if __name__ == "__main__":
results_file = "results/kaggle_predictions.csv"
calculate_optimal_threshold(results_file)