Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| import numpy as np | |
| from sklearn.metrics import roc_curve | |
| import logging | |
| import os | |
| # Configure logging | |
| logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') | |
| logger = logging.getLogger(__name__) | |
| def calculate_optimal_threshold(results_path): | |
| """ | |
| Calculates optimal threshold using Youden's J statistic. | |
| """ | |
| if not os.path.exists(results_path): | |
| logger.error(f"Results file not found: {results_path}") | |
| return None | |
| try: | |
| df = pd.read_csv(results_path) | |
| logger.info(f"Loaded {len(df)} predictions from {results_path}") | |
| df = df.dropna(subset=['pneumothorax_score']) | |
| if len(df) == 0: | |
| logger.error("No valid predictions found.") | |
| return None | |
| # Binary Labels | |
| y_true = (df['true_label'] == 'Pneumothorax').astype(int) | |
| y_scores = df['pneumothorax_score'] | |
| fpr, tpr, thresholds = roc_curve(y_true, y_scores) | |
| # Youden's J = Sensitivity + Specificity - 1 | |
| # Sensitivity = TPR | |
| # Specificity = 1 - FPR | |
| # J = TPR + (1 - FPR) - 1 = TPR - FPR | |
| j_scores = tpr - fpr | |
| best_idx = np.argmax(j_scores) | |
| best_threshold = thresholds[best_idx] | |
| logger.info(f"Optimal Threshold (Youden's J): {best_threshold:.4f}") | |
| logger.info(f"Sensitivity: {tpr[best_idx]:.4f}") | |
| logger.info(f"Specificity: {1 - fpr[best_idx]:.4f}") | |
| return best_threshold | |
| except Exception as e: | |
| logger.error(f"Failed to calculate threshold: {e}") | |
| return None | |
| if __name__ == "__main__": | |
| results_file = "results/kaggle_predictions.csv" | |
| calculate_optimal_threshold(results_file) | |