import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc
import logging
import os

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

def plot_roc_curve(results_path, output_image_path):
    """
    Reads predictions CSV, calculates AUC, and plots ROC curve.
    """
    if not os.path.exists(results_path):
        logger.error(f"Results file not found: {results_path}")
        return

    try:
        df = pd.read_csv(results_path)
        logger.info(f"Loaded {len(df)} predictions from {results_path}")
        
        # Filter out errors
        df = df.dropna(subset=['pneumothorax_score'])
        if len(df) == 0:
            logger.error("No valid predictions found.")
            return

        # Prepare True Labels (Binary)
        # Kaggle Labels: 'Pneumothorax' vs 'No Pneumothorax'
        y_true = (df['true_label'] == 'Pneumothorax').astype(int)
        y_scores = df['pneumothorax_score']

        # Calculate ROC and AUC
        fpr, tpr, thresholds = roc_curve(y_true, y_scores)
        roc_auc = auc(fpr, tpr)
        logger.info(f"Calculated AUC: {roc_auc:.4f}")

        # Plot
        plt.figure(figsize=(8, 6))
        plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (area = {roc_auc:.2f})')
        plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
        plt.xlim([0.0, 1.0])
        plt.ylim([0.0, 1.05])
        plt.xlabel('False Positive Rate')
        plt.ylabel('True Positive Rate')
        plt.title('ROC Curve - Zero-Shot Pneumothorax Classification (Kaggle)')
        plt.legend(loc="lower right")
        plt.grid(True, alpha=0.3)
        
        plt.savefig(output_image_path)
        logger.info(f"ROC curve saved to {output_image_path}")
        plt.close()

    except Exception as e:
        logger.error(f"Failed to plot ROC curve: {e}")

if __name__ == "__main__":
    results_file = "results/kaggle_predictions.csv"
    output_image = "results/kaggle_roc_curve.png"
    plot_roc_curve(results_file, output_image)