Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import json | |
| import pandas as pd | |
| import plotly.express as px | |
| import plotly.graph_objects as go | |
| def load_results(): | |
| with open('results.json', 'r') as f: | |
| return json.load(f) | |
| def create_metrics_df(results): | |
| rows = [] | |
| for r in results: | |
| row = { | |
| 'Model': r['model_name'], | |
| 'Timestamp': r['timestamp'], | |
| 'Embeddings': r['config']['embedding_model'], | |
| 'Retriever': r['config']['retriever_type'], | |
| 'Top-K': r['config']['retrieval_config'].get('top_k', 'N/A') | |
| } | |
| # Add metrics | |
| metrics = r['metrics'] | |
| for category in ['retrieval', 'generation']: | |
| if category in metrics: | |
| for metric_name, value in metrics[category].items(): | |
| row[f"{category}_{metric_name}"] = round(value, 4) | |
| rows.append(row) | |
| return pd.DataFrame(rows) | |
| def create_comparison_plot(df, metric_category): | |
| metrics = [col for col in df.columns if col.startswith(metric_category)] | |
| if not metrics: | |
| return None | |
| fig = go.Figure() | |
| for metric in metrics: | |
| fig.add_trace(go.Bar( | |
| name=metric.split('_')[-1], | |
| x=df['Model'], | |
| y=df[metric], | |
| text=df[metric].round(3), | |
| textposition='auto', | |
| )) | |
| fig.update_layout( | |
| title=f"{metric_category.capitalize()} Metrics Comparison", | |
| xaxis_title="Model", | |
| yaxis_title="Score", | |
| barmode='group' | |
| ) | |
| return fig | |
| def create_interface(): | |
| results = load_results() | |
| df = create_metrics_df(results) | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# RAG Evaluation Leaderboard") | |
| with gr.Tabs(): | |
| with gr.Tab("Leaderboard"): | |
| gr.Dataframe( | |
| df, | |
| headers=df.columns.tolist(), | |
| interactive=False | |
| ) | |
| with gr.Tab("Retrieval Metrics"): | |
| gr.Plot(create_comparison_plot(df, 'retrieval')) | |
| with gr.Tab("Generation Metrics"): | |
| gr.Plot(create_comparison_plot(df, 'generation')) | |
| with gr.Tab("Configuration Details"): | |
| config_df = df[['Model', 'Embeddings', 'Retriever', 'Top-K', 'Timestamp']] | |
| gr.Dataframe(config_df) | |
| gr.Markdown(''' | |
| ## How to Submit | |
| To submit your results: | |
| ```python | |
| from rag_leaderboard import RAGLeaderboard | |
| # Initialize leaderboard | |
| leaderboard = RAGLeaderboard( | |
| repo_id="your-username/repo-name", | |
| token="your-hf-token" | |
| ) | |
| # Submit results | |
| leaderboard.submit_results( | |
| model_name="Your Model Name", | |
| metrics={ | |
| "retrieval": {"hit_rate": 0.8, "mrr": 0.6}, | |
| "generation": {"rouge1": 0.7, "rouge2": 0.5, "rougeL": 0.6} | |
| }, | |
| config={ | |
| "embedding_model": "your-embedding-model", | |
| "retriever_type": "dense", | |
| "retrieval_config": {"top_k": 3} | |
| } | |
| ) | |
| ``` | |
| ''') | |
| return demo | |
| demo = create_interface() | |
| demo.launch() | |