import gradio as gr import json import os import pandas as pd from glob import glob eval_results_dir = "eval_results/" # Directory containing evaluation results def load_results(): records = {} # Recursively find all JSON files in the eval_results directory json_files = glob(os.path.join(eval_results_dir, "**", "*.json"), recursive=True) for file_path in json_files: try: with open(file_path, "r") as f: data = json.load(f) model_name = data["config_general"].get("model_name", "Unknown") results = data.get("results", {}) if model_name not in records: records[model_name] = {} # Extract relevant evaluation metrics for task, task_data in results.items(): if "extractive_match" in task_data: records[model_name][f"{task} (Match)"] = task_data["extractive_match"] records[model_name][f"{task} (StdErr)"] = task_data["extractive_match_stderr"] except Exception as e: print(f"Error reading {file_path}: {e}") # Convert to DataFrame df = pd.DataFrame.from_dict(records, orient="index").reset_index() df.rename(columns={"index": "Model"}, inplace=True) return df def leaderboard(): df = load_results() return df with gr.Blocks() as demo: gr.Markdown("# 📊 Evaluation Leaderboard") gr.Markdown("This leaderboard displays evaluation results from JSON files in `eval_results/`.") results_table = gr.Dataframe(leaderboard) refresh_button = gr.Button("🔄 Refresh") refresh_button.click(leaderboard, outputs=[results_table]) demo.launch()