|
import gradio as gr |
|
import json |
|
import os |
|
import pandas as pd |
|
from glob import glob |
|
|
|
eval_results_dir = "eval_results/" |
|
|
|
def load_results(): |
|
records = {} |
|
|
|
|
|
json_files = glob(os.path.join(eval_results_dir, "**", "*.json"), recursive=True) |
|
|
|
for file_path in json_files: |
|
try: |
|
with open(file_path, "r") as f: |
|
data = json.load(f) |
|
model_name = data["config_general"].get("model_name", "Unknown") |
|
results = data.get("results", {}) |
|
|
|
if model_name not in records: |
|
records[model_name] = {} |
|
|
|
|
|
for task, task_data in results.items(): |
|
if "extractive_match" in task_data: |
|
records[model_name][f"{task} (Match)"] = task_data["extractive_match"] |
|
records[model_name][f"{task} (StdErr)"] = task_data["extractive_match_stderr"] |
|
except Exception as e: |
|
print(f"Error reading {file_path}: {e}") |
|
|
|
|
|
df = pd.DataFrame.from_dict(records, orient="index").reset_index() |
|
df.rename(columns={"index": "Model"}, inplace=True) |
|
return df |
|
|
|
def leaderboard(): |
|
df = load_results() |
|
return df |
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown("# π Evaluation Leaderboard") |
|
gr.Markdown("This leaderboard displays evaluation results from JSON files in `eval_results/`.") |
|
|
|
results_table = gr.Dataframe(leaderboard) |
|
refresh_button = gr.Button("π Refresh") |
|
refresh_button.click(leaderboard, outputs=[results_table]) |
|
|
|
demo.launch() |
|
|