|
import gradio as gr |
|
import json |
|
import os |
|
import pandas as pd |
|
from glob import glob |
|
|
|
eval_results_dir = "eval_results/" |
|
|
|
def load_results(): |
|
records = [] |
|
|
|
|
|
json_files = glob(os.path.join(eval_results_dir, "**", "*.json"), recursive=True) |
|
|
|
for file_path in json_files: |
|
try: |
|
with open(file_path, "r") as f: |
|
data = json.load(f) |
|
model_name = data["config_general"].get("model_name", "Unknown") |
|
results = data.get("results", {}) |
|
|
|
|
|
for task, task_data in results.items(): |
|
if "extractive_match" in task_data: |
|
records.append({ |
|
"Model": model_name, |
|
"Task": task, |
|
"Extractive Match": task_data["extractive_match"], |
|
"Std Err": task_data["extractive_match_stderr"] |
|
}) |
|
except Exception as e: |
|
print(f"Error reading {file_path}: {e}") |
|
|
|
|
|
df = pd.DataFrame(records) |
|
return df.sort_values(by=["Task", "Extractive Match"], ascending=[True, False]) |
|
|
|
def leaderboard(): |
|
df = load_results() |
|
return df |
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown("# π Evaluation Leaderboard") |
|
gr.Markdown("This leaderboard displays evaluation results from JSON files in `eval_results/`.") |
|
|
|
results_table = gr.Dataframe(leaderboard) |
|
refresh_button = gr.Button("π Refresh") |
|
refresh_button.click(leaderboard, outputs=[results_table]) |
|
|
|
demo.launch() |
|
|