File size: 4,885 Bytes
b414398 38210ee b414398 0fade42 0dcfe74 38210ee b414398 356fe31 38210ee 356fe31 363a6cb 356fe31 06e8c2c 356fe31 38210ee b414398 7c67606 0fade42 8300932 7c67606 0fade42 7c67606 59d0aa9 7c67606 59d0aa9 387cdcd b414398 0dcfe74 abd806c b414398 83d1755 356fe31 cc9ebdf 339ab1a 356fe31 6ab6442 e98b562 06e8c2c 38210ee 06e8c2c 38210ee e98b562 356fe31 97d8622 356fe31 97d8622 356fe31 b414398 7764a8b 2d13ea1 b414398 cc9ebdf b414398 38210ee b414398 0dcfe74 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 |
import os
import pandas as pd
import requests
import huggingface_hub
import gradio as gr
data = pd.read_csv("data.csv")
webhook_url = os.environ.get("WEBHOOK_URL")
def filter_table(cols, name, type, arch, license):
tmp = data
# filter
tmp = tmp[tmp["Name"].str.contains(name)]
tmp = tmp[tmp["Type"].isin(type)]
tmp = tmp[tmp["Architecture"].isin(arch)]
tmp = tmp[tmp["License"].isin(license)]
# show/hide
tmp = tmp.drop(cols, axis=1)
# prettify
tmp["Type"] = tmp["Type"].apply(lambda x: x[0])
tmp = tmp.rename({"Type": "T"}, axis=1)
tmp["Name"] = tmp["Name"].apply(lambda x: f'<a target="_blank" href="https://huggingface.co/{x}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{x}</a>')
return tmp
def submit_model(name):
try:
huggingface_hub.hf_hub_download(repo_id=name, filename="config.json") # sanity check input
except huggingface_hub.utils._errors.EntryNotFoundError:
return "# ERROR: Model does not have a config.json file!"
except huggingface_hub.utils._errors.RepositoryNotFoundError:
return "# ERROR: Model could not be found on the Hugging Face Hub!"
except requests.exceptions.HTTPError:
return "# ERROR: Network error while validating model. Please try again later."
except Exception as e:
print(e)
return "ERROR: Unexpected error. Please try again later."
try:
result = requests.post(webhook_url, json={"content":name})
except requests.exceptions.HTTPError:
return "# ERROR: Network error while contacting queue. Please try again in a few minutes."
except Exception as e:
print(e)
return "ERROR: Unexpected error. Please try again later."
return "# SUCCESS: Please wait up to 24 hours for your model to be added to the queue."
with gr.Blocks() as demo:
gr.HTML('<style>.tab-buttons button{font-size:1.3em}</style><h1 style="text-align:center"><span style="font-size:1.3em">Subquadratic LLM Leaderboard</span></h1>')
with gr.Tabs(elem_classes="tab-buttons") as tabs:
with gr.Tab("🏅 LLM Benchmark"):
with gr.Row():
with gr.Column():
namefilter = gr.Textbox(max_lines=1, placeholder="Search by model name and hit Enter...", show_label=False)
typefilter = gr.CheckboxGroup(label="Filter by model type", choices=list(data["Type"].unique()), value=[n for n in data["Type"].unique() if n not in ["Pending"]])
with gr.Column():
archfilter = gr.CheckboxGroup(label="Filter by model architecture", choices=list(data["Architecture"].unique()), value=list(data["Architecture"].unique()))
lcnsfilter = gr.CheckboxGroup(label="Filter by model license", choices=list(data["License"].unique()), value=list(data["License"].unique()))
with gr.Column():
colfilter = gr.CheckboxGroup(label="Hide columns", choices=list(data.columns)[2:])
table = gr.Dataframe(datatype=["str","markdown","number","number","number","number","number","number","number","number","number"])
# actions
namefilter.submit(filter_table, [colfilter,namefilter,typefilter,archfilter,lcnsfilter], table)
for filter in [colfilter,typefilter,archfilter,lcnsfilter]:
filter.input(filter_table, [colfilter,namefilter,typefilter,archfilter,lcnsfilter], table)
demo.load(fn=filter_table, inputs=[colfilter,namefilter,typefilter,archfilter,lcnsfilter], outputs=table)
with gr.Tab("📝 About"):
gr.Markdown("""
The **Subquadratic LLM Leaderboard** evaluates LLMs with subquadratic architectures (ie RWKV & Mamba) with the goal of providing open evaluation results while the architectures themselves are pending inclusion in 🤗 Transformers.
The metrics are the same as the Open LLM Leaderboard: ARC 25-shot, HellaSwag 10-shot, MMLU 5-shot, TruthfulQA zeroshot, Winogrande 5-shot, and GSM8K 5-shot.
This leaderboard is maintained by Devin Gulliver and is still under construction, check back regularly for further improvements!
""")
with gr.Tab("🚀 Submit here!"):
with gr.Group():
with gr.Row():
model_name = gr.Textbox(max_lines=1, placeholder="Enter model name...", show_label=False, scale=4)
submit = gr.Button("Submit", variant="primary", scale=0)
output = gr.Markdown("Enter a public HF repo id, then hit Submit to add it to the evaluation queue.")
submit.click(fn=submit_model, inputs=model_name, outputs=output)
demo.launch() |