Submit + FAQ
Browse files- app.py +21 -3
- src/about.py +22 -3
- src/display/css_html_js.py +6 -0
app.py
CHANGED
@@ -189,7 +189,7 @@ def get_model_info_blocks(chosen_model_name):
|
|
189 |
with gr.Row():
|
190 |
benchmark_score = gr.HTML(get_metric_html("Benchmark Score").format(filtered_df["Benchmark Score"][0]))
|
191 |
rank = gr.HTML(get_metric_html("Benchmark Rank").format(filtered_df["Rank"][0]))
|
192 |
-
speed = gr.HTML(get_metric_html("Speed").format(filtered_df["Speed (words/sec)"][0]))
|
193 |
contamination = gr.HTML(get_metric_html("Contamination Score").format(filtered_df["Contamination Score"][0]))
|
194 |
size = gr.HTML(get_metric_html("Size Category").format(filtered_df["Category"][0]))
|
195 |
|
@@ -318,12 +318,30 @@ with demo:
|
|
318 |
with gr.TabItem("π Submit here", elem_id="llm-benchmark-tab-submit", id=5):
|
319 |
with gr.Row():
|
320 |
gr.Markdown("# Submit your model", elem_classes="markdown-text")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
321 |
|
322 |
with gr.Row():
|
323 |
with gr.Column():
|
324 |
model_name_textbox = gr.Textbox(label="Model name")
|
325 |
|
326 |
-
submit_button = gr.Button("Submit Eval", variant="huggingface" )
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
327 |
submission_result = gr.Markdown()
|
328 |
submit_button.click(
|
329 |
add_new_eval,
|
@@ -372,7 +390,7 @@ with demo:
|
|
372 |
row_count=5,
|
373 |
)
|
374 |
|
375 |
-
with gr.TabItem("π
|
376 |
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
|
377 |
|
378 |
with gr.Row():
|
|
|
189 |
with gr.Row():
|
190 |
benchmark_score = gr.HTML(get_metric_html("Benchmark Score").format(filtered_df["Benchmark Score"][0]))
|
191 |
rank = gr.HTML(get_metric_html("Benchmark Rank").format(filtered_df["Rank"][0]))
|
192 |
+
speed = gr.HTML(get_metric_html("Speed <br/>(words per second)").format(filtered_df["Speed (words/sec)"][0]))
|
193 |
contamination = gr.HTML(get_metric_html("Contamination Score").format(filtered_df["Contamination Score"][0]))
|
194 |
size = gr.HTML(get_metric_html("Size Category").format(filtered_df["Category"][0]))
|
195 |
|
|
|
318 |
with gr.TabItem("π Submit here", elem_id="llm-benchmark-tab-submit", id=5):
|
319 |
with gr.Row():
|
320 |
gr.Markdown("# Submit your model", elem_classes="markdown-text")
|
321 |
+
with gr.Column():
|
322 |
+
gr.Markdown("### Please confirm that you understand and accept the conditions below before submitting your model.")
|
323 |
+
prereqs_checkboxes = gr.CheckboxGroup(["I have successfully run the ABB benchmark script on my model using my own infrastructure and I should NOT use the leaderboard for testing purposes",
|
324 |
+
"I understand that my account/org have only one submission per month",
|
325 |
+
"I understand that I can't submit models more than 15B parameters (learn more in the FAQ)",
|
326 |
+
"I understand that submitting contaminated models or models to test the contamination score will lead to action from our side including banning and negative PR"],
|
327 |
+
label=None, info=None,
|
328 |
+
elem_classes="submit_prereq_checkboxes_container",
|
329 |
+
container=False)
|
330 |
+
|
331 |
+
|
332 |
|
333 |
with gr.Row():
|
334 |
with gr.Column():
|
335 |
model_name_textbox = gr.Textbox(label="Model name")
|
336 |
|
337 |
+
submit_button = gr.Button("Submit Eval", variant="huggingface", interactive=False )
|
338 |
+
|
339 |
+
prereqs_checkboxes.change(
|
340 |
+
fn=lambda choices: gr.update(interactive=len(choices) == 4),
|
341 |
+
inputs=prereqs_checkboxes,
|
342 |
+
outputs=submit_button
|
343 |
+
)
|
344 |
+
|
345 |
submission_result = gr.Markdown()
|
346 |
submit_button.click(
|
347 |
add_new_eval,
|
|
|
390 |
row_count=5,
|
391 |
)
|
392 |
|
393 |
+
with gr.TabItem("π FAQ", elem_id="llm-benchmark-tab-about", id=6):
|
394 |
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
|
395 |
|
396 |
with gr.Row():
|
src/about.py
CHANGED
@@ -67,10 +67,29 @@ Find more details in the about Tab.
|
|
67 |
|
68 |
# Which evaluations are you running? how can people reproduce what you have?
|
69 |
LLM_BENCHMARKS_TEXT = f"""
|
70 |
-
##
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
71 |
|
72 |
-
## Reproducibility
|
73 |
-
To reproduce our results, here is the commands you can run:
|
74 |
|
75 |
"""
|
76 |
|
|
|
67 |
|
68 |
# Which evaluations are you running? how can people reproduce what you have?
|
69 |
LLM_BENCHMARKS_TEXT = f"""
|
70 |
+
## What is the difference betweem ABL and ABB?
|
71 |
+
|
72 |
+
ABL is the Leaderboard which uses ABB benchmarking dataset and code in the backend to produce the results you see here
|
73 |
+
|
74 |
+
|
75 |
+
## What can I learn more about ABL and ABB?
|
76 |
+
|
77 |
+
Feel free to read the following resources
|
78 |
+
ABB Page:
|
79 |
+
ABL blog post:
|
80 |
+
|
81 |
+
## How can I reproduce the results?
|
82 |
+
|
83 |
+
You can easily run the ABB benchmarking code using the following command on Google Collab or your own infratructure.
|
84 |
+
|
85 |
+
## What is the Benchmark Score?
|
86 |
+
|
87 |
+
## What is the Contamination Score?
|
88 |
+
|
89 |
+
## What is the Speed?
|
90 |
+
|
91 |
+
## Why I am not allowed to submit models more than 15B parameters?
|
92 |
|
|
|
|
|
93 |
|
94 |
"""
|
95 |
|
src/display/css_html_js.py
CHANGED
@@ -143,6 +143,12 @@ border-radius: 10px;
|
|
143 |
margin: auto;
|
144 |
width: 80%;
|
145 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
146 |
"""
|
147 |
|
148 |
get_window_url_params = """
|
|
|
143 |
margin: auto;
|
144 |
width: 80%;
|
145 |
}
|
146 |
+
|
147 |
+
|
148 |
+
.submit_prereq_checkboxes_container div[data-testid=checkbox-group]{
|
149 |
+
display: flex;
|
150 |
+
flex-direction: column !important;
|
151 |
+
}
|
152 |
"""
|
153 |
|
154 |
get_window_url_params = """
|