karimouda commited on
Commit
cb2c8cb
Β·
1 Parent(s): 1cda853

Submit + FAQ

Browse files
Files changed (3) hide show
  1. app.py +21 -3
  2. src/about.py +22 -3
  3. src/display/css_html_js.py +6 -0
app.py CHANGED
@@ -189,7 +189,7 @@ def get_model_info_blocks(chosen_model_name):
189
  with gr.Row():
190
  benchmark_score = gr.HTML(get_metric_html("Benchmark Score").format(filtered_df["Benchmark Score"][0]))
191
  rank = gr.HTML(get_metric_html("Benchmark Rank").format(filtered_df["Rank"][0]))
192
- speed = gr.HTML(get_metric_html("Speed").format(filtered_df["Speed (words/sec)"][0]))
193
  contamination = gr.HTML(get_metric_html("Contamination Score").format(filtered_df["Contamination Score"][0]))
194
  size = gr.HTML(get_metric_html("Size Category").format(filtered_df["Category"][0]))
195
 
@@ -318,12 +318,30 @@ with demo:
318
  with gr.TabItem("πŸš€ Submit here", elem_id="llm-benchmark-tab-submit", id=5):
319
  with gr.Row():
320
  gr.Markdown("# Submit your model", elem_classes="markdown-text")
 
 
 
 
 
 
 
 
 
 
 
321
 
322
  with gr.Row():
323
  with gr.Column():
324
  model_name_textbox = gr.Textbox(label="Model name")
325
 
326
- submit_button = gr.Button("Submit Eval", variant="huggingface" )
 
 
 
 
 
 
 
327
  submission_result = gr.Markdown()
328
  submit_button.click(
329
  add_new_eval,
@@ -372,7 +390,7 @@ with demo:
372
  row_count=5,
373
  )
374
 
375
- with gr.TabItem("πŸ“ About", elem_id="llm-benchmark-tab-about", id=6):
376
  gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
377
 
378
  with gr.Row():
 
189
  with gr.Row():
190
  benchmark_score = gr.HTML(get_metric_html("Benchmark Score").format(filtered_df["Benchmark Score"][0]))
191
  rank = gr.HTML(get_metric_html("Benchmark Rank").format(filtered_df["Rank"][0]))
192
+ speed = gr.HTML(get_metric_html("Speed <br/>(words per second)").format(filtered_df["Speed (words/sec)"][0]))
193
  contamination = gr.HTML(get_metric_html("Contamination Score").format(filtered_df["Contamination Score"][0]))
194
  size = gr.HTML(get_metric_html("Size Category").format(filtered_df["Category"][0]))
195
 
 
318
  with gr.TabItem("πŸš€ Submit here", elem_id="llm-benchmark-tab-submit", id=5):
319
  with gr.Row():
320
  gr.Markdown("# Submit your model", elem_classes="markdown-text")
321
+ with gr.Column():
322
+ gr.Markdown("### Please confirm that you understand and accept the conditions below before submitting your model.")
323
+ prereqs_checkboxes = gr.CheckboxGroup(["I have successfully run the ABB benchmark script on my model using my own infrastructure and I should NOT use the leaderboard for testing purposes",
324
+ "I understand that my account/org have only one submission per month",
325
+ "I understand that I can't submit models more than 15B parameters (learn more in the FAQ)",
326
+ "I understand that submitting contaminated models or models to test the contamination score will lead to action from our side including banning and negative PR"],
327
+ label=None, info=None,
328
+ elem_classes="submit_prereq_checkboxes_container",
329
+ container=False)
330
+
331
+
332
 
333
  with gr.Row():
334
  with gr.Column():
335
  model_name_textbox = gr.Textbox(label="Model name")
336
 
337
+ submit_button = gr.Button("Submit Eval", variant="huggingface", interactive=False )
338
+
339
+ prereqs_checkboxes.change(
340
+ fn=lambda choices: gr.update(interactive=len(choices) == 4),
341
+ inputs=prereqs_checkboxes,
342
+ outputs=submit_button
343
+ )
344
+
345
  submission_result = gr.Markdown()
346
  submit_button.click(
347
  add_new_eval,
 
390
  row_count=5,
391
  )
392
 
393
+ with gr.TabItem("πŸ“ FAQ", elem_id="llm-benchmark-tab-about", id=6):
394
  gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
395
 
396
  with gr.Row():
src/about.py CHANGED
@@ -67,10 +67,29 @@ Find more details in the about Tab.
67
 
68
  # Which evaluations are you running? how can people reproduce what you have?
69
  LLM_BENCHMARKS_TEXT = f"""
70
- ## How it works
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
 
72
- ## Reproducibility
73
- To reproduce our results, here is the commands you can run:
74
 
75
  """
76
 
 
67
 
68
  # Which evaluations are you running? how can people reproduce what you have?
69
  LLM_BENCHMARKS_TEXT = f"""
70
+ ## What is the difference betweem ABL and ABB?
71
+
72
+ ABL is the Leaderboard which uses ABB benchmarking dataset and code in the backend to produce the results you see here
73
+
74
+
75
+ ## What can I learn more about ABL and ABB?
76
+
77
+ Feel free to read the following resources
78
+ ABB Page:
79
+ ABL blog post:
80
+
81
+ ## How can I reproduce the results?
82
+
83
+ You can easily run the ABB benchmarking code using the following command on Google Collab or your own infratructure.
84
+
85
+ ## What is the Benchmark Score?
86
+
87
+ ## What is the Contamination Score?
88
+
89
+ ## What is the Speed?
90
+
91
+ ## Why I am not allowed to submit models more than 15B parameters?
92
 
 
 
93
 
94
  """
95
 
src/display/css_html_js.py CHANGED
@@ -143,6 +143,12 @@ border-radius: 10px;
143
  margin: auto;
144
  width: 80%;
145
  }
 
 
 
 
 
 
146
  """
147
 
148
  get_window_url_params = """
 
143
  margin: auto;
144
  width: 80%;
145
  }
146
+
147
+
148
+ .submit_prereq_checkboxes_container div[data-testid=checkbox-group]{
149
+ display: flex;
150
+ flex-direction: column !important;
151
+ }
152
  """
153
 
154
  get_window_url_params = """