snyz commited on
Commit
0e11bd2
·
1 Parent(s): 4ff8fb1

Update space

Browse files
Files changed (3) hide show
  1. app.py +22 -5
  2. src/about.py +3 -3
  3. src/populate.py +6 -6
app.py CHANGED
@@ -4,6 +4,7 @@ import pandas as pd
4
  from apscheduler.schedulers.background import BackgroundScheduler
5
  from huggingface_hub import snapshot_download
6
 
 
7
  from src.about import (
8
  CITATION_BUTTON_LABEL,
9
  CITATION_BUTTON_TEXT,
@@ -28,13 +29,14 @@ from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REP
28
  from src.populate import get_evaluation_queue_df, get_leaderboard_df
29
  from src.submission.submit import add_new_eval
30
 
31
-
32
  def restart_space():
33
  API.restart_space(repo_id=REPO_ID)
34
 
35
- ### Space initialisation
36
  try:
37
  print(EVAL_REQUESTS_PATH)
 
38
  snapshot_download(
39
  repo_id=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN
40
  )
@@ -42,21 +44,24 @@ except Exception:
42
  restart_space()
43
  try:
44
  print(EVAL_RESULTS_PATH)
 
45
  snapshot_download(
46
  repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN
47
  )
48
  except Exception:
49
  restart_space()
50
 
51
-
52
  LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
53
 
 
54
  (
55
  finished_eval_queue_df,
56
  running_eval_queue_df,
57
  pending_eval_queue_df,
58
  ) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
59
 
 
60
  def init_leaderboard(dataframe):
61
  if dataframe is None or dataframe.empty:
62
  raise ValueError("Leaderboard DataFrame is empty or None.")
@@ -88,25 +93,30 @@ def init_leaderboard(dataframe):
88
  interactive=False,
89
  )
90
 
91
-
92
  demo = gr.Blocks(css=custom_css)
93
  with demo:
94
  gr.HTML(TITLE)
95
  gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
96
 
 
97
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
 
98
  with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
99
  leaderboard = init_leaderboard(LEADERBOARD_DF)
100
 
 
101
  with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=2):
102
  gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
103
 
 
104
  with gr.TabItem("🚀 Submit here! ", elem_id="llm-benchmark-tab-table", id=3):
105
  with gr.Column():
106
  with gr.Row():
107
  gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
108
 
109
  with gr.Column():
 
110
  with gr.Accordion(
111
  f"✅ Finished Evaluations ({len(finished_eval_queue_df)})",
112
  open=False,
@@ -118,6 +128,7 @@ with demo:
118
  datatype=EVAL_TYPES,
119
  row_count=5,
120
  )
 
121
  with gr.Accordion(
122
  f"🔄 Running Evaluation Queue ({len(running_eval_queue_df)})",
123
  open=False,
@@ -129,7 +140,7 @@ with demo:
129
  datatype=EVAL_TYPES,
130
  row_count=5,
131
  )
132
-
133
  with gr.Accordion(
134
  f"⏳ Pending Evaluation Queue ({len(pending_eval_queue_df)})",
135
  open=False,
@@ -141,9 +152,11 @@ with demo:
141
  datatype=EVAL_TYPES,
142
  row_count=5,
143
  )
 
144
  with gr.Row():
145
  gr.Markdown("# ✉️✨ Submit your model here!", elem_classes="markdown-text")
146
 
 
147
  with gr.Row():
148
  with gr.Column():
149
  model_name_textbox = gr.Textbox(label="Model name")
@@ -188,6 +201,7 @@ with demo:
188
  submission_result,
189
  )
190
 
 
191
  with gr.Row():
192
  with gr.Accordion("📙 Citation", open=False):
193
  citation_button = gr.Textbox(
@@ -198,7 +212,10 @@ with demo:
198
  show_copy_button=True,
199
  )
200
 
 
201
  scheduler = BackgroundScheduler()
202
  scheduler.add_job(restart_space, "interval", seconds=1800)
203
  scheduler.start()
 
 
204
  demo.queue(default_concurrency_limit=40).launch()
 
4
  from apscheduler.schedulers.background import BackgroundScheduler
5
  from huggingface_hub import snapshot_download
6
 
7
+ # 本地模块导入
8
  from src.about import (
9
  CITATION_BUTTON_LABEL,
10
  CITATION_BUTTON_TEXT,
 
29
  from src.populate import get_evaluation_queue_df, get_leaderboard_df
30
  from src.submission.submit import add_new_eval
31
 
32
+ # 空间重启函数
33
  def restart_space():
34
  API.restart_space(repo_id=REPO_ID)
35
 
36
+ ### 空间初始化
37
  try:
38
  print(EVAL_REQUESTS_PATH)
39
+ # 下载评估请求数据集
40
  snapshot_download(
41
  repo_id=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN
42
  )
 
44
  restart_space()
45
  try:
46
  print(EVAL_RESULTS_PATH)
47
+ # 下载评估结果数据集
48
  snapshot_download(
49
  repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN
50
  )
51
  except Exception:
52
  restart_space()
53
 
54
+ # 获取排行榜数据
55
  LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
56
 
57
+ # 获取评估队列数据
58
  (
59
  finished_eval_queue_df,
60
  running_eval_queue_df,
61
  pending_eval_queue_df,
62
  ) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
63
 
64
+ # 初始化排行榜
65
  def init_leaderboard(dataframe):
66
  if dataframe is None or dataframe.empty:
67
  raise ValueError("Leaderboard DataFrame is empty or None.")
 
93
  interactive=False,
94
  )
95
 
96
+ # 创建 Gradio 界面
97
  demo = gr.Blocks(css=custom_css)
98
  with demo:
99
  gr.HTML(TITLE)
100
  gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
101
 
102
+ # 创建标签页
103
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
104
+ # 排行榜标签页
105
  with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
106
  leaderboard = init_leaderboard(LEADERBOARD_DF)
107
 
108
+ # 关于标签页
109
  with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=2):
110
  gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
111
 
112
+ # 提交标签页
113
  with gr.TabItem("🚀 Submit here! ", elem_id="llm-benchmark-tab-table", id=3):
114
  with gr.Column():
115
  with gr.Row():
116
  gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
117
 
118
  with gr.Column():
119
+ # 已完成评估标签页
120
  with gr.Accordion(
121
  f"✅ Finished Evaluations ({len(finished_eval_queue_df)})",
122
  open=False,
 
128
  datatype=EVAL_TYPES,
129
  row_count=5,
130
  )
131
+ # 运行中评估标签页
132
  with gr.Accordion(
133
  f"🔄 Running Evaluation Queue ({len(running_eval_queue_df)})",
134
  open=False,
 
140
  datatype=EVAL_TYPES,
141
  row_count=5,
142
  )
143
+ # 待处理评估标签页
144
  with gr.Accordion(
145
  f"⏳ Pending Evaluation Queue ({len(pending_eval_queue_df)})",
146
  open=False,
 
152
  datatype=EVAL_TYPES,
153
  row_count=5,
154
  )
155
+ # 提交评估标题
156
  with gr.Row():
157
  gr.Markdown("# ✉️✨ Submit your model here!", elem_classes="markdown-text")
158
 
159
+ # 提交评估表单
160
  with gr.Row():
161
  with gr.Column():
162
  model_name_textbox = gr.Textbox(label="Model name")
 
201
  submission_result,
202
  )
203
 
204
+ # 引用标签页
205
  with gr.Row():
206
  with gr.Accordion("📙 Citation", open=False):
207
  citation_button = gr.Textbox(
 
212
  show_copy_button=True,
213
  )
214
 
215
+ # 创建调度器
216
  scheduler = BackgroundScheduler()
217
  scheduler.add_job(restart_space, "interval", seconds=1800)
218
  scheduler.start()
219
+
220
+ # 启动 Gradio 应用
221
  demo.queue(default_concurrency_limit=40).launch()
src/about.py CHANGED
@@ -20,12 +20,12 @@ NUM_FEWSHOT = 0 # Change with your few shot
20
 
21
 
22
 
23
- # Your leaderboard name
24
- TITLE = """<h1 align="center" id="space-title">Demo leaderboard</h1>"""
25
 
26
  # What does your leaderboard evaluate?
27
  INTRODUCTION_TEXT = """
28
- Intro text
29
  """
30
 
31
  # Which evaluations are you running? how can people reproduce what you have?
 
20
 
21
 
22
 
23
+ # 排行榜名称
24
+ TITLE = """<h1 align="center" id="space-title">Echo BenchMark</h1>"""
25
 
26
  # What does your leaderboard evaluate?
27
  INTRODUCTION_TEXT = """
28
+ 香港中文大学(深圳)
29
  """
30
 
31
  # Which evaluations are you running? how can people reproduce what you have?
src/populate.py CHANGED
@@ -7,9 +7,9 @@ from src.display.formatting import has_no_nan_values, make_clickable_model
7
  from src.display.utils import AutoEvalColumn, EvalQueueColumn
8
  from src.leaderboard.read_evals import get_raw_eval_results
9
 
10
-
11
  def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchmark_cols: list) -> pd.DataFrame:
12
- """Creates a dataframe from all the individual experiment results"""
13
  raw_data = get_raw_eval_results(results_path, requests_path)
14
  all_data_json = [v.to_dict() for v in raw_data]
15
 
@@ -17,13 +17,13 @@ def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchm
17
  df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
18
  df = df[cols].round(decimals=2)
19
 
20
- # filter out if any of the benchmarks have not been produced
21
  df = df[has_no_nan_values(df, benchmark_cols)]
22
  return df
23
 
24
-
25
  def get_evaluation_queue_df(save_path: str, cols: list) -> list[pd.DataFrame]:
26
- """Creates the different dataframes for the evaluation queues requestes"""
27
  entries = [entry for entry in os.listdir(save_path) if not entry.startswith(".")]
28
  all_evals = []
29
 
@@ -38,7 +38,7 @@ def get_evaluation_queue_df(save_path: str, cols: list) -> list[pd.DataFrame]:
38
 
39
  all_evals.append(data)
40
  elif ".md" not in entry:
41
- # this is a folder
42
  sub_entries = [e for e in os.listdir(f"{save_path}/{entry}") if os.path.isfile(e) and not e.startswith(".")]
43
  for sub_entry in sub_entries:
44
  file_path = os.path.join(save_path, entry, sub_entry)
 
7
  from src.display.utils import AutoEvalColumn, EvalQueueColumn
8
  from src.leaderboard.read_evals import get_raw_eval_results
9
 
10
+ # 用于生成排行榜(leaderboard)的 DataFrame
11
  def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchmark_cols: list) -> pd.DataFrame:
12
+ """创建一个包含所有单个实验结果的DataFrame"""
13
  raw_data = get_raw_eval_results(results_path, requests_path)
14
  all_data_json = [v.to_dict() for v in raw_data]
15
 
 
17
  df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
18
  df = df[cols].round(decimals=2)
19
 
20
+ # 过滤掉任何基准测试未生成的情况
21
  df = df[has_no_nan_values(df, benchmark_cols)]
22
  return df
23
 
24
+ # 用于生成“评估队列”相关的多个 DataFrame(已完成、正在进行、待处理等)
25
  def get_evaluation_queue_df(save_path: str, cols: list) -> list[pd.DataFrame]:
26
+ """创建用于评估队列请求的不同数据框"""
27
  entries = [entry for entry in os.listdir(save_path) if not entry.startswith(".")]
28
  all_evals = []
29
 
 
38
 
39
  all_evals.append(data)
40
  elif ".md" not in entry:
41
+ # 这是一个文件夹
42
  sub_entries = [e for e in os.listdir(f"{save_path}/{entry}") if os.path.isfile(e) and not e.startswith(".")]
43
  for sub_entry in sub_entries:
44
  file_path = os.path.join(save_path, entry, sub_entry)