Spaces:

szynb
/

EchoX_Arena

Sleeping

App Files Files Community

snyz commited on Mar 16

Commit

0e11bd2

1 Parent(s): 4ff8fb1

Update space

Browse files

Files changed (3) hide show

app.py +22 -5
src/about.py +3 -3
src/populate.py +6 -6

app.py CHANGED Viewed

@@ -4,6 +4,7 @@ import pandas as pd
 from apscheduler.schedulers.background import BackgroundScheduler
 from huggingface_hub import snapshot_download
 from src.about import (
     CITATION_BUTTON_LABEL,
     CITATION_BUTTON_TEXT,
@@ -28,13 +29,14 @@ from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REP
 from src.populate import get_evaluation_queue_df, get_leaderboard_df
 from src.submission.submit import add_new_eval
 def restart_space():
     API.restart_space(repo_id=REPO_ID)
-### Space initialisation
 try:
     print(EVAL_REQUESTS_PATH)
     snapshot_download(
         repo_id=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN
     )
@@ -42,21 +44,24 @@ except Exception:
     restart_space()
 try:
     print(EVAL_RESULTS_PATH)
     snapshot_download(
         repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN
     )
 except Exception:
     restart_space()
 LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
 (
     finished_eval_queue_df,
     running_eval_queue_df,
     pending_eval_queue_df,
 ) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
 def init_leaderboard(dataframe):
     if dataframe is None or dataframe.empty:
         raise ValueError("Leaderboard DataFrame is empty or None.")
@@ -88,25 +93,30 @@ def init_leaderboard(dataframe):
         interactive=False,
     )
 demo = gr.Blocks(css=custom_css)
 with demo:
     gr.HTML(TITLE)
     gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
     with gr.Tabs(elem_classes="tab-buttons") as tabs:
         with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
             leaderboard = init_leaderboard(LEADERBOARD_DF)
         with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=2):
             gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
         with gr.TabItem("🚀 Submit here! ", elem_id="llm-benchmark-tab-table", id=3):
             with gr.Column():
                 with gr.Row():
                     gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
                 with gr.Column():
                     with gr.Accordion(
                         f"✅ Finished Evaluations ({len(finished_eval_queue_df)})",
                         open=False,
@@ -118,6 +128,7 @@ with demo:
                                 datatype=EVAL_TYPES,
                                 row_count=5,
                             )
                     with gr.Accordion(
                         f"🔄 Running Evaluation Queue ({len(running_eval_queue_df)})",
                         open=False,
@@ -129,7 +140,7 @@ with demo:
                                 datatype=EVAL_TYPES,
                                 row_count=5,
                             )
                     with gr.Accordion(
                         f"⏳ Pending Evaluation Queue ({len(pending_eval_queue_df)})",
                         open=False,
@@ -141,9 +152,11 @@ with demo:
                                 datatype=EVAL_TYPES,
                                 row_count=5,
                             )
             with gr.Row():
                 gr.Markdown("# ✉️✨ Submit your model here!", elem_classes="markdown-text")
             with gr.Row():
                 with gr.Column():
                     model_name_textbox = gr.Textbox(label="Model name")
@@ -188,6 +201,7 @@ with demo:
                 submission_result,
             )
     with gr.Row():
         with gr.Accordion("📙 Citation", open=False):
             citation_button = gr.Textbox(
@@ -198,7 +212,10 @@ with demo:
                 show_copy_button=True,
             )
 scheduler = BackgroundScheduler()
 scheduler.add_job(restart_space, "interval", seconds=1800)
 scheduler.start()
 demo.queue(default_concurrency_limit=40).launch()

 from apscheduler.schedulers.background import BackgroundScheduler
 from huggingface_hub import snapshot_download
+# 本地模块导入
 from src.about import (
     CITATION_BUTTON_LABEL,
     CITATION_BUTTON_TEXT,
 from src.populate import get_evaluation_queue_df, get_leaderboard_df
 from src.submission.submit import add_new_eval
+# 空间重启函数
 def restart_space():
     API.restart_space(repo_id=REPO_ID)
+### 空间初始化
 try:
     print(EVAL_REQUESTS_PATH)
+    # 下载评估请求数据集
     snapshot_download(
         repo_id=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN
     )
     restart_space()
 try:
     print(EVAL_RESULTS_PATH)
+    # 下载评估结果数据集
     snapshot_download(
         repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN
     )
 except Exception:
     restart_space()
+# 获取排行榜数据
 LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
+# 获取评估队列数据
 (
     finished_eval_queue_df,
     running_eval_queue_df,
     pending_eval_queue_df,
 ) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
+# 初始化排行榜
 def init_leaderboard(dataframe):
     if dataframe is None or dataframe.empty:
         raise ValueError("Leaderboard DataFrame is empty or None.")
         interactive=False,
     )
+# 创建 Gradio 界面
 demo = gr.Blocks(css=custom_css)
 with demo:
     gr.HTML(TITLE)
     gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
+    # 创建标签页
     with gr.Tabs(elem_classes="tab-buttons") as tabs:
+        # 排行榜标签页
         with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
             leaderboard = init_leaderboard(LEADERBOARD_DF)
+        # 关于标签页
         with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=2):
             gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
+        # 提交标签页
         with gr.TabItem("🚀 Submit here! ", elem_id="llm-benchmark-tab-table", id=3):
             with gr.Column():
                 with gr.Row():
                     gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
                 with gr.Column():
+                    # 已完成评估标签页
                     with gr.Accordion(
                         f"✅ Finished Evaluations ({len(finished_eval_queue_df)})",
                         open=False,
                                 datatype=EVAL_TYPES,
                                 row_count=5,
                             )
+                    # 运行中评估标签页
                     with gr.Accordion(
                         f"🔄 Running Evaluation Queue ({len(running_eval_queue_df)})",
                         open=False,
                                 datatype=EVAL_TYPES,
                                 row_count=5,
                             )
+                    # 待处理评估标签页
                     with gr.Accordion(
                         f"⏳ Pending Evaluation Queue ({len(pending_eval_queue_df)})",
                         open=False,
                                 datatype=EVAL_TYPES,
                                 row_count=5,
                             )
+            # 提交评估标题
             with gr.Row():
                 gr.Markdown("# ✉️✨ Submit your model here!", elem_classes="markdown-text")
+            # 提交评估表单
             with gr.Row():
                 with gr.Column():
                     model_name_textbox = gr.Textbox(label="Model name")
                 submission_result,
             )
+    # 引用标签页
     with gr.Row():
         with gr.Accordion("📙 Citation", open=False):
             citation_button = gr.Textbox(
                 show_copy_button=True,
             )
+# 创建调度器
 scheduler = BackgroundScheduler()
 scheduler.add_job(restart_space, "interval", seconds=1800)
 scheduler.start()
+# 启动 Gradio 应用
 demo.queue(default_concurrency_limit=40).launch()

src/about.py CHANGED Viewed

@@ -20,12 +20,12 @@ NUM_FEWSHOT = 0 # Change with your few shot
-# Your leaderboard name
-TITLE = """<h1 align="center" id="space-title">Demo leaderboard</h1>"""
 # What does your leaderboard evaluate?
 INTRODUCTION_TEXT = """
-Intro text
 """
 # Which evaluations are you running? how can people reproduce what you have?

+# 排行榜名称
+TITLE = """<h1 align="center" id="space-title">Echo BenchMark</h1>"""
 # What does your leaderboard evaluate?
 INTRODUCTION_TEXT = """
+香港中文大学(深圳)
 """
 # Which evaluations are you running? how can people reproduce what you have?

src/populate.py CHANGED Viewed

@@ -7,9 +7,9 @@ from src.display.formatting import has_no_nan_values, make_clickable_model
 from src.display.utils import AutoEvalColumn, EvalQueueColumn
 from src.leaderboard.read_evals import get_raw_eval_results
 def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchmark_cols: list) -> pd.DataFrame:
-    """Creates a dataframe from all the individual experiment results"""
     raw_data = get_raw_eval_results(results_path, requests_path)
     all_data_json = [v.to_dict() for v in raw_data]
@@ -17,13 +17,13 @@ def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchm
     df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
     df = df[cols].round(decimals=2)
-    # filter out if any of the benchmarks have not been produced
     df = df[has_no_nan_values(df, benchmark_cols)]
     return df
 def get_evaluation_queue_df(save_path: str, cols: list) -> list[pd.DataFrame]:
-    """Creates the different dataframes for the evaluation queues requestes"""
     entries = [entry for entry in os.listdir(save_path) if not entry.startswith(".")]
     all_evals = []
@@ -38,7 +38,7 @@ def get_evaluation_queue_df(save_path: str, cols: list) -> list[pd.DataFrame]:
             all_evals.append(data)
         elif ".md" not in entry:
-            # this is a folder
             sub_entries = [e for e in os.listdir(f"{save_path}/{entry}") if os.path.isfile(e) and not e.startswith(".")]
             for sub_entry in sub_entries:
                 file_path = os.path.join(save_path, entry, sub_entry)

 from src.display.utils import AutoEvalColumn, EvalQueueColumn
 from src.leaderboard.read_evals import get_raw_eval_results
+# 用于生成排行榜（leaderboard）的 DataFrame
 def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchmark_cols: list) -> pd.DataFrame:
+    """创建一个包含所有单个实验结果的DataFrame"""
     raw_data = get_raw_eval_results(results_path, requests_path)
     all_data_json = [v.to_dict() for v in raw_data]
     df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
     df = df[cols].round(decimals=2)
+    # 过滤掉任何基准测试未生成的情况
     df = df[has_no_nan_values(df, benchmark_cols)]
     return df
+# 用于生成“评估队列”相关的多个 DataFrame（已完成、正在进行、待处理等）
 def get_evaluation_queue_df(save_path: str, cols: list) -> list[pd.DataFrame]:
+    """创建用于评估队列请求的不同数据框"""
     entries = [entry for entry in os.listdir(save_path) if not entry.startswith(".")]
     all_evals = []
             all_evals.append(data)
         elif ".md" not in entry:
+            # 这是一个文件夹
             sub_entries = [e for e in os.listdir(f"{save_path}/{entry}") if os.path.isfile(e) and not e.startswith(".")]
             for sub_entry in sub_entries:
                 file_path = os.path.join(save_path, entry, sub_entry)