Jerrycool commited on
Commit
8104fa7
·
verified ·
1 Parent(s): 14cd4fc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +554 -114
app.py CHANGED
@@ -1,141 +1,581 @@
1
  import gradio as gr
2
  import pandas as pd
3
  from apscheduler.schedulers.background import BackgroundScheduler
 
4
 
5
- """
6
- MLE‑Dojo Benchmark Leaderboard Dark Elegance v4
7
- =================================================
8
- * Removed "Asc. order" toggle (always sorted high→low).
9
- * Rank column back to plain numbers.
10
- * Category selector restyled via CSS.
11
- * Horizontal scrollbar hidden; vertical scrollbar retained.
12
- """
13
-
14
- # ---------------------------------------------------------------------------
15
- # Import copy or fallback placeholders
16
- # ---------------------------------------------------------------------------
17
  try:
18
  from src.about import (
19
  CITATION_BUTTON_LABEL,
20
  CITATION_BUTTON_TEXT,
 
21
  INTRODUCTION_TEXT,
22
  LLM_BENCHMARKS_TEXT,
23
- TITLE,
24
  )
25
- from src.display.css_html_js import custom_css
26
- from src.envs import REPO_ID
27
- from src.submission.submit import add_new_eval
 
 
 
 
 
 
 
28
  except ImportError:
29
- CITATION_BUTTON_LABEL = "Citation"
30
- CITATION_BUTTON_TEXT = "Please cite us if you use this benchmark…"
31
- INTRODUCTION_TEXT = "Welcome to the **MLE‑Dojo Benchmark Leaderboard** compare LLM agents across realistic ML‑engineering tasks."
32
- LLM_BENCHMARKS_TEXT = "Further details about tasks, metrics, and evaluation pipelines."
33
- TITLE = (
34
- "<h1 class='hero-title gradient-text'>🏆 MLE‑Dojo Benchmark Leaderboard</h1>"
35
- "<p class='subtitle'>Interactive, reproducible &amp; community‑driven ML‑agent benchmarking</p>"
36
- )
37
- custom_css = ""
38
- REPO_ID = "your/space-id"
39
- def add_new_eval(*_):
40
- return "Submission placeholder."
41
 
42
- # ---------------------------------------------------------------------------
43
- # Data (unchanged)
44
- # ---------------------------------------------------------------------------
 
 
45
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  data = [
47
- {"model_name": "gpt-4o-mini", "url": "https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/", "organizer": "OpenAI", "license": "Proprietary", "MLE-Lite_Elo": 753, "Tabular_Elo": 839, "NLP_Elo": 758, "CV_Elo": 754, "Overall": 778},
48
- {"model_name": "gpt-4o", "url": "https://openai.com/index/hello-gpt-4o/", "organizer": "OpenAI", "license": "Proprietary", "MLE-Lite_Elo": 830, "Tabular_Elo": 861, "NLP_Elo": 903, "CV_Elo": 761, "Overall": 841},
49
- {"model_name": "o3-mini", "url": "https://openai.com/index/openai-o3-mini/", "organizer": "OpenAI", "license": "Proprietary", "MLE-Lite_Elo": 1108, "Tabular_Elo": 1019, "NLP_Elo": 1056, "CV_Elo": 1207, "Overall": 1096},
50
- {"model_name": "deepseek-v3", "url": "https://api-docs.deepseek.com/news/news1226", "organizer": "DeepSeek", "license": "DeepSeek", "MLE-Lite_Elo": 1004, "Tabular_Elo": 1015, "NLP_Elo": 1028, "CV_Elo": 1067, "Overall": 1023},
51
- {"model_name": "deepseek-r1", "url": "https://api-docs.deepseek.com/news/news250120", "organizer": "DeepSeek", "license": "DeepSeek", "MLE-Lite_Elo": 1137, "Tabular_Elo": 1053, "NLP_Elo": 1103, "CV_Elo": 1083, "Overall": 1100},
52
- {"model_name": "gemini-2.0-flash", "url": "https://ai.google.dev/gemini-api/docs/models#gemini-2.0-flash", "organizer": "Google", "license": "Proprietary", "MLE-Lite_Elo": 847, "Tabular_Elo": 923, "NLP_Elo": 860, "CV_Elo": 978, "Overall": 895},
53
- {"model_name": "gemini-2.0-pro", "url": "https://blog.google/technology/google-deepmind/gemini-model-updates-february-2025/", "organizer": "Google", "license": "Proprietary", "MLE-Lite_Elo": 1064, "Tabular_Elo": 1139, "NLP_Elo": 1028, "CV_Elo": 973, "Overall": 1054},
54
- {"model_name": "gemini-2.5-pro", "url": "https://deepmind.google/technologies/gemini/pro/", "organizer": "Google", "license": "Proprietary", "MLE-Lite_Elo": 1257, "Tabular_Elo": 1150, "NLP_Elo": 1266, "CV_Elo": 1177, "Overall": 1214},
55
  ]
 
 
56
  master_df = pd.DataFrame(data)
57
 
58
- # ---------------------------------------------------------------------------
59
- # Helpers
60
- # ---------------------------------------------------------------------------
61
- CATEGORIES = ["Overall", "MLE-Lite", "Tabular", "NLP", "CV"]
62
- DEFAULT_CATEGORY = "Overall"
63
- CATEGORY_MAP = {
64
- "Overall": "Overall",
65
- "MLE-Lite": "MLE-Lite_Elo",
66
- "Tabular": "Tabular_Elo",
67
- "NLP": "NLP_Elo",
68
- "CV": "CV_Elo",
69
- }
70
-
71
- def update_leaderboard(category: str):
72
- col = CATEGORY_MAP.get(category, CATEGORY_MAP[DEFAULT_CATEGORY])
73
- df = (
74
- master_df[["model_name", "url", "organizer", "license", col]]
75
- .sort_values(by=col, ascending=False)
76
- .reset_index(drop=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
  )
78
- df.insert(0, "Rank", df.index + 1)
79
- df["Model"] = df.apply(lambda r: f"<a href='{r.url}' target='_blank'>{r.model_name}</a>", axis=1)
80
- df.rename(columns={"organizer": "Organizer", "license": "License", col: "Elo Score"}, inplace=True)
81
- return df[["Rank", "Model", "Organizer", "License", "Elo Score"]]
82
-
83
- # ---------------------------------------------------------------------------
84
- # CSS tweaks — category radio beautify & horizontal scroll hidden
85
- # ---------------------------------------------------------------------------
86
- custom_css += """
87
- /* Category radio stylish pill */
88
- .gr-radio input+label{
89
- background:#1e293b; color:#e2e8f0; padding:6px 14px; border-radius:9999px; margin-right:8px; cursor:pointer;
90
- transition:background .2s, color .2s;
91
- }
92
- .gr-radio input:checked+label{background:#38bdf8;color:#0f172a;font-weight:600;}
93
-
94
- /* Remove horizontal scrollbar; keep vertical */
95
- #leaderboard-table tbody{overflow-y:auto;overflow-x:hidden;max-height:520px;display:block;}
96
- #leaderboard-table thead, #leaderboard-table tbody tr{display:table;width:100%;table-layout:fixed;}
97
-
98
- #leaderboard-table td{padding:.7em;font-size:1.05rem;border-top:1px solid #334155;}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
  """
 
 
100
 
101
- # ---------------------------------------------------------------------------
102
- # Gradio App
103
- # ---------------------------------------------------------------------------
104
- app = gr.Blocks(css=custom_css, theme=gr.themes.Soft(primary_hue="sky", neutral_hue="slate", font=["Inter",]))
 
 
 
 
105
 
106
- with app:
 
107
  gr.HTML(TITLE)
 
 
108
  gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
109
- with gr.Tabs():
110
- with gr.TabItem("🏅 Leaderboard"):
111
- gr.HTML("<h3 class='section-title'><span class='icon'>📊</span>Model Elo Rankings by Category</h3>")
112
- category_radio = gr.Radio(CATEGORIES, value=DEFAULT_CATEGORY, label="Category")
113
- board = gr.Dataframe(
114
- value=update_leaderboard(DEFAULT_CATEGORY),
115
- headers=["Rank", "Model", "Organizer", "License", "Elo Score"],
116
- datatype=["number", "html", "str", "str", "number"],
117
- row_count=(len(master_df), "fixed"),
118
- col_count=(5, "fixed"),
119
- interactive=False,
120
- elem_id="leaderboard-table",
121
- )
122
- category_radio.change(update_leaderboard, category_radio, board)
123
- with gr.TabItem("ℹ️ About"):
124
- gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
125
- with gr.Accordion("📖 Citation", open=False):
126
- gr.Textbox(value=CITATION_BUTTON_TEXT, label=CITATION_BUTTON_LABEL, lines=10, show_copy_button=True)
127
-
128
- # ---------------------------------------------------------------------------
129
- # Optional scheduler
130
- # ---------------------------------------------------------------------------
131
 
132
- def restart_space():
133
- print(f"Restarting space {REPO_ID}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
134
 
 
 
135
  if __name__ == "__main__":
136
- if REPO_ID != "your/space-id":
137
- scheduler = BackgroundScheduler()
138
- scheduler.add_job(restart_space, "interval", seconds=1800)
139
- scheduler.start()
140
- print("Launching app…")
141
- app.launch()
 
1
  import gradio as gr
2
  import pandas as pd
3
  from apscheduler.schedulers.background import BackgroundScheduler
4
+ # Removed Hugging Face Hub imports as they are not needed for the simplified leaderboard
5
 
6
+ # --- Make sure these imports work relative to your file structure ---
7
+ # Option 1: If src is a directory in the same folder as your script:
 
 
 
 
 
 
 
 
 
 
8
  try:
9
  from src.about import (
10
  CITATION_BUTTON_LABEL,
11
  CITATION_BUTTON_TEXT,
12
+ EVALUATION_QUEUE_TEXT, # Keep if used by commented-out submit tab
13
  INTRODUCTION_TEXT,
14
  LLM_BENCHMARKS_TEXT,
15
+ TITLE as ORIGINAL_TITLE, # Rename original import
16
  )
17
+ from src.display.css_html_js import custom_css as original_css # Rename original import
18
+ from src.envs import REPO_ID # Keep if needed for restart_space or other functions
19
+ from src.submission.submit import add_new_eval # Keep if using the submit tab
20
+ print("Successfully imported from src module.")
21
+ # Start with original CSS if available
22
+ custom_css = original_css if isinstance(original_css, str) else ""
23
+ # Use original title if available, otherwise create a default
24
+ TITLE = ORIGINAL_TITLE if isinstance(ORIGINAL_TITLE, str) else "<h1>🏆 MLE-Dojo Benchmark Leaderboard</h1>"
25
+
26
+ # Option 2: If you don't have these files, define placeholders (REMOVE THIS if using Option 1)
27
  except ImportError:
28
+ print("Warning: Using placeholder values because src module imports failed.")
29
+ CITATION_BUTTON_LABEL="Citation"
30
+ CITATION_BUTTON_TEXT="Please cite us if you use this benchmark..."
31
+ EVALUATION_QUEUE_TEXT="Current evaluation queue:"
32
+ INTRODUCTION_TEXT="Welcome to the **MLE-Dojo Benchmark Leaderboard**, showcasing the performance of various models across different machine learning tasks. Select a category below to see the rankings based on Elo scores."
33
+ LLM_BENCHMARKS_TEXT="""
34
+ ## About the Benchmarks
35
+
36
+ This leaderboard tracks model performance using Elo ratings across several key areas:
 
 
 
37
 
38
+ * **Overall:** A combined score reflecting performance across all categories.
39
+ * **MLE-Lite:** Benchmarks focusing on lightweight machine learning engineering tasks.
40
+ * **Tabular:** Performance on tasks involving structured, tabular data.
41
+ * **NLP:** Natural Language Processing capabilities.
42
+ * **CV:** Computer Vision tasks.
43
 
44
+ Models are ranked based on their Elo score within each category. Higher scores indicate better relative performance. Click on a model name to visit its associated page (if available).
45
+ """
46
+ # Define an enhanced TITLE with an icon
47
+ TITLE = """
48
+ <div style="display: flex; align-items: center; justify-content: center; margin-bottom: 20px;">
49
+ <span style="font-size: 2.5em; margin-right: 15px;">🏆</span>
50
+ <h1 style="font-size: 2.8em; font-weight: 600; color: #333; margin: 0; line-height: 1.2;">
51
+ MLE-Dojo Benchmark Leaderboard
52
+ </h1>
53
+ </div>
54
+ """
55
+ custom_css="" # Start with empty CSS if not imported
56
+ REPO_ID="your/space-id" # Replace with actual ID if needed
57
+ def add_new_eval(*args): return "Submission placeholder."
58
+ # --- End Placeholder Definitions ---
59
+
60
+
61
+ # --- Elo Leaderboard Configuration ---
62
+ # Enhanced data with Rank (placeholder), Organizer, License, and URL
63
+ # !!! IMPORTANT: Replace placeholder URLs with actual model/project pages. !!!
64
+ # Verify organizer and license information for accuracy.
65
  data = [
66
+ {'model_name': 'gpt-4o-mini', 'url': 'https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/', 'organizer': 'OpenAI', 'license': 'Proprietary', 'MLE-Lite_Elo': 753, 'Tabular_Elo': 839, 'NLP_Elo': 758, 'CV_Elo': 754, 'Overall': 778},
67
+ {'model_name': 'gpt-4o', 'url': 'https://openai.com/index/hello-gpt-4o/', 'organizer': 'OpenAI', 'license': 'Proprietary', 'MLE-Lite_Elo': 830, 'Tabular_Elo': 861, 'NLP_Elo': 903, 'CV_Elo': 761, 'Overall': 841},
68
+ {'model_name': 'o3-mini', 'url': 'https://openai.com/index/openai-o3-mini/', 'organizer': 'OpenAI', 'license': 'Proprietary', 'MLE-Lite_Elo': 1108, 'Tabular_Elo': 1019, 'NLP_Elo': 1056, 'CV_Elo': 1207, 'Overall': 1096}, # Fill details later
69
+ {'model_name': 'deepseek-v3', 'url': 'https://api-docs.deepseek.com/news/news1226', 'organizer': 'DeepSeek', 'license': 'DeepSeek', 'MLE-Lite_Elo': 1004, 'Tabular_Elo': 1015, 'NLP_Elo': 1028, 'CV_Elo': 1067, 'Overall': 1023},
70
+ {'model_name': 'deepseek-r1', 'url': 'https://api-docs.deepseek.com/news/news250120', 'organizer': 'DeepSeek', 'license': 'DeepSeek', 'MLE-Lite_Elo': 1137, 'Tabular_Elo': 1053, 'NLP_Elo': 1103, 'CV_Elo': 1083, 'Overall': 1100},
71
+ {'model_name': 'gemini-2.0-flash', 'url': 'https://ai.google.dev/gemini-api/docs/models#gemini-2.0-flash', 'organizer': 'Google', 'license': 'Proprietary', 'MLE-Lite_Elo': 847, 'Tabular_Elo': 923, 'NLP_Elo': 860, 'CV_Elo': 978, 'Overall': 895},
72
+ {'model_name': 'gemini-2.0-pro', 'url': 'https://blog.google/technology/google-deepmind/gemini-model-updates-february-2025/', 'organizer': 'Google', 'license': 'Proprietary', 'MLE-Lite_Elo': 1064, 'Tabular_Elo': 1139, 'NLP_Elo': 1028, 'CV_Elo': 973, 'Overall': 1054},
73
+ {'model_name': 'gemini-2.5-pro', 'url': 'https://deepmind.google/technologies/gemini/pro/', 'organizer': 'Google', 'license': 'Proprietary', 'MLE-Lite_Elo': 1257, 'Tabular_Elo': 1150, 'NLP_Elo': 1266, 'CV_Elo': 1177, 'Overall': 1214},
74
  ]
75
+
76
+ # Create a master DataFrame
77
  master_df = pd.DataFrame(data)
78
 
79
+ # Define categories for selection (user-facing) - Emojis added
80
+ CATEGORIES = ["🏆 Overall", "💡 MLE-Lite", "📊 Tabular", "💬 NLP", "👁️ CV"]
81
+ DEFAULT_CATEGORY = "🏆 Overall" # Set a default category
82
+
83
+ # Map user-facing categories (WITHOUT emojis) to DataFrame column names
84
+ category_to_column = {
85
+ "Overall": "Overall", # Mapped from "🏆 Overall"
86
+ "MLE-Lite": "MLE-Lite_Elo", # Mapped from "💡 MLE-Lite"
87
+ "Tabular": "Tabular_Elo", # Mapped from "📊 Tabular"
88
+ "NLP": "NLP_Elo", # Mapped from "💬 NLP"
89
+ "CV": "CV_Elo", # Mapped from "👁️ CV"
90
+ }
91
+
92
+ # --- Helper function to update leaderboard ---
93
+ def update_leaderboard(category_with_emoji):
94
+ """
95
+ Selects relevant columns, sorts by the chosen category's Elo score,
96
+ adds Rank, formats model name as a link, and returns the DataFrame.
97
+ Handles category names with emojis.
98
+ """
99
+ # Extract the base category name by removing the emoji and leading space
100
+ base_category = category_with_emoji.split(" ", 1)[-1]
101
+ score_column = category_to_column.get(base_category)
102
+
103
+ if score_column is None or score_column not in master_df.columns:
104
+ print(f"Warning: Invalid category '{base_category}' or column '{score_column}'. Falling back to default.")
105
+ default_base_category = DEFAULT_CATEGORY.split(" ", 1)[-1]
106
+ score_column = category_to_column[default_base_category]
107
+ # Check fallback column too
108
+ if score_column not in master_df.columns:
109
+ print(f"Error: Default column '{score_column}' also not found.")
110
+ # Return empty df with correct columns
111
+ return pd.DataFrame({
112
+ "Rank": [],
113
+ "Model": [],
114
+ "Elo Score": [],
115
+ "Organizer": [],
116
+ "License": []
117
+ })
118
+
119
+ # Select base columns + the score column for sorting
120
+ cols_to_select = ['model_name', 'url', 'organizer', 'license', score_column]
121
+ df = master_df[cols_to_select].copy()
122
+
123
+ # Sort by the selected 'Elo Score' descending
124
+ df.sort_values(by=score_column, ascending=False, inplace=True)
125
+
126
+ # Add Rank based on the sorted order
127
+ df.reset_index(drop=True, inplace=True)
128
+ df.insert(0, 'Rank', df.index + 1)
129
+
130
+ # Format Model Name as HTML Hyperlink with improved styling
131
+ df['Model'] = df.apply(
132
+ lambda row: f"<a href='{row['url'] if pd.notna(row['url']) else '#'}' target='_blank' "
133
+ f"style='color: #0056b3; text-decoration: none; font-weight: 500;'>"
134
+ f"{row['model_name']}</a>",
135
+ axis=1
136
  )
137
+
138
+ # Rename the score column to 'Elo Score' for consistent display
139
+ df.rename(columns={score_column: 'Elo Score'}, inplace=True)
140
+
141
+ # Rename 'organizer' and 'license' to match desired display headers
142
+ df.rename(columns={'organizer': 'Organizer', 'license': 'License'}, inplace=True)
143
+
144
+ # Select and reorder columns for final display
145
+ final_columns = ["Rank", "Model", "Organizer", "License", "Elo Score"]
146
+ df = df[final_columns]
147
+
148
+ return df
149
+
150
+ # --- Mock/Placeholder functions/data for other tabs ---
151
+ # (If the Submit tab is used, ensure these variables are appropriately populated or handled)
152
+ print("Warning: Evaluation queue data fetching is disabled/mocked due to leaderboard changes.")
153
+ finished_eval_queue_df = pd.DataFrame(columns=["Model", "Status", "Requested", "Started"])
154
+ running_eval_queue_df = pd.DataFrame(columns=["Model", "Status", "Requested", "Started"])
155
+ pending_eval_queue_df = pd.DataFrame(columns=["Model", "Status", "Requested", "Started"])
156
+ EVAL_COLS = ["Model", "Status", "Requested", "Started"] # Define for the dataframe headers
157
+ EVAL_TYPES = ["str", "str", "str", "str"] # Define for the dataframe types
158
+
159
+ # --- Keep restart function if relevant ---
160
+ def restart_space():
161
+ # Make sure REPO_ID is correctly defined/imported if this function is used
162
+ print(f"Attempting to restart space: {REPO_ID}")
163
+ # Replace with your actual space restart mechanism if needed (e.g., HfApi().restart_space(REPO_ID))
164
+
165
+
166
+ # --- Enhanced CSS ---
167
+ enhanced_css = """
168
+ /* --- Overall Body and Font --- */
169
+ body, .gradio-container {
170
+ font-family: 'Inter', -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif; /* Modern Font Stack */
171
+ font-size: 16px !important; /* Base font size */
172
+ color: #333;
173
+ background-color: #f8f9fa; /* Light background */
174
+ line-height: 1.6;
175
+ }
176
+
177
+ /* --- Headings --- */
178
+ h1, .markdown-text h1 { /* Target Gradio's Markdown output too */
179
+ font-size: 2.6em !important; /* Larger main title */
180
+ font-weight: 700 !important;
181
+ color: #212529;
182
+ margin-bottom: 0.7em !important;
183
+ line-height: 1.3 !important;
184
+ text-align: center; /* Center main title if not using custom HTML */
185
+ }
186
+ h2, .markdown-text h2 {
187
+ font-size: 1.8em !important;
188
+ font-weight: 600 !important;
189
+ color: #343a40;
190
+ margin-top: 1.5em !important;
191
+ margin-bottom: 0.8em !important;
192
+ border-bottom: 2px solid #dee2e6;
193
+ padding-bottom: 0.3em;
194
+ }
195
+ h3, .markdown-text h3 {
196
+ font-size: 1.4em !important;
197
+ font-weight: 600 !important;
198
+ color: #495057;
199
+ margin-top: 1.2em !important;
200
+ margin-bottom: 0.6em !important;
201
+ }
202
+
203
+ /* --- Markdown & Text --- */
204
+ .markdown-text p {
205
+ margin-bottom: 1.2em !important;
206
+ font-size: 1.05em !important;
207
+ color: #454545;
208
+ }
209
+ .markdown-text ul, .markdown-text ol {
210
+ padding-left: 1.8em !important;
211
+ margin-bottom: 1.2em !important;
212
+ font-size: 1.05em !important;
213
+ }
214
+ .markdown-text li {
215
+ margin-bottom: 0.6em !important;
216
+ }
217
+ .markdown-text strong {
218
+ font-weight: 600;
219
+ color: #0056b3; /* Highlight strong text */
220
+ }
221
+ .markdown-text a {
222
+ color: #0056b3;
223
+ text-decoration: none;
224
+ }
225
+ .markdown-text a:hover {
226
+ text-decoration: underline;
227
+ }
228
+
229
+ /* --- Gradio Components Styling --- */
230
+ .gradio-container {
231
+ max-width: 1200px; /* Limit max width for better readability on wide screens */
232
+ margin: 20px auto !important; /* Center container with margin */
233
+ padding: 25px !important;
234
+ box-shadow: 0 4px 15px rgba(0, 0, 0, 0.08); /* Softer shadow */
235
+ border-radius: 12px; /* Rounded corners */
236
+ background-color: #ffffff; /* White background for content area */
237
+ }
238
+
239
+ /* --- Tabs --- */
240
+ .tab-buttons button { /* Target tab buttons */
241
+ font-size: 1.1em !important; /* Slightly larger tab text */
242
+ padding: 12px 20px !important;
243
+ border-radius: 8px 8px 0 0 !important; /* Rounded top corners */
244
+ border: 1px solid #dee2e6 !important;
245
+ border-bottom: none !important;
246
+ background-color: #f8f9fa !important; /* Default tab background */
247
+ color: #495057 !important;
248
+ margin-right: 4px !important;
249
+ transition: background-color 0.3s ease, color 0.3s ease;
250
+ }
251
+ .tab-buttons button.selected { /* Selected tab style */
252
+ background-color: #ffffff !important;
253
+ color: #0056b3 !important; /* Highlight selected tab */
254
+ font-weight: 600 !important;
255
+ border-color: #dee2e6 #dee2e6 #ffffff !important; /* Hide bottom border */
256
+ position: relative;
257
+ top: 1px; /* Align with content area */
258
+ }
259
+
260
+ /* --- Radio Buttons (Category Selector) --- */
261
+ #category-selector-radio .gr-form { /* Target the form containing radio buttons */
262
+ display: flex;
263
+ flex-wrap: wrap; /* Allow wrapping */
264
+ gap: 12px; /* Space between buttons */
265
+ border: 1px solid #ced4da;
266
+ padding: 15px;
267
+ border-radius: 8px;
268
+ background-color: #f8f9fa;
269
+ margin-bottom: 20px;
270
+ }
271
+ #category-selector-radio .gr-form .gr-input-label { /* Hide the main "Select Category:" label if needed */
272
+ display: none; /* Optional: Hide label if context is clear */
273
+ }
274
+ #category-selector-radio .gr-form > div { /* Target individual radio button containers */
275
+ flex-grow: 1; /* Allow buttons to grow */
276
+ min-width: 150px; /* Minimum width */
277
+ }
278
+ #category-selector-radio .gr-form label { /* Style individual radio labels */
279
+ display: block; /* Make label take full width */
280
+ padding: 10px 15px;
281
+ border: 1px solid #ced4da;
282
+ border-radius: 6px;
283
+ background-color: #ffffff;
284
+ cursor: pointer;
285
+ text-align: center;
286
+ transition: background-color 0.3s ease, border-color 0.3s ease, color 0.3s ease;
287
+ font-size: 1.05em; /* Make radio text slightly larger */
288
+ }
289
+ #category-selector-radio .gr-form input[type="radio"] { /* Hide the actual radio button */
290
+ display: none;
291
+ }
292
+ #category-selector-radio .gr-form input[type="radio"]:checked + label { /* Style for selected radio label */
293
+ background-color: #0056b3; /* Use primary color for selected */
294
+ color: #ffffff;
295
+ border-color: #004494;
296
+ font-weight: 600;
297
+ }
298
+ #category-selector-radio .gr-form label:hover {
299
+ background-color: #e9ecef;
300
+ border-color: #adb5bd;
301
+ }
302
+ #category-selector-radio .gr-form input[type="radio"]:checked + label:hover {
303
+ background-color: #004a9e; /* Slightly darker hover for selected */
304
+ }
305
+
306
+
307
+ /* --- Dataframe (Leaderboard Table) --- */
308
+ #leaderboard-table {
309
+ margin-top: 20px;
310
+ border: 1px solid #dee2e6;
311
+ border-radius: 8px; /* Rounded corners for the table wrapper */
312
+ overflow: hidden; /* Clip content to rounded corners */
313
+ box-shadow: 0 2px 5px rgba(0, 0, 0, 0.05);
314
+ }
315
+ #leaderboard-table .gr-dataframe { /* Target the inner dataframe structure */
316
+ border: none !important; /* Remove default border if needed */
317
+ border-radius: 0 !important; /* Reset inner radius if container has one */
318
+ }
319
+
320
+ #leaderboard-table table {
321
+ width: 100%;
322
+ border-collapse: collapse; /* Ensure borders touch */
323
+ }
324
+ #leaderboard-table th, #leaderboard-table td {
325
+ padding: 14px 18px !important; /* More padding */
326
+ border: none !important; /* Remove individual cell borders */
327
+ border-bottom: 1px solid #e9ecef !important; /* Use bottom borders as separators */
328
+ text-align: left;
329
+ vertical-align: middle; /* Center content vertically */
330
+ font-size: 1em !important; /* Table text size */
331
+ white-space: normal; /* Allow text wrapping */
332
+ }
333
+ #leaderboard-table th {
334
+ background-color: #f8f9fa !important; /* Lighter header background */
335
+ font-weight: 600 !important;
336
+ color: #495057;
337
+ font-size: 1.05em !important;
338
+ border-bottom-width: 2px !important; /* Thicker border under header */
339
+ border-color: #dee2e6 !important;
340
+ }
341
+ #leaderboard-table tr:last-child td {
342
+ border-bottom: none !important; /* Remove border from last row */
343
+ }
344
+ #leaderboard-table tr:nth-child(even) {
345
+ background-color: #fdfdfe !important; /* Very subtle striping */
346
+ }
347
+ #leaderboard-table tr:hover {
348
+ background-color: #e9f5ff !important; /* Light blue hover */
349
+ }
350
+ #leaderboard-table td a {
351
+ color: #0056b3;
352
+ text-decoration: none;
353
+ font-weight: 500; /* Make links slightly bolder */
354
+ }
355
+ #leaderboard-table td a:hover {
356
+ text-decoration: underline;
357
+ color: #003d80;
358
+ }
359
+ #leaderboard-table td:first-child, /* Rank column */
360
+ #leaderboard-table th:first-child {
361
+ text-align: center;
362
+ font-weight: 600;
363
+ width: 60px; /* Fixed width for Rank */
364
+ }
365
+ #leaderboard-table td:last-child, /* Elo Score column */
366
+ #leaderboard-table th:last-child {
367
+ text-align: right;
368
+ font-weight: 600;
369
+ width: 120px; /* Fixed width for Score */
370
+ }
371
+
372
+ /* --- Accordion --- */
373
+ .gradio-accordion { /* Custom class if needed, else target default */
374
+ border: 1px solid #dee2e6 !important;
375
+ border-radius: 8px !important;
376
+ margin-bottom: 15px !important;
377
+ overflow: hidden;
378
+ }
379
+ .gradio-accordion > button, /* Target accordion button */
380
+ .gradio-accordion > .gr-panel > button { /* Alternative selector if needed */
381
+ background-color: #f8f9fa !important;
382
+ border: none !important;
383
+ border-bottom: 1px solid #dee2e6 !important;
384
+ padding: 12px 18px !important;
385
+ font-size: 1.1em !important;
386
+ font-weight: 600 !important;
387
+ color: #343a40 !important;
388
+ width: 100%;
389
+ text-align: left;
390
+ cursor: pointer;
391
+ }
392
+ .gradio-accordion > button:hover,
393
+ .gradio-accordion > .gr-panel > button:hover {
394
+ background-color: #e9ecef !important;
395
+ }
396
+ .gradio-accordion > div { /* Accordion content */
397
+ padding: 15px 18px !important;
398
+ background-color: #ffffff;
399
+ }
400
+ #citation-button textarea { /* Style citation textbox */
401
+ font-family: 'Courier New', Courier, monospace;
402
+ font-size: 0.95em !important;
403
+ background-color: #e9ecef;
404
+ border-radius: 6px;
405
+ padding: 12px;
406
+ }
407
+ #citation-button label {
408
+ font-weight: 600;
409
+ color: #343a40;
410
+ }
411
+
412
+ /* --- Buttons (General / Submit) --- */
413
+ .gr-button {
414
+ font-size: 1.05em !important;
415
+ padding: 10px 20px !important;
416
+ border-radius: 6px !important;
417
+ font-weight: 500 !important;
418
+ transition: background-color 0.2s ease, border-color 0.2s ease;
419
+ }
420
+ /* Style specific buttons if needed */
421
+ #submit_button { /* Example if you add an ID to the submit button */
422
+ background-color: #0069d9 !important;
423
+ color: white !important;
424
+ border: none !important;
425
+ }
426
+ #submit_button:hover {
427
+ background-color: #0056b3 !important;
428
+ }
429
+
430
+ /* --- Textbox / Dropdown --- */
431
+ .gr-input, .gr-dropdown, .gr-textbox textarea {
432
+ font-size: 1em !important;
433
+ border-radius: 6px !important;
434
+ border: 1px solid #ced4da !important;
435
+ padding: 10px 12px !important;
436
+ }
437
+ .gr-input:focus, .gr-dropdown:focus, .gr-textbox textarea:focus {
438
+ border-color: #80bdff !important;
439
+ box-shadow: 0 0 0 0.2rem rgba(0, 123, 255, 0.25) !important;
440
+ }
441
+ .gr-input-label > span, /* Label text */
442
+ .gr-checkbox-label > span {
443
+ font-size: 1em !important;
444
+ font-weight: 500 !important;
445
+ color: #495057 !important;
446
+ margin-bottom: 5px !important;
447
+ }
448
+
449
+
450
  """
451
+ # Combine original CSS (if any) with new enhancements
452
+ final_css = custom_css + enhanced_css
453
 
454
+ # Use a theme for better default styling
455
+ demo = gr.Blocks(css=final_css, theme=gr.themes.Soft(
456
+ primary_hue=gr.themes.colors.blue,
457
+ secondary_hue=gr.themes.colors.sky,
458
+ neutral_hue=gr.themes.colors.gray,
459
+ font=[gr.themes.GoogleFont("Inter"), "system-ui", "sans-serif"], # Explicitly define font stack
460
+ radius_size=gr.themes.sizes.radius_md,
461
+ ))
462
 
463
+ with demo:
464
+ # Use the enhanced TITLE HTML
465
  gr.HTML(TITLE)
466
+
467
+ # Use the INTRODUCTION_TEXT variable
468
  gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
469
 
470
+ with gr.Tabs(elem_classes="tab-buttons") as tabs:
471
+ # Added Emojis to Tab Titles
472
+ with gr.TabItem("🏅 Leaderboard", elem_id="llm-benchmark-tab-table", id=0):
473
+ with gr.Column():
474
+ gr.Markdown("## Select Category to View Rankings", elem_classes="markdown-text") # More descriptive title
475
+ category_selector = gr.Radio(
476
+ choices=CATEGORIES,
477
+ label=None, # Hide the default label, rely on section title and button styling
478
+ value=DEFAULT_CATEGORY,
479
+ interactive=True,
480
+ elem_id="category-selector-radio" # ID for specific CSS styling
481
+ )
482
+ leaderboard_df_component = gr.Dataframe(
483
+ value=update_leaderboard(DEFAULT_CATEGORY),
484
+ headers=["Rank", "Model", "Organizer", "License", "Elo Score"],
485
+ # Ensure datatypes match the formatted output
486
+ datatype=["number", "html", "str", "str", "number"],
487
+ interactive=False,
488
+ row_count=(len(master_df), "fixed"), # Display all rows
489
+ col_count=(5, "fixed"),
490
+ wrap=True, # Allow text wrapping in cells
491
+ elem_id="leaderboard-table" # CSS hook for table styling
492
+ )
493
+ # Link the radio button change to the update function
494
+ category_selector.change(
495
+ fn=update_leaderboard,
496
+ inputs=category_selector,
497
+ outputs=leaderboard_df_component
498
+ )
499
+
500
+ with gr.TabItem("ℹ️ About", elem_id="llm-benchmark-tab-about", id=1):
501
+ with gr.Column(scale=2, min_width=600): # Give about tab more space
502
+ gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
503
+
504
+ # --- Submit Tab (Commented out as in original request) ---
505
+ # Make sure EVALUATION_QUEUE_TEXT and add_new_eval are imported/defined if uncommented
506
+ # with gr.TabItem("🚀 Submit Model", elem_id="llm-benchmark-tab-submit", id=2):
507
+ # with gr.Column():
508
+ # with gr.Row():
509
+ # gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text") # Requires import/definition
510
+ # with gr.Column():
511
+ # with gr.Accordion(f"✅ Finished Evaluations ({len(finished_eval_queue_df)})", open=False, elem_classes="gradio-accordion"):
512
+ # finished_eval_table = gr.components.Dataframe(
513
+ # value=finished_eval_queue_df, headers=EVAL_COLS, datatype=EVAL_TYPES, row_count=5,
514
+ # )
515
+ # with gr.Accordion(f"🔄 Running Evaluation Queue ({len(running_eval_queue_df)})", open=True, elem_classes="gradio-accordion"): # Open running by default
516
+ # running_eval_table = gr.components.Dataframe(
517
+ # value=running_eval_queue_df, headers=EVAL_COLS, datatype=EVAL_TYPES, row_count=5,
518
+ # )
519
+ # with gr.Accordion(f"⏳ Pending Evaluation Queue ({len(pending_eval_queue_df)})", open=True, elem_classes="gradio-accordion"): # Open pending by default
520
+ # pending_eval_table = gr.components.Dataframe(
521
+ # value=pending_eval_queue_df, headers=EVAL_COLS, datatype=EVAL_TYPES, row_count=5,
522
+ # )
523
+ # gr.Markdown("--- \n # ✨ Submit Your Model for Evaluation!", elem_classes="markdown-text") # Enhanced submit section title
524
+ # with gr.Row():
525
+ # with gr.Column(scale=1):
526
+ # model_name_textbox = gr.Textbox(label="Model Name (Hugging Face Hub ID)", placeholder="e.g., openai-community/gpt2")
527
+ # revision_name_textbox = gr.Textbox(label="Revision / Commit Hash", placeholder="main")
528
+ # model_type = gr.Dropdown(choices=["Encoder-Decoder", "Decoder Only", "Encoder Only", "Other"], label="Model Architecture Type", multiselect=False, value=None, interactive=True)
529
+ # with gr.Column(scale=1):
530
+ # precision = gr.Dropdown(choices=["float16", "bfloat16", "float32", "int8", "auto"], label="Precision", multiselect=False, value="auto", interactive=True)
531
+ # weight_type = gr.Dropdown(choices=["Original", "Adapter", "Delta"], label="Weights Type", multiselect=False, value="Original", interactive=True)
532
+ # base_model_name_textbox = gr.Textbox(label="Base Model HF ID (if using Adapter/Delta weights)", placeholder="e.g., meta-llama/Llama-2-7b-hf")
533
+ # with gr.Row():
534
+ # submit_button = gr.Button("🚀 Submit for Evaluation", variant="primary", elem_id="submit_button") # Added variant and ID
535
+ # with gr.Row():
536
+ # submission_result = gr.Markdown()
537
+ # # Ensure add_new_eval is correctly imported/defined and handles these inputs
538
+ # submit_button.click(
539
+ # add_new_eval, # Requires import/definition
540
+ # [ model_name_textbox, base_model_name_textbox, revision_name_textbox, precision, weight_type, model_type, ],
541
+ # submission_result,
542
+ # )
543
+
544
+ # --- Citation Accordion (at the bottom, outside Tabs) ---
545
+ with gr.Accordion("📙 Citation", open=False, elem_classes="gradio-accordion"):
546
+ # Use the CITATION_BUTTON_TEXT and CITATION_BUTTON_LABEL variables
547
+ citation_button = gr.Textbox(
548
+ value=CITATION_BUTTON_TEXT,
549
+ label=CITATION_BUTTON_LABEL,
550
+ lines=8, # Adjusted lines slightly
551
+ elem_id="citation-button",
552
+ show_copy_button=True,
553
+ # interactive=False # Make it non-editable if it's just for display/copy
554
+ )
555
+
556
+ # --- Keep scheduler if relevant ---
557
+ # Only start scheduler if the script is run directly
558
+ if __name__ == "__main__":
559
+ try:
560
+ scheduler = BackgroundScheduler(daemon=True) # Run as daemon thread
561
+ # Add job only if restart_space is callable and REPO_ID is valid
562
+ if callable(restart_space) and REPO_ID and REPO_ID != "your/space-id":
563
+ print(f"Scheduling space restart for {REPO_ID} every 30 minutes.")
564
+ scheduler.add_job(restart_space, "interval", seconds=1800) # Restart every 30 mins
565
+ scheduler.start()
566
+ else:
567
+ if not callable(restart_space):
568
+ print("Warning: restart_space function not available; space restart job not scheduled.")
569
+ if not REPO_ID or REPO_ID == "your/space-id":
570
+ print("Warning: REPO_ID not set or is placeholder; space restart job not scheduled.")
571
+ except Exception as e:
572
+ print(f"Failed to initialize or start scheduler: {e}")
573
+
574
 
575
+ # --- Launch the app ---
576
+ # Ensures the app launches only when the script is run directly
577
  if __name__ == "__main__":
578
+ # Ensure you have installed necessary libraries: pip install gradio pandas apscheduler
579
+ # Make sure your src module files (about.py etc.) are accessible OR use the placeholder definitions.
580
+ print("Launching Gradio App...")
581
+ demo.launch()