File size: 16,613 Bytes
922b418
22cc60c
 
 
be52959
e4014fe
 
839c9e4
922b418
839c9e4
 
 
 
 
 
922b418
839c9e4
922b418
 
 
 
 
 
 
839c9e4
 
 
922b418
839c9e4
 
 
 
 
922b418
 
 
 
 
 
 
 
 
 
 
 
 
839c9e4
 
e4014fe
 
be52959
 
922b418
3aab004
922b418
 
 
 
 
 
 
 
b41aa3c
3aab004
922b418
 
8104fa7
922b418
 
8104fa7
 
be52959
 
 
e4014fe
 
be52959
 
 
 
 
839c9e4
 
922b418
e4014fe
922b418
e4014fe
 
 
 
 
 
 
1117820
922b418
e4014fe
922b418
e4014fe
 
8104fa7
922b418
 
839c9e4
e4014fe
be52959
6e3d36f
be52959
 
 
 
 
922b418
 
1117820
be52959
 
 
922b418
839c9e4
922b418
 
 
839c9e4
922b418
 
839c9e4
922b418
eac1e3a
922b418
 
 
839c9e4
922b418
 
 
 
 
 
839c9e4
922b418
 
 
 
abae2f2
922b418
 
 
 
 
 
 
 
 
 
 
f5cc724
b4d3663
922b418
 
 
 
 
 
 
 
5b5d4cf
 
922b418
 
 
 
 
 
 
b4d3663
922b418
 
 
 
 
 
 
b4d3663
922b418
 
 
 
eac1e3a
922b418
 
 
 
eac1e3a
922b418
 
 
 
 
b4d3663
eba7c03
922b418
 
 
 
 
 
eba7c03
922b418
 
 
 
 
5b5d4cf
abae2f2
eba7c03
922b418
 
 
 
 
041df29
abae2f2
922b418
 
 
 
7e585be
922b418
 
 
7e585be
922b418
 
 
 
 
7e585be
b4d3663
922b418
 
 
7e585be
922b418
 
 
 
 
 
b4d3663
922b418
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
839c9e4
e4014fe
922b418
 
 
 
 
 
 
 
 
 
 
 
0f4f58e
8104fa7
922b418
ffb569a
839c9e4
922b418
 
 
 
3aab004
be52959
e4014fe
be52959
922b418
 
be52959
 
922b418
e4014fe
be52959
922b418
be52959
 
 
e4014fe
 
be52959
922b418
e4014fe
922b418
 
be52959
 
 
 
 
 
 
e4014fe
922b418
 
be52959
922b418
e4014fe
922b418
 
 
e4014fe
 
922b418
839c9e4
 
 
922b418
 
839c9e4
 
 
922b418
839c9e4
 
 
 
922b418
 
 
 
 
 
839c9e4
922b418
839c9e4
 
 
 
922b418
e4014fe
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
# -*- coding: utf-8 -*-
import gradio as gr
import pandas as pd
from apscheduler.schedulers.background import BackgroundScheduler
# Removed Hugging Face Hub imports as they are not needed for the simplified leaderboard

# --- Make sure these imports work relative to your file structure ---
try:
    # Assume these contain the *content* without excessive inline styling
    from src.about import (
        CITATION_BUTTON_LABEL,
        CITATION_BUTTON_TEXT,
        EVALUATION_QUEUE_TEXT, # Keep if used by commented-out submit tab
        INTRODUCTION_TEXT,
        LLM_BENCHMARKS_TEXT,
        TITLE, # Expected to have an ID like #main-leaderboard-title
    )
    # Import custom_css if it exists, otherwise it will be defined below
    try:
        from src.display.css_html_js import custom_css
    except ImportError:
        print("Warning: src.display.css_html_js not found. Starting with empty custom_css.")
        custom_css = "" # Start fresh if not found

    from src.envs import REPO_ID # Keep if needed for restart_space or other functions
    from src.submission.submit import add_new_eval # Keep if using the submit tab
    print("Successfully imported from src module.")
# Option 2: Placeholder definitions (REMOVE IF USING OPTION 1)
except ImportError:
    print("Warning: Using placeholder values because src module imports failed.")
    CITATION_BUTTON_LABEL="Citation"
    CITATION_BUTTON_TEXT="Please cite us if you use this benchmark..."
    EVALUATION_QUEUE_TEXT="Current evaluation queue:"
    # Example placeholders with structure for CSS
    TITLE="""<h1 id="main-leaderboard-title" align="center">πŸ† MLE-Dojo Benchmark Leaderboard (Placeholder)</h1>"""
    INTRODUCTION_TEXT="""
    <div class="introduction-section">
     <p>Welcome to the MLE-Dojo Benchmark Leaderboard (Placeholder Content).</p>
     <p>Edit <code>src/about.py</code> to set your actual title and introduction text.</p>
    </div>
    """
    LLM_BENCHMARKS_TEXT="""
    ## About Section (Placeholder)
    Information about the benchmarks will go here. Edit <code>src/about.py</code>.
    """
    custom_css="" # Start with empty CSS
    REPO_ID="your/space-id" # Replace with actual ID if needed
    def add_new_eval(*args): return "Submission placeholder."
# --- End Placeholder Definitions ---


# --- Elo Leaderboard Configuration ---
# (Keep your data definition as is)
data = [
    {'model_name': 'gpt-4o-mini', 'url': 'https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/', 'organizer': 'OpenAI', 'license': 'Proprietary', 'MLE-Lite_Elo': 753, 'Tabular_Elo': 839, 'NLP_Elo': 758, 'CV_Elo': 754, 'Overall': 778},
    {'model_name': 'gpt-4o', 'url': 'https://openai.com/index/hello-gpt-4o/', 'organizer': 'OpenAI', 'license': 'Proprietary', 'MLE-Lite_Elo': 830, 'Tabular_Elo': 861, 'NLP_Elo': 903, 'CV_Elo': 761, 'Overall': 841},
    {'model_name': 'o3-mini', 'url': 'https://openai.com/index/openai-o3-mini/', 'organizer': 'OpenAI', 'license': 'Proprietary', 'MLE-Lite_Elo': 1108, 'Tabular_Elo': 1019, 'NLP_Elo': 1056, 'CV_Elo': 1207, 'Overall': 1096}, # Fill details later
    {'model_name': 'deepseek-v3', 'url': 'https://api-docs.deepseek.com/news/news1226', 'organizer': 'DeepSeek', 'license': 'DeepSeek', 'MLE-Lite_Elo': 1004, 'Tabular_Elo': 1015, 'NLP_Elo': 1028, 'CV_Elo': 1067, 'Overall': 1023},
    {'model_name': 'deepseek-r1', 'url': 'https://api-docs.deepseek.com/news/news250120', 'organizer': 'DeepSeek', 'license': 'DeepSeek', 'MLE-Lite_Elo': 1137, 'Tabular_Elo': 1053, 'NLP_Elo': 1103, 'CV_Elo': 1083, 'Overall': 1100},
    {'model_name': 'gemini-2.0-flash', 'url': 'https://ai.google.dev/gemini-api/docs/models#gemini-2.0-flash', 'organizer': 'Google', 'license': 'Proprietary', 'MLE-Lite_Elo': 847, 'Tabular_Elo': 923, 'NLP_Elo': 860, 'CV_Elo': 978, 'Overall': 895},
    {'model_name': 'gemini-2.0-pro', 'url': 'https://blog.google/technology/google-deepmind/gemini-model-updates-february-2025/', 'organizer': 'Google', 'license': 'Proprietary', 'MLE-Lite_Elo': 1064, 'Tabular_Elo': 1139, 'NLP_Elo': 1028, 'CV_Elo': 973, 'Overall': 1054},
    {'model_name': 'gemini-2.5-pro', 'url': 'https://deepmind.google/technologies/gemini/pro/', 'organizer': 'Google', 'license': 'Proprietary', 'MLE-Lite_Elo': 1257, 'Tabular_Elo': 1150, 'NLP_Elo': 1266, 'CV_Elo': 1177, 'Overall': 1214},
]
master_df = pd.DataFrame(data)
CATEGORIES = ["Overall", "MLE-Lite", "Tabular", "NLP", "CV"]
DEFAULT_CATEGORY = "Overall"
category_to_column = {
    "MLE-Lite": "MLE-Lite_Elo", "Tabular": "Tabular_Elo",
    "NLP": "NLP_Elo", "CV": "CV_Elo", "Overall": "Overall"
}

# --- Helper function to update leaderboard ---
def update_leaderboard(category):
    """
    Selects relevant columns, sorts by the chosen category's Elo score,
    adds Rank, formats model name as a link, and returns the DataFrame.
    """
    score_column = category_to_column.get(category)
    if score_column is None or score_column not in master_df.columns:
        print(f"Warning: Invalid category '{category}' or column '{score_column}'. Falling back to default.")
        score_column = category_to_column[DEFAULT_CATEGORY]
        if score_column not in master_df.columns:
            print(f"Error: Default column '{score_column}' also not found.")
            # Return empty df with desired display columns
            return pd.DataFrame({
                "Rank": [], "Model": [], "Organizer": [], "License": [], "Elo Score": []
            })

    cols_to_select = ['model_name', 'url', 'organizer', 'license', score_column]
    df = master_df[cols_to_select].copy()
    df.sort_values(by=score_column, ascending=False, inplace=True)
    df.reset_index(drop=True, inplace=True)
    df.insert(0, 'Rank', df.index + 1)

    # Format Model Name as HTML Hyperlink - use a CSS class for styling
    df['Model'] = df.apply(
        lambda row: f"<a href='{row['url'] if pd.notna(row['url']) else '#'}' target='_blank' class='model-link'>{row['model_name']}</a>",
        axis=1
    )

    # Rename columns for final display
    df.rename(columns={score_column: 'Elo Score', 'organizer': 'Organizer', 'license': 'License'}, inplace=True)
    final_columns = ["Rank", "Model", "Organizer", "License", "Elo Score"]
    df = df[final_columns]
    return df

# --- Mock/Placeholder functions/data for other tabs ---
print("Warning: Evaluation queue data fetching is disabled/mocked due to leaderboard changes.")
finished_eval_queue_df = pd.DataFrame(columns=["Model", "Status", "Requested", "Started"])
running_eval_queue_df = pd.DataFrame(columns=["Model", "Status", "Requested", "Started"])
pending_eval_queue_df = pd.DataFrame(columns=["Model", "Status", "Requested", "Started"])
EVAL_COLS = ["Model", "Status", "Requested", "Started"]
EVAL_TYPES = ["str", "str", "str", "str"]

# --- Keep restart function if relevant ---
def restart_space():
    print(f"Attempting to restart space: {REPO_ID}")
    # Replace with your actual space restart mechanism if needed

# --- Enhanced CSS Definition ---
# Define all styles here. Assumes TITLE has id="main-leaderboard-title"
# and INTRODUCTION_TEXT is wrapped in class="introduction-section" (or rendered by gr.Markdown).

enhanced_css = """
/* Base and Theme Overrides */
body {
    font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, 'Open Sans', 'Helvetica Neue', sans-serif;
    font-size: 1.3em; /* Base font size */
    line-height: 1.6;
    background-color: #f8f9fa; /* Light background */
    color: #343a40; /* Default text color */
}

/* Container adjustments for better spacing */
.gradio-container {
    max-width: 1200px !important; /* Limit max width */
    margin: 0 auto !important; /* Center the container */
    padding: 2rem !important; /* Add padding around the whole app */
}

/* --- Title Styling --- */
/* Targets the h1 tag with the specific ID from src/about.py */
#main-leaderboard-title {
    font-size: 3.2em; /* Large title */
    font-weight: 700; /* Bolder */
    color: #212529; /* Darker color for title */
    text-align: center; /* Ensure centering */
    margin-bottom: 1.5rem; /* Space below title */
    padding-bottom: 0.5rem; /* Space within the element */
    border-bottom: 2px solid #dee2e6; /* Subtle underline */
}

/* --- Introduction Text Styling --- */
/* Targets the wrapper div or the markdown component */
.introduction-section p, .introduction-wrapper .prose p { /* Target paragraphs within the section */
    font-family: 'Georgia',
    font-size: 1.5em; !important; /* Slightly larger than base */
    color: #495057; /* Slightly lighter text color */
    margin-bottom: 1rem; /* Space between paragraphs */
    max-width: 900px; /* Limit width for readability */
    margin-left: auto;  /* Center the text block */
    margin-right: auto; /* Center the text block */
    text-align: center; /* Center align intro text */
}
.introduction-section, .introduction-wrapper {
    font-family: 'Georgia',
    font-size: 1.5em; !important; /* Slightly larger than base */
    margin-bottom: 2.5rem; /* Space below the intro block */
}


/* --- General Markdown and Header Styling --- */
.markdown-text h2, .tabitem .prose h2 { /* Target section headers */
    font-size: 1.8em;
    font-weight: 500;
    color: #343a40;
    margin-top: 2.5rem; /* More space above sections */
    margin-bottom: 1.2rem;
    padding-bottom: 0.4rem;
    border-bottom: 1px solid #e9ecef;
}
.markdown-text p, .tabitem .prose p {
    font-size: 1.5em; /* Standard paragraph size */
    margin-bottom: 1rem;
    color: #495057;
}
.markdown-text a, .tabitem .prose a { /* Style links within markdown */
    font-size: 1.3em;
    color: #007bff;
    text-decoration: none;
}
.markdown-text a:hover, .tabitem .prose a:hover {
    font-size: 1.3em;
    text-decoration: underline;
}

/* --- Tab Styling --- */
.tab-buttons button { /* Style tab buttons */
    font-size: 1.3em !important;
    padding: 5px 10px !important;
    font-weight: 500;
}

/* --- Leaderboard Table Styling --- */
#leaderboard-table {
    margin-top: 1.5rem; /* Space above table */
    font-size: 1.5em; /* Ensure table font size is consistent */
    border: 1px solid #dee2e6;
    box-shadow: 0 2px 4px rgba(0,0,0,0.05); /* Subtle shadow */
}
#leaderboard-table th {
    background-color: #e9ecef; /* Header background */
    font-size: 1.3em;
    font-weight: 500; /* Header font weight */
    padding: 10px 12px; /* Header padding */
    text-align: left;
    color: #495057;
    white-space: nowrap; /* Prevent header text wrapping */
}
#leaderboard-table td {
    font-size: 1.1em;
    padding: 8px 12px; /* Cell padding */
    border-bottom: 1px solid #e9ecef; /* Horizontal lines */
    vertical-align: middle; /* Center cell content vertically */
}
#leaderboard-table tr:nth-child(even) td {
    font-size: 1.1em;
    background-color: #f8f9fa; /* Zebra striping */
}
#leaderboard-table tr:hover td {
    font-size: 1.1em;
    background-color: #e2e6ea; /* Hover effect */
}
/* Style for the model links within the table */
#leaderboard-table .model-link {
    color: #0056b3; /* Slightly darker blue for links */
    font-size: 1.1em;
    font-weight: 500;
    text-decoration: none;
}
#leaderboard-table .model-link:hover {
    font-size: 1.1em;
    text-decoration: underline;
    color: #003d80;
}

/* --- Radio Button / Category Selector Styling --- */
.gradio-radio label span { /* Target the label text */
   font-size: 1.3em !important;
   font-weight: 500;
   color: #343a40;
}
.gradio-radio fieldset { /* Adjust spacing around radio buttons */
   margin-top: 0.5rem;
   margin-bottom: 1.5rem;
}
.gradio-radio fieldset label { /* Style individual radio choices */
    padding: 8px 12px !important;
}


/* --- Accordion Styling --- */
.gradio-accordion > button { /* Accordion header */
    font-size: 1.2em !important;
    font-weight: 600;
    padding: 12px 15px !important;
    background-color: #f1f3f5 !important;
    border-bottom: 1px solid #dee2e6 !important;
}
.gradio-accordion > div { /* Accordion content area */
    padding: 15px !important;
    border: 1px solid #dee2e6 !important;
    border-top: none !important;
}

/* --- Textbox/Button Styling (e.g., Citation) --- */
#citation-button textarea {
    font-family: 'Courier New', Courier, monospace; /* Monospace for code/citation */
    font-size: 0.95em !important;
    background-color: #e9ecef;
    color: #343a40;
}
#citation-button label span {
    font-weight: 600;
}

"""

# Combine any existing CSS with the new enhanced CSS
# Prioritize enhanced_css rules by placing it last or using more specific selectors
final_css = custom_css + "\n" + enhanced_css

# --- Gradio App Definition ---
# Use a theme for base styling and apply custom CSS overrides
demo = gr.Blocks(css=final_css, theme=gr.themes.Soft(
    # Optional: Customize theme variables if needed
    # primary_hue=gr.themes.colors.blue,
    # secondary_hue=gr.themes.colors.gray,
    # neutral_hue=gr.themes.colors.cool_gray,
))

with demo:
    # Render TITLE from src/about.py (expects <h1 id="main-leaderboard-title">...)
    gr.HTML(TITLE)

    # Render INTRODUCTION_TEXT from src/about.py
    # Add a wrapper class for CSS targeting if the text itself doesn't have one
    with gr.Row():
        gr.Markdown(INTRODUCTION_TEXT, elem_classes="introduction-wrapper") # Use this class for CSS

    with gr.Tabs(elem_classes="tab-buttons") as tabs:
        with gr.TabItem("πŸ… MLE-Dojo Benchmark", elem_id="llm-benchmark-tab-table", id=0):
            with gr.Column():
                # Use standard Markdown for the section header, CSS will style it
                gr.Markdown("## Model Elo Rankings by Category", elem_classes="markdown-text")
                category_selector = gr.Radio(
                    choices=CATEGORIES,
                    label="Select Category:", # Label is styled via CSS
                    value=DEFAULT_CATEGORY,
                    interactive=True,
                    elem_classes="gradio-radio" # Add class for styling
                )
                leaderboard_df_component = gr.Dataframe(
                    value=update_leaderboard(DEFAULT_CATEGORY),
                    headers=["Rank", "Model", "Organizer", "License", "Elo Score"],
                    datatype=["number", "html", "str", "str", "number"],
                    interactive=False,
                    row_count=(len(master_df), "fixed"),
                    col_count=(5, "fixed"),
                    wrap=True,
                    elem_id="leaderboard-table" # Used for specific table CSS
                )
                category_selector.change(
                    fn=update_leaderboard,
                    inputs=category_selector,
                    outputs=leaderboard_df_component
                )

        with gr.TabItem("πŸ“ About", elem_id="llm-benchmark-tab-about", id=1):
            # Render LLM_BENCHMARKS_TEXT using Markdown, styled by CSS
            gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text") # Apply standard markdown styling

        # --- Submit Tab (Keep commented out or uncomment and ensure imports/variables are defined) ---
        # with gr.TabItem("πŸš€ Submit here! ", elem_id="llm-benchmark-tab-submit", id=2):
        #     # ... (Your submission form code - apply elem_classes="markdown-text" to gr.Markdown) ...
        #     pass # Placeholder


    # --- Citation Row (at the bottom, outside Tabs) ---
    with gr.Accordion("πŸ“™ Citation", open=False, elem_classes="gradio-accordion"): # Add class
        citation_button = gr.Textbox(
            value=CITATION_BUTTON_TEXT,
            label=CITATION_BUTTON_LABEL,
            lines=8, # Adjusted lines slightly
            elem_id="citation-button", # Used for specific CSS
            show_copy_button=True,
        )

# --- Scheduler and Launch ---
if __name__ == "__main__":
    try:
        scheduler = BackgroundScheduler()
        if callable(restart_space):
            if REPO_ID and REPO_ID != "your/space-id":
                scheduler.add_job(restart_space, "interval", seconds=1800)
                scheduler.start()
                print("Scheduler started for space restart.")
            else:
                print("Warning: REPO_ID not set or is placeholder; space restart job not scheduled.")
        else:
            print("Warning: restart_space function not available; space restart job not scheduled.")
    except Exception as e:
        print(f"Failed to initialize or start scheduler: {e}")

    print("Launching Gradio App...")
    # demo.queue() # Consider adding queue() for better handling under load
    demo.launch()