|
import gradio as gr |
|
import pandas as pd |
|
import numpy as np |
|
from io import StringIO |
|
import os |
|
|
|
try: |
|
|
|
df = pd.read_csv("FACTS.tsv", sep='\t') |
|
print(f"Successfully loaded {len(df)} models from local file") |
|
except Exception as e: |
|
print(f"Error loading data from local file: {e}") |
|
|
|
df = pd.DataFrame({ |
|
'model': [ |
|
'deepseek-ai/DeepSeek-R1-Distill-Qwen-14B', |
|
'meta-llama/Llama-3.3-70B-Instruct', |
|
'Qwen/Qwen3-30B-A3B', |
|
'google/gemma-3-27b-it' |
|
], |
|
'size': [14, 70, 30, 27], |
|
'Separate Grounding Score': [0.817797, 0.842553, 0.812766, 0.936], |
|
'Separate Quality Score': [0.542373, 0.510638, 0.540426, 0.391], |
|
'Combined Score': [0.457627, 0.425532, 0.425532, 0.378] |
|
}) |
|
print("Showing sample data (file read failed)") |
|
|
|
|
|
df = df.dropna() |
|
df.columns = df.columns.str.strip() |
|
|
|
|
|
df = df.rename(columns={ |
|
'model': 'Model Name', |
|
'size': 'Size' |
|
}) |
|
|
|
|
|
df["Size_Display"] = df["Size"].apply( |
|
lambda x: f"{int(x)}B" if x == int(x) else f"{x}B" |
|
) |
|
|
|
|
|
|
|
def get_size_category(size): |
|
if size <= 5: |
|
return "0-5B" |
|
elif size <= 10: |
|
return "5-10B" |
|
elif size <= 20: |
|
return "10-20B" |
|
elif size <= 40: |
|
return "20-40B" |
|
elif size <= 80: |
|
return "40-80B" |
|
else: |
|
return ">80B" |
|
|
|
|
|
df["Size_Category"] = df["Size"].apply(get_size_category) |
|
|
|
|
|
def filter_and_search_models( |
|
search_query, size_ranges, sort_by, architecture_filters=None |
|
): |
|
"""Filter and search models based on user inputs""" |
|
filtered_df = df.copy() |
|
|
|
|
|
if search_query: |
|
mask = filtered_df["Model Name"].str.contains( |
|
search_query, case=False, na=False |
|
) |
|
filtered_df = filtered_df[mask] |
|
|
|
|
|
if size_ranges and len(size_ranges) > 0: |
|
filtered_df = filtered_df[filtered_df["Size_Category"].isin(size_ranges)] |
|
|
|
|
|
if architecture_filters and len(architecture_filters) > 0: |
|
architecture_mask = pd.Series( |
|
[False] * len(filtered_df), index=filtered_df.index |
|
) |
|
|
|
for arch in architecture_filters: |
|
if arch == "llama": |
|
architecture_mask |= filtered_df["Model Name"].str.contains( |
|
"meta-llama", case=False, na=False |
|
) |
|
elif arch == "deepseek": |
|
architecture_mask |= filtered_df["Model Name"].str.contains( |
|
"deepseek", case=False, na=False |
|
) |
|
elif arch == "qwen": |
|
architecture_mask |= filtered_df["Model Name"].str.contains( |
|
"Qwen", case=False, na=False |
|
) |
|
elif arch == "google": |
|
architecture_mask |= filtered_df["Model Name"].str.contains( |
|
"google", case=False, na=False |
|
) |
|
elif arch == "mistral": |
|
architecture_mask |= filtered_df["Model Name"].str.contains( |
|
"mistralai", case=False, na=False |
|
) |
|
elif arch == "others": |
|
|
|
others_mask = ~( |
|
filtered_df["Model Name"].str.contains("meta-llama", case=False, na=False) | |
|
filtered_df["Model Name"].str.contains("deepseek", case=False, na=False) | |
|
filtered_df["Model Name"].str.contains("Qwen", case=False, na=False) | |
|
filtered_df["Model Name"].str.contains("google", case=False, na=False) | |
|
filtered_df["Model Name"].str.contains("mistralai", case=False, na=False) |
|
) |
|
architecture_mask |= others_mask |
|
|
|
filtered_df = filtered_df[architecture_mask] |
|
|
|
|
|
if sort_by in filtered_df.columns: |
|
filtered_df = filtered_df.sort_values(sort_by, ascending=False) |
|
|
|
|
|
filtered_df = filtered_df.reset_index(drop=True) |
|
filtered_df["Rank"] = range(1, len(filtered_df) + 1) |
|
|
|
|
|
display_df = filtered_df[ |
|
[ |
|
"Rank", |
|
"Model Name", |
|
"Size_Display", |
|
"Separate Grounding Score", |
|
"Separate Quality Score", |
|
"Combined Score", |
|
] |
|
] |
|
|
|
|
|
display_df = display_df.rename(columns={"Size_Display": "Size"}) |
|
|
|
|
|
for col in ["Separate Grounding Score", "Separate Quality Score", "Combined Score"]: |
|
display_df = display_df.copy() |
|
display_df[col] = display_df[col].round(3) |
|
|
|
return display_df |
|
|
|
|
|
def create_html_table(df): |
|
"""Create an HTML table from the dataframe""" |
|
html = '<div class="leaderboard-container">' |
|
html += '<table class="leaderboard-table">' |
|
|
|
|
|
html += "<thead><tr>" |
|
for col in df.columns: |
|
html += f"<th>{col}</th>" |
|
html += "</tr></thead>" |
|
|
|
|
|
html += "<tbody>" |
|
for _, row in df.iterrows(): |
|
|
|
model_name = row["Model Name"] |
|
row_class = "" |
|
if "meta-llama" in model_name: |
|
row_class = "llama-row" |
|
elif "deepseek" in model_name: |
|
row_class = "deepseek-row" |
|
elif "Qwen" in model_name: |
|
row_class = "qwen-row" |
|
elif "google" in model_name: |
|
row_class = "google-row" |
|
elif "mistralai" in model_name: |
|
row_class = "mistral-row" |
|
else: |
|
row_class = "others-row" |
|
|
|
html += f'<tr class="{row_class}">' |
|
for i, col in enumerate(df.columns): |
|
cell_class = "" |
|
if i == 0: |
|
cell_class = "rank-cell" |
|
elif i == 1: |
|
cell_class = "model-cell" |
|
elif i == 2: |
|
cell_class = "size-cell" |
|
else: |
|
cell_class = "score-cell" |
|
|
|
|
|
if col == "Model Name": |
|
hf_url = f"https://huggingface.co/{model_name}" |
|
cell_content = f'<a href="{hf_url}" target="_blank" class="model-link">{model_name}</a>' |
|
else: |
|
cell_content = str(row[col]) |
|
|
|
html += f'<td class="{cell_class}">{cell_content}</td>' |
|
html += "</tr>" |
|
html += "</tbody>" |
|
html += "</table>" |
|
html += "</div>" |
|
|
|
return html |
|
|
|
|
|
|
|
with gr.Blocks(title="FACTS Grounding Leaderboard", theme=gr.themes.Base()) as app: |
|
gr.Markdown("# 🏆 FACTS Grounding Leaderboard") |
|
gr.Markdown( |
|
"### FACTS Medical Grounding is a benchmark designed to evaluate Open Models over medical domain." |
|
) |
|
|
|
with gr.Tabs(): |
|
with gr.TabItem("Leaderboard"): |
|
|
|
with gr.Row(): |
|
|
|
with gr.Column(scale=1): |
|
gr.Markdown("### 🎛️ **Filter & Sort Options**") |
|
|
|
|
|
with gr.Row(): |
|
sort_dropdown = gr.Dropdown( |
|
choices=[ |
|
("🏆 Combined Score", "Combined Score"), |
|
("🎯 Grounding Score", "Separate Grounding Score"), |
|
("📊 Quality Score", "Separate Quality Score"), |
|
], |
|
value="Combined Score", |
|
label="Sort by Metric", |
|
elem_classes="sort-dropdown-modern", |
|
container=True, |
|
) |
|
|
|
|
|
gr.Markdown("**📏 Filter by Model Size:**") |
|
size_checkboxes = gr.CheckboxGroup( |
|
choices=["0-5B", "5-10B", "10-20B", "20-40B", "40-80B", ">80B"], |
|
value=["0-5B", "5-10B", "10-20B", "20-40B", "40-80B", ">80B"], |
|
label="", |
|
elem_classes="size-filter", |
|
container=False, |
|
) |
|
|
|
|
|
gr.Markdown("**🏗️ Filter by Model Architecture:**") |
|
architecture_checkboxes = gr.CheckboxGroup( |
|
choices=[ |
|
("🤖 DeepSeek", "deepseek"), |
|
("🐧 Qwen", "qwen"), |
|
("🦙 Llama", "llama"), |
|
("🔷 Gemma", "google"), |
|
("🌟 Mistral", "mistral"), |
|
("🔧 Others", "others"), |
|
], |
|
value=["llama", "deepseek", "qwen", "google", "mistral", "others"], |
|
label="", |
|
elem_classes="architecture-filter", |
|
container=False, |
|
) |
|
|
|
|
|
with gr.Column(scale=1): |
|
gr.Markdown("### 🔍 **Search Models**") |
|
search_box = gr.Textbox( |
|
label="", |
|
placeholder="Search for a model name (e.g., Llama, Qwen, DeepSeek)...", |
|
value="", |
|
elem_classes="search-input", |
|
) |
|
|
|
|
|
total_models = gr.Markdown(f"**Showing {len(df)} models**") |
|
|
|
|
|
results_table = gr.HTML( |
|
value=create_html_table( |
|
filter_and_search_models( |
|
"", |
|
["0-5B", "5-10B", "10-20B", "20-40B", "40-80B", ">80B"], |
|
"Combined Score", |
|
["llama", "deepseek", "qwen", "google", "mistral", "others"], |
|
) |
|
), |
|
elem_id="leaderboard-table", |
|
) |
|
|
|
|
|
with gr.Accordion("Metric Explanations", open=False): |
|
gr.Markdown( |
|
""" |
|
- **Grounding Score**: Percentage of responses where all claims are supported by the context |
|
- **Quality Score**: Percentage of responses that adequately address the user's request |
|
- **Combined Score**: Percentage of responses that pass both quality and grounding checks |
|
""" |
|
) |
|
|
|
with gr.TabItem("About"): |
|
gr.Markdown( |
|
""" |
|
# About This Evaluation |
|
|
|
## FACTS Grounding Leaderboard |
|
|
|
The FACTS Grounding Leaderboard is a benchmark developed by Google DeepMind to evaluate how well Large Language Models (LLMs) can generate factually accurate responses that are fully grounded in provided context documents. |
|
|
|
### How It Works: |
|
1. **Input**: Each example contains a system instruction, a context document (up to 32k tokens), and a user request |
|
2. **Task**: Models must generate responses that answer the user's request using ONLY information from the provided context |
|
3. **Evaluation**: Responses are evaluated in two phases: |
|
- **Quality Check**: Does the response adequately address the user's request? |
|
- **Grounding Check**: Is every claim in the response supported by the context document? |
|
|
|
## Medical Domain Variation |
|
|
|
This implementation focuses specifically on medical domain examples from the FACTS benchmark to evaluate smaller, open-source models in healthcare contexts. |
|
|
|
### Key Modifications: |
|
- **Domain-Specific**: Uses only the 236 medical examples from the original 860-example dataset |
|
- **Single Judge Model**: Employs Gemini 1.5 Flash as the sole evaluator (vs. the original's ensemble of 3 models) |
|
- **Focus on Accessibility**: Tests Qwen 3 1.7B, demonstrating that smaller models can be benchmarked on this important task |
|
- **Streamlined Process**: Simplified evaluation pipeline suitable for resource-constrained environments |
|
|
|
### Why Medical Domain? |
|
Medical information requires exceptional accuracy and grounding. By focusing on this domain, we can assess how well smaller models handle critical healthcare information while strictly adhering to provided sources—a crucial capability for safe medical AI applications. |
|
|
|
### Evaluation Metrics: |
|
- **Grounding Score**: Percentage of responses where all claims are supported by the context |
|
- **Quality Score**: Percentage of responses that adequately address the user's request |
|
- **Combined Score**: Percentage of responses that pass both quality and grounding checks |
|
|
|
This focused approach enables rapid iteration and testing of smaller models on domain-specific factual grounding tasks. |
|
|
|
--- |
|
|
|
## References |
|
|
|
- **Original Leaderboard by Google**: [FACTS Grounding Benchmark Leaderboard](https://www.kaggle.com/benchmarks/google/facts-grounding/leaderboard) |
|
- **Public Dataset**: [FACTS Grounding Examples Dataset](https://www.kaggle.com/datasets/deepmind/facts-grounding-examples/data) |
|
- **Technical Documentation**: [FACTS Grounding Benchmark Starter Code](https://www.kaggle.com/code/andrewmingwang/facts-grounding-benchmark-starter-code/notebook) |
|
|
|
--- |
|
""" |
|
) |
|
|
|
|
|
def update_table(search, sizes, sort_by, arch_filters): |
|
filtered_df = filter_and_search_models(search, sizes, sort_by, arch_filters) |
|
model_count = f"**Showing {len(filtered_df)} models**" |
|
return create_html_table(filtered_df), model_count |
|
|
|
|
|
search_box.change( |
|
fn=update_table, |
|
inputs=[search_box, size_checkboxes, sort_dropdown, architecture_checkboxes], |
|
outputs=[results_table, total_models], |
|
) |
|
|
|
size_checkboxes.change( |
|
fn=update_table, |
|
inputs=[search_box, size_checkboxes, sort_dropdown, architecture_checkboxes], |
|
outputs=[results_table, total_models], |
|
) |
|
|
|
sort_dropdown.change( |
|
fn=update_table, |
|
inputs=[search_box, size_checkboxes, sort_dropdown, architecture_checkboxes], |
|
outputs=[results_table, total_models], |
|
) |
|
|
|
architecture_checkboxes.change( |
|
fn=update_table, |
|
inputs=[search_box, size_checkboxes, sort_dropdown, architecture_checkboxes], |
|
outputs=[results_table, total_models], |
|
) |
|
|
|
|
|
app.css = """ |
|
.leaderboard-container { |
|
margin-top: 20px; |
|
max-height: 600px; |
|
overflow-y: auto; |
|
border-radius: 8px; |
|
border: 1px solid #e9ecef; |
|
} |
|
|
|
.leaderboard-table { |
|
width: 100%; |
|
border-collapse: collapse; |
|
font-size: 14px; |
|
background: white; |
|
} |
|
|
|
.leaderboard-table th { |
|
background-color: #f8f9fa; |
|
font-weight: 600; |
|
padding: 12px 8px; |
|
text-align: center; |
|
border-bottom: 2px solid #dee2e6; |
|
position: sticky; |
|
top: 0; |
|
z-index: 10; |
|
} |
|
|
|
.leaderboard-table th:first-child { |
|
width: 60px; |
|
} |
|
|
|
.leaderboard-table td { |
|
padding: 10px 8px; |
|
border-bottom: 1px solid #f1f3f4; |
|
} |
|
|
|
.leaderboard-table tbody tr:hover { |
|
background-color: #f8f9fa; |
|
} |
|
|
|
.rank-cell { |
|
text-align: center; |
|
font-weight: 600; |
|
color: #444; |
|
background-color: #f8f9fa; |
|
width: 60px; |
|
} |
|
|
|
.model-cell { |
|
font-weight: 500; |
|
max-width: 400px; |
|
word-wrap: break-word; |
|
} |
|
|
|
.model-link { |
|
color: #0066cc !important; |
|
text-decoration: none !important; |
|
font-weight: 500 !important; |
|
transition: all 0.2s ease !important; |
|
border-bottom: 1px solid transparent !important; |
|
} |
|
|
|
.model-link:hover { |
|
color: #0052a3 !important; |
|
border-bottom: 1px solid #0066cc !important; |
|
background-color: rgba(0, 102, 204, 0.05) !important; |
|
padding: 2px 4px !important; |
|
border-radius: 4px !important; |
|
margin: -2px -4px !important; |
|
} |
|
|
|
.size-cell { |
|
text-align: center; |
|
font-weight: 500; |
|
color: #666; |
|
min-width: 60px; |
|
} |
|
|
|
.score-cell { |
|
text-align: center; |
|
font-family: 'Monaco', 'Menlo', 'Ubuntu Mono', monospace; |
|
font-size: 13px; |
|
} |
|
|
|
/* Model family row styling */ |
|
.llama-row { |
|
background-color: #fffbf0; |
|
} |
|
|
|
.llama-row:hover { |
|
background-color: #fef7e0; |
|
} |
|
|
|
.deepseek-row { |
|
background-color: #f0f8ff; |
|
} |
|
|
|
.deepseek-row:hover { |
|
background-color: #e6f3ff; |
|
} |
|
|
|
.qwen-row { |
|
background-color: #f5fff5; |
|
} |
|
|
|
.qwen-row:hover { |
|
background-color: #eaffea; |
|
} |
|
|
|
.google-row { |
|
background-color: #fff0f5; |
|
} |
|
|
|
.google-row:hover { |
|
background-color: #ffe6f0; |
|
} |
|
|
|
.mistral-row { |
|
background-color: #faf5ff; |
|
} |
|
|
|
.mistral-row:hover { |
|
background-color: #f3e8ff; |
|
} |
|
|
|
.others-row { |
|
background-color: #f8fafc; |
|
} |
|
|
|
.others-row:hover { |
|
background-color: #f1f5f9; |
|
} |
|
|
|
.size-filter { |
|
margin-top: 10px; |
|
} |
|
|
|
.size-filter > div { |
|
display: flex !important; |
|
flex-wrap: wrap !important; |
|
gap: 8px !important; |
|
align-items: center !important; |
|
} |
|
|
|
.size-filter label { |
|
display: flex !important; |
|
align-items: center !important; |
|
background: #f8f9fa !important; |
|
border: 2px solid #e9ecef !important; |
|
border-radius: 8px !important; |
|
padding: 8px 12px !important; |
|
margin: 0 !important; |
|
cursor: pointer !important; |
|
transition: all 0.2s ease !important; |
|
font-weight: 500 !important; |
|
font-size: 14px !important; |
|
color: #495057 !important; |
|
min-width: 70px !important; |
|
justify-content: center !important; |
|
} |
|
|
|
.size-filter label:hover { |
|
background: #e9ecef !important; |
|
border-color: #6c757d !important; |
|
} |
|
|
|
.size-filter input[type="checkbox"] { |
|
display: none !important; |
|
} |
|
|
|
.size-filter input[type="checkbox"]:checked + span { |
|
background: #0d6efd !important; |
|
color: white !important; |
|
border-color: #0d6efd !important; |
|
} |
|
|
|
.size-filter label:has(input[type="checkbox"]:checked) { |
|
background: #0d6efd !important; |
|
color: white !important; |
|
border-color: #0d6efd !important; |
|
box-shadow: 0 2px 4px rgba(13, 110, 253, 0.2) !important; |
|
} |
|
|
|
.architecture-filter { |
|
margin-top: 10px; |
|
} |
|
|
|
.architecture-filter > div { |
|
display: flex !important; |
|
flex-wrap: wrap !important; |
|
gap: 8px !important; |
|
align-items: center !important; |
|
} |
|
|
|
.architecture-filter label { |
|
display: flex !important; |
|
align-items: center !important; |
|
border-radius: 8px !important; |
|
padding: 8px 12px !important; |
|
margin: 0 !important; |
|
cursor: pointer !important; |
|
transition: all 0.2s ease !important; |
|
font-weight: 500 !important; |
|
font-size: 14px !important; |
|
min-width: 140px !important; |
|
justify-content: center !important; |
|
border: 2px solid !important; |
|
} |
|
|
|
.architecture-filter label:hover { |
|
transform: translateY(-1px); |
|
box-shadow: 0 2px 8px rgba(0,0,0,0.1) !important; |
|
} |
|
|
|
.architecture-filter input[type="checkbox"] { |
|
display: none !important; |
|
} |
|
|
|
/* Llama styling */ |
|
.architecture-filter label:nth-child(1) { |
|
background: #fffbf0 !important; |
|
border-color: #f7e6a3 !important; |
|
color: #8b4513 !important; |
|
} |
|
|
|
.architecture-filter label:nth-child(1):has(input[type="checkbox"]:checked) { |
|
background: #f4a261 !important; |
|
border-color: #f4a261 !important; |
|
color: white !important; |
|
box-shadow: 0 2px 4px rgba(244, 162, 97, 0.3) !important; |
|
} |
|
|
|
/* DeepSeek styling */ |
|
.architecture-filter label:nth-child(2) { |
|
background: #f0f8ff !important; |
|
border-color: #b3d9ff !important; |
|
color: #1e40af !important; |
|
} |
|
|
|
.architecture-filter label:nth-child(2):has(input[type="checkbox"]:checked) { |
|
background: #3b82f6 !important; |
|
border-color: #3b82f6 !important; |
|
color: white !important; |
|
box-shadow: 0 2px 4px rgba(59, 130, 246, 0.3) !important; |
|
} |
|
|
|
/* Qwen styling */ |
|
.architecture-filter label:nth-child(3) { |
|
background: #f5fff5 !important; |
|
border-color: #b3ffb3 !important; |
|
color: #15803d !important; |
|
} |
|
|
|
.architecture-filter label:nth-child(3):has(input[type="checkbox"]:checked) { |
|
background: #22c55e !important; |
|
border-color: #22c55e !important; |
|
color: white !important; |
|
box-shadow: 0 2px 4px rgba(34, 197, 94, 0.3) !important; |
|
} |
|
|
|
/* Google styling */ |
|
.architecture-filter label:nth-child(4) { |
|
background: #fff0f5 !important; |
|
border-color: #ffb3d9 !important; |
|
color: #be185d !important; |
|
} |
|
|
|
.architecture-filter label:nth-child(4):has(input[type="checkbox"]:checked) { |
|
background: #ec4899 !important; |
|
border-color: #ec4899 !important; |
|
color: white !important; |
|
box-shadow: 0 2px 4px rgba(236, 72, 153, 0.3) !important; |
|
} |
|
|
|
/* Mistral styling */ |
|
.architecture-filter label:nth-child(5) { |
|
background: #faf5ff !important; |
|
border-color: #d8b4fe !important; |
|
color: #7c3aed !important; |
|
} |
|
|
|
.architecture-filter label:nth-child(5):has(input[type="checkbox"]:checked) { |
|
background: #8b5cf6 !important; |
|
border-color: #8b5cf6 !important; |
|
color: white !important; |
|
box-shadow: 0 2px 4px rgba(139, 92, 246, 0.3) !important; |
|
} |
|
|
|
/* Others styling */ |
|
.architecture-filter label:nth-child(6) { |
|
background: #f8fafc !important; |
|
border-color: #cbd5e1 !important; |
|
color: #475569 !important; |
|
} |
|
|
|
.architecture-filter label:nth-child(6):has(input[type="checkbox"]:checked) { |
|
background: #64748b !important; |
|
border-color: #64748b !important; |
|
color: white !important; |
|
box-shadow: 0 2px 4px rgba(100, 116, 139, 0.3) !important; |
|
} |
|
|
|
/* Search and Filter Section Styling */ |
|
.search-input input { |
|
border: 2px solid #e9ecef !important; |
|
border-radius: 12px !important; |
|
padding: 12px 16px !important; |
|
font-size: 14px !important; |
|
transition: all 0.3s ease !important; |
|
background: linear-gradient(135deg, #f8f9fa 0%, #ffffff 100%) !important; |
|
} |
|
|
|
.search-input input:focus { |
|
border-color: #6366f1 !important; |
|
box-shadow: 0 0 0 3px rgba(99, 102, 241, 0.1) !important; |
|
background: white !important; |
|
} |
|
|
|
.search-input input::placeholder { |
|
color: #6b7280 !important; |
|
font-style: italic !important; |
|
} |
|
|
|
/* Modern Sort Dropdown Styling */ |
|
.sort-dropdown-modern label { |
|
font-weight: 600 !important; |
|
color: #374151 !important; |
|
margin-bottom: 8px !important; |
|
} |
|
|
|
.sort-dropdown-modern .wrap { |
|
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important; |
|
border-radius: 12px !important; |
|
padding: 2px !important; |
|
border: none !important; |
|
} |
|
|
|
.sort-dropdown-modern select { |
|
background: white !important; |
|
border: none !important; |
|
border-radius: 10px !important; |
|
padding: 12px 16px !important; |
|
font-size: 14px !important; |
|
font-weight: 500 !important; |
|
color: #374151 !important; |
|
cursor: pointer !important; |
|
transition: all 0.3s ease !important; |
|
box-shadow: 0 2px 4px rgba(0,0,0,0.1) !important; |
|
} |
|
|
|
.sort-dropdown-modern select:hover { |
|
box-shadow: 0 4px 8px rgba(0,0,0,0.15) !important; |
|
transform: translateY(-1px) !important; |
|
} |
|
|
|
.sort-dropdown-modern select:focus { |
|
outline: none !important; |
|
box-shadow: 0 0 0 3px rgba(99, 102, 241, 0.2) !important; |
|
} |
|
|
|
/* Section Headers */ |
|
h3 { |
|
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important; |
|
-webkit-background-clip: text !important; |
|
-webkit-text-fill-color: transparent !important; |
|
background-clip: text !important; |
|
margin-bottom: 12px !important; |
|
} |
|
|
|
/* Centered Architecture Section */ |
|
.centered-title { |
|
text-align: center !important; |
|
} |
|
|
|
.centered-filter > div { |
|
display: flex !important; |
|
flex-wrap: wrap !important; |
|
gap: 8px !important; |
|
align-items: center !important; |
|
justify-content: center !important; |
|
} |
|
|
|
.size-filter { |
|
margin-top: 10px; |
|
} |
|
|
|
/* Dark Mode Specific Styles */ |
|
@media (prefers-color-scheme: dark) { |
|
.leaderboard-table { |
|
background: #1f2937 !important; |
|
color: #f9fafb !important; |
|
} |
|
|
|
.leaderboard-table th { |
|
background-color: #374151 !important; |
|
color: #f9fafb !important; |
|
border-bottom: 2px solid #4b5563 !important; |
|
} |
|
|
|
.leaderboard-table td { |
|
color: #f9fafb !important; |
|
border-bottom: 1px solid #374151 !important; |
|
} |
|
|
|
.leaderboard-table tbody tr:hover { |
|
background-color: #374151 !important; |
|
} |
|
|
|
.rank-cell { |
|
background-color: #374151 !important; |
|
color: #f9fafb !important; |
|
} |
|
|
|
.model-cell { |
|
color: #f9fafb !important; |
|
} |
|
|
|
.size-cell { |
|
color: #d1d5db !important; |
|
} |
|
|
|
.score-cell { |
|
color: #f9fafb !important; |
|
} |
|
|
|
/* Dark mode row colors with better contrast */ |
|
.llama-row { |
|
background-color: rgba(245, 158, 11, 0.1) !important; |
|
} |
|
|
|
.llama-row:hover { |
|
background-color: rgba(245, 158, 11, 0.2) !important; |
|
} |
|
|
|
.deepseek-row { |
|
background-color: rgba(59, 130, 246, 0.1) !important; |
|
} |
|
|
|
.deepseek-row:hover { |
|
background-color: rgba(59, 130, 246, 0.2) !important; |
|
} |
|
|
|
.qwen-row { |
|
background-color: rgba(34, 197, 94, 0.1) !important; |
|
} |
|
|
|
.qwen-row:hover { |
|
background-color: rgba(34, 197, 94, 0.2) !important; |
|
} |
|
|
|
.google-row { |
|
background-color: rgba(236, 72, 153, 0.2) !important; |
|
} |
|
|
|
.google-row:hover { |
|
background-color: rgba(236, 72, 153, 0.2) !important; |
|
} |
|
|
|
.mistral-row { |
|
background-color: rgba(139, 92, 246, 0.1) !important; |
|
} |
|
|
|
.mistral-row:hover { |
|
background-color: rgba(139, 92, 246, 0.2) !important; |
|
} |
|
|
|
.others-row { |
|
background-color: rgba(107, 114, 128, 0.1) !important; |
|
} |
|
|
|
.others-row:hover { |
|
background-color: rgba(107, 114, 128, 0.2) !important; |
|
} |
|
|
|
.leaderboard-container { |
|
border: 1px solid #4b5563 !important; |
|
} |
|
|
|
.model-cell { |
|
color: #f9fafb !important; |
|
} |
|
|
|
.model-link { |
|
color: #60a5fa !important; |
|
} |
|
|
|
.model-link:hover { |
|
color: #93c5fd !important; |
|
border-bottom: 1px solid #60a5fa !important; |
|
background-color: rgba(96, 165, 250, 0.1) !important; |
|
} |
|
|
|
.size-cell { |
|
color: #d1d5db !important; |
|
} |
|
} |
|
""" |
|
|
|
|
|
if __name__ == "__main__": |
|
app.launch() |
|
|