Spaces:
Sleeping
Sleeping
import json | |
import random | |
import html | |
import markdown | |
from typing import List, Dict, Any, Tuple | |
import gradio as gr | |
import pandas as pd | |
from datasets import load_dataset | |
# df = pd.read_json("selected_battles.json") | |
# load arena battles | |
ds = load_dataset("lmarena-ai/arena-human-preference-100k") | |
battles = ds['train'].to_pandas() | |
# Expected columns in this dataset family: | |
# ['question_id','model_a','model_b','winner','conversation_a','conversation_b', | |
# 'turn','anony','language','tstamp','conv_metadata','is_code','is_refusal', | |
# 'dedup_tag','category_tag','judge_hash', ...] | |
# See HF card. ──> winner ∈ {model_a, model_b, tie, both_bad}; conversations are full threads. [oai_citation:1‡Hugging Face](https://huggingface.co/datasets/lmarena-ai/arena-human-preference-100k/blob/c9fe392b54cd08a0fd27777455318bac2e7b495c/README.md?utm_source=chatgpt.com) | |
# Dropdown options - sorted by frequency | |
def get_sorted_options(column_name): | |
if column_name not in df.columns: | |
return ["(Any)"] | |
value_counts = df[column_name].dropna().value_counts() | |
sorted_values = value_counts.index.tolist() | |
return ["(Any)"] + sorted_values | |
models_a = get_sorted_options("model_a") | |
models_b = get_sorted_options("model_b") | |
languages = get_sorted_options("language") | |
def _ensure_messages(x: Any) -> List[Dict[str, Any]]: | |
""" | |
conversation_a / conversation_b can be: | |
- a Python list of {role, content} dicts | |
- a JSON string encoding that list | |
Normalize to a list of dicts with 'role' and 'content'. | |
""" | |
if isinstance(x, list): | |
return x | |
if isinstance(x, str): | |
try: | |
val = json.loads(x) | |
if isinstance(val, list): | |
return val | |
except Exception: | |
pass | |
# Last resort: wrap as a single assistant message | |
return [{"role": "assistant", "content": str(x)}] | |
def _winner_text(row: pd.Series) -> str: | |
w = str(row.get("winner", "")).strip().lower() | |
mapping = { | |
"model_a": "Preference: Model A", | |
"model_b": "Preference: Model B", | |
"tie": "Preference: Tie", | |
"both_bad": "Preference: Tie (both bad)", | |
} | |
return mapping.get(w, "Preference: (unknown)") | |
def _bubble_html(messages: List[Dict[str, Any]], side_label: str) -> str: | |
""" | |
Make a chat-like interface with proper user/assistant bubbles. | |
User messages are on the left, assistant messages on the right. | |
""" | |
# Tailwind-like inline styles (no external CSS) | |
css = """ | |
<style> | |
.chat-container {padding:12px; border-radius:16px; background:#fafafa; box-shadow:0 1px 3px rgba(0,0,0,.08);} | |
.model-label {font-weight:600; font-size:14px; margin-bottom:12px; opacity:.8; text-align:center;} | |
.message {margin:12px 0; display:flex; align-items:flex-start;} | |
.message.user {justify-content:flex-start;} | |
.message.assistant {justify-content:flex-end;} | |
.bubble {max-width:70%; padding:10px 14px; border-radius:18px; word-wrap:break-word;} | |
.bubble.user {background:#e9eef7; color:#2c3e50; margin-right:auto;} | |
.bubble.assistant {background:#eaf7ea; color:#2c3e50; margin-left:auto;} | |
.role-label {font-size:11px; font-weight:500; margin-bottom:4px; opacity:.7;} | |
.role-label.assistant {text-align:right;} | |
.bubble pre {background:#f5f5f5; padding:8px; border-radius:4px; overflow-x:auto; margin:8px 0;} | |
.bubble code {background:#f0f0f0; padding:2px 4px; border-radius:3px; font-family:monospace;} | |
.bubble p {margin:8px 0;} | |
.bubble ul, .bubble ol {margin:8px 0; padding-left:20px;} | |
.bubble blockquote {border-left:3px solid #ddd; padding-left:12px; margin:8px 0; color:#666;} | |
</style> | |
""" | |
body = [f'<div class="chat-container">'] | |
# Only show model label at top for User side | |
if side_label != "Assistant": | |
body.append(f'<div class="model-label">{side_label}</div>') | |
first_assistant_message = True | |
for m in messages: | |
role = (m.get("role") or "").lower() | |
content = str(m.get("content", "")).strip() | |
if not content: | |
continue | |
# Convert markdown to HTML | |
try: | |
rendered_content = markdown.markdown(content, extensions=['fenced_code', 'codehilite', 'tables']) | |
except: | |
# Fallback to escaped content if markdown rendering fails | |
rendered_content = html.escape(content) | |
if role in ("user", "system"): | |
role_display = "User" if role == "user" else "System" | |
body.append(f''' | |
<div class="message user"> | |
<div> | |
<div class="role-label">{role_display}</div> | |
<div class="bubble user">{rendered_content}</div> | |
</div> | |
</div> | |
''') | |
else: | |
# For assistant messages, include the model name in the first message | |
if first_assistant_message and side_label == "Assistant": | |
content = f"{side_label}: {content}" | |
try: | |
rendered_content = markdown.markdown(content, extensions=['fenced_code', 'codehilite', 'tables']) | |
except: | |
rendered_content = html.escape(content) | |
first_assistant_message = False | |
body.append(f''' | |
<div class="message assistant"> | |
<div> | |
<div class="role-label assistant">Assistant</div> | |
<div class="bubble assistant">{rendered_content}</div> | |
</div> | |
</div> | |
''') | |
body.append("</div>") | |
return css + "\n".join(body) | |
def filter_df(model_a_sel: str, model_b_sel: str, lang_sel: str) -> pd.DataFrame: | |
sub = df | |
if model_a_sel != "(Any)": | |
sub = sub[sub["model_a"] == model_a_sel] | |
if model_b_sel != "(Any)": | |
sub = sub[sub["model_b"] == model_b_sel] | |
if "language" in sub.columns and lang_sel != "(Any)": | |
sub = sub[sub["language"].astype(str) == lang_sel] | |
return sub.reset_index(drop=True) | |
def format_row(row: pd.Series) -> Tuple[str, str, str, str, str]: | |
# Prompt headline = first user message if present | |
msgs_a = _ensure_messages(row["conversation_a"]) | |
msgs_b = _ensure_messages(row["conversation_b"]) | |
first_user = "" | |
for m in msgs_a: | |
if (m.get("role") or "").lower() == "user": | |
first_user = str(m.get("content", "")).strip() | |
break | |
left = _bubble_html(msgs_a, f"Model A: {row['model_a']}") | |
right = _bubble_html(msgs_b, f"Model B: {row['model_b']}") | |
# Create a subtle preference footer with soft yellow background | |
preference_text = _winner_text(row) | |
footer_html = f""" | |
<div style=" | |
background: #fff8e1; | |
color: #5d4037; | |
padding: 10px 16px; | |
margin: 12px 0; | |
border-radius: 6px; | |
font-weight: 600; | |
font-size: 14px; | |
text-align: center; | |
box-shadow: 0 1px 3px rgba(0,0,0,0.08); | |
border: 1px solid #ffcc02; | |
"> | |
{preference_text} | |
</div> | |
""" | |
return "", left, right, footer_html, "" | |
with gr.Blocks(theme=gr.themes.Soft(primary_hue="indigo")) as demo: | |
gr.Markdown("# Chatbot Arena Battle Viewer (100k)") | |
gr.Markdown( | |
"Filter by **Model A**, **Model B**, and **Language**, then browse side-by-side conversations. " | |
"Data: `lmarena-ai/arena-human-preference-100k`." | |
) | |
with gr.Row(): | |
dd_a = gr.Dropdown(models_a, label="Model A", value="(Any)") | |
dd_b = gr.Dropdown(models_b, label="Model B", value="(Any)") | |
dd_l = gr.Dropdown(languages, label="Language", value=languages[0]) | |
with gr.Row(): | |
btn_rand = gr.Button("Random match") | |
btn_prev = gr.Button("◀ Prev") | |
btn_next = gr.Button("Next ▶") | |
st_indices = gr.State([]) | |
st_ptr = gr.State(0) | |
header_md = gr.Markdown() | |
with gr.Row(): | |
left_html = gr.HTML() | |
right_html = gr.HTML() | |
footer_md = gr.HTML() | |
meta_md = gr.Markdown() | |
def apply_filters(a, b, l): | |
sub = filter_df(a, b, l) | |
idxs = list(range(len(sub))) | |
ptr = 0 if idxs else -1 | |
if ptr >= 0: | |
row = sub.iloc[ptr] | |
head, left, right, foot, meta = format_row(row) | |
else: | |
head = left = right = foot = meta = "_No rows match your filters._" | |
return idxs, ptr, head, left, right, foot, meta | |
def nav(a, b, l, indices, ptr, direction): | |
sub = filter_df(a, b, l) | |
if not len(sub): | |
return [], -1, "_No rows match your filters._", "", "", "", "" | |
idxs = list(range(len(sub))) | |
if ptr is None or ptr < 0 or ptr >= len(sub): | |
ptr = 0 | |
if direction == "next": | |
ptr = (ptr + 1) % len(sub) | |
elif direction == "prev": | |
ptr = (ptr - 1) % len(sub) | |
row = sub.iloc[ptr] | |
head, left, right, foot, meta = format_row(row) | |
return idxs, ptr, head, left, right, foot, meta | |
def rand(a, b, l): | |
sub = filter_df(a, b, l) | |
if not len(sub): | |
return [], -1, "_No rows match your filters._", "", "", "", "" | |
r = random.randrange(len(sub)) | |
row = sub.iloc[r] | |
head, left, right, foot, meta = format_row(row) | |
return list(range(len(sub))), r, head, left, right, foot, meta | |
# Auto-update when dropdowns change | |
dd_a.change(apply_filters, [dd_a, dd_b, dd_l], | |
[st_indices, st_ptr, header_md, left_html, right_html, footer_md, meta_md]) | |
dd_b.change(apply_filters, [dd_a, dd_b, dd_l], | |
[st_indices, st_ptr, header_md, left_html, right_html, footer_md, meta_md]) | |
dd_l.change(apply_filters, [dd_a, dd_b, dd_l], | |
[st_indices, st_ptr, header_md, left_html, right_html, footer_md, meta_md]) | |
btn_next.click(nav, [dd_a, dd_b, dd_l, st_indices, st_ptr, gr.State("next")], | |
[st_indices, st_ptr, header_md, left_html, right_html, footer_md, meta_md]) | |
btn_prev.click(nav, [dd_a, dd_b, dd_l, st_indices, st_ptr, gr.State("prev")], | |
[st_indices, st_ptr, header_md, left_html, right_html, footer_md, meta_md]) | |
btn_rand.click(rand, [dd_a, dd_b, dd_l], | |
[st_indices, st_ptr, header_md, left_html, right_html, footer_md, meta_md]) | |
gr.on([demo.load], apply_filters, [dd_a, dd_b, dd_l], | |
[st_indices, st_ptr, header_md, left_html, right_html, footer_md, meta_md]) | |
if __name__ == "__main__": | |
demo.launch() |