import json
import random
import html
import markdown
from typing import List, Dict, Any, Tuple
import gradio as gr
import pandas as pd
from datasets import load_dataset
df = pd.read_json("selected_battles.json")
# load arena battles
# ds = load_dataset("lmarena-ai/arena-human-preference-100k")
# df = ds['train'].to_pandas()
# Expected columns in this dataset family:
# ['question_id','model_a','model_b','winner','conversation_a','conversation_b',
# 'turn','anony','language','tstamp','conv_metadata','is_code','is_refusal',
# 'dedup_tag','category_tag','judge_hash', ...]
# See HF card. ──> winner ∈ {model_a, model_b, tie, both_bad}; conversations are full threads. [oai_citation:1‡Hugging Face](https://huggingface.co/datasets/lmarena-ai/arena-human-preference-100k/blob/c9fe392b54cd08a0fd27777455318bac2e7b495c/README.md?utm_source=chatgpt.com)
# Dropdown options - sorted by frequency
def get_sorted_options(column_name):
if column_name not in df.columns:
return ["(Any)"]
value_counts = df[column_name].dropna().value_counts()
sorted_values = value_counts.index.tolist()
return ["(Any)"] + sorted_values
models_a = get_sorted_options("model_a")
models_b = get_sorted_options("model_b")
languages = get_sorted_options("language")
def _ensure_messages(x: Any) -> List[Dict[str, Any]]:
"""
conversation_a / conversation_b can be:
- a Python list of {role, content} dicts
- a JSON string encoding that list
Normalize to a list of dicts with 'role' and 'content'.
"""
if isinstance(x, list):
return x
if isinstance(x, str):
try:
val = json.loads(x)
if isinstance(val, list):
return val
except Exception:
pass
# Last resort: wrap as a single assistant message
return [{"role": "assistant", "content": str(x)}]
def _winner_text(row: pd.Series) -> str:
w = str(row.get("winner", "")).strip().lower()
mapping = {
"model_a": "Preference: Model A",
"model_b": "Preference: Model B",
"tie": "Preference: Tie",
"both_bad": "Preference: Tie (both bad)",
}
return mapping.get(w, "Preference: (unknown)")
def _bubble_html(messages: List[Dict[str, Any]], side_label: str) -> str:
"""
Make a chat-like interface with proper user/assistant bubbles.
User messages are on the left, assistant messages on the right.
"""
# Tailwind-like inline styles (no external CSS)
css = """
"""
body = [f'
']
# Only show model label at top for User side
if side_label != "Assistant":
body.append(f'
{side_label}
')
first_assistant_message = True
for m in messages:
role = (m.get("role") or "").lower()
content = str(m.get("content", "")).strip()
if not content:
continue
# Convert markdown to HTML
try:
rendered_content = markdown.markdown(content, extensions=['fenced_code', 'codehilite', 'tables'])
except:
# Fallback to escaped content if markdown rendering fails
rendered_content = html.escape(content)
if role in ("user", "system"):
role_display = "User" if role == "user" else "System"
body.append(f'''
{role_display}
{rendered_content}
''')
else:
# For assistant messages, include the model name in the first message
if first_assistant_message and side_label == "Assistant":
content = f"{side_label}: {content}"
try:
rendered_content = markdown.markdown(content, extensions=['fenced_code', 'codehilite', 'tables'])
except:
rendered_content = html.escape(content)
first_assistant_message = False
body.append(f'''
Assistant
{rendered_content}
''')
body.append("
")
return css + "\n".join(body)
def filter_df(model_a_sel: str, model_b_sel: str, lang_sel: str) -> pd.DataFrame:
sub = df
if model_a_sel != "(Any)":
sub = sub[sub["model_a"] == model_a_sel]
if model_b_sel != "(Any)":
sub = sub[sub["model_b"] == model_b_sel]
if "language" in sub.columns and lang_sel != "(Any)":
sub = sub[sub["language"].astype(str) == lang_sel]
return sub.reset_index(drop=True)
def format_row(row: pd.Series) -> Tuple[str, str, str, str, str]:
# Prompt headline = first user message if present
msgs_a = _ensure_messages(row["conversation_a"])
msgs_b = _ensure_messages(row["conversation_b"])
first_user = ""
for m in msgs_a:
if (m.get("role") or "").lower() == "user":
first_user = str(m.get("content", "")).strip()
break
left = _bubble_html(msgs_a, f"Model A: {row['model_a']}")
right = _bubble_html(msgs_b, f"Model B: {row['model_b']}")
# Create a subtle preference footer with soft yellow background
preference_text = _winner_text(row)
footer_html = f"""
{preference_text}
"""
return "", left, right, footer_html, ""
with gr.Blocks(theme=gr.themes.Soft(primary_hue="indigo")) as demo:
gr.Markdown("# Chatbot Arena Battle Viewer (100k)")
gr.Markdown(
"Filter by **Model A**, **Model B**, and **Language**, then browse side-by-side conversations. "
"Data: `lmarena-ai/arena-human-preference-100k`."
)
with gr.Row():
dd_a = gr.Dropdown(models_a, label="Model A", value="(Any)")
dd_b = gr.Dropdown(models_b, label="Model B", value="(Any)")
dd_l = gr.Dropdown(languages, label="Language", value=languages[0])
with gr.Row():
btn_rand = gr.Button("Random match")
btn_prev = gr.Button("◀ Prev")
btn_next = gr.Button("Next ▶")
st_indices = gr.State([])
st_ptr = gr.State(0)
header_md = gr.Markdown()
with gr.Row():
left_html = gr.HTML()
right_html = gr.HTML()
footer_md = gr.HTML()
meta_md = gr.Markdown()
def apply_filters(a, b, l):
sub = filter_df(a, b, l)
idxs = list(range(len(sub)))
ptr = 0 if idxs else -1
if ptr >= 0:
row = sub.iloc[ptr]
head, left, right, foot, meta = format_row(row)
else:
head = left = right = foot = meta = "_No rows match your filters._"
return idxs, ptr, head, left, right, foot, meta
def nav(a, b, l, indices, ptr, direction):
sub = filter_df(a, b, l)
if not len(sub):
return [], -1, "_No rows match your filters._", "", "", "", ""
idxs = list(range(len(sub)))
if ptr is None or ptr < 0 or ptr >= len(sub):
ptr = 0
if direction == "next":
ptr = (ptr + 1) % len(sub)
elif direction == "prev":
ptr = (ptr - 1) % len(sub)
row = sub.iloc[ptr]
head, left, right, foot, meta = format_row(row)
return idxs, ptr, head, left, right, foot, meta
def rand(a, b, l):
sub = filter_df(a, b, l)
if not len(sub):
return [], -1, "_No rows match your filters._", "", "", "", ""
r = random.randrange(len(sub))
row = sub.iloc[r]
head, left, right, foot, meta = format_row(row)
return list(range(len(sub))), r, head, left, right, foot, meta
# Auto-update when dropdowns change
dd_a.change(apply_filters, [dd_a, dd_b, dd_l],
[st_indices, st_ptr, header_md, left_html, right_html, footer_md, meta_md])
dd_b.change(apply_filters, [dd_a, dd_b, dd_l],
[st_indices, st_ptr, header_md, left_html, right_html, footer_md, meta_md])
dd_l.change(apply_filters, [dd_a, dd_b, dd_l],
[st_indices, st_ptr, header_md, left_html, right_html, footer_md, meta_md])
btn_next.click(nav, [dd_a, dd_b, dd_l, st_indices, st_ptr, gr.State("next")],
[st_indices, st_ptr, header_md, left_html, right_html, footer_md, meta_md])
btn_prev.click(nav, [dd_a, dd_b, dd_l, st_indices, st_ptr, gr.State("prev")],
[st_indices, st_ptr, header_md, left_html, right_html, footer_md, meta_md])
btn_rand.click(rand, [dd_a, dd_b, dd_l],
[st_indices, st_ptr, header_md, left_html, right_html, footer_md, meta_md])
gr.on([demo.load], apply_filters, [dd_a, dd_b, dd_l],
[st_indices, st_ptr, header_md, left_html, right_html, footer_md, meta_md])
if __name__ == "__main__":
demo.launch()