lisabdunlap commited on
Commit
f68c8d9
·
verified ·
1 Parent(s): 75889f2

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +266 -0
app.py ADDED
@@ -0,0 +1,266 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import random
3
+ import html
4
+ import markdown
5
+ from typing import List, Dict, Any, Tuple
6
+
7
+ import gradio as gr
8
+ import pandas as pd
9
+ from datasets import load_dataset
10
+
11
+ # df = pd.read_json("selected_battles.json")
12
+
13
+ # load arena battles
14
+ ds = load_dataset("lmarena-ai/arena-human-preference-100k", split="train")
15
+ battles = ds['train'].to_pandas()
16
+
17
+ # Expected columns in this dataset family:
18
+ # ['question_id','model_a','model_b','winner','conversation_a','conversation_b',
19
+ # 'turn','anony','language','tstamp','conv_metadata','is_code','is_refusal',
20
+ # 'dedup_tag','category_tag','judge_hash', ...]
21
+ # See HF card. ──> winner ∈ {model_a, model_b, tie, both_bad}; conversations are full threads. [oai_citation:1‡Hugging Face](https://huggingface.co/datasets/lmarena-ai/arena-human-preference-100k/blob/c9fe392b54cd08a0fd27777455318bac2e7b495c/README.md?utm_source=chatgpt.com)
22
+
23
+ # Dropdown options - sorted by frequency
24
+ def get_sorted_options(column_name):
25
+ if column_name not in df.columns:
26
+ return ["(Any)"]
27
+ value_counts = df[column_name].dropna().value_counts()
28
+ sorted_values = value_counts.index.tolist()
29
+ return ["(Any)"] + sorted_values
30
+
31
+ models_a = get_sorted_options("model_a")
32
+ models_b = get_sorted_options("model_b")
33
+ languages = get_sorted_options("language")
34
+
35
+ def _ensure_messages(x: Any) -> List[Dict[str, Any]]:
36
+ """
37
+ conversation_a / conversation_b can be:
38
+ - a Python list of {role, content} dicts
39
+ - a JSON string encoding that list
40
+ Normalize to a list of dicts with 'role' and 'content'.
41
+ """
42
+ if isinstance(x, list):
43
+ return x
44
+ if isinstance(x, str):
45
+ try:
46
+ val = json.loads(x)
47
+ if isinstance(val, list):
48
+ return val
49
+ except Exception:
50
+ pass
51
+ # Last resort: wrap as a single assistant message
52
+ return [{"role": "assistant", "content": str(x)}]
53
+
54
+ def _winner_text(row: pd.Series) -> str:
55
+ w = str(row.get("winner", "")).strip().lower()
56
+ mapping = {
57
+ "model_a": "Preference: Model A",
58
+ "model_b": "Preference: Model B",
59
+ "tie": "Preference: Tie",
60
+ "both_bad": "Preference: Tie (both bad)",
61
+ }
62
+ return mapping.get(w, "Preference: (unknown)")
63
+
64
+ def _bubble_html(messages: List[Dict[str, Any]], side_label: str) -> str:
65
+ """
66
+ Make a chat-like interface with proper user/assistant bubbles.
67
+ User messages are on the left, assistant messages on the right.
68
+ """
69
+ # Tailwind-like inline styles (no external CSS)
70
+ css = """
71
+ <style>
72
+ .chat-container {padding:12px; border-radius:16px; background:#fafafa; box-shadow:0 1px 3px rgba(0,0,0,.08);}
73
+ .model-label {font-weight:600; font-size:14px; margin-bottom:12px; opacity:.8; text-align:center;}
74
+ .message {margin:12px 0; display:flex; align-items:flex-start;}
75
+ .message.user {justify-content:flex-start;}
76
+ .message.assistant {justify-content:flex-end;}
77
+ .bubble {max-width:70%; padding:10px 14px; border-radius:18px; word-wrap:break-word;}
78
+ .bubble.user {background:#e9eef7; color:#2c3e50; margin-right:auto;}
79
+ .bubble.assistant {background:#eaf7ea; color:#2c3e50; margin-left:auto;}
80
+ .role-label {font-size:11px; font-weight:500; margin-bottom:4px; opacity:.7;}
81
+ .role-label.assistant {text-align:right;}
82
+ .bubble pre {background:#f5f5f5; padding:8px; border-radius:4px; overflow-x:auto; margin:8px 0;}
83
+ .bubble code {background:#f0f0f0; padding:2px 4px; border-radius:3px; font-family:monospace;}
84
+ .bubble p {margin:8px 0;}
85
+ .bubble ul, .bubble ol {margin:8px 0; padding-left:20px;}
86
+ .bubble blockquote {border-left:3px solid #ddd; padding-left:12px; margin:8px 0; color:#666;}
87
+ </style>
88
+ """
89
+ body = [f'<div class="chat-container">']
90
+
91
+ # Only show model label at top for User side
92
+ if side_label != "Assistant":
93
+ body.append(f'<div class="model-label">{side_label}</div>')
94
+
95
+ first_assistant_message = True
96
+ for m in messages:
97
+ role = (m.get("role") or "").lower()
98
+ content = str(m.get("content", "")).strip()
99
+ if not content:
100
+ continue
101
+
102
+ # Convert markdown to HTML
103
+ try:
104
+ rendered_content = markdown.markdown(content, extensions=['fenced_code', 'codehilite', 'tables'])
105
+ except:
106
+ # Fallback to escaped content if markdown rendering fails
107
+ rendered_content = html.escape(content)
108
+
109
+ if role in ("user", "system"):
110
+ role_display = "User" if role == "user" else "System"
111
+ body.append(f'''
112
+ <div class="message user">
113
+ <div>
114
+ <div class="role-label">{role_display}</div>
115
+ <div class="bubble user">{rendered_content}</div>
116
+ </div>
117
+ </div>
118
+ ''')
119
+ else:
120
+ # For assistant messages, include the model name in the first message
121
+ if first_assistant_message and side_label == "Assistant":
122
+ content = f"{side_label}: {content}"
123
+ try:
124
+ rendered_content = markdown.markdown(content, extensions=['fenced_code', 'codehilite', 'tables'])
125
+ except:
126
+ rendered_content = html.escape(content)
127
+ first_assistant_message = False
128
+
129
+ body.append(f'''
130
+ <div class="message assistant">
131
+ <div>
132
+ <div class="role-label assistant">Assistant</div>
133
+ <div class="bubble assistant">{rendered_content}</div>
134
+ </div>
135
+ </div>
136
+ ''')
137
+
138
+ body.append("</div>")
139
+ return css + "\n".join(body)
140
+
141
+ def filter_df(model_a_sel: str, model_b_sel: str, lang_sel: str) -> pd.DataFrame:
142
+ sub = df
143
+ if model_a_sel != "(Any)":
144
+ sub = sub[sub["model_a"] == model_a_sel]
145
+ if model_b_sel != "(Any)":
146
+ sub = sub[sub["model_b"] == model_b_sel]
147
+ if "language" in sub.columns and lang_sel != "(Any)":
148
+ sub = sub[sub["language"].astype(str) == lang_sel]
149
+ return sub.reset_index(drop=True)
150
+
151
+ def format_row(row: pd.Series) -> Tuple[str, str, str, str, str]:
152
+ # Prompt headline = first user message if present
153
+ msgs_a = _ensure_messages(row["conversation_a"])
154
+ msgs_b = _ensure_messages(row["conversation_b"])
155
+ first_user = ""
156
+ for m in msgs_a:
157
+ if (m.get("role") or "").lower() == "user":
158
+ first_user = str(m.get("content", "")).strip()
159
+ break
160
+
161
+ left = _bubble_html(msgs_a, f"Model A: {row['model_a']}")
162
+ right = _bubble_html(msgs_b, f"Model B: {row['model_b']}")
163
+
164
+ # Create a subtle preference footer with soft yellow background
165
+ preference_text = _winner_text(row)
166
+ footer_html = f"""
167
+ <div style="
168
+ background: #fff8e1;
169
+ color: #5d4037;
170
+ padding: 10px 16px;
171
+ margin: 12px 0;
172
+ border-radius: 6px;
173
+ font-weight: 600;
174
+ font-size: 14px;
175
+ text-align: center;
176
+ box-shadow: 0 1px 3px rgba(0,0,0,0.08);
177
+ border: 1px solid #ffcc02;
178
+ ">
179
+ {preference_text}
180
+ </div>
181
+ """
182
+
183
+ return "", left, right, footer_html, ""
184
+
185
+ with gr.Blocks(theme=gr.themes.Soft(primary_hue="indigo")) as demo:
186
+ gr.Markdown("# Chatbot Arena Battle Viewer (100k)")
187
+ gr.Markdown(
188
+ "Filter by **Model A**, **Model B**, and **Language**, then browse side-by-side conversations. "
189
+ "Data: `lmarena-ai/arena-human-preference-100k`."
190
+ )
191
+
192
+ with gr.Row():
193
+ dd_a = gr.Dropdown(models_a, label="Model A", value="(Any)")
194
+ dd_b = gr.Dropdown(models_b, label="Model B", value="(Any)")
195
+ dd_l = gr.Dropdown(languages, label="Language", value=languages[0])
196
+
197
+ with gr.Row():
198
+ btn_rand = gr.Button("Random match")
199
+ btn_prev = gr.Button("◀ Prev")
200
+ btn_next = gr.Button("Next ▶")
201
+
202
+ st_indices = gr.State([])
203
+ st_ptr = gr.State(0)
204
+
205
+ header_md = gr.Markdown()
206
+ with gr.Row():
207
+ left_html = gr.HTML()
208
+ right_html = gr.HTML()
209
+ footer_md = gr.HTML()
210
+ meta_md = gr.Markdown()
211
+
212
+ def apply_filters(a, b, l):
213
+ sub = filter_df(a, b, l)
214
+ idxs = list(range(len(sub)))
215
+ ptr = 0 if idxs else -1
216
+ if ptr >= 0:
217
+ row = sub.iloc[ptr]
218
+ head, left, right, foot, meta = format_row(row)
219
+ else:
220
+ head = left = right = foot = meta = "_No rows match your filters._"
221
+ return idxs, ptr, head, left, right, foot, meta
222
+
223
+ def nav(a, b, l, indices, ptr, direction):
224
+ sub = filter_df(a, b, l)
225
+ if not len(sub):
226
+ return [], -1, "_No rows match your filters._", "", "", "", ""
227
+ idxs = list(range(len(sub)))
228
+ if ptr is None or ptr < 0 or ptr >= len(sub):
229
+ ptr = 0
230
+ if direction == "next":
231
+ ptr = (ptr + 1) % len(sub)
232
+ elif direction == "prev":
233
+ ptr = (ptr - 1) % len(sub)
234
+ row = sub.iloc[ptr]
235
+ head, left, right, foot, meta = format_row(row)
236
+ return idxs, ptr, head, left, right, foot, meta
237
+
238
+ def rand(a, b, l):
239
+ sub = filter_df(a, b, l)
240
+ if not len(sub):
241
+ return [], -1, "_No rows match your filters._", "", "", "", ""
242
+ r = random.randrange(len(sub))
243
+ row = sub.iloc[r]
244
+ head, left, right, foot, meta = format_row(row)
245
+ return list(range(len(sub))), r, head, left, right, foot, meta
246
+
247
+ # Auto-update when dropdowns change
248
+ dd_a.change(apply_filters, [dd_a, dd_b, dd_l],
249
+ [st_indices, st_ptr, header_md, left_html, right_html, footer_md, meta_md])
250
+ dd_b.change(apply_filters, [dd_a, dd_b, dd_l],
251
+ [st_indices, st_ptr, header_md, left_html, right_html, footer_md, meta_md])
252
+ dd_l.change(apply_filters, [dd_a, dd_b, dd_l],
253
+ [st_indices, st_ptr, header_md, left_html, right_html, footer_md, meta_md])
254
+
255
+ btn_next.click(nav, [dd_a, dd_b, dd_l, st_indices, st_ptr, gr.State("next")],
256
+ [st_indices, st_ptr, header_md, left_html, right_html, footer_md, meta_md])
257
+ btn_prev.click(nav, [dd_a, dd_b, dd_l, st_indices, st_ptr, gr.State("prev")],
258
+ [st_indices, st_ptr, header_md, left_html, right_html, footer_md, meta_md])
259
+ btn_rand.click(rand, [dd_a, dd_b, dd_l],
260
+ [st_indices, st_ptr, header_md, left_html, right_html, footer_md, meta_md])
261
+
262
+ gr.on([demo.load], apply_filters, [dd_a, dd_b, dd_l],
263
+ [st_indices, st_ptr, header_md, left_html, right_html, footer_md, meta_md])
264
+
265
+ if __name__ == "__main__":
266
+ demo.launch()