import gradio as gr
import pandas as pd
from utils.visualizations import load_instance, get_instances, clean_text
from utils.interp_space_utils import cached_generate_style_embedding, instance_to_df, compute_g2v_features, compute_predicted_author
# ── Global CSS to be prepended to every block ─────────────────────────────────
GLOBAL_CSS = """
"""
def styled_block(content: str) -> str:
"""
Injects GLOBAL_CSS before the provided content.
Returns a single HTML blob safe to pass into gr.HTML().
"""
return GLOBAL_CSS + "\n" + content
def styled_html(html_content: str) -> str:
"""
Wraps raw HTML content with global CSS. Pass the result to gr.HTML().
"""
return styled_block(html_content)
def instruction_callout(text: str) -> str:
"""
Returns a full HTML string (with global CSS) rendering `text`
as a bold, full-width callout box.
Usage:
gr.HTML(instruction_callout(
"Run visualization to see which author cluster contains the mystery document."
))
"""
callout = f"""
{text}
"""
return styled_html(callout)
def read_txt(f):
if not f:
return ""
path = f.name if hasattr(f, 'name') else f
try:
with open(path, 'r', encoding='utf-8') as fh:
return fh.read().strip()
except Exception:
return "(Could not read file)"
# Toggle which input UI is visible
def toggle_task(mode):
print(mode)
return (
gr.update(visible=(mode == "Predefined HRS Task")),
gr.update(visible=(mode == "Upload Your Own Task"))
)
# Update displayed texts based on mode
def update_task_display(mode, iid, instances, background_df, mystery_file, cand1_file, cand2_file, cand3_file, true_author, model_radio, custom_model_input):
model_name = model_radio if model_radio != "Other" else custom_model_input
if mode == "Predefined HRS Task":
iid = int(iid.replace('Task ', ''))
data = instances[iid]
predicted_author = data['latent_rank'][0]
ground_truth_author = data['gt_idx']
mystery_txt = data['Q_fullText']
c1_txt = data['a0_fullText']
c2_txt = data['a1_fullText']
c3_txt = data['a2_fullText']
candidate_texts = [c1_txt, c2_txt, c3_txt]
#create a dataframe of the task authors
task_authors_df = instance_to_df(instances[iid], predicted_author=predicted_author, ground_truth_author=ground_truth_author)
print(f"\n\n\n ----> Loaded task {iid} with {len(task_authors_df)} authors\n\n\n")
print(task_authors_df)
else:
header_html = "Custom Uploaded Task
"
mystery_txt = read_txt(mystery_file)
c1_txt = read_txt(cand1_file)
c2_txt = read_txt(cand2_file)
c3_txt = read_txt(cand3_file)
candidate_texts = [c1_txt, c2_txt, c3_txt]
ground_truth_author = true_author
print(f"Ground truth author: {ground_truth_author} ; {true_author}")
custom_task_instance = {
'Q_fullText': mystery_txt,
'a0_fullText': c1_txt,
'a1_fullText': c2_txt,
'a2_fullText': c3_txt
}
task_authors_df = instance_to_df(custom_task_instance)
print(task_authors_df)
print(f"Generating embeddings for {model_name} on task authors")
task_authors_df = cached_generate_style_embedding(task_authors_df, 'fullText', model_name)
print("Task authors after embedding generation:")
print(task_authors_df)
# Generate the new embedding of all the background_df authors
print(f"Generating embeddings for {model_name} on background corpus")
background_df = cached_generate_style_embedding(background_df, 'fullText', model_name)
print(f"Generated embeddings for {len(background_df)} texts using model '{model_name}'")
# computing g2v features
print("Generating g2v features for on background corpus")
background_g2v, task_authors_g2v = compute_g2v_features(background_df, task_authors_df)
background_df['g2v_vector'] = background_g2v
task_authors_df['g2v_vector'] = task_authors_g2v
print(f"Gram2Vec feature generation complete")
print(background_df.columns)
if mode != "Predefined HRS Task":
# Computing predicted author by checking pairwise cosine similarity over luar embeddings
col_name = f'{model_name.split("/")[-1]}_style_embedding'
predicted_author = compute_predicted_author(task_authors_df, col_name)
#generating html for the task
header_html, mystery_html, candidate_htmls = task_HTML(mystery_txt, candidate_texts, predicted_author, ground_truth_author)
return [
header_html,
mystery_html,
candidate_htmls[0],
candidate_htmls[1],
candidate_htmls[2],
mystery_txt,
c1_txt,
c2_txt,
c3_txt,
task_authors_df,
background_df,
predicted_author,
ground_truth_author
]
def task_HTML(mystery_text, candidate_texts, predicted_author, ground_truth_author):
header_html = f"""
Here’s the mystery passage alongside three candidate texts—look for the green highlight to see the predicted author.
"""
# mystery_text = clean_text(mystery_text)
mystery_html = f"""
Mystery Author
{clean_text(mystery_text)}
"""
print(f"Predicted author: {predicted_author}, Ground truth author: {ground_truth_author}")
# Candidate boxes
candidate_htmls = []
for i in range(3):
text = candidate_texts[i]
title = f"Candidate {i+1}"
extra_style = ""
if ground_truth_author == i:
if ground_truth_author != predicted_author: # highlight the true author only if its different than the predictd one
title += " (True Author)"
extra_style = (
"border: 2px solid #ff5722; "
"background: #fff3e0; "
"padding:10px; "
)
if predicted_author == i:
if predicted_author == ground_truth_author:
title += " (Predicted and True Author)"
else:
title += " (Predicted Author)"
extra_style = (
"border:2px solid #228B22; " # dark green border
"background-color: #e6ffe6; " # light green fill
"padding:10px; "
)
candidate_htmls.append(f"""
{title}
{clean_text(text)}
""")
return header_html, mystery_html, candidate_htmls
def toggle_custom_model(choice):
return gr.update(visible=(choice == "Other"))