import gradio as gr import pandas as pd from utils.visualizations import load_instance, get_instances, clean_text from utils.interp_space_utils import cached_generate_style_embedding, instance_to_df, compute_g2v_features, compute_predicted_author # ── Global CSS to be prepended to every block ───────────────────────────────── GLOBAL_CSS = """ """ def styled_block(content: str) -> str: """ Injects GLOBAL_CSS before the provided content. Returns a single HTML blob safe to pass into gr.HTML(). """ return GLOBAL_CSS + "\n" + content def styled_html(html_content: str) -> str: """ Wraps raw HTML content with global CSS. Pass the result to gr.HTML(). """ return styled_block(html_content) def instruction_callout(text: str) -> str: """ Returns a full HTML string (with global CSS) rendering `text` as a bold, full-width callout box. Usage: gr.HTML(instruction_callout( "Run visualization to see which author cluster contains the mystery document." )) """ callout = f"""

{text}

""" return styled_html(callout) def read_txt(f): if not f: return "" path = f.name if hasattr(f, 'name') else f try: with open(path, 'r', encoding='utf-8') as fh: return fh.read().strip() except Exception: return "(Could not read file)" # Toggle which input UI is visible def toggle_task(mode): print(mode) return ( gr.update(visible=(mode == "Predefined HRS Task")), gr.update(visible=(mode == "Upload Your Own Task")) ) # Update displayed texts based on mode def update_task_display(mode, iid, instances, background_df, mystery_file, cand1_file, cand2_file, cand3_file, true_author, model_radio, custom_model_input): model_name = model_radio if model_radio != "Other" else custom_model_input if mode == "Predefined HRS Task": iid = int(iid.replace('Task ', '')) data = instances[iid] predicted_author = data['latent_rank'][0] ground_truth_author = data['gt_idx'] mystery_txt = data['Q_fullText'] c1_txt = data['a0_fullText'] c2_txt = data['a1_fullText'] c3_txt = data['a2_fullText'] candidate_texts = [c1_txt, c2_txt, c3_txt] #create a dataframe of the task authors task_authors_df = instance_to_df(instances[iid], predicted_author=predicted_author, ground_truth_author=ground_truth_author) print(f"\n\n\n ----> Loaded task {iid} with {len(task_authors_df)} authors\n\n\n") print(task_authors_df) else: header_html = "

Custom Uploaded Task

" mystery_txt = read_txt(mystery_file) c1_txt = read_txt(cand1_file) c2_txt = read_txt(cand2_file) c3_txt = read_txt(cand3_file) candidate_texts = [c1_txt, c2_txt, c3_txt] ground_truth_author = true_author print(f"Ground truth author: {ground_truth_author} ; {true_author}") custom_task_instance = { 'Q_fullText': mystery_txt, 'a0_fullText': c1_txt, 'a1_fullText': c2_txt, 'a2_fullText': c3_txt } task_authors_df = instance_to_df(custom_task_instance) print(task_authors_df) print(f"Generating embeddings for {model_name} on task authors") task_authors_df = cached_generate_style_embedding(task_authors_df, 'fullText', model_name) print("Task authors after embedding generation:") print(task_authors_df) # Generate the new embedding of all the background_df authors print(f"Generating embeddings for {model_name} on background corpus") background_df = cached_generate_style_embedding(background_df, 'fullText', model_name) print(f"Generated embeddings for {len(background_df)} texts using model '{model_name}'") # computing g2v features print("Generating g2v features for on background corpus") background_g2v, task_authors_g2v = compute_g2v_features(background_df, task_authors_df) background_df['g2v_vector'] = background_g2v task_authors_df['g2v_vector'] = task_authors_g2v print(f"Gram2Vec feature generation complete") print(background_df.columns) if mode != "Predefined HRS Task": # Computing predicted author by checking pairwise cosine similarity over luar embeddings col_name = f'{model_name.split("/")[-1]}_style_embedding' predicted_author = compute_predicted_author(task_authors_df, col_name) #generating html for the task header_html, mystery_html, candidate_htmls = task_HTML(mystery_txt, candidate_texts, predicted_author, ground_truth_author) return [ header_html, mystery_html, candidate_htmls[0], candidate_htmls[1], candidate_htmls[2], mystery_txt, c1_txt, c2_txt, c3_txt, task_authors_df, background_df, predicted_author, ground_truth_author ] def task_HTML(mystery_text, candidate_texts, predicted_author, ground_truth_author): header_html = f"""

Here’s the mystery passage alongside three candidate texts—look for the green highlight to see the predicted author.

""" # mystery_text = clean_text(mystery_text) mystery_html = f"""

Mystery Author

{clean_text(mystery_text)}

""" print(f"Predicted author: {predicted_author}, Ground truth author: {ground_truth_author}") # Candidate boxes candidate_htmls = [] for i in range(3): text = candidate_texts[i] title = f"Candidate {i+1}" extra_style = "" if ground_truth_author == i: if ground_truth_author != predicted_author: # highlight the true author only if its different than the predictd one title += " (True Author)" extra_style = ( "border: 2px solid #ff5722; " "background: #fff3e0; " "padding:10px; " ) if predicted_author == i: if predicted_author == ground_truth_author: title += " (Predicted and True Author)" else: title += " (Predicted Author)" extra_style = ( "border:2px solid #228B22; " # dark green border "background-color: #e6ffe6; " # light green fill "padding:10px; " ) candidate_htmls.append(f"""

{title}

{clean_text(text)}

""") return header_html, mystery_html, candidate_htmls def toggle_custom_model(choice): return gr.update(visible=(choice == "Other"))