import gradio as gr import pandas as pd from pathlib import Path # Global store token_df = pd.DataFrame() def make_sample_data(n=100): people = ["Alice","Bob","Charlie","Diane","Eve"] orgs = ["Acme","Globex","Initech","Umbrella","Stark"] locs = ["Paris","NYC","London","Tokyo","Sydney"] rows = [{"text": f"{people[i%5]} visited {orgs[i%5]} in {locs[i%5]}."} for i in range(n)] return pd.DataFrame(rows) # ────────────────────────── I/O helpers ────────────────────────── def load_data(file): global token_df df = pd.read_csv(file.name) if file else make_sample_data() if "text" not in df.columns: return None,"❌ Need a `text` column",gr.update(visible=False) records=[] for sid,txt in enumerate(df["text"]): for tok in txt.split(): records.append({"sentence_id":sid,"token":tok,"label":"O"}) token_df=pd.DataFrame(records) return token_df,"✅ Loaded & tokenized",gr.update(visible=True) def save_edits(tbl): # keep edits in memory global token_df token_df=pd.DataFrame(tbl,columns=["sentence_id","token","label"]) return "💾 Saved" def get_tokens_csv(): path="raw_tokens.csv"; token_df.to_csv(path,index=False); return Path(path) def get_iob_csv(): iob,prev=[],{} for _,r in token_df.iterrows(): sid,l=r["sentence_id"],r["label"] if l=="O": iob.append("O"); prev[sid]=None else: iob.append(("I-" if prev.get(sid)==l else "B-")+l); prev[sid]=l out=token_df.copy(); out["iob"]=iob path="ner_iob.csv"; out.to_csv(path,index=False); return Path(path) # ────────────────────────── UI ────────────────────────── with gr.Blocks() as demo: gr.Markdown("# 🏷️ Label It! Mini-NER") gr.Markdown("Step 1 – Upload a CSV with a `text` column (or leave blank for sample).") with gr.Row(): file_in = gr.File(label="📁 Upload CSV", file_types=[".csv"]) load_btn = gr.Button("Load") status = gr.Textbox(label="Status", interactive=False) table = gr.Dataframe(headers=["sentence_id","token","label"], interactive=True, visible=False) # action row with gr.Row(visible=False) as actions: save_btn = gr.Button("💾 Save Edits") dl_tok_btn = gr.Button("⬇️ Download Tokens CSV") dl_iob_btn = gr.Button("⬇️ Download IOB CSV") hidden_tok = gr.File(visible=False) hidden_iob = gr.File(visible=False) # Bindings load_btn.click(load_data, inputs=file_in, outputs=[table,status,actions]) save_btn.click(save_edits, inputs=table, outputs=status) dl_tok_btn.click(lambda: get_tokens_csv(), outputs=hidden_tok) dl_iob_btn.click(lambda: get_iob_csv(), outputs=hidden_iob) gr.Markdown("Step 2 – Edit **label** cells (`PER`,`ORG`,`LOC`, or `O`), then Save/Download.") demo.launch()