Suzana's picture
Update app.py
6b99f03 verified
raw
history blame
3.05 kB
import gradio as gr
import pandas as pd
from pathlib import Path
# Global store
token_df = pd.DataFrame()
def make_sample_data(n=100):
people = ["Alice","Bob","Charlie","Diane","Eve"]
orgs = ["Acme","Globex","Initech","Umbrella","Stark"]
locs = ["Paris","NYC","London","Tokyo","Sydney"]
rows = [{"text": f"{people[i%5]} visited {orgs[i%5]} in {locs[i%5]}."} for i in range(n)]
return pd.DataFrame(rows)
# ────────────────────────── I/O helpers ──────────────────────────
def load_data(file):
global token_df
df = pd.read_csv(file.name) if file else make_sample_data()
if "text" not in df.columns:
return None,"❌ Need a `text` column",gr.update(visible=False)
records=[]
for sid,txt in enumerate(df["text"]):
for tok in txt.split():
records.append({"sentence_id":sid,"token":tok,"label":"O"})
token_df=pd.DataFrame(records)
return token_df,"βœ… Loaded & tokenized",gr.update(visible=True)
def save_edits(tbl): # keep edits in memory
global token_df
token_df=pd.DataFrame(tbl,columns=["sentence_id","token","label"])
return "πŸ’Ύ Saved"
def get_tokens_csv():
path="raw_tokens.csv"; token_df.to_csv(path,index=False); return Path(path)
def get_iob_csv():
iob,prev=[],{}
for _,r in token_df.iterrows():
sid,l=r["sentence_id"],r["label"]
if l=="O": iob.append("O"); prev[sid]=None
else: iob.append(("I-" if prev.get(sid)==l else "B-")+l); prev[sid]=l
out=token_df.copy(); out["iob"]=iob
path="ner_iob.csv"; out.to_csv(path,index=False); return Path(path)
# ────────────────────────── UI ──────────────────────────
with gr.Blocks() as demo:
gr.Markdown("# 🏷️ Label It! Mini-NER")
gr.Markdown("Step 1 – Upload a CSV with a `text` column (or leave blank for sample).")
with gr.Row():
file_in = gr.File(label="πŸ“ Upload CSV", file_types=[".csv"])
load_btn = gr.Button("Load")
status = gr.Textbox(label="Status", interactive=False)
table = gr.Dataframe(headers=["sentence_id","token","label"], interactive=True, visible=False)
# action row
with gr.Row(visible=False) as actions:
save_btn = gr.Button("πŸ’Ύ Save Edits")
dl_tok_btn = gr.Button("⬇️ Download Tokens CSV")
dl_iob_btn = gr.Button("⬇️ Download IOB CSV")
hidden_tok = gr.File(visible=False)
hidden_iob = gr.File(visible=False)
# Bindings
load_btn.click(load_data, inputs=file_in, outputs=[table,status,actions])
save_btn.click(save_edits, inputs=table, outputs=status)
dl_tok_btn.click(lambda: get_tokens_csv(), outputs=hidden_tok)
dl_iob_btn.click(lambda: get_iob_csv(), outputs=hidden_iob)
gr.Markdown("Step 2 – Edit **label** cells (`PER`,`ORG`,`LOC`, or `O`), then Save/Download.")
demo.launch()