Spaces:
Sleeping
Sleeping
import gradio as gr | |
import pandas as pd | |
from pathlib import Path | |
# Global store | |
token_df = pd.DataFrame() | |
def make_sample_data(n=100): | |
people = ["Alice","Bob","Charlie","Diane","Eve"] | |
orgs = ["Acme","Globex","Initech","Umbrella","Stark"] | |
locs = ["Paris","NYC","London","Tokyo","Sydney"] | |
rows = [{"text": f"{people[i%5]} visited {orgs[i%5]} in {locs[i%5]}."} for i in range(n)] | |
return pd.DataFrame(rows) | |
# ββββββββββββββββββββββββββ I/O helpers ββββββββββββββββββββββββββ | |
def load_data(file): | |
global token_df | |
df = pd.read_csv(file.name) if file else make_sample_data() | |
if "text" not in df.columns: | |
return None,"β Need a `text` column",gr.update(visible=False) | |
records=[] | |
for sid,txt in enumerate(df["text"]): | |
for tok in txt.split(): | |
records.append({"sentence_id":sid,"token":tok,"label":"O"}) | |
token_df=pd.DataFrame(records) | |
return token_df,"β Loaded & tokenized",gr.update(visible=True) | |
def save_edits(tbl): # keep edits in memory | |
global token_df | |
token_df=pd.DataFrame(tbl,columns=["sentence_id","token","label"]) | |
return "πΎ Saved" | |
def get_tokens_csv(): | |
path="raw_tokens.csv"; token_df.to_csv(path,index=False); return Path(path) | |
def get_iob_csv(): | |
iob,prev=[],{} | |
for _,r in token_df.iterrows(): | |
sid,l=r["sentence_id"],r["label"] | |
if l=="O": iob.append("O"); prev[sid]=None | |
else: iob.append(("I-" if prev.get(sid)==l else "B-")+l); prev[sid]=l | |
out=token_df.copy(); out["iob"]=iob | |
path="ner_iob.csv"; out.to_csv(path,index=False); return Path(path) | |
# ββββββββββββββββββββββββββ UI ββββββββββββββββββββββββββ | |
with gr.Blocks() as demo: | |
gr.Markdown("# π·οΈ Label It! Mini-NER") | |
gr.Markdown("Step 1 β Upload a CSV with a `text` column (or leave blank for sample).") | |
with gr.Row(): | |
file_in = gr.File(label="π Upload CSV", file_types=[".csv"]) | |
load_btn = gr.Button("Load") | |
status = gr.Textbox(label="Status", interactive=False) | |
table = gr.Dataframe(headers=["sentence_id","token","label"], interactive=True, visible=False) | |
# action row | |
with gr.Row(visible=False) as actions: | |
save_btn = gr.Button("πΎ Save Edits") | |
dl_tok_btn = gr.Button("β¬οΈ Download Tokens CSV") | |
dl_iob_btn = gr.Button("β¬οΈ Download IOB CSV") | |
hidden_tok = gr.File(visible=False) | |
hidden_iob = gr.File(visible=False) | |
# Bindings | |
load_btn.click(load_data, inputs=file_in, outputs=[table,status,actions]) | |
save_btn.click(save_edits, inputs=table, outputs=status) | |
dl_tok_btn.click(lambda: get_tokens_csv(), outputs=hidden_tok) | |
dl_iob_btn.click(lambda: get_iob_csv(), outputs=hidden_iob) | |
gr.Markdown("Step 2 β Edit **label** cells (`PER`,`ORG`,`LOC`, or `O`), then Save/Download.") | |
demo.launch() | |