Spaces:
Sleeping
Sleeping
File size: 3,045 Bytes
f3b49b2 6b99f03 f3b49b2 6b99f03 f3b49b2 9ed6d9a 6b99f03 f3b49b2 6b99f03 f3b49b2 6b99f03 f3b49b2 6b99f03 f3b49b2 6b99f03 f3b49b2 6b99f03 f3b49b2 6b99f03 f3b49b2 6b99f03 f3b49b2 6b99f03 f3b49b2 6b99f03 f3b49b2 6b99f03 f3b49b2 6b99f03 f3b49b2 6b99f03 f3b49b2 6b99f03 f3b49b2 6b99f03 f3b49b2 6b99f03 9ed6d9a f3b49b2 6b99f03 f3b49b2 6b99f03 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 |
import gradio as gr
import pandas as pd
from pathlib import Path
# Global store
token_df = pd.DataFrame()
def make_sample_data(n=100):
people = ["Alice","Bob","Charlie","Diane","Eve"]
orgs = ["Acme","Globex","Initech","Umbrella","Stark"]
locs = ["Paris","NYC","London","Tokyo","Sydney"]
rows = [{"text": f"{people[i%5]} visited {orgs[i%5]} in {locs[i%5]}."} for i in range(n)]
return pd.DataFrame(rows)
# ββββββββββββββββββββββββββ I/O helpers ββββββββββββββββββββββββββ
def load_data(file):
global token_df
df = pd.read_csv(file.name) if file else make_sample_data()
if "text" not in df.columns:
return None,"β Need a `text` column",gr.update(visible=False)
records=[]
for sid,txt in enumerate(df["text"]):
for tok in txt.split():
records.append({"sentence_id":sid,"token":tok,"label":"O"})
token_df=pd.DataFrame(records)
return token_df,"β
Loaded & tokenized",gr.update(visible=True)
def save_edits(tbl): # keep edits in memory
global token_df
token_df=pd.DataFrame(tbl,columns=["sentence_id","token","label"])
return "πΎ Saved"
def get_tokens_csv():
path="raw_tokens.csv"; token_df.to_csv(path,index=False); return Path(path)
def get_iob_csv():
iob,prev=[],{}
for _,r in token_df.iterrows():
sid,l=r["sentence_id"],r["label"]
if l=="O": iob.append("O"); prev[sid]=None
else: iob.append(("I-" if prev.get(sid)==l else "B-")+l); prev[sid]=l
out=token_df.copy(); out["iob"]=iob
path="ner_iob.csv"; out.to_csv(path,index=False); return Path(path)
# ββββββββββββββββββββββββββ UI ββββββββββββββββββββββββββ
with gr.Blocks() as demo:
gr.Markdown("# π·οΈ Label It! Mini-NER")
gr.Markdown("Step 1 β Upload a CSV with a `text` column (or leave blank for sample).")
with gr.Row():
file_in = gr.File(label="π Upload CSV", file_types=[".csv"])
load_btn = gr.Button("Load")
status = gr.Textbox(label="Status", interactive=False)
table = gr.Dataframe(headers=["sentence_id","token","label"], interactive=True, visible=False)
# action row
with gr.Row(visible=False) as actions:
save_btn = gr.Button("πΎ Save Edits")
dl_tok_btn = gr.Button("β¬οΈ Download Tokens CSV")
dl_iob_btn = gr.Button("β¬οΈ Download IOB CSV")
hidden_tok = gr.File(visible=False)
hidden_iob = gr.File(visible=False)
# Bindings
load_btn.click(load_data, inputs=file_in, outputs=[table,status,actions])
save_btn.click(save_edits, inputs=table, outputs=status)
dl_tok_btn.click(lambda: get_tokens_csv(), outputs=hidden_tok)
dl_iob_btn.click(lambda: get_iob_csv(), outputs=hidden_iob)
gr.Markdown("Step 2 β Edit **label** cells (`PER`,`ORG`,`LOC`, or `O`), then Save/Download.")
demo.launch()
|