File size: 3,045 Bytes
f3b49b2
 
6b99f03
f3b49b2
6b99f03
f3b49b2
 
 
 
9ed6d9a
 
6b99f03
f3b49b2
 
6b99f03
f3b49b2
 
6b99f03
f3b49b2
6b99f03
 
 
f3b49b2
6b99f03
 
 
f3b49b2
6b99f03
f3b49b2
6b99f03
 
f3b49b2
6b99f03
 
f3b49b2
6b99f03
 
 
 
 
 
 
 
f3b49b2
6b99f03
 
f3b49b2
6b99f03
f3b49b2
 
6b99f03
 
f3b49b2
 
6b99f03
f3b49b2
6b99f03
f3b49b2
6b99f03
 
 
f3b49b2
6b99f03
 
 
 
 
9ed6d9a
f3b49b2
6b99f03
 
 
 
f3b49b2
6b99f03
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import gradio as gr
import pandas as pd
from pathlib import Path

# Global store
token_df = pd.DataFrame()

def make_sample_data(n=100):
    people = ["Alice","Bob","Charlie","Diane","Eve"]
    orgs   = ["Acme","Globex","Initech","Umbrella","Stark"]
    locs   = ["Paris","NYC","London","Tokyo","Sydney"]
    rows = [{"text": f"{people[i%5]} visited {orgs[i%5]} in {locs[i%5]}."} for i in range(n)]
    return pd.DataFrame(rows)

# ────────────────────────── I/O helpers ──────────────────────────
def load_data(file):
    global token_df
    df = pd.read_csv(file.name) if file else make_sample_data()
    if "text" not in df.columns:
        return None,"❌ Need a `text` column",gr.update(visible=False)
    records=[]
    for sid,txt in enumerate(df["text"]):
        for tok in txt.split():
            records.append({"sentence_id":sid,"token":tok,"label":"O"})
    token_df=pd.DataFrame(records)
    return token_df,"βœ… Loaded & tokenized",gr.update(visible=True)

def save_edits(tbl):          # keep edits in memory
    global token_df
    token_df=pd.DataFrame(tbl,columns=["sentence_id","token","label"])
    return "πŸ’Ύ Saved"

def get_tokens_csv():
    path="raw_tokens.csv"; token_df.to_csv(path,index=False); return Path(path)

def get_iob_csv():
    iob,prev=[],{}
    for _,r in token_df.iterrows():
        sid,l=r["sentence_id"],r["label"]
        if l=="O": iob.append("O"); prev[sid]=None
        else: iob.append(("I-" if prev.get(sid)==l else "B-")+l); prev[sid]=l
    out=token_df.copy(); out["iob"]=iob
    path="ner_iob.csv"; out.to_csv(path,index=False); return Path(path)

# ────────────────────────── UI ──────────────────────────
with gr.Blocks() as demo:
    gr.Markdown("# 🏷️ Label It! Mini-NER")
    gr.Markdown("Step 1 – Upload a CSV with a `text` column (or leave blank for sample).")

    with gr.Row():
        file_in  = gr.File(label="πŸ“ Upload CSV", file_types=[".csv"])
        load_btn = gr.Button("Load")

    status = gr.Textbox(label="Status", interactive=False)
    table  = gr.Dataframe(headers=["sentence_id","token","label"], interactive=True, visible=False)

    # action row
    with gr.Row(visible=False) as actions:
        save_btn = gr.Button("πŸ’Ύ Save Edits")
        dl_tok_btn = gr.Button("⬇️ Download Tokens CSV")
        dl_iob_btn = gr.Button("⬇️ Download IOB CSV")

    hidden_tok = gr.File(visible=False)
    hidden_iob = gr.File(visible=False)

    # Bindings
    load_btn.click(load_data, inputs=file_in, outputs=[table,status,actions])
    save_btn.click(save_edits, inputs=table, outputs=status)

    dl_tok_btn.click(lambda: get_tokens_csv(),  outputs=hidden_tok)
    dl_iob_btn.click(lambda: get_iob_csv(),     outputs=hidden_iob)

    gr.Markdown("Step 2 – Edit **label** cells (`PER`,`ORG`,`LOC`, or `O`), then Save/Download.")

demo.launch()