Spaces:

Suzana
/

labelit-mini-ner

Sleeping

App Files Files Community

Suzana commited on May 28

Commit

6b99f03

verified ·

1 Parent(s): 9ed6d9a

Update app.py

Browse files

Files changed (1) hide show

app.py +44 -73

app.py CHANGED Viewed

@@ -1,104 +1,75 @@
 import gradio as gr
 import pandas as pd
-# In-memory token DataFrame
 token_df = pd.DataFrame()
 def make_sample_data(n=100):
     people = ["Alice","Bob","Charlie","Diane","Eve"]
     orgs   = ["Acme","Globex","Initech","Umbrella","Stark"]
     locs   = ["Paris","NYC","London","Tokyo","Sydney"]
-    verbs  = ["visited","joined","founded","traveled to","met with"]
-    rows = []
-    for i in range(n):
-        p = people[i % len(people)]
-        v = verbs[i % len(verbs)]
-        o = orgs[i % len(orgs)]
-        l = locs[i % len(locs)]
-        rows.append({"text": f"{p} {v} {o} in {l}."})
     return pd.DataFrame(rows)
 def load_data(file):
     global token_df
-    # Load uploaded or sample
-    if file:
-        df = pd.read_csv(file.name)
-    else:
-        df = make_sample_data(100)
     if "text" not in df.columns:
-        return (
-            gr.update(visible=False),
-            "❌ CSV must contain a `text` column.",
-            gr.update(visible=False)
-        )
-    # Tokenize
-    records = []
-    for sid, txt in enumerate(df["text"]):
         for tok in txt.split():
-            records.append({"sentence_id": sid, "token": tok, "label": "O"})
-    token_df = pd.DataFrame(records)
-    return (
-        gr.update(value=token_df, visible=True),
-        f"✅ Loaded {len(df)} sentences → {len(token_df)} tokens.",
-        gr.update(visible=True),
-    )
-def save_edits(table):
     global token_df
-    token_df = pd.DataFrame(table, columns=["sentence_id","token","label"])
-    return "💾 Edits saved."
-def download_tokens():
-    token_df.to_csv("raw_tokens.csv", index=False)
-    return "raw_tokens.csv"
-def download_iob():
-    # Build IOB tags
-    iob, prev = [], {}
-    for _, r in token_df.iterrows():
-        sid, lbl = r["sentence_id"], r["label"]
-        if lbl == "O":
-            iob.append("O")
-            prev[sid] = None
-        else:
-            tag = ("I-" if prev.get(sid)==lbl else "B-") + lbl
-            iob.append(tag)
-            prev[sid] = lbl
-    out = token_df.copy()
-    out["iob"] = iob
-    out.to_csv("ner_iob.csv", index=False)
-    return "ner_iob.csv"
-with gr.Blocks() as app:
     gr.Markdown("# 🏷️ Label It! Mini-NER")
-    gr.Markdown("**Step 1:** Upload a CSV with a `text` column (or leave blank for sample).")
     with gr.Row():
-        file_in = gr.File(label="📁 Upload CSV", file_types=[".csv"])
-        load_btn = gr.Button("Load Data")
     status = gr.Textbox(label="Status", interactive=False)
-    table  = gr.Dataframe(
-        headers=["sentence_id","token","label"],
-        interactive=True,
-        visible=False,
-        label="📝 Annotate Tokens"
-    )
-    # Action buttons: Save + Downloads
     with gr.Row(visible=False) as actions:
-        save_btn     = gr.Button("💾 Save Edits")
-        dl_tokens    = gr.DownloadButton(fn=download_tokens, file_name="raw_tokens.csv", label="⬇️ Download Tokens CSV")
-        dl_iob       = gr.DownloadButton(fn=download_iob,   file_name="ner_iob.csv",    label="⬇️ Download IOB CSV")
-    load_btn.click(load_data, inputs=file_in, outputs=[table, status, actions])
     save_btn.click(save_edits, inputs=table, outputs=status)
-    gr.Markdown("""
-    **Step 2:**
-    • Click into the **label** column and type one of: `PER`, `ORG`, `LOC`, or leave as `O`.
-    • Press **Save Edits** to lock your annotations.
-    • Download your **Tokens CSV** or **IOB CSV** with the buttons above.
-    """)
-app.launch()

 import gradio as gr
 import pandas as pd
+from pathlib import Path
+# Global store
 token_df = pd.DataFrame()
 def make_sample_data(n=100):
     people = ["Alice","Bob","Charlie","Diane","Eve"]
     orgs   = ["Acme","Globex","Initech","Umbrella","Stark"]
     locs   = ["Paris","NYC","London","Tokyo","Sydney"]
+    rows = [{"text": f"{people[i%5]} visited {orgs[i%5]} in {locs[i%5]}."} for i in range(n)]
     return pd.DataFrame(rows)
+# ────────────────────────── I/O helpers ──────────────────────────
 def load_data(file):
     global token_df
+    df = pd.read_csv(file.name) if file else make_sample_data()
     if "text" not in df.columns:
+        return None,"❌ Need a `text` column",gr.update(visible=False)
+    records=[]
+    for sid,txt in enumerate(df["text"]):
         for tok in txt.split():
+            records.append({"sentence_id":sid,"token":tok,"label":"O"})
+    token_df=pd.DataFrame(records)
+    return token_df,"✅ Loaded & tokenized",gr.update(visible=True)
+def save_edits(tbl):          # keep edits in memory
     global token_df
+    token_df=pd.DataFrame(tbl,columns=["sentence_id","token","label"])
+    return "💾 Saved"
+def get_tokens_csv():
+    path="raw_tokens.csv"; token_df.to_csv(path,index=False); return Path(path)
+def get_iob_csv():
+    iob,prev=[],{}
+    for _,r in token_df.iterrows():
+        sid,l=r["sentence_id"],r["label"]
+        if l=="O": iob.append("O"); prev[sid]=None
+        else: iob.append(("I-" if prev.get(sid)==l else "B-")+l); prev[sid]=l
+    out=token_df.copy(); out["iob"]=iob
+    path="ner_iob.csv"; out.to_csv(path,index=False); return Path(path)
+# ────────────────────────── UI ──────────────────────────
+with gr.Blocks() as demo:
     gr.Markdown("# 🏷️ Label It! Mini-NER")
+    gr.Markdown("Step 1 – Upload a CSV with a `text` column (or leave blank for sample).")
     with gr.Row():
+        file_in  = gr.File(label="📁 Upload CSV", file_types=[".csv"])
+        load_btn = gr.Button("Load")
     status = gr.Textbox(label="Status", interactive=False)
+    table  = gr.Dataframe(headers=["sentence_id","token","label"], interactive=True, visible=False)
+    # action row
     with gr.Row(visible=False) as actions:
+        save_btn = gr.Button("💾 Save Edits")
+        dl_tok_btn = gr.Button("⬇️ Download Tokens CSV")
+        dl_iob_btn = gr.Button("⬇️ Download IOB CSV")
+    hidden_tok = gr.File(visible=False)
+    hidden_iob = gr.File(visible=False)
+    # Bindings
+    load_btn.click(load_data, inputs=file_in, outputs=[table,status,actions])
     save_btn.click(save_edits, inputs=table, outputs=status)
+    dl_tok_btn.click(lambda: get_tokens_csv(),  outputs=hidden_tok)
+    dl_iob_btn.click(lambda: get_iob_csv(),     outputs=hidden_iob)
+    gr.Markdown("Step 2 – Edit **label** cells (`PER`,`ORG`,`LOC`, or `O`), then Save/Download.")
+demo.launch()