Spaces:

Suzana
/

labelit-mini-ner

Sleeping

App Files Files Community

Suzana commited on 26 days ago

Commit

359816c

verified ·

1 Parent(s): 11b95d7

Update app.py

Browse files

Files changed (1) hide show

app.py +6 -6

app.py CHANGED Viewed

@@ -10,7 +10,7 @@ token_df = pd.DataFrame()  # global store
 # ───────────────────────── token explode ───────────────────────
 def explode(df: pd.DataFrame) -> pd.DataFrame:
-    """Return DataFrame(sentence_id, token, label='O')."""
     if "text" in df.columns:
         lines = df["text"].astype(str)
     else:  # user / assistant dialogs
@@ -19,7 +19,7 @@ def explode(df: pd.DataFrame) -> pd.DataFrame:
     rows = []
     for sid, line in enumerate(lines, start=0):      # ensure unique 0,1,2,...
         for tok in line.split():
-            rows.append({"sentence_id": sid, "token": tok, "label": "O"})
     return pd.DataFrame(rows)
 # ───────────────────────── callbacks ───────────────────────────
@@ -42,7 +42,7 @@ def load_csv(file):
 def save_table(tbl):
     global token_df
-    token_df = pd.DataFrame(tbl, columns=["sentence_id", "token", "label"])
     bad = token_df.loc[~token_df["label"].isin(LABELS), "label"].unique()
     return "💾 Saved." if bad.size == 0 else f"⚠️ Unknown label(s): {', '.join(bad)}"
@@ -54,7 +54,7 @@ def export_tokens():
 def export_iob():
     iob, prev = [], {}
     for _, r in token_df.iterrows():
-        sid, lbl = r["sentence_id"], r["label"]
         if lbl == "O":
             iob.append("O"); prev[sid] = None
         else:
@@ -83,7 +83,7 @@ def push_to_hub(repo_id, token):
 with gr.Blocks() as demo:
     gr.Markdown("# 🏷️ Label It! Mini-NER")
-    gr.Markdown("**Step 1** – upload CSV (`text` **or** `user`+`assistant`).")
     with gr.Row():
         csv_file = gr.File(file_types=[".csv"])
@@ -91,7 +91,7 @@ with gr.Blocks() as demo:
     status = gr.Textbox(interactive=False)
-    tok_table = gr.Dataframe(headers=["sentence_id", "token", "label"],
                              datatype=["number", "str", "str"],
                              visible=False)

 # ───────────────────────── token explode ───────────────────────
 def explode(df: pd.DataFrame) -> pd.DataFrame:
+    """Return DataFrame(example_id, token, label='O')."""
     if "text" in df.columns:
         lines = df["text"].astype(str)
     else:  # user / assistant dialogs
     rows = []
     for sid, line in enumerate(lines, start=0):      # ensure unique 0,1,2,...
         for tok in line.split():
+            rows.append({"example_id": sid, "token": tok, "label": "O"})
     return pd.DataFrame(rows)
 # ───────────────────────── callbacks ───────────────────────────
 def save_table(tbl):
     global token_df
+    token_df = pd.DataFrame(tbl, columns=["example_id", "token", "label"])
     bad = token_df.loc[~token_df["label"].isin(LABELS), "label"].unique()
     return "💾 Saved." if bad.size == 0 else f"⚠️ Unknown label(s): {', '.join(bad)}"
 def export_iob():
     iob, prev = [], {}
     for _, r in token_df.iterrows():
+        sid, lbl = r["example_id"], r["label"]
         if lbl == "O":
             iob.append("O"); prev[sid] = None
         else:
 with gr.Blocks() as demo:
     gr.Markdown("# 🏷️ Label It! Mini-NER")
+    gr.Markdown("**Step 1** – upload CSV (columns: `text` **or** `user`+`assistant`).")
     with gr.Row():
         csv_file = gr.File(file_types=[".csv"])
     status = gr.Textbox(interactive=False)
+    tok_table = gr.Dataframe(headers=["example_id", "token", "label"],
                              datatype=["number", "str", "str"],
                              visible=False)