Suzana commited on
Commit
359816c
Β·
verified Β·
1 Parent(s): 11b95d7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -6
app.py CHANGED
@@ -10,7 +10,7 @@ token_df = pd.DataFrame() # global store
10
 
11
  # ───────────────────────── token explode ───────────────────────
12
  def explode(df: pd.DataFrame) -> pd.DataFrame:
13
- """Return DataFrame(sentence_id, token, label='O')."""
14
  if "text" in df.columns:
15
  lines = df["text"].astype(str)
16
  else: # user / assistant dialogs
@@ -19,7 +19,7 @@ def explode(df: pd.DataFrame) -> pd.DataFrame:
19
  rows = []
20
  for sid, line in enumerate(lines, start=0): # ensure unique 0,1,2,...
21
  for tok in line.split():
22
- rows.append({"sentence_id": sid, "token": tok, "label": "O"})
23
  return pd.DataFrame(rows)
24
 
25
  # ───────────────────────── callbacks ───────────────────────────
@@ -42,7 +42,7 @@ def load_csv(file):
42
 
43
  def save_table(tbl):
44
  global token_df
45
- token_df = pd.DataFrame(tbl, columns=["sentence_id", "token", "label"])
46
  bad = token_df.loc[~token_df["label"].isin(LABELS), "label"].unique()
47
  return "πŸ’Ύ Saved." if bad.size == 0 else f"⚠️ Unknown label(s): {', '.join(bad)}"
48
 
@@ -54,7 +54,7 @@ def export_tokens():
54
  def export_iob():
55
  iob, prev = [], {}
56
  for _, r in token_df.iterrows():
57
- sid, lbl = r["sentence_id"], r["label"]
58
  if lbl == "O":
59
  iob.append("O"); prev[sid] = None
60
  else:
@@ -83,7 +83,7 @@ def push_to_hub(repo_id, token):
83
  with gr.Blocks() as demo:
84
  gr.Markdown("# 🏷️ Label It! Mini-NER")
85
 
86
- gr.Markdown("**Step 1** – upload CSV (`text` **or** `user`+`assistant`).")
87
 
88
  with gr.Row():
89
  csv_file = gr.File(file_types=[".csv"])
@@ -91,7 +91,7 @@ with gr.Blocks() as demo:
91
 
92
  status = gr.Textbox(interactive=False)
93
 
94
- tok_table = gr.Dataframe(headers=["sentence_id", "token", "label"],
95
  datatype=["number", "str", "str"],
96
  visible=False)
97
 
 
10
 
11
  # ───────────────────────── token explode ───────────────────────
12
  def explode(df: pd.DataFrame) -> pd.DataFrame:
13
+ """Return DataFrame(example_id, token, label='O')."""
14
  if "text" in df.columns:
15
  lines = df["text"].astype(str)
16
  else: # user / assistant dialogs
 
19
  rows = []
20
  for sid, line in enumerate(lines, start=0): # ensure unique 0,1,2,...
21
  for tok in line.split():
22
+ rows.append({"example_id": sid, "token": tok, "label": "O"})
23
  return pd.DataFrame(rows)
24
 
25
  # ───────────────────────── callbacks ───────────────────────────
 
42
 
43
  def save_table(tbl):
44
  global token_df
45
+ token_df = pd.DataFrame(tbl, columns=["example_id", "token", "label"])
46
  bad = token_df.loc[~token_df["label"].isin(LABELS), "label"].unique()
47
  return "πŸ’Ύ Saved." if bad.size == 0 else f"⚠️ Unknown label(s): {', '.join(bad)}"
48
 
 
54
  def export_iob():
55
  iob, prev = [], {}
56
  for _, r in token_df.iterrows():
57
+ sid, lbl = r["example_id"], r["label"]
58
  if lbl == "O":
59
  iob.append("O"); prev[sid] = None
60
  else:
 
83
  with gr.Blocks() as demo:
84
  gr.Markdown("# 🏷️ Label It! Mini-NER")
85
 
86
+ gr.Markdown("**Step 1** – upload CSV (columns: `text` **or** `user`+`assistant`).")
87
 
88
  with gr.Row():
89
  csv_file = gr.File(file_types=[".csv"])
 
91
 
92
  status = gr.Textbox(interactive=False)
93
 
94
+ tok_table = gr.Dataframe(headers=["example_id", "token", "label"],
95
  datatype=["number", "str", "str"],
96
  visible=False)
97