Suzana commited on
Commit
6b99f03
Β·
verified Β·
1 Parent(s): 9ed6d9a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -73
app.py CHANGED
@@ -1,104 +1,75 @@
1
  import gradio as gr
2
  import pandas as pd
 
3
 
4
- # In-memory token DataFrame
5
  token_df = pd.DataFrame()
6
 
7
  def make_sample_data(n=100):
8
  people = ["Alice","Bob","Charlie","Diane","Eve"]
9
  orgs = ["Acme","Globex","Initech","Umbrella","Stark"]
10
  locs = ["Paris","NYC","London","Tokyo","Sydney"]
11
- verbs = ["visited","joined","founded","traveled to","met with"]
12
- rows = []
13
- for i in range(n):
14
- p = people[i % len(people)]
15
- v = verbs[i % len(verbs)]
16
- o = orgs[i % len(orgs)]
17
- l = locs[i % len(locs)]
18
- rows.append({"text": f"{p} {v} {o} in {l}."})
19
  return pd.DataFrame(rows)
20
 
 
21
  def load_data(file):
22
  global token_df
23
- # Load uploaded or sample
24
- if file:
25
- df = pd.read_csv(file.name)
26
- else:
27
- df = make_sample_data(100)
28
  if "text" not in df.columns:
29
- return (
30
- gr.update(visible=False),
31
- "❌ CSV must contain a `text` column.",
32
- gr.update(visible=False)
33
- )
34
- # Tokenize
35
- records = []
36
- for sid, txt in enumerate(df["text"]):
37
  for tok in txt.split():
38
- records.append({"sentence_id": sid, "token": tok, "label": "O"})
39
- token_df = pd.DataFrame(records)
40
- return (
41
- gr.update(value=token_df, visible=True),
42
- f"βœ… Loaded {len(df)} sentences β†’ {len(token_df)} tokens.",
43
- gr.update(visible=True),
44
- )
45
 
46
- def save_edits(table):
47
  global token_df
48
- token_df = pd.DataFrame(table, columns=["sentence_id","token","label"])
49
- return "πŸ’Ύ Edits saved."
50
 
51
- def download_tokens():
52
- token_df.to_csv("raw_tokens.csv", index=False)
53
- return "raw_tokens.csv"
54
 
55
- def download_iob():
56
- # Build IOB tags
57
- iob, prev = [], {}
58
- for _, r in token_df.iterrows():
59
- sid, lbl = r["sentence_id"], r["label"]
60
- if lbl == "O":
61
- iob.append("O")
62
- prev[sid] = None
63
- else:
64
- tag = ("I-" if prev.get(sid)==lbl else "B-") + lbl
65
- iob.append(tag)
66
- prev[sid] = lbl
67
- out = token_df.copy()
68
- out["iob"] = iob
69
- out.to_csv("ner_iob.csv", index=False)
70
- return "ner_iob.csv"
71
 
72
- with gr.Blocks() as app:
 
73
  gr.Markdown("# 🏷️ Label It! Mini-NER")
74
- gr.Markdown("**Step 1:** Upload a CSV with a `text` column (or leave blank for sample).")
75
 
76
  with gr.Row():
77
- file_in = gr.File(label="πŸ“ Upload CSV", file_types=[".csv"])
78
- load_btn = gr.Button("Load Data")
79
 
80
  status = gr.Textbox(label="Status", interactive=False)
81
- table = gr.Dataframe(
82
- headers=["sentence_id","token","label"],
83
- interactive=True,
84
- visible=False,
85
- label="πŸ“ Annotate Tokens"
86
- )
87
 
88
- # Action buttons: Save + Downloads
89
  with gr.Row(visible=False) as actions:
90
- save_btn = gr.Button("πŸ’Ύ Save Edits")
91
- dl_tokens = gr.DownloadButton(fn=download_tokens, file_name="raw_tokens.csv", label="⬇️ Download Tokens CSV")
92
- dl_iob = gr.DownloadButton(fn=download_iob, file_name="ner_iob.csv", label="⬇️ Download IOB CSV")
93
 
94
- load_btn.click(load_data, inputs=file_in, outputs=[table, status, actions])
 
 
 
 
95
  save_btn.click(save_edits, inputs=table, outputs=status)
96
 
97
- gr.Markdown("""
98
- **Step 2:**
99
- β€’ Click into the **label** column and type one of: `PER`, `ORG`, `LOC`, or leave as `O`.
100
- β€’ Press **Save Edits** to lock your annotations.
101
- β€’ Download your **Tokens CSV** or **IOB CSV** with the buttons above.
102
- """)
103
 
104
- app.launch()
 
1
  import gradio as gr
2
  import pandas as pd
3
+ from pathlib import Path
4
 
5
+ # Global store
6
  token_df = pd.DataFrame()
7
 
8
  def make_sample_data(n=100):
9
  people = ["Alice","Bob","Charlie","Diane","Eve"]
10
  orgs = ["Acme","Globex","Initech","Umbrella","Stark"]
11
  locs = ["Paris","NYC","London","Tokyo","Sydney"]
12
+ rows = [{"text": f"{people[i%5]} visited {orgs[i%5]} in {locs[i%5]}."} for i in range(n)]
 
 
 
 
 
 
 
13
  return pd.DataFrame(rows)
14
 
15
+ # ────────────────────────── I/O helpers ──────────────────────────
16
  def load_data(file):
17
  global token_df
18
+ df = pd.read_csv(file.name) if file else make_sample_data()
 
 
 
 
19
  if "text" not in df.columns:
20
+ return None,"❌ Need a `text` column",gr.update(visible=False)
21
+ records=[]
22
+ for sid,txt in enumerate(df["text"]):
 
 
 
 
 
23
  for tok in txt.split():
24
+ records.append({"sentence_id":sid,"token":tok,"label":"O"})
25
+ token_df=pd.DataFrame(records)
26
+ return token_df,"βœ… Loaded & tokenized",gr.update(visible=True)
 
 
 
 
27
 
28
+ def save_edits(tbl): # keep edits in memory
29
  global token_df
30
+ token_df=pd.DataFrame(tbl,columns=["sentence_id","token","label"])
31
+ return "πŸ’Ύ Saved"
32
 
33
+ def get_tokens_csv():
34
+ path="raw_tokens.csv"; token_df.to_csv(path,index=False); return Path(path)
 
35
 
36
+ def get_iob_csv():
37
+ iob,prev=[],{}
38
+ for _,r in token_df.iterrows():
39
+ sid,l=r["sentence_id"],r["label"]
40
+ if l=="O": iob.append("O"); prev[sid]=None
41
+ else: iob.append(("I-" if prev.get(sid)==l else "B-")+l); prev[sid]=l
42
+ out=token_df.copy(); out["iob"]=iob
43
+ path="ner_iob.csv"; out.to_csv(path,index=False); return Path(path)
 
 
 
 
 
 
 
 
44
 
45
+ # ────────────────────────── UI ──────────────────────────
46
+ with gr.Blocks() as demo:
47
  gr.Markdown("# 🏷️ Label It! Mini-NER")
48
+ gr.Markdown("Step 1 – Upload a CSV with a `text` column (or leave blank for sample).")
49
 
50
  with gr.Row():
51
+ file_in = gr.File(label="πŸ“ Upload CSV", file_types=[".csv"])
52
+ load_btn = gr.Button("Load")
53
 
54
  status = gr.Textbox(label="Status", interactive=False)
55
+ table = gr.Dataframe(headers=["sentence_id","token","label"], interactive=True, visible=False)
 
 
 
 
 
56
 
57
+ # action row
58
  with gr.Row(visible=False) as actions:
59
+ save_btn = gr.Button("πŸ’Ύ Save Edits")
60
+ dl_tok_btn = gr.Button("⬇️ Download Tokens CSV")
61
+ dl_iob_btn = gr.Button("⬇️ Download IOB CSV")
62
 
63
+ hidden_tok = gr.File(visible=False)
64
+ hidden_iob = gr.File(visible=False)
65
+
66
+ # Bindings
67
+ load_btn.click(load_data, inputs=file_in, outputs=[table,status,actions])
68
  save_btn.click(save_edits, inputs=table, outputs=status)
69
 
70
+ dl_tok_btn.click(lambda: get_tokens_csv(), outputs=hidden_tok)
71
+ dl_iob_btn.click(lambda: get_iob_csv(), outputs=hidden_iob)
72
+
73
+ gr.Markdown("Step 2 – Edit **label** cells (`PER`,`ORG`,`LOC`, or `O`), then Save/Download.")
 
 
74
 
75
+ demo.launch()