Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -10,7 +10,7 @@ token_df = pd.DataFrame() # global store
|
|
10 |
|
11 |
# βββββββββββββββββββββββββ token explode βββββββββββββββββββββββ
|
12 |
def explode(df: pd.DataFrame) -> pd.DataFrame:
|
13 |
-
"""Return DataFrame(
|
14 |
if "text" in df.columns:
|
15 |
lines = df["text"].astype(str)
|
16 |
else: # user / assistant dialogs
|
@@ -19,7 +19,7 @@ def explode(df: pd.DataFrame) -> pd.DataFrame:
|
|
19 |
rows = []
|
20 |
for sid, line in enumerate(lines, start=0): # ensure unique 0,1,2,...
|
21 |
for tok in line.split():
|
22 |
-
rows.append({"
|
23 |
return pd.DataFrame(rows)
|
24 |
|
25 |
# βββββββββββββββββββββββββ callbacks βββββββββββββββββββββββββββ
|
@@ -42,7 +42,7 @@ def load_csv(file):
|
|
42 |
|
43 |
def save_table(tbl):
|
44 |
global token_df
|
45 |
-
token_df = pd.DataFrame(tbl, columns=["
|
46 |
bad = token_df.loc[~token_df["label"].isin(LABELS), "label"].unique()
|
47 |
return "πΎ Saved." if bad.size == 0 else f"β οΈ Unknown label(s): {', '.join(bad)}"
|
48 |
|
@@ -54,7 +54,7 @@ def export_tokens():
|
|
54 |
def export_iob():
|
55 |
iob, prev = [], {}
|
56 |
for _, r in token_df.iterrows():
|
57 |
-
sid, lbl = r["
|
58 |
if lbl == "O":
|
59 |
iob.append("O"); prev[sid] = None
|
60 |
else:
|
@@ -83,7 +83,7 @@ def push_to_hub(repo_id, token):
|
|
83 |
with gr.Blocks() as demo:
|
84 |
gr.Markdown("# π·οΈ Label It! Mini-NER")
|
85 |
|
86 |
-
gr.Markdown("**Step 1** β upload CSV (`text` **or** `user`+`assistant`).")
|
87 |
|
88 |
with gr.Row():
|
89 |
csv_file = gr.File(file_types=[".csv"])
|
@@ -91,7 +91,7 @@ with gr.Blocks() as demo:
|
|
91 |
|
92 |
status = gr.Textbox(interactive=False)
|
93 |
|
94 |
-
tok_table = gr.Dataframe(headers=["
|
95 |
datatype=["number", "str", "str"],
|
96 |
visible=False)
|
97 |
|
|
|
10 |
|
11 |
# βββββββββββββββββββββββββ token explode βββββββββββββββββββββββ
|
12 |
def explode(df: pd.DataFrame) -> pd.DataFrame:
|
13 |
+
"""Return DataFrame(example_id, token, label='O')."""
|
14 |
if "text" in df.columns:
|
15 |
lines = df["text"].astype(str)
|
16 |
else: # user / assistant dialogs
|
|
|
19 |
rows = []
|
20 |
for sid, line in enumerate(lines, start=0): # ensure unique 0,1,2,...
|
21 |
for tok in line.split():
|
22 |
+
rows.append({"example_id": sid, "token": tok, "label": "O"})
|
23 |
return pd.DataFrame(rows)
|
24 |
|
25 |
# βββββββββββββββββββββββββ callbacks βββββββββββββββββββββββββββ
|
|
|
42 |
|
43 |
def save_table(tbl):
|
44 |
global token_df
|
45 |
+
token_df = pd.DataFrame(tbl, columns=["example_id", "token", "label"])
|
46 |
bad = token_df.loc[~token_df["label"].isin(LABELS), "label"].unique()
|
47 |
return "πΎ Saved." if bad.size == 0 else f"β οΈ Unknown label(s): {', '.join(bad)}"
|
48 |
|
|
|
54 |
def export_iob():
|
55 |
iob, prev = [], {}
|
56 |
for _, r in token_df.iterrows():
|
57 |
+
sid, lbl = r["example_id"], r["label"]
|
58 |
if lbl == "O":
|
59 |
iob.append("O"); prev[sid] = None
|
60 |
else:
|
|
|
83 |
with gr.Blocks() as demo:
|
84 |
gr.Markdown("# π·οΈ Label It! Mini-NER")
|
85 |
|
86 |
+
gr.Markdown("**Step 1** β upload CSV (columns: `text` **or** `user`+`assistant`).")
|
87 |
|
88 |
with gr.Row():
|
89 |
csv_file = gr.File(file_types=[".csv"])
|
|
|
91 |
|
92 |
status = gr.Textbox(interactive=False)
|
93 |
|
94 |
+
tok_table = gr.Dataframe(headers=["example_id", "token", "label"],
|
95 |
datatype=["number", "str", "str"],
|
96 |
visible=False)
|
97 |
|