Suzana commited on
Commit
a4cec6f
·
verified ·
1 Parent(s): e8b5cf1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -40
app.py CHANGED
@@ -1,8 +1,8 @@
1
  import gradio as gr
2
  import pandas as pd
3
  import io
4
- from huggingface_hub import HfApi, HfFolder, Repository
5
  import os
 
6
 
7
  # Global state
8
  df = pd.DataFrame()
@@ -10,35 +10,25 @@ df = pd.DataFrame()
10
  def upload_csv(file):
11
  global df
12
  df = pd.read_csv(file.name)
13
-
14
  if "text" not in df.columns or "label" not in df.columns:
15
- return gr.update(visible=False), "CSV must contain 'text' and 'label' columns."
16
-
17
- # Fill label column if empty
18
  df["label"] = df["label"].fillna("")
19
-
20
- # Return the editable table
21
- return gr.Dataframe(
22
- value=df,
23
- headers=["text", "label"],
24
- interactive=True,
25
- label="Edit labels below"
26
- ), "File uploaded successfully."
27
 
28
- def save_edits(updated_table):
29
  global df
30
  df = pd.DataFrame(updated_table, columns=["text", "label"])
31
- return "Changes saved."
32
 
33
  def download_csv():
34
- # Create a downloadable CSV
35
- csv_bytes = df.to_csv(index=False).encode()
36
  return gr.File.update(value=io.BytesIO(csv_bytes), filename="annotated_data.csv")
37
 
38
  def push_to_hub(repo_name, hf_token):
39
- # Authenticate and push to Hugging Face Hub
40
  repo_url = f"https://huggingface.co/datasets/{repo_name}"
41
- local_path = f"./{repo_name}"
42
 
43
  if os.path.exists(local_path):
44
  os.system(f"rm -rf {local_path}")
@@ -46,35 +36,42 @@ def push_to_hub(repo_name, hf_token):
46
  api = HfApi()
47
  api.create_repo(repo_id=repo_name, token=hf_token, repo_type="dataset", exist_ok=True)
48
  repo = Repository(local_dir=local_path, clone_from=repo_url, token=hf_token)
 
49
  df.to_csv(f"{local_path}/data.csv", index=False)
50
  repo.push_to_hub()
51
 
52
- return f"Pushed to Hugging Face: {repo_url}"
 
 
 
 
53
 
54
- with gr.Blocks() as demo:
55
- gr.Markdown("## 🏷️ Label it! Text Labeling Tool")
56
-
57
  with gr.Row():
58
- csv_input = gr.File(label="Upload CSV", file_types=[".csv"])
59
  upload_btn = gr.Button("Upload")
60
 
61
- df_output = gr.Dataframe(headers=["text", "label"], interactive=True, visible=False)
62
- upload_status = gr.Textbox(visible=True, interactive=False)
 
 
 
 
 
 
63
 
 
 
64
  with gr.Row():
65
- save_btn = gr.Button("Save Changes")
66
- download_btn = gr.Button("Download CSV")
67
- download_file = gr.File(label="Download", interactive=False)
68
-
69
  with gr.Row():
70
- hf_repo = gr.Textbox(label="HF Dataset Repo (e.g. your-username/my-dataset)")
71
- hf_token = gr.Textbox(label="Hugging Face Token", type="password")
72
- push_btn = gr.Button("Push to Hugging Face Hub")
73
- push_status = gr.Textbox(interactive=False)
74
-
75
- upload_btn.click(fn=upload_csv, inputs=csv_input, outputs=[df_output, upload_status])
76
- save_btn.click(fn=save_edits, inputs=df_output, outputs=upload_status)
77
- download_btn.click(fn=download_csv, outputs=download_file)
78
- push_btn.click(fn=push_to_hub, inputs=[hf_repo, hf_token], outputs=push_status)
79
 
80
- demo.launch()
 
 
 
1
  import gradio as gr
2
  import pandas as pd
3
  import io
 
4
  import os
5
+ from huggingface_hub import HfApi, Repository
6
 
7
  # Global state
8
  df = pd.DataFrame()
 
10
  def upload_csv(file):
11
  global df
12
  df = pd.read_csv(file.name)
 
13
  if "text" not in df.columns or "label" not in df.columns:
14
+ return None, "CSV must have 'text' and 'label' columns."
 
 
15
  df["label"] = df["label"].fillna("")
16
+ return df[["text", "label"]], "✅ File uploaded. You can now edit labels."
 
 
 
 
 
 
 
17
 
18
+ def save_changes(updated_table):
19
  global df
20
  df = pd.DataFrame(updated_table, columns=["text", "label"])
21
+ return "Changes saved."
22
 
23
  def download_csv():
24
+ global df
25
+ csv_bytes = df.to_csv(index=False).encode("utf-8")
26
  return gr.File.update(value=io.BytesIO(csv_bytes), filename="annotated_data.csv")
27
 
28
  def push_to_hub(repo_name, hf_token):
29
+ global df
30
  repo_url = f"https://huggingface.co/datasets/{repo_name}"
31
+ local_path = f"./{repo_name.replace('/', '_')}"
32
 
33
  if os.path.exists(local_path):
34
  os.system(f"rm -rf {local_path}")
 
36
  api = HfApi()
37
  api.create_repo(repo_id=repo_name, token=hf_token, repo_type="dataset", exist_ok=True)
38
  repo = Repository(local_dir=local_path, clone_from=repo_url, token=hf_token)
39
+
40
  df.to_csv(f"{local_path}/data.csv", index=False)
41
  repo.push_to_hub()
42
 
43
+ return f" Successfully pushed to: {repo_url}"
44
+
45
+ with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as app:
46
+ gr.Markdown("# 🏷️ Label it! Text Annotation Tool")
47
+ gr.Markdown("Upload your CSV with `text` and `label` columns. Edit labels, save your work, and export or publish.")
48
 
 
 
 
49
  with gr.Row():
50
+ file_input = gr.File(label="📁 Upload CSV", file_types=[".csv"])
51
  upload_btn = gr.Button("Upload")
52
 
53
+ dataframe = gr.Dataframe(
54
+ headers=["text", "label"],
55
+ label="📝 Annotate Labels Below",
56
+ interactive=True,
57
+ visible=False,
58
+ row_count=10,
59
+ col_count=(2, "fixed")
60
+ )
61
 
62
+ status = gr.Textbox(visible=True, label="Status", interactive=False)
63
+
64
  with gr.Row():
65
+ save_btn = gr.Button("💾 Save")
66
+ download_btn = gr.Button("⬇️ Download CSV")
67
+ download_file = gr.File(label="📥 Downloaded File")
68
+
69
  with gr.Row():
70
+ repo_input = gr.Textbox(label="📦 Hugging Face Dataset Repo (e.g. username/my-dataset)")
71
+ token_input = gr.Textbox(label="🔑 Hugging Face Token", type="password")
72
+ push_btn = gr.Button("🚀 Push to HF Hub")
73
+ push_status = gr.Textbox(label="Push Status", interactive=False)
 
 
 
 
 
74
 
75
+ upload_btn.click(fn=upload_csv, inputs=file_input, outputs=[dataframe, status])
76
+ save_btn.click(fn=save_changes, inputs=dataframe, outputs=status)
77
+ download_btn.click(fn=download_csv, outputs=download_file)_