File size: 3,217 Bytes
20e7095 d18e6c8 a4cec6f 20e7095 d18e6c8 20e7095 1d6c7cd d18e6c8 1d6c7cd 20e7095 1d6c7cd 20e7095 1d6c7cd 20e7095 a4cec6f d18e6c8 20e7095 d18e6c8 a4cec6f d18e6c8 c505c35 d18e6c8 a4cec6f 1d6c7cd 20e7095 a4cec6f 1d6c7cd 8277138 1d6c7cd d18e6c8 a4cec6f d18e6c8 a4cec6f 8277138 a4cec6f 20e7095 d18e6c8 a4cec6f 1d6c7cd a4cec6f 20e7095 d18e6c8 c505c35 1d6c7cd d18e6c8 8277138 d18e6c8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 |
import gradio as gr
import pandas as pd
import io
import os
from pathlib import Path
from huggingface_hub import HfApi, Repository
df = pd.DataFrame()
def upload_csv(file):
global df
df = pd.read_csv(file.name)
if "text" not in df.columns or "label" not in df.columns:
return gr.update(visible=False), "❌ CSV must contain ‘text’ and ‘label’ columns."
df["label"] = df["label"].fillna("")
return (
gr.update(value=df[["text","label"]], visible=True),
"✅ File uploaded — you can now edit labels."
)
def save_changes(edited_table):
global df
df = pd.DataFrame(edited_table, columns=["text","label"])
return "💾 Changes saved."
def download_csv():
global df
out_path = "annotated_data.csv"
df.to_csv(out_path, index=False)
return out_path
def push_to_hub(repo_name: str, hf_token: str) -> str:
global df
try:
api = HfApi()
api.create_repo(
repo_id=repo_name,
token=hf_token,
repo_type="dataset",
exist_ok=True
)
local_dir = Path(f"./{repo_name.replace('/', '_')}")
if local_dir.exists():
for child in local_dir.iterdir():
child.unlink()
local_dir.rmdir()
repo = Repository(
local_dir=str(local_dir),
clone_from=repo_name,
repo_type="dataset", # <-- important fix!
use_auth_token=hf_token
)
csv_path = local_dir / "data.csv"
df.to_csv(csv_path, index=False)
repo.push_to_hub(commit_message="📑 Update annotated data")
return f"🚀 Pushed to https://huggingface.co/datasets/{repo_name}"
except Exception as e:
return f"❌ Push failed: {e}"
with gr.Blocks(theme=gr.themes.Default()) as app:
gr.Markdown("## 🏷️ Label It! Text Annotation Tool")
gr.Markdown("Upload a `.csv` with `text` + `label` columns, annotate in-place, then export or publish.")
with gr.Row():
file_input = gr.File(label="📁 Upload CSV", file_types=[".csv"])
upload_btn = gr.Button("Upload")
df_table = gr.Dataframe(
headers=["text","label"],
label="📝 Editable Table",
interactive=True,
visible=False
)
status = gr.Textbox(label="Status", interactive=False)
with gr.Row():
save_btn = gr.Button("💾 Save")
download_btn = gr.Button("⬇️ Download CSV")
download_out = gr.File(label="📥 Downloaded File")
with gr.Accordion("📦 Push to Hugging Face Hub", open=False):
repo_input = gr.Textbox(label="Repo (username/dataset-name)")
token_input = gr.Textbox(label="HF Token", type="password")
push_btn = gr.Button("🚀 Push")
push_status = gr.Textbox(label="Push Status", interactive=False)
upload_btn.click(upload_csv, inputs=file_input, outputs=[df_table, status])
save_btn.click( save_changes, inputs=df_table, outputs=status)
download_btn.click(download_csv, outputs=download_out)
push_btn.click( push_to_hub, inputs=[repo_input, token_input], outputs=push_status)
app.launch()
|