|
import gradio as gr |
|
import pandas as pd |
|
import io |
|
import os |
|
from pathlib import Path |
|
from huggingface_hub import HfApi, Repository |
|
|
|
df = pd.DataFrame() |
|
|
|
def upload_csv(file): |
|
global df |
|
df = pd.read_csv(file.name) |
|
if "text" not in df.columns or "label" not in df.columns: |
|
return gr.update(visible=False), "❌ CSV must contain ‘text’ and ‘label’ columns." |
|
df["label"] = df["label"].fillna("") |
|
return ( |
|
gr.update(value=df[["text","label"]], visible=True), |
|
"✅ File uploaded — you can now edit labels." |
|
) |
|
|
|
def save_changes(edited_table): |
|
global df |
|
df = pd.DataFrame(edited_table, columns=["text","label"]) |
|
return "💾 Changes saved." |
|
|
|
def download_csv(): |
|
global df |
|
out_path = "annotated_data.csv" |
|
df.to_csv(out_path, index=False) |
|
return out_path |
|
|
|
def push_to_hub(repo_name: str, hf_token: str) -> str: |
|
global df |
|
try: |
|
api = HfApi() |
|
api.create_repo( |
|
repo_id=repo_name, |
|
token=hf_token, |
|
repo_type="dataset", |
|
exist_ok=True |
|
) |
|
|
|
local_dir = Path(f"./{repo_name.replace('/', '_')}") |
|
if local_dir.exists(): |
|
for child in local_dir.iterdir(): |
|
child.unlink() |
|
local_dir.rmdir() |
|
|
|
repo = Repository( |
|
local_dir=str(local_dir), |
|
clone_from=repo_name, |
|
repo_type="dataset", |
|
use_auth_token=hf_token |
|
) |
|
|
|
csv_path = local_dir / "data.csv" |
|
df.to_csv(csv_path, index=False) |
|
|
|
repo.push_to_hub(commit_message="📑 Update annotated data") |
|
return f"🚀 Pushed to https://huggingface.co/datasets/{repo_name}" |
|
|
|
except Exception as e: |
|
return f"❌ Push failed: {e}" |
|
|
|
with gr.Blocks(theme=gr.themes.Default()) as app: |
|
gr.Markdown("## 🏷️ Label It! Text Annotation Tool") |
|
gr.Markdown("Upload a `.csv` with `text` + `label` columns, annotate in-place, then export or publish.") |
|
|
|
with gr.Row(): |
|
file_input = gr.File(label="📁 Upload CSV", file_types=[".csv"]) |
|
upload_btn = gr.Button("Upload") |
|
|
|
df_table = gr.Dataframe( |
|
headers=["text","label"], |
|
label="📝 Editable Table", |
|
interactive=True, |
|
visible=False |
|
) |
|
status = gr.Textbox(label="Status", interactive=False) |
|
|
|
with gr.Row(): |
|
save_btn = gr.Button("💾 Save") |
|
download_btn = gr.Button("⬇️ Download CSV") |
|
download_out = gr.File(label="📥 Downloaded File") |
|
|
|
with gr.Accordion("📦 Push to Hugging Face Hub", open=False): |
|
repo_input = gr.Textbox(label="Repo (username/dataset-name)") |
|
token_input = gr.Textbox(label="HF Token", type="password") |
|
push_btn = gr.Button("🚀 Push") |
|
push_status = gr.Textbox(label="Push Status", interactive=False) |
|
|
|
upload_btn.click(upload_csv, inputs=file_input, outputs=[df_table, status]) |
|
save_btn.click( save_changes, inputs=df_table, outputs=status) |
|
download_btn.click(download_csv, outputs=download_out) |
|
push_btn.click( push_to_hub, inputs=[repo_input, token_input], outputs=push_status) |
|
|
|
app.launch() |
|
|