File size: 3,217 Bytes
20e7095
 
 
 
d18e6c8
a4cec6f
20e7095
 
 
 
 
 
 
d18e6c8
20e7095
1d6c7cd
 
d18e6c8
1d6c7cd
20e7095
1d6c7cd
20e7095
1d6c7cd
 
20e7095
 
a4cec6f
d18e6c8
 
 
20e7095
d18e6c8
a4cec6f
d18e6c8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c505c35
d18e6c8
 
 
 
 
 
 
 
 
 
 
a4cec6f
1d6c7cd
 
 
20e7095
 
a4cec6f
1d6c7cd
8277138
1d6c7cd
 
d18e6c8
a4cec6f
d18e6c8
a4cec6f
8277138
a4cec6f
20e7095
d18e6c8
 
 
a4cec6f
1d6c7cd
 
 
 
a4cec6f
20e7095
d18e6c8
c505c35
1d6c7cd
d18e6c8
8277138
d18e6c8
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import gradio as gr
import pandas as pd
import io
import os
from pathlib import Path
from huggingface_hub import HfApi, Repository

df = pd.DataFrame()

def upload_csv(file):
    global df
    df = pd.read_csv(file.name)
    if "text" not in df.columns or "label" not in df.columns:
        return gr.update(visible=False), "❌ CSV must contain ‘text’ and ‘label’ columns."
    df["label"] = df["label"].fillna("")
    return (
        gr.update(value=df[["text","label"]], visible=True),
        "✅ File uploaded — you can now edit labels."
    )

def save_changes(edited_table):
    global df
    df = pd.DataFrame(edited_table, columns=["text","label"])
    return "💾 Changes saved."

def download_csv():
    global df
    out_path = "annotated_data.csv"
    df.to_csv(out_path, index=False)
    return out_path

def push_to_hub(repo_name: str, hf_token: str) -> str:
    global df
    try:
        api = HfApi()
        api.create_repo(
            repo_id=repo_name,
            token=hf_token,
            repo_type="dataset",
            exist_ok=True
        )

        local_dir = Path(f"./{repo_name.replace('/', '_')}")
        if local_dir.exists():
            for child in local_dir.iterdir():
                child.unlink()
            local_dir.rmdir()

        repo = Repository(
            local_dir=str(local_dir),
            clone_from=repo_name,
            repo_type="dataset",       # <-- important fix!
            use_auth_token=hf_token
        )

        csv_path = local_dir / "data.csv"
        df.to_csv(csv_path, index=False)

        repo.push_to_hub(commit_message="📑 Update annotated data")
        return f"🚀 Pushed to https://huggingface.co/datasets/{repo_name}"

    except Exception as e:
        return f"❌ Push failed: {e}"

with gr.Blocks(theme=gr.themes.Default()) as app:
    gr.Markdown("## 🏷️ Label It! Text Annotation Tool")
    gr.Markdown("Upload a `.csv` with `text` + `label` columns, annotate in-place, then export or publish.")

    with gr.Row():
        file_input = gr.File(label="📁 Upload CSV", file_types=[".csv"])
        upload_btn = gr.Button("Upload")

    df_table = gr.Dataframe(
        headers=["text","label"],
        label="📝 Editable Table",
        interactive=True,
        visible=False
    )
    status = gr.Textbox(label="Status", interactive=False)

    with gr.Row():
        save_btn     = gr.Button("💾 Save")
        download_btn = gr.Button("⬇️ Download CSV")
        download_out = gr.File(label="📥 Downloaded File")

    with gr.Accordion("📦 Push to Hugging Face Hub", open=False):
        repo_input  = gr.Textbox(label="Repo (username/dataset-name)")
        token_input = gr.Textbox(label="HF Token", type="password")
        push_btn    = gr.Button("🚀 Push")
        push_status = gr.Textbox(label="Push Status", interactive=False)

    upload_btn.click(upload_csv,     inputs=file_input,              outputs=[df_table, status])
    save_btn.click(  save_changes,   inputs=df_table,               outputs=status)
    download_btn.click(download_csv, outputs=download_out)
    push_btn.click(  push_to_hub,    inputs=[repo_input, token_input], outputs=push_status)

app.launch()