File size: 3,251 Bytes
20e7095
 
 
 
d18e6c8
a4cec6f
20e7095
1d6c7cd
20e7095
 
 
 
 
 
d18e6c8
20e7095
d18e6c8
1d6c7cd
 
d18e6c8
1d6c7cd
20e7095
1d6c7cd
20e7095
1d6c7cd
 
20e7095
 
a4cec6f
d18e6c8
 
 
20e7095
d18e6c8
a4cec6f
d18e6c8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a4cec6f
1d6c7cd
 
 
20e7095
 
a4cec6f
1d6c7cd
8277138
1d6c7cd
 
d18e6c8
a4cec6f
d18e6c8
a4cec6f
8277138
a4cec6f
20e7095
d18e6c8
 
 
a4cec6f
1d6c7cd
 
 
 
a4cec6f
20e7095
d18e6c8
 
 
1d6c7cd
d18e6c8
8277138
d18e6c8
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
import gradio as gr
import pandas as pd
import io
import os
from pathlib import Path
from huggingface_hub import HfApi, Repository

# Global DataFrame
df = pd.DataFrame()

def upload_csv(file):
    global df
    df = pd.read_csv(file.name)
    if "text" not in df.columns or "label" not in df.columns:
        return gr.update(visible=False), "❌ CSV must contain ‘text’ and ‘label’ columns."
    df["label"] = df["label"].fillna("")
    # Show the table and set status
    return (
        gr.update(value=df[["text","label"]], visible=True),
        "✅ File uploaded — you can now edit labels."
    )

def save_changes(edited_table):
    global df
    df = pd.DataFrame(edited_table, columns=["text","label"])
    return "💾 Changes saved."

def download_csv():
    global df
    out_path = "annotated_data.csv"
    df.to_csv(out_path, index=False)
    return out_path

def push_to_hub(repo_name: str, hf_token: str) -> str:
    global df
    try:
        api = HfApi()
        api.create_repo(
            repo_id=repo_name,
            token=hf_token,
            repo_type="dataset",
            exist_ok=True
        )

        local_dir = Path(f"./{repo_name.replace('/', '_')}")
        if local_dir.exists():
            for child in local_dir.iterdir():
                child.unlink()
            local_dir.rmdir()

        repo = Repository(
            local_dir=str(local_dir),
            clone_from=repo_name,
            use_auth_token=hf_token
        )

        csv_path = local_dir / "data.csv"
        df.to_csv(csv_path, index=False)

        repo.push_to_hub(commit_message="📑 Update annotated data")
        return f"🚀 Pushed to https://huggingface.co/datasets/{repo_name}"

    except Exception as e:
        return f"❌ Push failed: {e}"

with gr.Blocks(theme=gr.themes.Default()) as app:
    gr.Markdown("## 🏷️ Label It! Text Annotation Tool")
    gr.Markdown("Upload a `.csv` with `text` + `label` columns, annotate in-place, then export or publish.")

    with gr.Row():
        file_input = gr.File(label="📁 Upload CSV", file_types=[".csv"])
        upload_btn = gr.Button("Upload")

    df_table = gr.Dataframe(
        headers=["text","label"],
        label="📝 Editable Table",
        interactive=True,
        visible=False
    )
    status = gr.Textbox(label="Status", interactive=False)

    with gr.Row():
        save_btn     = gr.Button("💾 Save")
        download_btn = gr.Button("⬇️ Download CSV")
        download_out = gr.File(label="📥 Downloaded File")

    with gr.Accordion("📦 Push to Hugging Face Hub", open=False):
        repo_input  = gr.Textbox(label="Repo (username/dataset-name)")
        token_input = gr.Textbox(label="HF Token", type="password")
        push_btn    = gr.Button("🚀 Push")
        push_status = gr.Textbox(label="Push Status", interactive=False)

    # Event bindings
    upload_btn.click(upload_csv,     inputs=file_input,              outputs=[df_table, status])
    save_btn.click(  save_changes,   inputs=df_table,                outputs=status)
    download_btn.click(download_csv, outputs=download_out)
    push_btn.click(  push_to_hub,    inputs=[repo_input, token_input], outputs=push_status)

# Launch the app
app.launch()