File size: 4,278 Bytes
20e7095
 
 
 
d18e6c8
a4cec6f
3ea3aae
20e7095
3ea3aae
20e7095
 
3ea3aae
 
 
 
 
 
 
 
20e7095
 
 
 
d18e6c8
3ea3aae
1d6c7cd
 
d18e6c8
1d6c7cd
20e7095
1d6c7cd
20e7095
1d6c7cd
 
20e7095
 
a4cec6f
d18e6c8
 
 
20e7095
3ea3aae
 
 
 
 
 
 
 
 
 
 
 
 
d18e6c8
a4cec6f
d18e6c8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3ea3aae
d18e6c8
 
 
 
 
 
 
 
 
 
a4cec6f
1d6c7cd
 
3ea3aae
20e7095
 
a4cec6f
1d6c7cd
8277138
1d6c7cd
 
d18e6c8
a4cec6f
d18e6c8
a4cec6f
8277138
a4cec6f
20e7095
d18e6c8
 
3ea3aae
d18e6c8
3ea3aae
 
 
 
 
 
 
 
a4cec6f
1d6c7cd
 
3ea3aae
1d6c7cd
a4cec6f
20e7095
3ea3aae
d18e6c8
3ea3aae
1d6c7cd
3ea3aae
d18e6c8
8277138
d18e6c8
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
import gradio as gr
import pandas as pd
import io
import os
from pathlib import Path
from huggingface_hub import HfApi, Repository
import matplotlib.pyplot as plt

# Global DataFrame
df = pd.DataFrame()

# List of free, recommended models (for future auto-labeling)
DEFAULT_MODELS = [
    "mistralai/Mistral-7B-Instruct-v0.2",
    "HuggingFaceH4/zephyr-7b-beta",
    "tiiuae/falcon-rw-1b",
    "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
]

def upload_csv(file):
    global df
    df = pd.read_csv(file.name)
    if "text" not in df.columns or "label" not in df.columns:
        return gr.update(visible=False), "❌ CSV must contain ‘text’ and ‘label’ columns."
    df["label"] = df["label"].fillna("")  # ensure there’s always a label column
    return (
        gr.update(value=df[["text","label"]], visible=True),
        "✅ File uploaded — you can now edit labels."
    )

def save_changes(edited_table):
    global df
    df = pd.DataFrame(edited_table, columns=["text","label"])
    return "💾 Changes saved."

def download_csv():
    global df
    out_path = "annotated_data.csv"
    df.to_csv(out_path, index=False)
    return out_path

def visualize_distribution():
    global df
    if df.empty or "label" not in df.columns:
        return None
    counts = df["label"].value_counts()
    fig, ax = plt.subplots()
    counts.plot(kind="bar", ax=ax)
    ax.set_title("Label Distribution")
    ax.set_xlabel("Label")
    ax.set_ylabel("Count")
    plt.tight_layout()
    return fig

def push_to_hub(repo_name: str, hf_token: str) -> str:
    global df
    try:
        api = HfApi()
        api.create_repo(
            repo_id=repo_name,
            token=hf_token,
            repo_type="dataset",
            exist_ok=True
        )

        local_dir = Path(f"./{repo_name.replace('/', '_')}")
        if local_dir.exists():
            for child in local_dir.iterdir():
                child.unlink()
            local_dir.rmdir()

        repo = Repository(
            local_dir=str(local_dir),
            clone_from=repo_name,
            repo_type="dataset",
            use_auth_token=hf_token
        )

        csv_path = local_dir / "data.csv"
        df.to_csv(csv_path, index=False)
        repo.push_to_hub(commit_message="📑 Update annotated data")
        return f"🚀 Pushed to https://huggingface.co/datasets/{repo_name}"

    except Exception as e:
        return f"❌ Push failed: {e}"

with gr.Blocks(theme=gr.themes.Default()) as app:
    gr.Markdown("## 🏷️ Label It! Text Annotation Tool")
    gr.Markdown("Upload a `.csv` with **text** + **label** columns, annotate in-place, then export, visualize, or publish.")

    with gr.Row():
        file_input = gr.File(label="📁 Upload CSV", file_types=[".csv"])
        upload_btn = gr.Button("Upload")

    df_table = gr.Dataframe(
        headers=["text","label"],
        label="📝 Editable Table",
        interactive=True,
        visible=False
    )
    status = gr.Textbox(label="Status", interactive=False)

    with gr.Row():
        save_btn     = gr.Button("💾 Save")
        download_btn = gr.Button("⬇️ Download CSV")
        visualize_btn= gr.Button("📊 Visualize Distribution")
        download_out = gr.File(label="📥 Downloaded File")
        viz_out      = gr.Plot(label="Label Distribution")

    with gr.Row():
        model_dropdown = gr.Dropdown(
            label="🤖 (Future) Auto-Label Model",
            choices=DEFAULT_MODELS,
            value=DEFAULT_MODELS[0]
        )

    with gr.Accordion("📦 Push to Hugging Face Hub", open=False):
        repo_input  = gr.Textbox(label="Repo (username/dataset-name)")
        token_input = gr.Textbox(label="🔑 HF Token", type="password")
        push_btn    = gr.Button("🚀 Push")
        push_status = gr.Textbox(label="Push Status", interactive=False)

    # Bind events
    upload_btn.click(upload_csv,     inputs=file_input,              outputs=[df_table, status])
    save_btn.click(  save_changes,   inputs=df_table,                outputs=status)
    download_btn.click(download_csv, outputs=download_out)
    visualize_btn.click(visualize_distribution, outputs=viz_out)
    push_btn.click(  push_to_hub,    inputs=[repo_input, token_input], outputs=push_status)

app.launch()