File size: 4,278 Bytes
20e7095 d18e6c8 a4cec6f 3ea3aae 20e7095 3ea3aae 20e7095 3ea3aae 20e7095 d18e6c8 3ea3aae 1d6c7cd d18e6c8 1d6c7cd 20e7095 1d6c7cd 20e7095 1d6c7cd 20e7095 a4cec6f d18e6c8 20e7095 3ea3aae d18e6c8 a4cec6f d18e6c8 3ea3aae d18e6c8 a4cec6f 1d6c7cd 3ea3aae 20e7095 a4cec6f 1d6c7cd 8277138 1d6c7cd d18e6c8 a4cec6f d18e6c8 a4cec6f 8277138 a4cec6f 20e7095 d18e6c8 3ea3aae d18e6c8 3ea3aae a4cec6f 1d6c7cd 3ea3aae 1d6c7cd a4cec6f 20e7095 3ea3aae d18e6c8 3ea3aae 1d6c7cd 3ea3aae d18e6c8 8277138 d18e6c8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 |
import gradio as gr
import pandas as pd
import io
import os
from pathlib import Path
from huggingface_hub import HfApi, Repository
import matplotlib.pyplot as plt
# Global DataFrame
df = pd.DataFrame()
# List of free, recommended models (for future auto-labeling)
DEFAULT_MODELS = [
"mistralai/Mistral-7B-Instruct-v0.2",
"HuggingFaceH4/zephyr-7b-beta",
"tiiuae/falcon-rw-1b",
"TinyLlama/TinyLlama-1.1B-Chat-v1.0"
]
def upload_csv(file):
global df
df = pd.read_csv(file.name)
if "text" not in df.columns or "label" not in df.columns:
return gr.update(visible=False), "❌ CSV must contain ‘text’ and ‘label’ columns."
df["label"] = df["label"].fillna("") # ensure there’s always a label column
return (
gr.update(value=df[["text","label"]], visible=True),
"✅ File uploaded — you can now edit labels."
)
def save_changes(edited_table):
global df
df = pd.DataFrame(edited_table, columns=["text","label"])
return "💾 Changes saved."
def download_csv():
global df
out_path = "annotated_data.csv"
df.to_csv(out_path, index=False)
return out_path
def visualize_distribution():
global df
if df.empty or "label" not in df.columns:
return None
counts = df["label"].value_counts()
fig, ax = plt.subplots()
counts.plot(kind="bar", ax=ax)
ax.set_title("Label Distribution")
ax.set_xlabel("Label")
ax.set_ylabel("Count")
plt.tight_layout()
return fig
def push_to_hub(repo_name: str, hf_token: str) -> str:
global df
try:
api = HfApi()
api.create_repo(
repo_id=repo_name,
token=hf_token,
repo_type="dataset",
exist_ok=True
)
local_dir = Path(f"./{repo_name.replace('/', '_')}")
if local_dir.exists():
for child in local_dir.iterdir():
child.unlink()
local_dir.rmdir()
repo = Repository(
local_dir=str(local_dir),
clone_from=repo_name,
repo_type="dataset",
use_auth_token=hf_token
)
csv_path = local_dir / "data.csv"
df.to_csv(csv_path, index=False)
repo.push_to_hub(commit_message="📑 Update annotated data")
return f"🚀 Pushed to https://huggingface.co/datasets/{repo_name}"
except Exception as e:
return f"❌ Push failed: {e}"
with gr.Blocks(theme=gr.themes.Default()) as app:
gr.Markdown("## 🏷️ Label It! Text Annotation Tool")
gr.Markdown("Upload a `.csv` with **text** + **label** columns, annotate in-place, then export, visualize, or publish.")
with gr.Row():
file_input = gr.File(label="📁 Upload CSV", file_types=[".csv"])
upload_btn = gr.Button("Upload")
df_table = gr.Dataframe(
headers=["text","label"],
label="📝 Editable Table",
interactive=True,
visible=False
)
status = gr.Textbox(label="Status", interactive=False)
with gr.Row():
save_btn = gr.Button("💾 Save")
download_btn = gr.Button("⬇️ Download CSV")
visualize_btn= gr.Button("📊 Visualize Distribution")
download_out = gr.File(label="📥 Downloaded File")
viz_out = gr.Plot(label="Label Distribution")
with gr.Row():
model_dropdown = gr.Dropdown(
label="🤖 (Future) Auto-Label Model",
choices=DEFAULT_MODELS,
value=DEFAULT_MODELS[0]
)
with gr.Accordion("📦 Push to Hugging Face Hub", open=False):
repo_input = gr.Textbox(label="Repo (username/dataset-name)")
token_input = gr.Textbox(label="🔑 HF Token", type="password")
push_btn = gr.Button("🚀 Push")
push_status = gr.Textbox(label="Push Status", interactive=False)
# Bind events
upload_btn.click(upload_csv, inputs=file_input, outputs=[df_table, status])
save_btn.click( save_changes, inputs=df_table, outputs=status)
download_btn.click(download_csv, outputs=download_out)
visualize_btn.click(visualize_distribution, outputs=viz_out)
push_btn.click( push_to_hub, inputs=[repo_input, token_input], outputs=push_status)
app.launch()
|