|
import gradio as gr |
|
import pandas as pd |
|
import io |
|
from huggingface_hub import HfApi, HfFolder, Repository |
|
import os |
|
|
|
|
|
df = pd.DataFrame() |
|
|
|
def upload_csv(file): |
|
global df |
|
df = pd.read_csv(file.name) |
|
|
|
if "text" not in df.columns or "label" not in df.columns: |
|
return gr.update(visible=False), "CSV must contain 'text' and 'label' columns." |
|
|
|
|
|
df["label"] = df["label"].fillna("") |
|
|
|
|
|
return gr.Dataframe( |
|
value=df, |
|
headers=["text", "label"], |
|
interactive=True, |
|
label="Edit labels below" |
|
), "File uploaded successfully." |
|
|
|
def save_edits(updated_table): |
|
global df |
|
df = pd.DataFrame(updated_table, columns=["text", "label"]) |
|
return "Changes saved." |
|
|
|
def download_csv(): |
|
|
|
csv_bytes = df.to_csv(index=False).encode() |
|
return gr.File.update(value=io.BytesIO(csv_bytes), filename="annotated_data.csv") |
|
|
|
def push_to_hub(repo_name, hf_token): |
|
|
|
repo_url = f"https://huggingface.co/datasets/{repo_name}" |
|
local_path = f"./{repo_name}" |
|
|
|
if os.path.exists(local_path): |
|
os.system(f"rm -rf {local_path}") |
|
|
|
api = HfApi() |
|
api.create_repo(repo_id=repo_name, token=hf_token, repo_type="dataset", exist_ok=True) |
|
repo = Repository(local_dir=local_path, clone_from=repo_url, token=hf_token) |
|
df.to_csv(f"{local_path}/data.csv", index=False) |
|
repo.push_to_hub() |
|
|
|
return f"Pushed to Hugging Face: {repo_url}" |
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown("## 🏷️ Label it! Text Labeling Tool") |
|
|
|
with gr.Row(): |
|
csv_input = gr.File(label="Upload CSV", file_types=[".csv"]) |
|
upload_btn = gr.Button("Upload") |
|
|
|
df_output = gr.Dataframe(headers=["text", "label"], interactive=True, visible=False) |
|
upload_status = gr.Textbox(visible=True, interactive=False) |
|
|
|
with gr.Row(): |
|
save_btn = gr.Button("Save Changes") |
|
download_btn = gr.Button("Download CSV") |
|
download_file = gr.File(label="Download", interactive=False) |
|
|
|
with gr.Row(): |
|
hf_repo = gr.Textbox(label="HF Dataset Repo (e.g. your-username/my-dataset)") |
|
hf_token = gr.Textbox(label="Hugging Face Token", type="password") |
|
push_btn = gr.Button("Push to Hugging Face Hub") |
|
push_status = gr.Textbox(interactive=False) |
|
|
|
upload_btn.click(fn=upload_csv, inputs=csv_input, outputs=[df_output, upload_status]) |
|
save_btn.click(fn=save_edits, inputs=df_output, outputs=upload_status) |
|
download_btn.click(fn=download_csv, outputs=download_file) |
|
push_btn.click(fn=push_to_hub, inputs=[hf_repo, hf_token], outputs=push_status) |
|
|
|
demo.launch() |
|
|