File size: 2,773 Bytes
20e7095
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e8b5cf1
20e7095
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import gradio as gr
import pandas as pd
import io
from huggingface_hub import HfApi, HfFolder, Repository
import os

# Global state
df = pd.DataFrame()

def upload_csv(file):
    global df
    df = pd.read_csv(file.name)
    
    if "text" not in df.columns or "label" not in df.columns:
        return gr.update(visible=False), "CSV must contain 'text' and 'label' columns."
    
    # Fill label column if empty
    df["label"] = df["label"].fillna("")
    
    # Return the editable table
    return gr.Dataframe(
        value=df,
        headers=["text", "label"],
        interactive=True,
        label="Edit labels below"
    ), "File uploaded successfully."

def save_edits(updated_table):
    global df
    df = pd.DataFrame(updated_table, columns=["text", "label"])
    return "Changes saved."

def download_csv():
    # Create a downloadable CSV
    csv_bytes = df.to_csv(index=False).encode()
    return gr.File.update(value=io.BytesIO(csv_bytes), filename="annotated_data.csv")

def push_to_hub(repo_name, hf_token):
    # Authenticate and push to Hugging Face Hub
    repo_url = f"https://huggingface.co/datasets/{repo_name}"
    local_path = f"./{repo_name}"
    
    if os.path.exists(local_path):
        os.system(f"rm -rf {local_path}")
    
    api = HfApi()
    api.create_repo(repo_id=repo_name, token=hf_token, repo_type="dataset", exist_ok=True)
    repo = Repository(local_dir=local_path, clone_from=repo_url, token=hf_token)
    df.to_csv(f"{local_path}/data.csv", index=False)
    repo.push_to_hub()
    
    return f"Pushed to Hugging Face: {repo_url}"

with gr.Blocks() as demo:
    gr.Markdown("## 🏷️ Label it! Text Labeling Tool")
    
    with gr.Row():
        csv_input = gr.File(label="Upload CSV", file_types=[".csv"])
        upload_btn = gr.Button("Upload")
    
    df_output = gr.Dataframe(headers=["text", "label"], interactive=True, visible=False)
    upload_status = gr.Textbox(visible=True, interactive=False)
    
    with gr.Row():
        save_btn = gr.Button("Save Changes")
        download_btn = gr.Button("Download CSV")
        download_file = gr.File(label="Download", interactive=False)
    
    with gr.Row():
        hf_repo = gr.Textbox(label="HF Dataset Repo (e.g. your-username/my-dataset)")
        hf_token = gr.Textbox(label="Hugging Face Token", type="password")
        push_btn = gr.Button("Push to Hugging Face Hub")
        push_status = gr.Textbox(interactive=False)
    
    upload_btn.click(fn=upload_csv, inputs=csv_input, outputs=[df_output, upload_status])
    save_btn.click(fn=save_edits, inputs=df_output, outputs=upload_status)
    download_btn.click(fn=download_csv, outputs=download_file)
    push_btn.click(fn=push_to_hub, inputs=[hf_repo, hf_token], outputs=push_status)

demo.launch()