commit-labeling / app.py
Petr Tsvetkov
Add .gitignore file and main application script
1b24b13
raw
history blame
4.79 kB
import uuid
import gradio as gr
from datasets import load_dataset
configuration = "commitchronicle-py-long" # select a configuration
dataset = load_dataset("JetBrains-Research/lca-cmg",
configuration,
split="test",
cache_dir="data")
n_samples = len(dataset)
saver = gr.CSVLogger()
def get_github_link(repo, hash):
repo_url = f"https://github.com/{repo}/commit/{hash}"
return repo_url
def update_commit_view(sample_ind):
if sample_ind >= n_samples:
return None
record = dataset[sample_ind]
github_link_md = f"[See the commit on GitHub]({get_github_link(record['repo'], record['hash'])})"
diff_json = record['mods']
commit_msg = record['message']
repo_val = record['repo']
hash_val = record['hash']
return github_link_md, diff_json, commit_msg, repo_val, hash_val
def next_sample(current_sample_ind):
if current_sample_ind == n_samples:
return None
current_sample_ind += 1
updated_view = update_commit_view(current_sample_ind)
return (current_sample_ind,) + updated_view
with gr.Blocks(theme=gr.themes.Soft()) as demo:
with gr.Row():
current_sample_sld = gr.Slider(minimum=0, maximum=n_samples, step=1,
value=0,
interactive=False,
label='sample_ind',
info=f"Samples labeled/skipped (out of {n_samples})",
show_label=False,
container=False,
scale=5)
with gr.Column(scale=1):
repo_val = gr.Textbox(interactive=False, container=False, label='repo', visible=False)
hash_val = gr.Textbox(interactive=False, container=False, label='hash', visible=False)
skip_btn = gr.Button("Skip the current sample")
with gr.Row():
with gr.Column(scale=2):
github_link = gr.Markdown()
diff_view = gr.JSON()
with gr.Column(scale=1):
commit_msg = gr.Textbox(label="AI-generated commit message",
interactive=False,
)
gr.Markdown("## Please, answer the questions below")
verbosity_feedback = gr.Radio(info='How can you describe the length of the commit message above?',
label='verbosity',
show_label=False,
choices=[
('Too short', 0),
('Just right', 1),
('Too verbose', 2)])
correctness_feedback = gr.Radio(info='Is the commit message factually correct?',
label='is_correct',
show_label=False,
choices=[
('Yes', True),
('No', False)])
format_feedback = gr.Slider(info='Rate the commit message\'s format (1 - very bad, 5 - very good)',
label='format_score',
show_label=False,
minimum=1,
step=1,
interactive=True,
maximum=5)
submit_btn = gr.Button("Submit and continue")
session_val = gr.Textbox(info='Session', interactive=False, container=True, show_label=False,
label='session')
commit_view = [
github_link,
diff_view,
commit_msg,
repo_val,
hash_val
]
feedback_form = [
current_sample_sld,
session_val,
repo_val,
hash_val,
verbosity_feedback,
correctness_feedback,
format_feedback
]
saver.setup(feedback_form, "feedback")
skip_btn.click(next_sample, inputs=[current_sample_sld], outputs=[current_sample_sld] + commit_view)
def submit(*args):
saver.flag(args)
return next_sample(args[0])
submit_btn.click(submit, inputs=feedback_form, outputs=[current_sample_sld] + commit_view)
def init_session(*args):
session = str(uuid.uuid4())
return (session,) + update_commit_view(*args)
demo.load(init_session, inputs=[current_sample_sld], outputs=[session_val] + commit_view)
demo.launch()