import json
import os
import random
import uuid
from datetime import datetime
from difflib import ndiff
import gradio as gr
from data_loader import load_data
from hf_dataset_saver_builder import get_dataset_saver
HF_TOKEN = os.environ.get('HF_REWRITING_TOKEN')
HF_DATASET = os.environ.get('HF_REWRITING_DATASET')
data = load_data()
n_samples = len(data)
saver = get_dataset_saver(HF_TOKEN, HF_DATASET, private=True)
def convert_diff_to_unified(diff_string):
diff = json.loads(diff_string)
result = "\n".join(
[
f'--- {modified_file["old_path"]}\n'
f'+++ {modified_file["new_path"]}\n'
f'{modified_file["diff"]}'
for modified_file in diff
]
)
return result
def get_diff2html_view(raw_diff):
html = f"""
"""
return html
def get_github_link_md(repo, hash):
return f'[See the commit on Github](https://github.com/{repo}/commit/{hash})'
def char_diff_obj(change_type, pos, character, timestamp):
return {"type": change_type, "pos": pos, "char": character, "timestamp": timestamp}
def update_commit_view(sample_ind):
if sample_ind >= n_samples:
return None
record = data[sample_ind]
diff_view = get_diff2html_view(convert_diff_to_unified(record['mods']))
repo_val = record['repo']
hash_val = record['hash']
github_link_md = get_github_link_md(repo_val, hash_val)
diff_loaded_timestamp = datetime.now().isoformat()
commit_message = record['prediction']
commit_message_start = commit_message
commit_message_prev = commit_message
commit_message_history = []
return (
github_link_md, diff_view, repo_val, hash_val, diff_loaded_timestamp,
commit_message_start, commit_message, commit_message_prev, commit_message_history)
def next_sample(current_sample_ind, shuffled_idx):
if current_sample_ind == n_samples:
return None
current_sample_ind += 1
updated_view = update_commit_view(shuffled_idx[current_sample_ind])
return (current_sample_ind,) + updated_view
with open("head.html") as head_file:
head_html = head_file.read()
with gr.Blocks(theme=gr.themes.Soft(), head=head_html, css="style_overrides.css") as application:
repo_val = gr.Textbox(interactive=False, label='repo', visible=False)
hash_val = gr.Textbox(interactive=False, label='hash', visible=False)
shuffled_idx_val = gr.JSON(visible=False)
with gr.Row():
with gr.Accordion("Help"):
with open("survey_guide.md") as content_file:
gr.Markdown(content_file.read())
with gr.Row():
current_sample_sld = gr.Slider(minimum=0, maximum=n_samples, step=1,
value=0,
interactive=False,
label='sample_ind',
info=f"Samples labeled/skipped (out of {n_samples})",
show_label=False,
container=False,
scale=5)
with gr.Column(scale=1):
skip_btn = gr.Button("Skip the current sample")
with gr.Row():
with gr.Column(scale=2):
github_link = gr.Markdown()
diff_view = gr.HTML()
with gr.Column(scale=1):
commit_msg_start = gr.TextArea(label="commit_msg_start", visible=False)
commit_msg = gr.TextArea(label="commit_msg_end", show_label=False,
info="Commit message (can be scrollable)")
commit_msg_prev = gr.TextArea(visible=False)
commit_msg_history = gr.JSON(label="commit_msg_history", visible=False)
submit_btn = gr.Button("Submit")
session_val = gr.Textbox(info='Session', interactive=False, container=True, show_label=False,
label='session')
with gr.Row(visible=False):
sample_loaded_timestamp = gr.Textbox(info="Sample loaded", label='loaded_ts', interactive=False,
container=True, show_label=False)
now_timestamp = gr.Textbox(info="Current time",
interactive=False, container=True, show_label=False,
value=lambda: datetime.now().isoformat(), every=0.1,
label='submitted_ts')
commit_view = [
github_link,
diff_view,
repo_val,
hash_val,
sample_loaded_timestamp,
commit_msg_start,
commit_msg,
commit_msg_prev,
commit_msg_history
]
feedback_metadata = [
session_val,
repo_val,
hash_val,
sample_loaded_timestamp,
now_timestamp
]
feedback_form = [
commit_msg_start,
commit_msg,
commit_msg_history
]
saver.setup([current_sample_sld] + feedback_metadata + feedback_form, "feedback")
skip_btn.click(next_sample, inputs=[current_sample_sld, shuffled_idx_val],
outputs=[current_sample_sld] + commit_view)
def submit(current_sample, shuffled_idx, *args):
saver.flag((current_sample,) + args)
return next_sample(current_sample, shuffled_idx)
submit_btn.click(
submit,
inputs=[current_sample_sld, shuffled_idx_val] + feedback_metadata + feedback_form,
outputs=[current_sample_sld] + commit_view
)
def on_commit_msg_changed(message, prev_message, history, timestamp):
for i, s in enumerate(ndiff(prev_message, message)):
diff = char_diff_obj(s[0], i, s[-1], timestamp)
if diff['type'] in ('+', '-'):
print(diff)
history.append(diff)
return message, history
commit_msg.change(on_commit_msg_changed, inputs=[commit_msg, commit_msg_prev, commit_msg_history,
now_timestamp],
outputs=[commit_msg_prev, commit_msg_history])
def init_session(current_sample):
session = str(uuid.uuid4())
shuffled_idx = list(range(n_samples))
random.shuffle(shuffled_idx)
return (session, shuffled_idx) + update_commit_view(shuffled_idx[current_sample])
application.load(init_session,
inputs=[current_sample_sld],
outputs=[session_val, shuffled_idx_val] + commit_view, )
application.launch()