Spaces:

nlphuji
/

whoops-explorer-full

Running

App Files Files Community

whoops-explorer-full / app.py

nitzanguetta

Update app.py

82c0a05 verified 3 days ago

raw

history blame contribute delete

5.04 kB

	import math
	from datasets import load_dataset
	import gradio as gr
	import os
	import ast

	auth_token = os.environ.get("auth_token")
	whoops = load_dataset("nlphuji/whoops", token=auth_token, trust_remote_code=True)['test'].shuffle()
	# print(f"Loaded WHOOPS!, first example:")
	# print(whoops[0])
	dataset_size = len(whoops)

	IMAGE = 'image'
	IMAGE_DESIGNER = 'image_designer'
	DESIGNER_EXPLANATION = 'designer_explanation'
	CROWD_CAPTIONS = 'crowd_captions'
	CROWD_EXPLANATIONS = 'crowd_explanations'
	CROWD_UNDERSPECIFIED_CAPTIONS = 'crowd_underspecified_captions'
	QA = 'question_answering_pairs'
	IMAGE_ID = 'image_id'
	SELECTED_CAPTION = 'selected_caption'
	COMMONSENSE_CATEGORY = 'commonsense_category'


	left_side_columns = [IMAGE]
	right_side_columns = [x for x in whoops.features.keys() if x not in left_side_columns]
	enumerate_cols = [CROWD_CAPTIONS, CROWD_EXPLANATIONS, CROWD_UNDERSPECIFIED_CAPTIONS]


	right_side_columns.remove('image_url')

	emoji_to_label = {IMAGE_DESIGNER: '🎨, 🧑‍🎨, 💻', DESIGNER_EXPLANATION: '💡, 🤔, 🧑‍🎨',
	CROWD_CAPTIONS: '👥, 💬, 📝', CROWD_EXPLANATIONS: '👥, 💡, 🤔', CROWD_UNDERSPECIFIED_CAPTIONS: '👥, 💬, 👎',
	QA: '❓, 🤔, 💡', IMAGE_ID: '🔍, 📄, 💾', COMMONSENSE_CATEGORY: '🤔, 📚, 💡', SELECTED_CAPTION: '📝, 👌, 💬'}
	# batch_size = 16
	batch_size = 8
	target_size = (1024, 1024)


	def func(index):
	start_index = index * batch_size
	end_index = start_index + batch_size
	all_examples = [whoops[index] for index in list(range(start_index, end_index))]
	values_lst = []
	for example_idx, example in enumerate(all_examples):
	values = get_instance_values(example)
	values_lst += values
	return values_lst


	def get_instance_values(example):
	values = []
	for k in left_side_columns + right_side_columns:
	if k == IMAGE:
	value = example["image"].resize(target_size)
	elif k in enumerate_cols:
	value = list_to_string(ast.literal_eval(example[k]))
	elif k == QA:
	qa_list = [f"Q: {x[0]} A: {x[1]}" for x in ast.literal_eval(example[k])]
	value = list_to_string(qa_list)
	else:
	value = example[k]
	values.append(value)
	return values

	def list_to_string(lst):
	return '\n'.join(['{}. {}'.format(i+1, item) for i, item in enumerate(lst)])

	demo = gr.Blocks()


	def get_col(example):
	instance_values = get_instance_values(example)
	with gr.Column():
	inputs_left = []
	assert len(left_side_columns) == len(
	instance_values[:len(left_side_columns)]) # excluding the image & designer
	for key, value in zip(left_side_columns, instance_values[:len(left_side_columns)]):
	if key == IMAGE:
	img_resized = example["image"].resize(target_size)
	# input_k = gr.Image(value=img_resized, label=example['commonsense_category'])
	input_k = gr.Image(value=img_resized)
	else:
	label = key.capitalize().replace("_", " ")
	input_k = gr.Textbox(value=value, label=f"{label} {emoji_to_label[key]}")
	inputs_left.append(input_k)

	with gr.Accordion("Click for details", open=False):
	text_inputs_right = []
	assert len(right_side_columns) == len(
	instance_values[len(left_side_columns):]) # excluding the image & designer
	for key, value in zip(right_side_columns, instance_values[len(left_side_columns):]):
	label = key.capitalize().replace("_", " ")

	if type(value) != str:
	num_lines = 1
	else:
	num_lines = max(1, len(value) // 50 + (len(value) % 45 > 0)) # Assuming ~50 chars per line

	text_input_k = gr.Textbox(value=value, label=f"{label} {emoji_to_label[key]}", lines=num_lines)
	# text_input_k = gr.Textbox(value=value, label=f"{label} {emoji_to_label[key]}")
	text_inputs_right.append(text_input_k)
	return inputs_left, text_inputs_right


	with demo:
	gr.Markdown("# Slide to iterate WHOOPS!")

	with gr.Column():
	num_batches = math.ceil(dataset_size / batch_size)
	slider = gr.Slider(minimum=0, maximum=num_batches, step=1, label=f'Page (out of {num_batches})')
	with gr.Row():
	index = slider.value
	start_index = 0 * batch_size
	end_index = start_index + batch_size
	all_examples = [whoops[index] for index in list(range(start_index, end_index))]
	all_inputs_left_right = []
	for example_idx, example in enumerate(all_examples):
	inputs_left, text_inputs_right = get_col(example)
	inputs_left_right = inputs_left + text_inputs_right
	all_inputs_left_right += inputs_left_right

	slider.change(func, inputs=[slider], outputs=all_inputs_left_right)

	demo.launch()