Spaces:

BAAI
/

Emu2

Runtime error

App Files Files Community

Emu2 / demo /chat_frontend.py

ryanzhangfan

add examples and license

6a25365 11 months ago

raw

history blame

9.33 kB

	# -- coding: utf-8 --

	# ===================================================
	#
	# Author : Fan Zhang
	# Email : [email protected]
	# Institute : Beijing Academy of Artificial Intelligence (BAAI)
	# Create On : 2023-12-12 18:05
	# Last Modified : 2023-12-20 15:59
	# File Name : chat_frontend.py
	# Description :
	#
	# ===================================================

	import json
	import io
	import time
	from PIL import Image
	import requests

	import gradio as gr

	from .meta import ConvMeta, Role, DataMeta
	from .utils import extract_frames
	from .utils import frontend_logger as logging
	from .constants import LICENSE, TERM_OF_USE

	CONTROLLER_URL = ""

	def submit(
	meta,
	image,
	video,
	text,
	num_frames,
	):
	if meta is None:
	meta = ConvMeta()

	meta.pop_error()

	check_text = (text != "" and text is not None)
	check_image = image is not None
	check_video = video is not None

	if check_text + check_image + check_video != 1:
	logging.info(f"{meta.log_id}: invalid input: give multi madality simultaneously for single modality input")
	gr.Error("Invalid input number, must give exactly one modality input at a time")
	return meta.format_chatbot(), meta, None, None, ""

	if check_text:
	meta.append(Role.USER, DataMeta.build(text=text))
	elif check_image:
	meta.append(Role.USER, DataMeta.build(image=image))
	elif check_video:
	frames = extract_frames(video, num_frames)
	meta.append(Role.USER, DataMeta.build(frames=frames))

	return meta.format_chatbot(), meta, None, None, ""


	def clear_history(meta):
	if meta is None:
	meta = ConvMeta()
	meta.clear()
	return meta.format_chatbot(), meta


	def generate(
	meta,
	do_sample,
	max_new_tokens,
	temperature,
	top_k,
	top_p,
	length_penalty,
	num_beams,
	repetition_penalty,
	):
	if meta is None:
	meta = ConvMeta()

	meta.pop_error()
	meta.pop()
	prompt = meta.format_chat()

	prompt_list, image_list = [], {}
	for idx, p in enumerate(prompt):
	if isinstance(p, Image.Image):
	key = f"[<IMAGE{idx}>]"
	prompt_list.append(["IMAGE", key])

	buf = io.BytesIO()
	p.save(buf, format="PNG")
	image_list[key] = (key, io.BytesIO(buf.getvalue()), "image/png")
	else:
	prompt_list.append(["TEXT", p])

	if len(image_list) == 0:
	image_list = None

	logging.info(f"{meta.log_id}: construct chat reqeust with prompt {prompt_list}")

	t0 = time.time()
	try:
	rsp = requests.post(
	CONTROLLER_URL + "/v1/mmc",
	files=image_list,
	data={
	"log_id": meta.log_id,
	"prompt": json.dumps(prompt_list),
	"do_sample": do_sample,
	"max_new_tokens": max_new_tokens,
	"temperature": temperature,
	"top_k": top_k,
	"top_p": top_p,
	"length_penalty": length_penalty,
	"num_beams": num_beams,
	"repetition_penalty": repetition_penalty,
	},
	)
	except Exception as ex:
	rsp = requests.Response()
	rsp.status_code = 1099
	rsp._content = str(ex).encode()
	t1 = time.time()

	logging.info(f"{meta.log_id}: get response with status code: {rsp.status_code}, time: {(t1-t0)*1000:.3f}ms")

	if rsp.status_code == requests.codes.ok:
	content = json.loads(rsp.text)
	if content["code"] == 0:
	meta.append(Role.ASSISTANT, DataMeta.build(text=content["data"]))
	else:
	meta.append(Role.ASSISTANT, DataMeta.build(text=f"GENERATE FAILED: {content['data']}", is_error=True))
	else:
	meta.append(Role.ASSISTANT, DataMeta.build(text=f"GENERATE FAILED: http failed with code {rsp.status_code}, msg: {rsp.text}", is_error=True))

	return meta.format_chatbot(), meta


	def push_examples(examples, meta):
	if meta is None:
	meta = ConvMeta()

	meta.clear()

	image, prompt = examples
	meta.append(Role.USER, DataMeta.build(image=Image.open(image)))
	meta.append(Role.USER, DataMeta.build(text=prompt))

	return meta.format_chatbot(), meta


	def build_chat(args):
	global CONTROLLER_URL
	CONTROLLER_URL = args.controller_url

	with gr.Blocks(title="Emu", theme=gr.themes.Default(primary_hue="blue", secondary_hue="blue")) as demo:
	state = gr.State()


	with gr.Row():
	with gr.Column(scale=2):
	with gr.Row():
	imagebox = gr.Image(type="pil")
	with gr.Row():
	videobox = gr.Video()

	with gr.Accordion("Parameters", open=True, visible=True) as parameter_row:
	do_sample = gr.Checkbox(value=False, label="Do Sample", interactive=True)
	max_new_tokens = gr.Slider(minimum=0, maximum=2048, value=512, step=1, interactive=True, label="Max Output Tokens")
	temperature = gr.Slider(minimum=0, maximum=1, value=0.7, step=0.05, interactive=True, label="Temperature")
	top_k = gr.Slider(minimum=1, maximum=5, value=3, step=1, interactive=True, label="Top K")
	top_p = gr.Slider(minimum=0, maximum=1, value=0.9, step=0.05, interactive=True, label="Top P")
	length_penalty = gr.Slider(minimum=0, maximum=5, value=2, step=0.1, interactive=True, label="Length Penalty")
	num_beams = gr.Slider(minimum=1, maximum=10, value=5, step=1, interactive=True, label="Beam Size")
	repetition_penalty = gr.Slider(minimum=1.0, maximum=10.0, value=1.0, step=0.5, interactive=True, label="Repetition Penalty")
	num_frames = gr.Number(interactive=True, value=8, maximum=12, label="Num Video Frames")

	with gr.Column(scale=6):
	chatbot = gr.Chatbot(
	elem_id="chatbot",
	label="Emu Chatbot",
	visible=True,
	height=1070,
	)

	with gr.Row():
	with gr.Column(scale=8):
	textbox = gr.Textbox(
	show_label=False,
	placeholder="Enter text and add to prompt",
	visible=True,
	container=False,
	)

	with gr.Column(scale=1, min_width=60):
	add_btn = gr.Button(value="Add")

	with gr.Row(visible=True) as button_row:
	# upvote_btn = gr.Button(value="👍 Upvote", interactive=False)
	# downvote_btn = gr.Button(value="👎 Downvote", interactive=False)
	# flag_btn = gr.Button(value="⚠️ Flag", interactive=False)
	# regenerate_btn = gr.Button(value="🔄 Regenerate", interactive=False)
	clear_btn = gr.Button(value="🗑️ Clear History")
	generate_btn = gr.Button(value="Generate")

	with gr.Row():
	examples = gr.Dataset(components=[gr.Image(type="pil", visible=False), gr.Textbox(visible=False)],
	label="Examples",
	samples=[
	["./examples/roadmap.jpg", "Imagine you are a guiding robot.\nHere is a photo I took. Please tell me how to get to the restroom."],
	["./examples/shapes.jpeg", "Look at this sequence of three shapes. What shape should come as the fourth shape? Explain your reasoning with detailed descriptions of the first shapes."],
	],
	)

	gr.Markdown(TERM_OF_USE)
	gr.Markdown(LICENSE)


	clear_btn.click(clear_history, inputs=state, outputs=[chatbot, state])
	textbox.submit(
	submit,
	inputs=[
	state,
	imagebox,
	videobox,
	textbox,
	num_frames,
	],
	outputs=[
	chatbot,
	state,
	imagebox,
	videobox,
	textbox,
	],
	)
	add_btn.click(
	submit,
	inputs=[
	state,
	imagebox,
	videobox,
	textbox,
	num_frames,
	],
	outputs=[
	chatbot,
	state,
	imagebox,
	videobox,
	textbox,
	],
	)
	generate_btn.click(
	generate,
	inputs=[
	state,
	do_sample,
	max_new_tokens,
	temperature,
	top_k,
	top_p,
	length_penalty,
	num_beams,
	repetition_penalty,
	],
	outputs=[
	chatbot,
	state,
	],
	)
	examples.click(
	push_examples,
	inputs=[
	examples,
	state,
	],
	outputs=[
	chatbot,
	state,
	]
	)

	return demo