Spaces:

Tonic
/

Prometheus-VL

Runtime error

App Files Files Community

Prometheus-VL / app.py

tonic

refactor

3263d5e 8 months ago

raw

history blame contribute delete

No virus

5.91 kB

	import spaces

	import argparse
	import torch
	import os
	import json
	from tqdm import tqdm
	import shortuuid

	from prometheus.constants import IMAGE_TOKEN_INDEX, DEFAULT_IMAGE_TOKEN, DEFAULT_IM_START_TOKEN, DEFAULT_IM_END_TOKEN
	from prometheus.conversation import conv_templates, SeparatorStyle
	from prometheus.model.builder import load_pretrained_model
	from prometheus.utils import disable_torch_init
	from prometheus.mm_utils import tokenizer_image_token, get_model_name_from_path, KeywordsStoppingCriteria

	from PIL import Image
	import math


	model_path = 'kaist-ai/prometheus-vision-13b-v1.0'
	model_name = 'llava-v1.5'

	def split_list(lst, n):
	"""Split a list into n (roughly) equal-sized chunks"""
	chunk_size = math.ceil(len(lst) / n) # integer division
	return [lst[i:i+chunk_size] for i in range(0, len(lst), chunk_size)]


	def get_chunk(lst, n, k):
	chunks = split_list(lst, n)
	return chunks[k]

	@spaces.GPU
	def eval_model(args, model_name = model_name, model_path = model_path):
	disable_torch_init()
	tokenizer, model, image_processor, context_len = load_pretrained_model(model_path, args.model_base, model_name)

	questions = [json.loads(q) for q in open(os.path.expanduser(args.question_file), "r")]
	questions = get_chunk(questions, args.num_chunks, args.chunk_idx)
	answers_file = os.path.expanduser(args.answers_file)
	os.makedirs(os.path.dirname(answers_file), exist_ok=True)
	ans_file = open(answers_file, "w")
	for line in tqdm(questions):
	idx = line["question_id"]
	image_file = line["image"]
	qs = line["text"]
	cur_prompt = qs
	if model.config.mm_use_im_start_end:
	qs = DEFAULT_IM_START_TOKEN + DEFAULT_IMAGE_TOKEN + DEFAULT_IM_END_TOKEN + '\n' + qs
	else:
	qs = DEFAULT_IMAGE_TOKEN + '\n' + qs

	conv = conv_templates[args.conv_mode].copy()
	conv.append_message(conv.roles[0], qs)
	conv.append_message(conv.roles[1], None)
	prompt = conv.get_prompt()

	input_ids = tokenizer_image_token(prompt, tokenizer, IMAGE_TOKEN_INDEX, return_tensors='pt').unsqueeze(0).cuda()

	image = Image.open(os.path.join(args.image_folder, image_file))
	image_tensor = image_processor.preprocess(image, return_tensors='pt')['pixel_values'][0]

	stop_str = conv.sep if conv.sep_style != SeparatorStyle.TWO else conv.sep2
	keywords = [stop_str]
	stopping_criteria = KeywordsStoppingCriteria(keywords, tokenizer, input_ids)

	with torch.inference_mode():
	output_ids = model.generate(
	input_ids,
	images=image_tensor.unsqueeze(0).half().cuda(),
	do_sample=True if args.temperature > 0 else False,
	temperature=args.temperature,
	top_p=args.top_p,
	num_beams=args.num_beams,
	# no_repeat_ngram_size=3,
	max_new_tokens=1024,
	use_cache=True)

	input_token_len = input_ids.shape[1]
	n_diff_input_output = (input_ids != output_ids[:, :input_token_len]).sum().item()
	if n_diff_input_output > 0:
	print(f'[Warning] {n_diff_input_output} output_ids are not the same as the input_ids')
	outputs = tokenizer.batch_decode(output_ids[:, input_token_len:], skip_special_tokens=True)[0]
	outputs = outputs.strip()
	if outputs.endswith(stop_str):
	outputs = outputs[:-len(stop_str)]
	outputs = outputs.strip()

	ans_id = shortuuid.uuid()
	ans_file.write(json.dumps({"question_id": idx,
	"prompt": cur_prompt,
	"text": outputs,
	"answer_id": ans_id,
	"model_id": model_name,
	"metadata": {}}) + "\n")
	ans_file.flush()
	ans_file.close()

	def gradio_wrapper( image_folder, question_file, answers_file, conv_mode, num_chunks, chunk_idx, temperature, top_p, num_beams, model_path = model_path , model_name = model_name):

	question_file_path = os.path.join(tempfile.mkdtemp(), "question.jsonl")
	with open(question_file_path, "w") as f:
	for question in question_file:
	f.write(json.dumps(question) + "\n")

	temp_image_folder = tempfile.mkdtemp()
	for image_file in image_folder:
	image_path = os.path.join(temp_image_folder, image_file.name)
	image_file.save(image_path)

	args = argparse.Namespace(
	model_path=model_path,
	model_base=model_base,
	image_folder=temp_image_folder,
	question_file=question_file_path,
	answers_file=answers_file,
	conv_mode=conv_mode,
	num_chunks=num_chunks,
	chunk_idx=chunk_idx,
	temperature=temperature,
	top_p=top_p,
	num_beams=num_beams
	)

	eval_model(args)

	with open(answers_file, "r") as f:
	answers = f.readlines()

	return answers

	iface = gr.Interface(
	fn=gradio_wrapper,
	inputs=[
	gr.File(label="Image Folder", type="file", multiple=True),
	gr.JSON(label="Question File"),
	gr.Textbox(label="Answers File"),
	gr.Dropdown(label="Conversation Mode", choices=["llava_v1"]),
	gr.Slider(label="Number of Chunks", min_value=1, max_value=10, step=1, value=1),
	gr.Slider(label="Chunk Index", min_value=0, max_value=9, step=1, value=0),
	gr.Slider(label="Temperature", min_value=0.0, max_value=1.0, step=0.01, value=0.2),
	gr.Textbox(label="Top P", value=None),
	gr.Slider(label="Number of Beams", min_value=1, max_value=10, step=1, value=1)
	],
	outputs=[
	gr.Textbox(label="Answers")
	],
	title="Model Evaluation Interface",
	description="A Gradio interface for evaluating models."
	)

	if __name__ == "__main__":
	iface.launch()