Spaces:
Running
on
Zero
Running
on
Zero
File size: 2,984 Bytes
6a8ca1f 04fc1f1 6a8ca1f e9cc0b5 6a8ca1f de50a7e 04fc1f1 ee5e19e e27d897 db2ea29 ee5e19e e27d897 ee5e19e db2ea29 576d10c db2ea29 6a8ca1f e27d897 8a8a62b e27d897 8a8a62b db2ea29 6a8ca1f ee5e19e fefde70 6a8ca1f e9ecb71 6a8ca1f e9ecb71 6a8ca1f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 |
import spaces
import torch
import re
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM
from PIL import Image
if torch.cuda.is_available():
device, dtype = "cuda", torch.float16
else:
device, dtype = "cpu", torch.float32
model_id = "vikhyatk/moondream2"
revision = "2024-04-02"
tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision)
moondream = AutoModelForCausalLM.from_pretrained(
model_id, trust_remote_code=True, revision=revision, torch_dtype=dtype
).to(device=device)
moondream.eval()
@spaces.GPU
def answer_questions(image_tuples, prompt_text):
result = ""
prompts = [p.strip() for p in prompt_text.split(',')] # Splitting and cleaning prompts
print(f"prompts\n{prompts}\n")
image_embeds = [img[0] for img in image_tuples if img[0] is not None] # Extracting images from tuples, ignoring None
# Check if the lengths of image_embeds and prompts are equal
#if len(image_embeds) != len(prompts):
#return ("Error: The number of images input and prompts input (seperate by commas in input text field) must be the same.")
answers = []
for prompt in prompts:
image_answers = moondream.batch_answer(
images=[img.convert("RGB") for img in image_embeds],
prompts=[prompt] * len(image_embeds),
tokenizer=tokenizer,
)
answers.append(image_answers)
data = []
for i in range(len(image_tuples)):
image_name = f"image{i+1}"
image_answers = [answer[i] for answer in answers]
print(f"image{i+1}_answers \n {image_answers} \n")
data.append([image_name] + image_answers)
result = {'headers': prompts, 'data': data}
return result
'''
answers = moondream.batch_answer(
images=image_embeds,
prompts=prompts,
tokenizer=tokenizer,
)
for question, answer in zip(prompts, answers):
result += (f"Q: {question}\nA: {answer}\n\n")
return result
'''
with gr.Blocks() as demo:
gr.Markdown("# moondream2 unofficial batch processing demo")
gr.Markdown("1. Select images\n2. Enter prompts (one prompt for each image provided) separated by commas. Ex: Describe this image, What is in this image?\n\n")
gr.Markdown("*Tested and Running on free CPU space tier currently so results may take a bit to process compared to using GPU space hardware*")
gr.Markdown("## π moondream2\nA tiny vision language model. [GitHub](https://github.com/vikhyatk/moondream)")
with gr.Row():
img = gr.Gallery(label="Upload Images", type="pil")
prompt = gr.Textbox(label="Input Prompts", placeholder="Enter prompts (one prompt for each image provided) separated by commas. Ex: Describe this image, What is in this image?", lines=8)
submit = gr.Button("Submit")
output = gr.TextArea(label="Responses", lines=8)
submit.click(answer_questions, [img, prompt], output)
demo.queue().launch()
|