|
|
|
|
|
|
|
|
|
from transformers import BlipForQuestionAnswering |
|
model = BlipForQuestionAnswering.from_pretrained("Salesforce/blip-vqa-base") |
|
|
|
from transformers import AutoProcessor |
|
processor = AutoProcessor.from_pretrained("Salesforce/blip-vqa-base") |
|
|
|
from PIL import Image |
|
|
|
import gradio as gr |
|
|
|
def answering(image, question): |
|
inputs = processor(image, question, return_tensors="pt") |
|
out = model.generate(**inputs) |
|
output = processor.decode(out[0], skip_special_tokens=True) |
|
return output |
|
|
|
gr.close_all() |
|
|
|
app = gr.Interface(fn=answering, |
|
inputs=[gr.Image(label="Picture here", type="pil"), |
|
gr.Textbox(label="Question about picture here")], |
|
outputs=[gr.Textbox(label="Answer"),], |
|
title="Harza's application for answering questions about picture'", |
|
description="Harza's miracle application that can answer questions about given picuture!'", |
|
allow_flagging="never") |
|
app.launch() |
|
gr.close_all() |
|
|