|
|
|
|
|
|
|
|
|
from transformers import BlipForConditionalGeneration |
|
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base") |
|
|
|
from transformers import AutoProcessor |
|
processor = AutoProcessor.from_pretrained("Salesforce/blip-image-captioning-base") |
|
|
|
from PIL import Image |
|
import gradio as gr |
|
|
|
def captioning(input): |
|
image_tensors = processor(input, return_tensors="pt") |
|
image_text_tensors = model.generate(**image_tensors) |
|
output = processor.decode(image_text_tensors[0], skip_special_tokens=True) |
|
return output |
|
|
|
gr.close_all() |
|
|
|
app = gr.Interface(fn=captioning, |
|
inputs=[gr.Image(label="Laita tähä joku kuva", type="pil")], |
|
outputs=[gr.Textbox(label="Mitä näkyy?")], |
|
title="Harzan kuvan selitys aplikaatio", |
|
description="Harzan ihme aplikaatio kertomaan mitä kuvassa on", |
|
allow_flagging="never") |
|
app.launch() |
|
gr.close_all() |