# Image Captioning from: # https://learn.deeplearning.ai/courses/open-source-models-hugging-face/lesson/12/image-captioning # from transformers import BlipForConditionalGeneration model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base") from transformers import AutoProcessor processor = AutoProcessor.from_pretrained("Salesforce/blip-image-captioning-base") from PIL import Image import gradio as gr def captioning(input): image_tensors = processor(input, return_tensors="pt") image_text_tensors = model.generate(**image_tensors) output = processor.decode(image_text_tensors[0], skip_special_tokens=True) return output gr.close_all() app = gr.Interface(fn=captioning, inputs=[gr.Image(label="Laita tähä joku kuva", type="pil")], outputs=[gr.Textbox(label="Mitä näkyy?")], title="Harzan kuvan selitys aplikaatio", description="Harzan ihme aplikaatio kertomaan mitä kuvassa on", allow_flagging="never") app.launch() gr.close_all()