Spaces:

martinsinnona
/

visdecode

Running

martinsinnona commited on Jun 27

Commit

19dfe9f

•

1 Parent(s): 2ef02d0

a

Files changed (2) hide show

app.py CHANGED Viewed

@@ -1,7 +1,33 @@
 import gradio as gr
-def greet(name):
-    return "Hello " + name + "!!"
-demo = gr.Interface(fn=greet, inputs="text", outputs="text")
-demo.launch(share=True)

 import gradio as gr
+from transformers import AutoProcessor, Pix2StructForConditionalGeneration
+import torch
+from PIL import Image
+# Load the processor and model
+processor = AutoProcessor.from_pretrained("google/matcha-base")
+processor.image_processor.is_vqa = False
+model = Pix2StructForConditionalGeneration.from_pretrained("martinsinnona/visdecode_B").to("cuda" if torch.cuda.is_available() else "cpu")
+model.eval()
+def generate_caption(image):
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    inputs = processor(images=image, return_tensors="pt", max_patches=1024).to(device)
+    generated_ids = model.generate(flattened_patches=inputs.flattened_patches, attention_mask=inputs.attention_mask, max_length=600)
+    generated_caption = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+    return generated_caption
+# Create the Gradio interface
+demo = gr.Interface(
+    fn=generate_caption,
+    inputs=gr.Image(type="pil"),
+    outputs="text",
+    title="Image to Text Generator",
+    description="Upload an image and get a generated caption."
+)
+# Launch the interface
+if __name__ == "__main__":
+    demo.launch(share=True)

requirements.txt CHANGED Viewed

+transformers
+torch
+PIL