martinsinnona commited on
Commit
19dfe9f
1 Parent(s): 2ef02d0
Files changed (2) hide show
  1. app.py +30 -4
  2. requirements.txt +3 -0
app.py CHANGED
@@ -1,7 +1,33 @@
1
  import gradio as gr
 
 
 
2
 
3
- def greet(name):
4
- return "Hello " + name + "!!"
 
 
 
5
 
6
- demo = gr.Interface(fn=greet, inputs="text", outputs="text")
7
- demo.launch(share=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ from transformers import AutoProcessor, Pix2StructForConditionalGeneration
3
+ import torch
4
+ from PIL import Image
5
 
6
+ # Load the processor and model
7
+ processor = AutoProcessor.from_pretrained("google/matcha-base")
8
+ processor.image_processor.is_vqa = False
9
+ model = Pix2StructForConditionalGeneration.from_pretrained("martinsinnona/visdecode_B").to("cuda" if torch.cuda.is_available() else "cpu")
10
+ model.eval()
11
 
12
+ def generate_caption(image):
13
+
14
+ device = "cuda" if torch.cuda.is_available() else "cpu"
15
+
16
+ inputs = processor(images=image, return_tensors="pt", max_patches=1024).to(device)
17
+ generated_ids = model.generate(flattened_patches=inputs.flattened_patches, attention_mask=inputs.attention_mask, max_length=600)
18
+ generated_caption = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
19
+
20
+ return generated_caption
21
+
22
+ # Create the Gradio interface
23
+ demo = gr.Interface(
24
+ fn=generate_caption,
25
+ inputs=gr.Image(type="pil"),
26
+ outputs="text",
27
+ title="Image to Text Generator",
28
+ description="Upload an image and get a generated caption."
29
+ )
30
+
31
+ # Launch the interface
32
+ if __name__ == "__main__":
33
+ demo.launch(share=True)
requirements.txt CHANGED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ transformers
2
+ torch
3
+ PIL