JunnanLi commited on
Commit
4214d1d
1 Parent(s): 01775c7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -4
app.py CHANGED
@@ -21,9 +21,9 @@ transform = transforms.Compose([
21
  transforms.Normalize((0.48145466, 0.4578275, 0.40821073), (0.26862954, 0.26130258, 0.27577711))
22
  ])
23
 
24
- model_url = 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model*_base_caption.pth'
25
 
26
- model = blip_decoder(pretrained=model_url, image_size=384, vit='base')
27
  model.eval()
28
  model = model.to(device)
29
 
@@ -61,12 +61,12 @@ def inference(raw_image, model_n, question, strategy):
61
  answer = model_vq(image_vq, question, train=False, inference='generate')
62
  return 'answer: '+answer[0]
63
 
64
- inputs = [gr.inputs.Image(type='pil'),gr.inputs.Radio(choices=['Image Captioning',"Visual Question Answering"], type="value", default="Image Captioning", label="Model"),"textbox",gr.inputs.Radio(choices=['Beam search','Nucleus sampling'], type="value", default="Nucleus sampling", label="Strategy")]
65
  outputs = gr.outputs.Textbox(label="Output")
66
 
67
  title = "BLIP"
68
 
69
- description = "Gradio demo for BLIP: Bootstrapping Language-Image Pre-training for Unified Vision-Language Understanding and Generation by Salesforce Research. To use it, simply upload your image, or click one of the examples to load them. Read more at the links below."
70
 
71
  article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2201.12086' target='_blank'>BLIP: Bootstrapping Language-Image Pre-training for Unified Vision-Language Understanding and Generation</a> | <a href='https://github.com/salesforce/BLIP' target='_blank'>Github Repo</a></p>"
72
 
 
21
  transforms.Normalize((0.48145466, 0.4578275, 0.40821073), (0.26862954, 0.26130258, 0.27577711))
22
  ])
23
 
24
+ model_url = 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model*_large_caption.pth'
25
 
26
+ model = blip_decoder(pretrained=model_url, image_size=384, vit='large')
27
  model.eval()
28
  model = model.to(device)
29
 
 
61
  answer = model_vq(image_vq, question, train=False, inference='generate')
62
  return 'answer: '+answer[0]
63
 
64
+ inputs = [gr.inputs.Image(type='pil'),gr.inputs.Radio(choices=['Image Captioning',"Visual Question Answering"], type="value", default="Image Captioning", label="Model"),"textbox",gr.inputs.Radio(choices=['Beam search','Nucleus sampling'], type="value", default="Nucleus sampling", label="Caption Decoding Strategy")]
65
  outputs = gr.outputs.Textbox(label="Output")
66
 
67
  title = "BLIP"
68
 
69
+ description = "Gradio demo for BLIP: Bootstrapping Language-Image Pre-training for Unified Vision-Language Understanding and Generation (Salesforce Research). To use it, simply upload your image, or click one of the examples to load them. Read more at the links below."
70
 
71
  article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2201.12086' target='_blank'>BLIP: Bootstrapping Language-Image Pre-training for Unified Vision-Language Understanding and Generation</a> | <a href='https://github.com/salesforce/BLIP' target='_blank'>Github Repo</a></p>"
72