Spaces:

Salesforce
/

BLIP2

Runtime error

App Files Files Community

Dongxu Li commited on Feb 3, 2023

Commit

1365f85

•

2 Parent(s): 202fdfa 2f872f9

Merge branch 'main' of https://huggingface.co/spaces/Salesforce/BLIP2

Browse files

Files changed (3) hide show

app.py +18 -12
flower.jpg +0 -0
forbidden_city.webp +0 -0

app.py CHANGED Viewed

@@ -126,14 +126,16 @@ def inference_caption(
 title = """<h1 align="center">BLIP-2</h1>"""
-description = """Gradio demo for BLIP-2, a multimodal chatbot from Salesforce Research. To use it, simply upload your image, or click one of the examples to load them. Please visit our <a href='https://github.com/salesforce/LAVIS/tree/main/projects/blip2' target='_blank'>project webpage</a>.</p>
 <p> <strong>Disclaimer</strong>: This is a research prototype and is not intended for production use. No data including but not restricted to text and images is collected. </p>"""
-article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2201.12086' target='_blank'>BLIP-2: Bootstrapping Language-Image Pre-training with Frozen Image Encoders and Large Language Models</a>"
 endpoint = Endpoint()
 examples = [
     ["house.png", "How could someone get out of the house?"],
     # [
     #     "sunset.png",
     #     "Write a romantic message that goes along this photo.",
@@ -162,30 +164,31 @@ with gr.Blocks() as iface:
                     minimum=0.5,
                     maximum=1.0,
                     value=0.8,
                     interactive=True,
-                    label="Temperature",
                 )
                 len_penalty = gr.Slider(
-                    minimum=-2.0,
                     maximum=2.0,
                     value=1.0,
-                    step=0.5,
                     interactive=True,
-                    label="Length Penalty",
                 )
                 rep_penalty = gr.Slider(
                     minimum=1.0,
-                    maximum=20.0,
-                    value=10.0,
                     step=0.5,
                     interactive=True,
-                    label="Repeat Penalty",
                 )
             with gr.Row():
-                caption_output = gr.Textbox(lines=2, label="Caption Output")
                 caption_button = gr.Button(
                     value="Caption it!", interactive=True, variant="primary"
                 )
@@ -205,7 +208,7 @@ with gr.Blocks() as iface:
             chat_input = gr.Textbox(lines=2, label="Chat Input")
             with gr.Row():
-                chatbot = gr.Chatbot()
                 image_input.change(lambda: (None, "", "", []), [], [chatbot, chat_input, caption_output, state])
             with gr.Row():
@@ -237,7 +240,10 @@ with gr.Blocks() as iface:
     examples = gr.Examples(
         examples=examples,
         inputs=[image_input, chat_input],
     )
-iface.queue(concurrency_count=1, api_open=False, max_size=20)
 iface.launch(enable_queue=True)

 title = """<h1 align="center">BLIP-2</h1>"""
+description = """Gradio demo for BLIP-2, image-to-text generation from Salesforce Research. To use it, simply upload your image, or click one of the examples to load them. Please visit our <a href='https://github.com/salesforce/LAVIS/tree/main/projects/blip2' target='_blank'>project webpage</a>.</p>
 <p> <strong>Disclaimer</strong>: This is a research prototype and is not intended for production use. No data including but not restricted to text and images is collected. </p>"""
+article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2301.12597' target='_blank'>BLIP-2: Bootstrapping Language-Image Pre-training with Frozen Image Encoders and Large Language Models</a>"
 endpoint = Endpoint()
 examples = [
     ["house.png", "How could someone get out of the house?"],
+    ["flower.jpg", "Question: What is this flower and where is it's origin? Answer:"],
+    ["forbidden_city.webp", "In what dynasties was this place build?"],
     # [
     #     "sunset.png",
     #     "Write a romantic message that goes along this photo.",
                     minimum=0.5,
                     maximum=1.0,
                     value=0.8,
+                    step=0.1,
                     interactive=True,
+                    label="Temperature (used with nucleus sampling)",
                 )
                 len_penalty = gr.Slider(
+                    minimum=-1.0,
                     maximum=2.0,
                     value=1.0,
+                    step=0.2,
                     interactive=True,
+                    label="Length Penalty (set to larger for longer sequence, used with beam search)",
                 )
                 rep_penalty = gr.Slider(
                     minimum=1.0,
+                    maximum=5.0,
+                    value=1.5,
                     step=0.5,
                     interactive=True,
+                    label="Repeat Penalty (larger value prevents repetition)",
                 )
             with gr.Row():
+                caption_output = gr.Textbox(lines=2, label="Caption Output (from OPT)")
                 caption_button = gr.Button(
                     value="Caption it!", interactive=True, variant="primary"
                 )
             chat_input = gr.Textbox(lines=2, label="Chat Input")
             with gr.Row():
+                chatbot = gr.Chatbot(label="Chat Output (from FlanT5)")
                 image_input.change(lambda: (None, "", "", []), [], [chatbot, chat_input, caption_output, state])
             with gr.Row():
     examples = gr.Examples(
         examples=examples,
         inputs=[image_input, chat_input],
+#         outputs=[chatbot, state],
+#         run_on_click=True,
+#         fn = inference_chat,
     )
+iface.queue(concurrency_count=1, api_open=False, max_size=10)
 iface.launch(enable_queue=True)

flower.jpg ADDED Viewed

forbidden_city.webp ADDED Viewed