Spaces:

Keyven
/

Multimodal-Vision-Insight

Runtime error

App Files Files Community

Keyven commited on Oct 2, 2023

Commit

a076c9d

•

1 Parent(s): 6ec5f0d

Update perf.

Browse files

Files changed (1) hide show

app.py +18 -33

app.py CHANGED Viewed

@@ -119,25 +119,6 @@ def handle_regeneration(chatbot, task_history):
     print("After:", task_history, chatbot)
     return get_chat_response(chatbot, task_history)
-chatbot = []
-task_history = []
-def main_function(text, image):
-    global chatbot, task_history
-    if text:
-        chatbot, task_history = handle_text_input(chatbot, task_history, text)
-    if image:
-        chatbot, task_history = handle_file_upload(chatbot, task_history, image)
-    chatbot, task_history = get_chat_response(chatbot, task_history)
-    formatted_response = chatbot[-1][1]  # Get the latest response from the chatbot
-    return formatted_response
-def clear_history_fn():
-    global chatbot, task_history
-    chatbot.clear()
-    task_history.clear()
-    return "History cleared."
 # Custom CSS
 css = '''
     .gradio-container {
@@ -152,21 +133,25 @@ with gr.Blocks(css=css) as demo:
         "Special thanks to [@Artificialguybr](https://twitter.com/artificialguybr) for the inspiration from his code.\n"
         "### Qwen-VL: A Multimodal Large Vision Language Model by Alibaba Cloud\n"
     )
-    chat_interface = gr.Interface(
-        fn=main_function,
-        inputs=[
-            gr.components.Textbox(lines=2, label='Input'),  # Update here
-            gr.components.Image(type='filepath', label='Upload Image')  # Update here
-        ],
-        outputs='text',
-        live=True,
-        layout='vertical',
-        theme=None,
-        css=css
-    ).launch()
-    gr.Markdown("### Key Features:\n- **Strong Performance**: Surpasses existing LVLMs on multiple English benchmarks including Zero-shot Captioning and VQA.\n- **Multi-lingual Support**: Supports English, Chinese, and multi-lingual conversation.\n- **High Resolution**: Utilizes 448*448 resolution for fine-grained recognition and understanding.")
-    demo.add_button("🧹 Clear History", clear_history_fn)
 demo.launch(share=True)

     print("After:", task_history, chatbot)
     return get_chat_response(chatbot, task_history)
 # Custom CSS
 css = '''
     .gradio-container {
         "Special thanks to [@Artificialguybr](https://twitter.com/artificialguybr) for the inspiration from his code.\n"
         "### Qwen-VL: A Multimodal Large Vision Language Model by Alibaba Cloud\n"
     )
+    chatbot = gr.Chatbot(label='Qwen-VL-Chat', elem_classes="control-height", height=520)
+    query = gr.Textbox(lines=2, label='Input')
+    task_history = gr.State([])
+    with gr.Row():
+        upload_btn = gr.UploadButton("📁 Upload", file_types=["image"])
+        submit_btn = gr.Button("🚀 Submit")
+        regen_btn = gr.Button("🤔️ Regenerate")
+        clear_btn = gr.Button("🧹 Clear History")
+    gr.Markdown("### Key Features:\n- **Strong Performance**: Surpasses existing LVLMs on multiple English benchmarks including Zero-shot Captioning and VQA.\n- **Multi-lingual Support**: Supports English, Chinese, and multi-lingual conversation.\n- **High Resolution**: Utilizes 448*448 resolution for fine-grained recognition and understanding.")
+    submit_btn.click(handle_text_input, [chatbot, task_history, query], [chatbot, task_history]).then(
+        get_chat_response, [chatbot, task_history], [chatbot], show_progress=True
+    )
+    submit_btn.click(clear_input, [], [query])
+    clear_btn.click(clear_history, [task_history], [chatbot], show_progress=True)
+    regen_btn.click(handle_regeneration, [chatbot, task_history], [chatbot], show_progress=True)
+    upload_btn.upload(handle_file_upload, [chatbot, task_history, upload_btn], [chatbot, task_history], show_progress=True)
 demo.launch(share=True)