DongfuJiang
commited on
Merge branch 'main' of https://huggingface.co/spaces/TIGER-Lab/Mantis
Browse files- README.md +2 -1
- app.py +30 -4
- examples/image15.jpg +0 -0
README.md
CHANGED
@@ -8,6 +8,7 @@ sdk_version: 4.24.0
|
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
license: apache-2.0
|
|
|
11 |
---
|
12 |
|
13 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
license: apache-2.0
|
11 |
+
short_description: Multimodal Language Model
|
12 |
---
|
13 |
|
14 |
+
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
app.py
CHANGED
@@ -3,6 +3,7 @@ import spaces
|
|
3 |
import os
|
4 |
import time
|
5 |
from PIL import Image
|
|
|
6 |
from models.mllava import MLlavaProcessor, LlavaForConditionalGeneration, chat_mllava, MLlavaForConditionalGeneration
|
7 |
from typing import List
|
8 |
processor = MLlavaProcessor.from_pretrained("TIGER-Lab/Mantis-8B-siglip-llama3")
|
@@ -48,13 +49,15 @@ def get_chat_history(history):
|
|
48 |
chat_history.append({"role": "assistant", "text": ""})
|
49 |
return chat_history
|
50 |
|
|
|
51 |
def get_chat_images(history):
|
52 |
images = []
|
53 |
for message in history:
|
54 |
if isinstance(message[0], tuple):
|
55 |
images.extend(message[0])
|
56 |
return images
|
57 |
-
|
|
|
58 |
def bot(history):
|
59 |
print(history)
|
60 |
cur_messages = {"text": "", "images": []}
|
@@ -93,6 +96,8 @@ def bot(history):
|
|
93 |
history[-1][1] = _output
|
94 |
time.sleep(0.05)
|
95 |
yield history
|
|
|
|
|
96 |
|
97 |
def build_demo():
|
98 |
with gr.Blocks() as demo:
|
@@ -118,14 +123,35 @@ def build_demo():
|
|
118 |
chat_input = gr.MultimodalTextbox(interactive=True, file_types=["image"], placeholder="Enter message or upload images. Please use <image> to indicate the position of uploaded images", show_label=True)
|
119 |
|
120 |
chat_msg = chat_input.submit(add_message, [chatbot, chat_input], [chatbot, chat_input])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
121 |
bot_msg = chat_msg.success(bot, chatbot, chatbot, api_name="bot_response")
|
122 |
|
123 |
chatbot.like(print_like_dislike, None, None)
|
124 |
-
|
125 |
with gr.Row():
|
126 |
send_button = gr.Button("Send")
|
127 |
clear_button = gr.ClearButton([chatbot, chat_input])
|
128 |
-
|
129 |
send_button.click(
|
130 |
add_message, [chatbot, chat_input], [chatbot, chat_input]
|
131 |
).then(
|
@@ -136,7 +162,7 @@ def build_demo():
|
|
136 |
examples=[
|
137 |
{
|
138 |
"text": "<image> <image> How many dices are there in image 1 and image 2 respectively?",
|
139 |
-
"files": ["./examples/image10.jpg", "./examples/
|
140 |
},
|
141 |
{
|
142 |
"text": "<image> <image> <image> Which image shows a different mood of character from the others?",
|
|
|
3 |
import os
|
4 |
import time
|
5 |
from PIL import Image
|
6 |
+
import functools
|
7 |
from models.mllava import MLlavaProcessor, LlavaForConditionalGeneration, chat_mllava, MLlavaForConditionalGeneration
|
8 |
from typing import List
|
9 |
processor = MLlavaProcessor.from_pretrained("TIGER-Lab/Mantis-8B-siglip-llama3")
|
|
|
49 |
chat_history.append({"role": "assistant", "text": ""})
|
50 |
return chat_history
|
51 |
|
52 |
+
|
53 |
def get_chat_images(history):
|
54 |
images = []
|
55 |
for message in history:
|
56 |
if isinstance(message[0], tuple):
|
57 |
images.extend(message[0])
|
58 |
return images
|
59 |
+
|
60 |
+
|
61 |
def bot(history):
|
62 |
print(history)
|
63 |
cur_messages = {"text": "", "images": []}
|
|
|
96 |
history[-1][1] = _output
|
97 |
time.sleep(0.05)
|
98 |
yield history
|
99 |
+
|
100 |
+
|
101 |
|
102 |
def build_demo():
|
103 |
with gr.Blocks() as demo:
|
|
|
123 |
chat_input = gr.MultimodalTextbox(interactive=True, file_types=["image"], placeholder="Enter message or upload images. Please use <image> to indicate the position of uploaded images", show_label=True)
|
124 |
|
125 |
chat_msg = chat_input.submit(add_message, [chatbot, chat_input], [chatbot, chat_input])
|
126 |
+
|
127 |
+
"""
|
128 |
+
with gr.Accordion(label='Advanced options', open=False):
|
129 |
+
temperature = gr.Slider(
|
130 |
+
label='Temperature',
|
131 |
+
minimum=0.1,
|
132 |
+
maximum=2.0,
|
133 |
+
step=0.1,
|
134 |
+
value=0.2,
|
135 |
+
interactive=True
|
136 |
+
)
|
137 |
+
top_p = gr.Slider(
|
138 |
+
label='Top-p',
|
139 |
+
minimum=0.05,
|
140 |
+
maximum=1.0,
|
141 |
+
step=0.05,
|
142 |
+
value=1.0,
|
143 |
+
interactive=True
|
144 |
+
)
|
145 |
+
"""
|
146 |
+
|
147 |
bot_msg = chat_msg.success(bot, chatbot, chatbot, api_name="bot_response")
|
148 |
|
149 |
chatbot.like(print_like_dislike, None, None)
|
150 |
+
|
151 |
with gr.Row():
|
152 |
send_button = gr.Button("Send")
|
153 |
clear_button = gr.ClearButton([chatbot, chat_input])
|
154 |
+
|
155 |
send_button.click(
|
156 |
add_message, [chatbot, chat_input], [chatbot, chat_input]
|
157 |
).then(
|
|
|
162 |
examples=[
|
163 |
{
|
164 |
"text": "<image> <image> How many dices are there in image 1 and image 2 respectively?",
|
165 |
+
"files": ["./examples/image10.jpg", "./examples/image15.jpg"]
|
166 |
},
|
167 |
{
|
168 |
"text": "<image> <image> <image> Which image shows a different mood of character from the others?",
|
examples/image15.jpg
ADDED