Update app.py
Browse files
app.py
CHANGED
@@ -230,9 +230,6 @@ def clear_response(history):
|
|
230 |
return history, question
|
231 |
|
232 |
|
233 |
-
# def print_like_dislike(x: gr.LikeData):
|
234 |
-
# print(x.index, x.value, x.liked)
|
235 |
-
|
236 |
|
237 |
def add_message(history, message):
|
238 |
# history=[]
|
@@ -259,19 +256,14 @@ def add_message(history, message):
|
|
259 |
history.append((message["text"], None))
|
260 |
return history, gr.MultimodalTextbox(value=None, interactive=False)
|
261 |
else:
|
262 |
-
|
263 |
-
|
264 |
-
|
265 |
if message["text"] is not None:
|
266 |
history.append((message["text"], None))
|
267 |
-
|
268 |
-
return history, gr.MultimodalTextbox(value=None, interactive=False)
|
269 |
-
|
270 |
-
# if message["text"] is not None:
|
271 |
-
# history.append((message["text"], None))
|
272 |
|
273 |
-
|
274 |
-
|
275 |
|
276 |
@spaces.GPU
|
277 |
def bot(history, temperature, top_p, max_output_tokens):
|
@@ -285,6 +277,7 @@ def bot(history, temperature, top_p, max_output_tokens):
|
|
285 |
text_this_term = ""
|
286 |
# import pdb;pdb.set_trace()
|
287 |
num_new_images = 0
|
|
|
288 |
for i, message in enumerate(history[:-1]):
|
289 |
if type(message[0]) is tuple:
|
290 |
# print("### message[0]",message[0])
|
@@ -294,7 +287,6 @@ def bot(history, temperature, top_p, max_output_tokens):
|
|
294 |
# else:
|
295 |
images_this_term.append(message[0][0])
|
296 |
if is_valid_video_filename(message[0][0]):
|
297 |
-
# 不接受视频
|
298 |
raise ValueError("Video is not supported")
|
299 |
num_new_images += our_chatbot.num_frames
|
300 |
elif is_valid_image_filename(message[0][0]):
|
@@ -374,7 +366,7 @@ def bot(history, temperature, top_p, max_output_tokens):
|
|
374 |
input_ids = tokenizer_image_token(
|
375 |
prompt, our_chatbot.tokenizer, IMAGE_TOKEN_INDEX, return_tensors="pt"
|
376 |
).unsqueeze(0).to(our_chatbot.model.device)
|
377 |
-
|
378 |
stop_str = (
|
379 |
our_chatbot.conversation.sep
|
380 |
if our_chatbot.conversation.sep_style != SeparatorStyle.TWO
|
@@ -384,9 +376,7 @@ def bot(history, temperature, top_p, max_output_tokens):
|
|
384 |
stopping_criteria = KeywordsStoppingCriteria(
|
385 |
keywords, our_chatbot.tokenizer, input_ids
|
386 |
)
|
387 |
-
|
388 |
-
# our_chatbot.tokenizer, skip_prompt=True, skip_special_tokens=True
|
389 |
-
# )
|
390 |
streamer = TextIteratorStreamer(
|
391 |
our_chatbot.tokenizer, skip_prompt=True, skip_special_tokens=True
|
392 |
)
|
@@ -394,22 +384,6 @@ def bot(history, temperature, top_p, max_output_tokens):
|
|
394 |
print(input_ids.device)
|
395 |
print(image_tensor.device)
|
396 |
|
397 |
-
# with torch.inference_mode():
|
398 |
-
# output_ids = our_chatbot.model.generate(
|
399 |
-
# input_ids,
|
400 |
-
# images=image_tensor,
|
401 |
-
# do_sample=True,
|
402 |
-
# temperature=0.7,
|
403 |
-
# top_p=1.0,
|
404 |
-
# max_new_tokens=4096,
|
405 |
-
# streamer=streamer,
|
406 |
-
# use_cache=False,
|
407 |
-
# stopping_criteria=[stopping_criteria],
|
408 |
-
# )
|
409 |
-
|
410 |
-
# outputs = our_chatbot.tokenizer.decode(output_ids[0]).strip()
|
411 |
-
# if outputs.endswith(stop_str):
|
412 |
-
# outputs = outputs[: -len(stop_str)]
|
413 |
# our_chatbot.conversation.messages[-1][-1] = outputs
|
414 |
|
415 |
# history[-1] = [text, outputs]
|
|
|
230 |
return history, question
|
231 |
|
232 |
|
|
|
|
|
|
|
233 |
|
234 |
def add_message(history, message):
|
235 |
# history=[]
|
|
|
256 |
history.append((message["text"], None))
|
257 |
return history, gr.MultimodalTextbox(value=None, interactive=False)
|
258 |
else:
|
259 |
+
for x in message["files"]:
|
260 |
+
history.append(((x,), None))
|
261 |
+
chat_image_num += len(message["files"])
|
262 |
if message["text"] is not None:
|
263 |
history.append((message["text"], None))
|
|
|
|
|
|
|
|
|
|
|
264 |
|
265 |
+
return None, gr.MultimodalTextbox(value=None, interactive=False)
|
266 |
+
|
267 |
|
268 |
@spaces.GPU
|
269 |
def bot(history, temperature, top_p, max_output_tokens):
|
|
|
277 |
text_this_term = ""
|
278 |
# import pdb;pdb.set_trace()
|
279 |
num_new_images = 0
|
280 |
+
|
281 |
for i, message in enumerate(history[:-1]):
|
282 |
if type(message[0]) is tuple:
|
283 |
# print("### message[0]",message[0])
|
|
|
287 |
# else:
|
288 |
images_this_term.append(message[0][0])
|
289 |
if is_valid_video_filename(message[0][0]):
|
|
|
290 |
raise ValueError("Video is not supported")
|
291 |
num_new_images += our_chatbot.num_frames
|
292 |
elif is_valid_image_filename(message[0][0]):
|
|
|
366 |
input_ids = tokenizer_image_token(
|
367 |
prompt, our_chatbot.tokenizer, IMAGE_TOKEN_INDEX, return_tensors="pt"
|
368 |
).unsqueeze(0).to(our_chatbot.model.device)
|
369 |
+
|
370 |
stop_str = (
|
371 |
our_chatbot.conversation.sep
|
372 |
if our_chatbot.conversation.sep_style != SeparatorStyle.TWO
|
|
|
376 |
stopping_criteria = KeywordsStoppingCriteria(
|
377 |
keywords, our_chatbot.tokenizer, input_ids
|
378 |
)
|
379 |
+
|
|
|
|
|
380 |
streamer = TextIteratorStreamer(
|
381 |
our_chatbot.tokenizer, skip_prompt=True, skip_special_tokens=True
|
382 |
)
|
|
|
384 |
print(input_ids.device)
|
385 |
print(image_tensor.device)
|
386 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
387 |
# our_chatbot.conversation.messages[-1][-1] = outputs
|
388 |
|
389 |
# history[-1] = [text, outputs]
|