Spaces:
Running
on
Zero
Running
on
Zero
NGUYEN, Xuan Phi
commited on
Commit
•
43147aa
1
Parent(s):
2997e80
update
Browse files
multipurpose_chatbot/demos/multimodal_chat_interface.py
CHANGED
@@ -944,8 +944,8 @@ def vision_chat_response_stream_multiturn_engine(
|
|
944 |
if num_tokens >= MODEL_ENGINE.max_position_embeddings - 128:
|
945 |
raise gr.Error(f"Conversation or prompt is too long ({num_tokens} toks), please clear the chatbox or try shorter input.")
|
946 |
|
947 |
-
print(f'{image_paths=}')
|
948 |
-
print(full_prompt)
|
949 |
outputs = None
|
950 |
response = None
|
951 |
num_tokens = -1
|
@@ -995,7 +995,7 @@ def doc_chat_response_stream_multiturn_engine(
|
|
995 |
if num_tokens >= MODEL_ENGINE.max_position_embeddings - 128:
|
996 |
raise gr.Error(f"Conversation or prompt is too long ({num_tokens} toks), please clear the chatbox or try shorter input.")
|
997 |
|
998 |
-
print(full_prompt)
|
999 |
outputs = None
|
1000 |
response = None
|
1001 |
num_tokens = -1
|
@@ -1050,8 +1050,8 @@ def vision_doc_chat_response_stream_multiturn_engine(
|
|
1050 |
if num_tokens >= MODEL_ENGINE.max_position_embeddings - 128:
|
1051 |
raise gr.Error(f"Conversation or prompt is too long ({num_tokens} toks), please clear the chatbox or try shorter input.")
|
1052 |
|
1053 |
-
print(full_prompt)
|
1054 |
-
print(f'{image_paths=}')
|
1055 |
outputs = None
|
1056 |
response = None
|
1057 |
num_tokens = -1
|
|
|
944 |
if num_tokens >= MODEL_ENGINE.max_position_embeddings - 128:
|
945 |
raise gr.Error(f"Conversation or prompt is too long ({num_tokens} toks), please clear the chatbox or try shorter input.")
|
946 |
|
947 |
+
# print(f'{image_paths=}')
|
948 |
+
# print(full_prompt)
|
949 |
outputs = None
|
950 |
response = None
|
951 |
num_tokens = -1
|
|
|
995 |
if num_tokens >= MODEL_ENGINE.max_position_embeddings - 128:
|
996 |
raise gr.Error(f"Conversation or prompt is too long ({num_tokens} toks), please clear the chatbox or try shorter input.")
|
997 |
|
998 |
+
# print(full_prompt)
|
999 |
outputs = None
|
1000 |
response = None
|
1001 |
num_tokens = -1
|
|
|
1050 |
if num_tokens >= MODEL_ENGINE.max_position_embeddings - 128:
|
1051 |
raise gr.Error(f"Conversation or prompt is too long ({num_tokens} toks), please clear the chatbox or try shorter input.")
|
1052 |
|
1053 |
+
# print(full_prompt)
|
1054 |
+
# print(f'{image_paths=}')
|
1055 |
outputs = None
|
1056 |
response = None
|
1057 |
num_tokens = -1
|
multipurpose_chatbot/engines/transformers_engine.py
CHANGED
@@ -1,8 +1,13 @@
|
|
1 |
|
2 |
try:
|
3 |
import spaces
|
|
|
|
|
|
|
4 |
except ModuleNotFoundError:
|
5 |
print(f'Cannot import hf `spaces` with `import spaces`.')
|
|
|
|
|
6 |
import os
|
7 |
import numpy as np
|
8 |
import argparse
|
@@ -541,7 +546,7 @@ class TransformersEngine(BaseEngine):
|
|
541 |
if message_safety is not None:
|
542 |
raise gr.Error(message_safety)
|
543 |
|
544 |
-
@
|
545 |
def generate_yield_string(self, prompt, temperature, max_tokens, stop_strings: Optional[Tuple[str]] = None, **kwargs):
|
546 |
|
547 |
# ! MUST PUT INSIDE torch.no_grad() otherwise it will overflow OOM
|
@@ -558,6 +563,12 @@ class TransformersEngine(BaseEngine):
|
|
558 |
|
559 |
with torch.no_grad():
|
560 |
inputs = self.tokenizer(prompt, return_tensors='pt')
|
|
|
|
|
|
|
|
|
|
|
|
|
561 |
num_tokens = inputs.input_ids.size(1)
|
562 |
|
563 |
inputs = inputs.to(self._model.device)
|
@@ -574,7 +585,7 @@ class TransformersEngine(BaseEngine):
|
|
574 |
response = None
|
575 |
for index, token in enumerate(generator):
|
576 |
out_tokens.extend(token.tolist())
|
577 |
-
response = self.tokenizer.decode(out_tokens)
|
578 |
if "<|im_start|>assistant\n" in response:
|
579 |
response = response.split("<|im_start|>assistant\n")[-1]
|
580 |
num_tokens += 1
|
|
|
1 |
|
2 |
try:
|
3 |
import spaces
|
4 |
+
def maybe_spaces_gpu(fn):
|
5 |
+
fn = spaces.GPU(fn)
|
6 |
+
return fn
|
7 |
except ModuleNotFoundError:
|
8 |
print(f'Cannot import hf `spaces` with `import spaces`.')
|
9 |
+
def maybe_spaces_gpu(fn):
|
10 |
+
return fn
|
11 |
import os
|
12 |
import numpy as np
|
13 |
import argparse
|
|
|
546 |
if message_safety is not None:
|
547 |
raise gr.Error(message_safety)
|
548 |
|
549 |
+
@maybe_spaces_gpu
|
550 |
def generate_yield_string(self, prompt, temperature, max_tokens, stop_strings: Optional[Tuple[str]] = None, **kwargs):
|
551 |
|
552 |
# ! MUST PUT INSIDE torch.no_grad() otherwise it will overflow OOM
|
|
|
563 |
|
564 |
with torch.no_grad():
|
565 |
inputs = self.tokenizer(prompt, return_tensors='pt')
|
566 |
+
# whether to print the full prompts
|
567 |
+
retok_full_prompt = self.tokenizer.decode(inputs.input_ids[0], skip_special_tokens=False)
|
568 |
+
print(f"retok_full_prompt:\n{retok_full_prompt}>>>>")
|
569 |
+
begin_bos = inputs.input_ids[0][0] == self.tokenizer.bos_token_id
|
570 |
+
print(f'begin_bos: {begin_bos}')
|
571 |
+
|
572 |
num_tokens = inputs.input_ids.size(1)
|
573 |
|
574 |
inputs = inputs.to(self._model.device)
|
|
|
585 |
response = None
|
586 |
for index, token in enumerate(generator):
|
587 |
out_tokens.extend(token.tolist())
|
588 |
+
response = self.tokenizer.decode(out_tokens, skip_special_tokens=True)
|
589 |
if "<|im_start|>assistant\n" in response:
|
590 |
response = response.split("<|im_start|>assistant\n")[-1]
|
591 |
num_tokens += 1
|