SeaLLM-Chat

Running on Zero

nxphi47 commited on Feb 2

Commit

3148f33

•

1 Parent(s): 9f522f6

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -837,7 +837,7 @@ def chat_response_stream_multiturn(
     full_prompt = chatml_format(message.strip(), history=history, system_prompt=system_prompt)
-    if len(tokenizer.encode(full_prompt, add_special_tokens=False)) >= 4050:
         raise gr.Error(f"Conversation or prompt is too long, please clear the chatbox or try shorter input.")
     sampling_params = SamplingParams(
@@ -942,7 +942,7 @@ def generate_free_form_stream(
     if message_safety is not None:
         raise gr.Error(message_safety)
-    if len(tokenizer.encode(message, add_special_tokens=False)) >= 4050:
         raise gr.Error(f"Prompt is too long!")
     cur_out = None
@@ -1173,7 +1173,7 @@ def validate_file_item(filename, index, item: Dict[str, str]):
         raise gr.Error(f'Prompt {index} invalid: {message_safety}')
     tokenizer = llm.get_tokenizer() if llm is not None else None
-    if tokenizer is None or len(tokenizer.encode(message, add_special_tokens=False)) >= BATCH_INFER_MAX_PROMPT_TOKENS:
         raise gr.Error(f"Prompt {index} too long, should be less than {BATCH_INFER_MAX_PROMPT_TOKENS} tokens")
@@ -1299,7 +1299,7 @@ def batch_inference(
     ]
     print(f'{full_prompts[0]}\n')
-    if any(len(tokenizer.encode(x, add_special_tokens=False)) >= 4090 for x in full_prompts):
         raise gr.Error(f"Some prompt is too long!")
     stop_seq = list(set(['<s>', '</s>', '<<SYS>>', '<</SYS>>', '[INST]', '[/INST]'] + stop_strings))

     full_prompt = chatml_format(message.strip(), history=history, system_prompt=system_prompt)
+    if len(tokenizer.encode(full_prompt)) >= 4050:
         raise gr.Error(f"Conversation or prompt is too long, please clear the chatbox or try shorter input.")
     sampling_params = SamplingParams(
     if message_safety is not None:
         raise gr.Error(message_safety)
+    if len(tokenizer.encode(message)) >= 4050:
         raise gr.Error(f"Prompt is too long!")
     cur_out = None
         raise gr.Error(f'Prompt {index} invalid: {message_safety}')
     tokenizer = llm.get_tokenizer() if llm is not None else None
+    if tokenizer is None or len(tokenizer.encode(message)) >= BATCH_INFER_MAX_PROMPT_TOKENS:
         raise gr.Error(f"Prompt {index} too long, should be less than {BATCH_INFER_MAX_PROMPT_TOKENS} tokens")
     ]
     print(f'{full_prompts[0]}\n')
+    if any(len(tokenizer.encode(x)) >= 4090 for x in full_prompts):
         raise gr.Error(f"Some prompt is too long!")
     stop_seq = list(set(['<s>', '</s>', '<<SYS>>', '<</SYS>>', '[INST]', '[/INST]'] + stop_strings))