Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -837,7 +837,7 @@ def chat_response_stream_multiturn(
|
|
837 |
|
838 |
full_prompt = chatml_format(message.strip(), history=history, system_prompt=system_prompt)
|
839 |
|
840 |
-
if len(tokenizer.encode(full_prompt
|
841 |
raise gr.Error(f"Conversation or prompt is too long, please clear the chatbox or try shorter input.")
|
842 |
|
843 |
sampling_params = SamplingParams(
|
@@ -942,7 +942,7 @@ def generate_free_form_stream(
|
|
942 |
if message_safety is not None:
|
943 |
raise gr.Error(message_safety)
|
944 |
|
945 |
-
if len(tokenizer.encode(message
|
946 |
raise gr.Error(f"Prompt is too long!")
|
947 |
|
948 |
cur_out = None
|
@@ -1173,7 +1173,7 @@ def validate_file_item(filename, index, item: Dict[str, str]):
|
|
1173 |
raise gr.Error(f'Prompt {index} invalid: {message_safety}')
|
1174 |
|
1175 |
tokenizer = llm.get_tokenizer() if llm is not None else None
|
1176 |
-
if tokenizer is None or len(tokenizer.encode(message
|
1177 |
raise gr.Error(f"Prompt {index} too long, should be less than {BATCH_INFER_MAX_PROMPT_TOKENS} tokens")
|
1178 |
|
1179 |
|
@@ -1299,7 +1299,7 @@ def batch_inference(
|
|
1299 |
]
|
1300 |
print(f'{full_prompts[0]}\n')
|
1301 |
|
1302 |
-
if any(len(tokenizer.encode(x
|
1303 |
raise gr.Error(f"Some prompt is too long!")
|
1304 |
|
1305 |
stop_seq = list(set(['<s>', '</s>', '<<SYS>>', '<</SYS>>', '[INST]', '[/INST]'] + stop_strings))
|
|
|
837 |
|
838 |
full_prompt = chatml_format(message.strip(), history=history, system_prompt=system_prompt)
|
839 |
|
840 |
+
if len(tokenizer.encode(full_prompt)) >= 4050:
|
841 |
raise gr.Error(f"Conversation or prompt is too long, please clear the chatbox or try shorter input.")
|
842 |
|
843 |
sampling_params = SamplingParams(
|
|
|
942 |
if message_safety is not None:
|
943 |
raise gr.Error(message_safety)
|
944 |
|
945 |
+
if len(tokenizer.encode(message)) >= 4050:
|
946 |
raise gr.Error(f"Prompt is too long!")
|
947 |
|
948 |
cur_out = None
|
|
|
1173 |
raise gr.Error(f'Prompt {index} invalid: {message_safety}')
|
1174 |
|
1175 |
tokenizer = llm.get_tokenizer() if llm is not None else None
|
1176 |
+
if tokenizer is None or len(tokenizer.encode(message)) >= BATCH_INFER_MAX_PROMPT_TOKENS:
|
1177 |
raise gr.Error(f"Prompt {index} too long, should be less than {BATCH_INFER_MAX_PROMPT_TOKENS} tokens")
|
1178 |
|
1179 |
|
|
|
1299 |
]
|
1300 |
print(f'{full_prompts[0]}\n')
|
1301 |
|
1302 |
+
if any(len(tokenizer.encode(x)) >= 4090 for x in full_prompts):
|
1303 |
raise gr.Error(f"Some prompt is too long!")
|
1304 |
|
1305 |
stop_seq = list(set(['<s>', '</s>', '<<SYS>>', '<</SYS>>', '[INST]', '[/INST]'] + stop_strings))
|