Spaces:
Runtime error
Runtime error
File size: 5,226 Bytes
5dedc73 83a0d99 38fe132 20c8db3 d0d202a 167b9ae 38fe132 5dedc73 66fc2d3 5dedc73 38fe132 16a3b53 b031b71 16a3b53 b031b71 16a3b53 38fe132 5dedc73 38fe132 5dedc73 38fe132 5dedc73 167b9ae 5dedc73 167b9ae 5dedc73 167b9ae 5dedc73 38fe132 5dedc73 16a3b53 5dedc73 16a3b53 38fe132 5dedc73 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 |
from lmdeploy.serve.gradio.turbomind_coupled import *
from lmdeploy.messages import TurbomindEngineConfig
from lmdeploy import ChatTemplateConfig
chat_template = ChatTemplateConfig(model_name='internlm2-chat-7b', system='', eosys='', meta_instruction='')
backend_config = TurbomindEngineConfig(model_name='internlm2-chat-7b', max_batch_size=1, cache_max_entry_count=0.05)#, model_format='awq')
model_path = 'internlm/internlm2-math-7b'
InterFace.async_engine = AsyncEngine(
model_path=model_path,
backend='turbomind',
backend_config=backend_config,
chat_template_config=chat_template,
tp=1)
async def reset_local_func(instruction_txtbox: gr.Textbox,
state_chatbot: Sequence, session_id: int):
"""reset the session.
Args:
instruction_txtbox (str): user's prompt
state_chatbot (Sequence): the chatting history
session_id (int): the session id
"""
state_chatbot = []
# end the session
with InterFace.lock:
InterFace.global_session_id += 1
session_id = InterFace.global_session_id
return (state_chatbot, state_chatbot, gr.Textbox.update(value=''), session_id)
async def cancel_local_func(state_chatbot: Sequence, cancel_btn: gr.Button,
reset_btn: gr.Button, session_id: int):
"""stop the session.
Args:
instruction_txtbox (str): user's prompt
state_chatbot (Sequence): the chatting history
cancel_btn (gr.Button): the cancel button
reset_btn (gr.Button): the reset button
session_id (int): the session id
"""
yield (state_chatbot, disable_btn, disable_btn, session_id)
InterFace.async_engine.stop_session(session_id)
# pytorch backend does not support resume chat history now
if InterFace.async_engine.backend == 'pytorch':
yield (state_chatbot, disable_btn, enable_btn, session_id)
else:
with InterFace.lock:
InterFace.global_session_id += 1
session_id = InterFace.global_session_id
messages = []
for qa in state_chatbot:
messages.append(dict(role='user', content=qa[0]))
if qa[1] is not None:
messages.append(dict(role='assistant', content=qa[1]))
gen_config = GenerationConfig(max_new_tokens=0)
async for out in InterFace.async_engine.generate(messages,
session_id,
gen_config=gen_config,
stream_response=True,
sequence_start=True,
sequence_end=False):
pass
yield (state_chatbot, disable_btn, enable_btn, session_id)
with gr.Blocks(css=CSS, theme=THEME) as demo:
state_chatbot = gr.State([])
state_session_id = gr.State(0)
with gr.Column(elem_id='container'):
gr.Markdown('## LMDeploy Playground')
chatbot = gr.Chatbot(
elem_id='chatbot',
label=InterFace.async_engine.engine.model_name)
instruction_txtbox = gr.Textbox(
placeholder='Please input the instruction',
label='Instruction')
with gr.Row():
cancel_btn = gr.Button(value='Cancel', interactive=False)
reset_btn = gr.Button(value='Reset')
with gr.Row():
request_output_len = gr.Slider(1,
2048,
value=1024,
step=1,
label='Maximum new tokens')
top_p = gr.Slider(0.01, 1, value=1.0, step=0.01, label='Top_p')
temperature = gr.Slider(0.01,
1.5,
value=0.01,
step=0.01,
label='Temperature')
send_event = instruction_txtbox.submit(chat_stream_local, [
instruction_txtbox, state_chatbot, cancel_btn, reset_btn,
state_session_id, top_p, temperature, request_output_len
], [state_chatbot, chatbot, cancel_btn, reset_btn])
instruction_txtbox.submit(
lambda: gr.Textbox.update(value=''),
[],
[instruction_txtbox],
)
cancel_btn.click(
cancel_local_func,
[state_chatbot, cancel_btn, reset_btn, state_session_id],
[state_chatbot, cancel_btn, reset_btn, state_session_id],
cancels=[send_event])
reset_btn.click(reset_local_func,
[instruction_txtbox, state_chatbot, state_session_id],
[state_chatbot, chatbot, instruction_txtbox, state_session_id],
cancels=[send_event])
def init():
with InterFace.lock:
InterFace.global_session_id += 1
new_session_id = InterFace.global_session_id
return new_session_id
demo.load(init, inputs=None, outputs=[state_session_id])
demo.queue(concurrency_count=InterFace.async_engine.instance_num,
max_size=100).launch()
|