File size: 4,602 Bytes
38fe132
 
 
 
 
 
57b2f33
38fe132
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7fec255
38fe132
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7fec255
38fe132
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7fec255
38fe132
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7fec255
 
38fe132
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
import os,random
os.system('sh install_lmdeploy.sh')
import gradio as gr
from lmdeploy.serve.gradio.app import *
os.system('sh download.sh')

InterFace.async_engine = AsyncEngine(model_path='internlm2-chat-20b-4bits',
                                        instance_num=2,
                                        tp=1)


async def reset_local_demo(instruction_txtbox: gr.Textbox,
                           state_chatbot: gr.State, request: gr.Request):
    """reset the session.

    Args:
        instruction_txtbox (str): user's prompt
        state_chatbot (Sequence): the chatting history
        request (gr.Request): the request from a user
    """
    state_chatbot = []

    return (
        state_chatbot,
        state_chatbot,
        gr.Textbox.update(value=''),
    )


async def cancel_local_demo(state_chatbot: gr.State, cancel_btn: gr.Button,
                            reset_btn: gr.Button, request: gr.Request):
    """stop the session.

    Args:
        instruction_txtbox (str): user's prompt
        state_chatbot (Sequence): the chatting history
        request (gr.Request): the request from a user
    """
    return (state_chatbot, disable_btn, disable_btn)

async def chat_stream_demo(
    instruction: str,
    state_chatbot: Sequence,
    cancel_btn: gr.Button,
    reset_btn: gr.Button,
    request: gr.Request,
):
    """Chat with AI assistant.

    Args:
        instruction (str): user's prompt
        state_chatbot (Sequence): the chatting history
        request (gr.Request): the request from a user
    """
    session_id = random.randint(0,100000)
    bot_summarized_response = ''
    state_chatbot = state_chatbot + [(instruction, None)]
    messages = []
    for item in state_chatbot:
        messages.append(dict(role='user', content=item[0]))
        if item[1] is not None:
            messages.append(dict(role='assistant', content=item[1]))

    yield (state_chatbot, state_chatbot, disable_btn, disable_btn,
           f'{bot_summarized_response}'.strip())

    async for outputs in InterFace.async_engine.generate(
            messages,
            session_id,
            stream_response=True,
            sequence_start=True,
            sequence_end=True):
        response = outputs.response
        if outputs.finish_reason == 'length':
            gr.Warning('WARNING: exceed session max length.'
                       ' Please restart the session by reset button.')
        if outputs.generate_token_len < 0:
            gr.Warning('WARNING: running on the old session.'
                       ' Please restart the session by reset button.')
        if state_chatbot[-1][-1] is None:
            state_chatbot[-1] = (state_chatbot[-1][0], response)
        else:
            state_chatbot[-1] = (state_chatbot[-1][0],
                                 state_chatbot[-1][1] + response
                                 )  # piece by piece
        yield (state_chatbot, state_chatbot, disable_btn, disable_btn,
               f'{bot_summarized_response}'.strip())

    yield (state_chatbot, state_chatbot, disable_btn, disable_btn,
           f'{bot_summarized_response}'.strip())


with gr.Blocks(css=CSS, theme=THEME) as demo:
    state_chatbot = gr.State([])

    with gr.Column(elem_id='container'):
        gr.Markdown('## LMDeploy Playground')

        chatbot = gr.Chatbot(
            elem_id='chatbot',
            label=InterFace.async_engine.tm_model.model_name)
        instruction_txtbox = gr.Textbox(
            placeholder='Please input the instruction',
            label='Instruction')
        with gr.Row():
            cancel_btn = gr.Button(value='Cancel', interactive=False, visible=False)
            reset_btn = gr.Button(value='Reset', interactive=False, visible=False)

    send_event = instruction_txtbox.submit(
        chat_stream_demo,
        [instruction_txtbox, state_chatbot, cancel_btn, reset_btn],
        [state_chatbot, chatbot, cancel_btn, reset_btn])
    instruction_txtbox.submit(
        lambda: gr.Textbox.update(value=''),
        [],
        [instruction_txtbox],
    )
    cancel_btn.click(cancel_local_demo,
                        [state_chatbot, cancel_btn, reset_btn],
                        [state_chatbot, cancel_btn, reset_btn],
                        cancels=[send_event])

    reset_btn.click(reset_local_demo, [instruction_txtbox, state_chatbot],
                    [state_chatbot, chatbot, instruction_txtbox],
                    cancels=[send_event])

# print(f'server is gonna mount on: http://{server_name}:{server_port}')
    demo.queue(concurrency_count=4, max_size=100).launch()