Files changed (1) hide show
  1. app.py +206 -25
app.py CHANGED
@@ -1,5 +1,5 @@
1
  import gradio as gr
2
- import os, gc, torch
3
  from datetime import datetime
4
  from huggingface_hub import hf_hub_download
5
  from pynvml import *
@@ -98,27 +98,208 @@ examples = [
98
  ["You have $100, and your goal is to turn that into as much money as possible with AI and Machine Learning. Please respond with detailed plan.", "", 150, 1.0, 0.5, 0.4, 0.4],
99
  ]
100
 
101
- g = gr.Interface(
102
- fn=evaluate,
103
- inputs=[
104
- gr.components.Textbox(lines=2, label="Instruction", value="Tell me about ravens."),
105
- gr.components.Textbox(lines=2, label="Input", placeholder="none"),
106
- gr.components.Slider(minimum=10, maximum=200, step=10, value=150), # token_count
107
- gr.components.Slider(minimum=0.2, maximum=2.0, step=0.1, value=1.0), # temperature
108
- gr.components.Slider(minimum=0, maximum=1, step=0.05, value=0.5), # top_p
109
- gr.components.Slider(0.0, 1.0, step=0.1, value=0.4), # presencePenalty
110
- gr.components.Slider(0.0, 1.0, step=0.1, value=0.4), # countPenalty
111
- ],
112
- outputs=[
113
- gr.inputs.Textbox(
114
- lines=5,
115
- label="Output",
116
- )
117
- ],
118
- title=f"🐦Raven - {title}",
119
- description="Raven is [RWKV 7B](https://github.com/BlinkDL/ChatRWKV) 100% RNN [RWKV-LM](https://github.com/BlinkDL/RWKV-LM) finetuned to follow instructions. *** Please try examples first (bottom of page) *** (edit them to use your question). Demo limited to ctxlen 1024. It is finetuned on [Stanford Alpaca](https://github.com/tatsu-lab/stanford_alpaca), codealpaca and more. For best results, *** keep you prompt short and clear ***.",
120
- examples=examples,
121
- cache_examples=False,
122
- )
123
- g.queue(concurrency_count=1, max_size=10)
124
- g.launch(share=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ import os, gc, copy, torch
3
  from datetime import datetime
4
  from huggingface_hub import hf_hub_download
5
  from pynvml import *
 
98
  ["You have $100, and your goal is to turn that into as much money as possible with AI and Machine Learning. Please respond with detailed plan.", "", 150, 1.0, 0.5, 0.4, 0.4],
99
  ]
100
 
101
+ chat_intro = '''The following is a coherent verbose detailed conversation between an AI girl named <|bot|> and <|user|>. One day, they meet at a café.
102
+ Note the following important facts about <|bot|>:
103
+ 1. <|bot|> is very intelligent, creative and friendly.
104
+ 2. <|bot|> likes to tell <|user|> a lot about herself and her opinions.
105
+ 3. <|bot|> usually gives <|user|> kind, helpful and informative advices.
106
+
107
+ <|user|>: Hello, how are you doing?
108
+
109
+ <|bot|>: Hi! Thanks, I'm fine. What about you?
110
+
111
+ <|user|>: I am fine. It's nice to see you. Look, here is a store selling tea and juice. We can go and take a look. Would you like to chat with me for a while?
112
+
113
+ <|bot|>: Sure. Let's go inside. What would you like to talk about? I'm listening.
114
+ '''
115
+
116
+ def user(message, chatbot):
117
+ chatbot = chatbot or []
118
+ print(f"User: {message}")
119
+ return "", chatbot + [[message, None]]
120
+
121
+ def alternative(chatbot, history):
122
+ if not chatbot or not history:
123
+ return chatbot, history
124
+
125
+ chatbot[-1][1] = None
126
+ history[0] = copy.deepcopy(history[1])
127
+
128
+ return chatbot, history
129
+
130
+ def chat(
131
+ prompt,
132
+ user,
133
+ bot,
134
+ chatbot,
135
+ history,
136
+ temperature=1.0,
137
+ top_p=0.8,
138
+ presence_penalty=0.1,
139
+ count_penalty=0.1,
140
+ ):
141
+ args = PIPELINE_ARGS(temperature=max(0.2, float(temperature)), top_p=float(top_p),
142
+ alpha_frequency=float(count_penalty),
143
+ alpha_presence=float(presence_penalty),
144
+ token_ban=[], # ban the generation of some tokens
145
+ token_stop=[]) # stop generation whenever you see any token here
146
+
147
+ if not chatbot:
148
+ return chatbot, history
149
+
150
+ message = chatbot[-1][0]
151
+ message = message.strip().replace('\r\n','\n').replace('\n\n','\n')
152
+ ctx = f"{user}: {message}\n\n{bot}:"
153
+
154
+ # gpu_info = nvmlDeviceGetMemoryInfo(gpu_h)
155
+ # print(f'vram {gpu_info.total} used {gpu_info.used} free {gpu_info.free}')
156
+
157
+ if not history:
158
+ prompt = prompt.replace("<|user|>", user.strip())
159
+ prompt = prompt.replace("<|bot|>", bot.strip())
160
+ prompt = prompt.strip()
161
+ prompt = f"\n{prompt}\n\n"
162
+
163
+ out, state = model.forward(pipeline.encode(prompt), None)
164
+ history = [state, None, []] # [state, state_pre, tokens]
165
+ print("History reloaded.")
166
+
167
+ [state, _, all_tokens] = history
168
+ state_pre_0 = copy.deepcopy(state)
169
+
170
+ out, state = model.forward(pipeline.encode(ctx)[-ctx_limit:], state)
171
+ state_pre_1 = copy.deepcopy(state) # For recovery
172
+
173
+ print("Bot: ", end='')
174
+
175
+ begin = len(all_tokens)
176
+ out_last = begin
177
+ out_str: str = ''
178
+ occurrence = {}
179
+ for i in range(300):
180
+ if i <= 0:
181
+ nl_bias = -float('inf')
182
+ elif i <= 30:
183
+ nl_bias = (i - 30) * 0.1
184
+ elif i <= 130:
185
+ nl_bias = 0
186
+ else:
187
+ nl_bias = (i - 130) * 0.25
188
+ out[187] += nl_bias
189
+ for n in occurrence:
190
+ out[n] -= (args.alpha_presence + occurrence[n] * args.alpha_frequency)
191
+
192
+ token = pipeline.sample_logits(out, temperature=args.temperature, top_p=args.top_p)
193
+ next_tokens = [token]
194
+ if token == 0:
195
+ next_tokens = pipeline.encode('\n\n')
196
+ all_tokens += next_tokens
197
+
198
+ if token not in occurrence:
199
+ occurrence[token] = 1
200
+ else:
201
+ occurrence[token] += 1
202
+
203
+ out, state = model.forward(next_tokens, state)
204
+
205
+ tmp = pipeline.decode(all_tokens[out_last:])
206
+ if '\ufffd' not in tmp:
207
+ print(tmp, end='', flush=True)
208
+ out_last = begin + i + 1
209
+ out_str += tmp
210
+
211
+ chatbot[-1][1] = out_str.strip()
212
+ history = [state, all_tokens]
213
+ yield chatbot, history
214
+
215
+ out_str = pipeline.decode(all_tokens[begin:])
216
+ out_str = out_str.replace("\r\n", '\n').replace('\\n', '\n')
217
+
218
+ if '\n\n' in out_str:
219
+ break
220
+
221
+ # State recovery
222
+ if f'{user}:' in out_str or f'{bot}:' in out_str:
223
+ idx_user = out_str.find(f'{user}:')
224
+ idx_user = len(out_str) if idx_user == -1 else idx_user
225
+ idx_bot = out_str.find(f'{bot}:')
226
+ idx_bot = len(out_str) if idx_bot == -1 else idx_bot
227
+ idx = min(idx_user, idx_bot)
228
+
229
+ if idx < len(out_str):
230
+ out_str = f" {out_str[:idx].strip()}\n\n"
231
+ tokens = pipeline.encode(out_str)
232
+
233
+ all_tokens = all_tokens[:begin] + tokens
234
+ out, state = model.forward(tokens, state_pre_1)
235
+ break
236
+
237
+ gc.collect()
238
+ torch.cuda.empty_cache()
239
+
240
+ chatbot[-1][1] = out_str.strip()
241
+ history = [state, state_pre_0, all_tokens]
242
+ yield chatbot, history
243
+
244
+ with gr.Blocks(title=title) as demo:
245
+ gr.HTML(f"<div style=\"text-align: center;\">\n<h1>🐦Raven - {title}</h1>\n</div>")
246
+ with gr.Tab("Instruct"):
247
+ gr.Markdown(f"Raven is [RWKV 7B](https://github.com/BlinkDL/ChatRWKV) 100% RNN [RWKV-LM](https://github.com/BlinkDL/RWKV-LM) finetuned to follow instructions. *** Please try examples first (bottom of page) *** (edit them to use your question). Demo limited to ctxlen 1024. It is finetuned on [Stanford Alpaca](https://github.com/tatsu-lab/stanford_alpaca), codealpaca and more. For best results, *** keep you prompt short and clear ***.")
248
+ with gr.Row():
249
+ with gr.Column():
250
+ instruction = gr.Textbox(lines=2, label="Instruction", value="Tell me about ravens.")
251
+ input = gr.Textbox(lines=2, label="Input", placeholder="none")
252
+ token_count = gr.Slider(10, 200, label="Max Tokens", step=10, value=150)
253
+ temperature = gr.Slider(0.2, 2.0, label="Temperature", step=0.1, value=1.0)
254
+ top_p = gr.Slider(0.0, 1.0, label="Top P", step=0.05, value=0.7)
255
+ presence_penalty = gr.Slider(0.0, 1.0, label="Presence Penalty", step=0.1, value=0.2)
256
+ count_penalty = gr.Slider(0.0, 1.0, label="Count Penalty", step=0.1, value=0.2)
257
+ with gr.Column():
258
+ with gr.Row():
259
+ submit = gr.Button("Submit", variant="primary")
260
+ clear = gr.Button("Clear", variant="secondary")
261
+ output = gr.Textbox(label="Output", lines=5)
262
+ data = gr.Dataset(components=[instruction, input, token_count, temperature, top_p, presence_penalty, count_penalty], samples=examples, label="Example Instructions", headers=["Instruction", "Input", "Max Tokens", "Temperature", "Top P", "Presence Penalty", "Count Penalty"])
263
+ submit.click(evaluate, [instruction, input, token_count, temperature, top_p, presence_penalty, count_penalty], [output])
264
+ clear.click(lambda: None, [], [output])
265
+ data.click(lambda x: x, [data], [instruction, input, token_count, temperature, top_p, presence_penalty, count_penalty])
266
+
267
+ with gr.Tab("Chat"):
268
+ gr.Markdown(f'''*** <b>Default Chat Scenario: You (Bob) and Bot (Alice) meet at a café.</b> ***\nIf you want to change the scenario, make sure to use an empty new line to separate different people's words. Also, make sure there is no empty new lines within one person's lines. Changes only take effect after clearing.''', label="Description")
269
+ with gr.Row():
270
+ with gr.Column():
271
+ chatbot = gr.Chatbot()
272
+ state = gr.State()
273
+ message = gr.Textbox(label="Message")
274
+ with gr.Row():
275
+ send = gr.Button("Send", variant="primary")
276
+ alt = gr.Button("Alternative", variant="secondary")
277
+ clear = gr.Button("Clear", variant="secondary")
278
+ with gr.Column():
279
+ with gr.Row():
280
+ user_name = gr.Textbox(lines=1, max_lines=1, label="User Name", value="Bob")
281
+ bot_name = gr.Textbox(lines=1, max_lines=1, label="Bot Name", value="Alice")
282
+ prompt = gr.Textbox(lines=10, max_lines=50, label="Scenario", value=chat_intro)
283
+ temperature = gr.Slider(0.2, 2.0, label="Temperature", step=0.1, value=1.0)
284
+ top_p = gr.Slider(0.0, 1.0, label="Top P", step=0.05, value=0.7)
285
+ presence_penalty = gr.Slider(0.0, 1.0, label="Presence Penalty", step=0.1, value=0.2)
286
+ count_penalty = gr.Slider(0.0, 1.0, label="Count Penalty", step=0.1, value=0.2)
287
+ chat_inputs = [
288
+ prompt,
289
+ user_name,
290
+ bot_name,
291
+ chatbot,
292
+ state,
293
+ temperature,
294
+ top_p,
295
+ presence_penalty,
296
+ count_penalty
297
+ ]
298
+ chat_outputs = [chatbot, state]
299
+ message.submit(user, [message, chatbot], [message, chatbot], queue=False).then(chat, chat_inputs, chat_outputs)
300
+ send.click(user, [message, chatbot], [message, chatbot], queue=False).then(chat, chat_inputs, chat_outputs)
301
+ alt.click(alternative, [chatbot, state], [chatbot, state], queue=False).then(chat, chat_inputs, chat_outputs)
302
+ clear.click(lambda: ([], None, ""), [], [chatbot, state, message], queue=False)
303
+
304
+ demo.queue(max_size=10)
305
+ demo.launch(share=True)