MagpieLM-8B

Running on Zero

yuchenlin commited on Sep 18

Commit

9b8eb72

•

1 Parent(s): 77cf82d

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -40,23 +40,19 @@ def respond(
     )
     model_inputs = tokenizer([text], return_tensors="pt").to(device)
-    generated_ids = model.generate(
         model_inputs.input_ids,
         max_new_tokens = max_tokens,
         temperature = temperature,
         top_p = top_p,
         repetition_penalty=repetition_penalty,
     )
-    generated_ids = [
-        output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
-    ]
-    response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
-    return response
-"""
-For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
-"""
 demo = gr.ChatInterface(
     respond,
     additional_inputs=[
@@ -74,6 +70,5 @@ demo = gr.ChatInterface(
     ],
 )
 if __name__ == "__main__":
-    demo.launch(share=True)

     )
     model_inputs = tokenizer([text], return_tensors="pt").to(device)
+    streamer = gr.utils.StreamingTextStreamer(tokenizer, skip_special_tokens=True, skip_prompt=True)
+    _ = model.generate(
         model_inputs.input_ids,
         max_new_tokens = max_tokens,
         temperature = temperature,
         top_p = top_p,
         repetition_penalty=repetition_penalty,
+        streamer=streamer
     )
+    return streamer
 demo = gr.ChatInterface(
     respond,
     additional_inputs=[
     ],
 )
 if __name__ == "__main__":
+    demo.launch(share=True)