Spaces:

xtreme86
/

dev

Sleeping

App Files Files Community

aiforhumans commited on Sep 15

Commit

4fd05d8

•

1 Parent(s): c0f7cf1

s

Browse files

Files changed (1) hide show

app.py +11 -8

app.py CHANGED Viewed

@@ -7,7 +7,6 @@ MAX_TOKENS_DEFAULT = 512
 TEMPERATURE_DEFAULT = 0.7
 TOP_P_DEFAULT = 0.95
 inference_client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
@@ -18,7 +17,7 @@ def respond(
     max_tokens: int,
     temperature: float,
     top_p: float,
-) -> str:
     """
     Respond to a user message given the conversation history and other parameters.
@@ -30,21 +29,24 @@ def respond(
         temperature (float): The temperature to use when generating text.
         top_p (float): The top-p value to use when generating text.
-    Returns:
-        str: The response to the user's message.
     """
     messages = [{"role": "system", "content": system_message}]
     for user_input, assistant_response in conversation_history:
         if user_input:
             messages.append({"role": "user", "content": user_input})
         if assistant_response:
             messages.append({"role": "assistant", "content": assistant_response})
     messages.append({"role": "user", "content": user_message})
     response = ""
     for message in inference_client.chat_completion(
         messages,
         max_tokens=max_tokens,
@@ -53,11 +55,13 @@ def respond(
         top_p=top_p,
     ):
         token = message.choices[0].delta.content
         response += token
-        yield response
 chatbot_interface = gr.ChatInterface(
     fn=respond,
     chatbot=gr.Chatbot(height=600),
@@ -90,6 +94,5 @@ chatbot_interface = gr.ChatInterface(
     ],
 )
 if __name__ == "__main__":
     chatbot_interface.launch()

 TEMPERATURE_DEFAULT = 0.7
 TOP_P_DEFAULT = 0.95
 inference_client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
     max_tokens: int,
     temperature: float,
     top_p: float,
+):
     """
     Respond to a user message given the conversation history and other parameters.
         temperature (float): The temperature to use when generating text.
         top_p (float): The top-p value to use when generating text.
+    Yields:
+        list[tuple[str, str]]: Updated conversation history with the new assistant response.
     """
     messages = [{"role": "system", "content": system_message}]
+    # Prepare messages for the model based on the history
     for user_input, assistant_response in conversation_history:
         if user_input:
             messages.append({"role": "user", "content": user_input})
         if assistant_response:
             messages.append({"role": "assistant", "content": assistant_response})
+    # Append the new user message
     messages.append({"role": "user", "content": user_message})
+    # Initialize response string
     response = ""
+    # Stream the completion from the inference client
     for message in inference_client.chat_completion(
         messages,
         max_tokens=max_tokens,
         top_p=top_p,
     ):
         token = message.choices[0].delta.content
         response += token
+        # Continuously yield updated history with the new response
+        updated_history = conversation_history + [(user_message, response)]
+        yield updated_history
+# Chatbot interface definition
 chatbot_interface = gr.ChatInterface(
     fn=respond,
     chatbot=gr.Chatbot(height=600),
     ],
 )
 if __name__ == "__main__":
     chatbot_interface.launch()