aiforhumans commited on
Commit
4fd05d8
1 Parent(s): c0f7cf1
Files changed (1) hide show
  1. app.py +11 -8
app.py CHANGED
@@ -7,7 +7,6 @@ MAX_TOKENS_DEFAULT = 512
7
  TEMPERATURE_DEFAULT = 0.7
8
  TOP_P_DEFAULT = 0.95
9
 
10
-
11
  inference_client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
12
 
13
 
@@ -18,7 +17,7 @@ def respond(
18
  max_tokens: int,
19
  temperature: float,
20
  top_p: float,
21
- ) -> str:
22
  """
23
  Respond to a user message given the conversation history and other parameters.
24
 
@@ -30,21 +29,24 @@ def respond(
30
  temperature (float): The temperature to use when generating text.
31
  top_p (float): The top-p value to use when generating text.
32
 
33
- Returns:
34
- str: The response to the user's message.
35
  """
36
  messages = [{"role": "system", "content": system_message}]
37
-
38
  for user_input, assistant_response in conversation_history:
39
  if user_input:
40
  messages.append({"role": "user", "content": user_input})
41
  if assistant_response:
42
  messages.append({"role": "assistant", "content": assistant_response})
43
 
 
44
  messages.append({"role": "user", "content": user_message})
45
 
 
46
  response = ""
47
 
 
48
  for message in inference_client.chat_completion(
49
  messages,
50
  max_tokens=max_tokens,
@@ -53,11 +55,13 @@ def respond(
53
  top_p=top_p,
54
  ):
55
  token = message.choices[0].delta.content
56
-
57
  response += token
58
- yield response
 
 
59
 
60
 
 
61
  chatbot_interface = gr.ChatInterface(
62
  fn=respond,
63
  chatbot=gr.Chatbot(height=600),
@@ -90,6 +94,5 @@ chatbot_interface = gr.ChatInterface(
90
  ],
91
  )
92
 
93
-
94
  if __name__ == "__main__":
95
  chatbot_interface.launch()
 
7
  TEMPERATURE_DEFAULT = 0.7
8
  TOP_P_DEFAULT = 0.95
9
 
 
10
  inference_client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
11
 
12
 
 
17
  max_tokens: int,
18
  temperature: float,
19
  top_p: float,
20
+ ):
21
  """
22
  Respond to a user message given the conversation history and other parameters.
23
 
 
29
  temperature (float): The temperature to use when generating text.
30
  top_p (float): The top-p value to use when generating text.
31
 
32
+ Yields:
33
+ list[tuple[str, str]]: Updated conversation history with the new assistant response.
34
  """
35
  messages = [{"role": "system", "content": system_message}]
36
+ # Prepare messages for the model based on the history
37
  for user_input, assistant_response in conversation_history:
38
  if user_input:
39
  messages.append({"role": "user", "content": user_input})
40
  if assistant_response:
41
  messages.append({"role": "assistant", "content": assistant_response})
42
 
43
+ # Append the new user message
44
  messages.append({"role": "user", "content": user_message})
45
 
46
+ # Initialize response string
47
  response = ""
48
 
49
+ # Stream the completion from the inference client
50
  for message in inference_client.chat_completion(
51
  messages,
52
  max_tokens=max_tokens,
 
55
  top_p=top_p,
56
  ):
57
  token = message.choices[0].delta.content
 
58
  response += token
59
+ # Continuously yield updated history with the new response
60
+ updated_history = conversation_history + [(user_message, response)]
61
+ yield updated_history
62
 
63
 
64
+ # Chatbot interface definition
65
  chatbot_interface = gr.ChatInterface(
66
  fn=respond,
67
  chatbot=gr.Chatbot(height=600),
 
94
  ],
95
  )
96
 
 
97
  if __name__ == "__main__":
98
  chatbot_interface.launch()