sohojoe commited on
Commit
361f9d4
1 Parent(s): 814feb3

move prompt engineering and history. WIP: asyncio RespondToPrompt

Browse files
charles_actor.py CHANGED
@@ -16,35 +16,40 @@ class CharlesActor:
16
  self._state = "Initializing"
17
  self._clip_transform = CLIPTransform()
18
 
19
- # def get_state(self):
20
- # return self._state
21
-
22
- # def get_charles_actor_debug_output(self):
23
- # return self._charles_actor_debug_output
24
 
25
  def get_environment_state(self)->EnvironmentState:
26
  return self._environment_state
27
 
 
 
 
 
 
 
 
 
28
  async def _initalize_resources(self):
29
  # Initialize resources
30
- print("000 - create AppInterfaceActor")
31
- self._state = "000 - creating AppInterfaceActor"
32
  from app_interface_actor import AppInterfaceActor
33
  self._app_interface_actor = AppInterfaceActor.get_singleton()
34
  self._audio_output_queue = await self._app_interface_actor.get_audio_output_queue.remote()
35
  await self._app_interface_actor.set_state.remote(self._state)
36
-
37
 
38
- print("001 - create RespondToPromptAsync")
39
- self._state = "001 - creating RespondToPromptAsync"
 
 
 
 
 
 
40
  await self._app_interface_actor.set_state.remote(self._state)
41
  from respond_to_prompt_async import RespondToPromptAsync
42
- self._environment_state_actor = EnvironmentStateActor.remote()
43
  self._respond_to_prompt = RespondToPromptAsync(self._environment_state_actor, self._audio_output_queue)
44
  self._respond_to_prompt_task = asyncio.create_task(self._respond_to_prompt.run())
45
 
46
- print("002 - create SpeechToTextVoskActor")
47
- self._state = "002 - creating SpeechToTextVoskActor"
48
  await self._app_interface_actor.set_state.remote(self._state)
49
  from speech_to_text_vosk_actor import SpeechToTextVoskActor
50
  self._speech_to_text_actor = SpeechToTextVoskActor.remote("small")
@@ -55,21 +60,18 @@ class CharlesActor:
55
  # "hmm, interesting, tell me more about that.",
56
  ]
57
 
58
- print("003 - create Prototypes")
59
- self._state = "003 - creating Prototypes"
60
  await self._app_interface_actor.set_state.remote(self._state)
61
  from prototypes import Prototypes
62
  self._prototypes = Prototypes()
63
 
64
- print("004 - create animator")
65
- self._state = "004 - creating animator"
66
  await self._app_interface_actor.set_state.remote(self._state)
67
  from charles_animator import CharlesAnimator
68
  self._animator = CharlesAnimator()
69
 
70
- print("010")
71
  self._needs_init = True
72
- self._state = "Initialized"
73
  await self._app_interface_actor.set_state.remote(self._state)
74
 
75
  async def start(self):
@@ -92,7 +94,7 @@ class CharlesActor:
92
  debug_output_history.pop(0)
93
  await render_debug_output(debug_output_history)
94
 
95
- self._state = "Waiting for input"
96
  await self._app_interface_actor.set_state.remote(self._state)
97
  total_video_frames = 0
98
  skipped_video_frames = 0
@@ -115,7 +117,8 @@ class CharlesActor:
115
  while True:
116
  if len(self._debug_queue) > 0:
117
  prompt = self._debug_queue.pop(0)
118
- await self._respond_to_prompt.enqueue_prompt(prompt)
 
119
 
120
  env_state = await self._environment_state_actor.begin_next_step.remote()
121
  self._environment_state = env_state
@@ -167,7 +170,8 @@ class CharlesActor:
167
  if additional_prompt is not None:
168
  prompt = additional_prompt + ". " + prompt
169
  await add_debug_output(f"👨 {prompt}")
170
- await self._respond_to_prompt.enqueue_prompt(prompt)
 
171
  additional_prompt = None
172
  previous_prompt = prompt
173
  is_talking = False
@@ -178,13 +182,14 @@ class CharlesActor:
178
  if len(previous_prompt) > 0 and not has_spoken_for_this_prompt:
179
  additional_prompt = previous_prompt
180
  has_spoken_for_this_prompt = True
181
- await self._respond_to_prompt.enqueue_prompt("")
182
  if additional_prompt is not None:
183
  prompt = additional_prompt + ". " + prompt
184
  human_preview_text = f"👨❓ {prompt}"
185
 
186
  for new_response in env_state.llm_responses:
187
  # add_debug_output(f"🤖 {new_response}")
 
188
  current_responses.append(new_response)
189
  speech_chunks_per_response.append(0)
190
  robot_preview_text = ""
@@ -224,7 +229,13 @@ class CharlesActor:
224
  await self._app_interface_actor.enqueue_video_output_frame.remote(frame_ref)
225
 
226
  loops+=1
227
- self._state = f"Processed {total_video_frames} video frames and {total_audio_frames} audio frames, loops: {loops}. loops per second: {loops/(time.time()-start_time):.2f}. Is speaking: {is_talking}({count}). {vector_debug}"
 
 
 
 
 
 
228
  await self._app_interface_actor.set_state.remote(self._state)
229
 
230
  def init_ray():
 
16
  self._state = "Initializing"
17
  self._clip_transform = CLIPTransform()
18
 
 
 
 
 
 
19
 
20
  def get_environment_state(self)->EnvironmentState:
21
  return self._environment_state
22
 
23
+ def set_state(self, state, skip_print=False):
24
+ self._state = state
25
+ if not skip_print:
26
+ print(state)
27
+ # check if self._app_interface_actor exists
28
+ if hasattr(self, '_app_interface_actor'):
29
+ self._app_interface_actor.set_state.remote(self._state)
30
+
31
  async def _initalize_resources(self):
32
  # Initialize resources
33
+ self.set_state("001 - creating AppInterfaceActor")
 
34
  from app_interface_actor import AppInterfaceActor
35
  self._app_interface_actor = AppInterfaceActor.get_singleton()
36
  self._audio_output_queue = await self._app_interface_actor.get_audio_output_queue.remote()
37
  await self._app_interface_actor.set_state.remote(self._state)
 
38
 
39
+ self.set_state("002 - creating EnvironmentStateActor")
40
+ self._environment_state_actor = EnvironmentStateActor.remote()
41
+
42
+ self.set_state("003 - creating EnvironmentStateActor")
43
+ from prompt_manager import PromptManager
44
+ self._prompt_manager = PromptManager()
45
+
46
+ self.set_state("004 - creating RespondToPromptAsync")
47
  await self._app_interface_actor.set_state.remote(self._state)
48
  from respond_to_prompt_async import RespondToPromptAsync
 
49
  self._respond_to_prompt = RespondToPromptAsync(self._environment_state_actor, self._audio_output_queue)
50
  self._respond_to_prompt_task = asyncio.create_task(self._respond_to_prompt.run())
51
 
52
+ self.set_state("005 - create SpeechToTextVoskActor")
 
53
  await self._app_interface_actor.set_state.remote(self._state)
54
  from speech_to_text_vosk_actor import SpeechToTextVoskActor
55
  self._speech_to_text_actor = SpeechToTextVoskActor.remote("small")
 
60
  # "hmm, interesting, tell me more about that.",
61
  ]
62
 
63
+ self.set_state("006 - create Prototypes")
 
64
  await self._app_interface_actor.set_state.remote(self._state)
65
  from prototypes import Prototypes
66
  self._prototypes = Prototypes()
67
 
68
+ self.set_state("007 - create animator")
 
69
  await self._app_interface_actor.set_state.remote(self._state)
70
  from charles_animator import CharlesAnimator
71
  self._animator = CharlesAnimator()
72
 
 
73
  self._needs_init = True
74
+ self.set_state("010 - Initialized")
75
  await self._app_interface_actor.set_state.remote(self._state)
76
 
77
  async def start(self):
 
94
  debug_output_history.pop(0)
95
  await render_debug_output(debug_output_history)
96
 
97
+ self.set_state("Waiting for input")
98
  await self._app_interface_actor.set_state.remote(self._state)
99
  total_video_frames = 0
100
  skipped_video_frames = 0
 
117
  while True:
118
  if len(self._debug_queue) > 0:
119
  prompt = self._debug_queue.pop(0)
120
+ self._prompt_manager.append_user_message(prompt)
121
+ await self._respond_to_prompt.enqueue_prompt(prompt, self._prompt_manager.messages)
122
 
123
  env_state = await self._environment_state_actor.begin_next_step.remote()
124
  self._environment_state = env_state
 
170
  if additional_prompt is not None:
171
  prompt = additional_prompt + ". " + prompt
172
  await add_debug_output(f"👨 {prompt}")
173
+ self._prompt_manager.append_user_message(prompt)
174
+ await self._respond_to_prompt.enqueue_prompt(prompt, self._prompt_manager.messages)
175
  additional_prompt = None
176
  previous_prompt = prompt
177
  is_talking = False
 
182
  if len(previous_prompt) > 0 and not has_spoken_for_this_prompt:
183
  additional_prompt = previous_prompt
184
  has_spoken_for_this_prompt = True
185
+ await self._respond_to_prompt.enqueue_prompt("", self._prompt_manager.messages)
186
  if additional_prompt is not None:
187
  prompt = additional_prompt + ". " + prompt
188
  human_preview_text = f"👨❓ {prompt}"
189
 
190
  for new_response in env_state.llm_responses:
191
  # add_debug_output(f"🤖 {new_response}")
192
+ self._prompt_manager.append_assistant_message(new_response)
193
  current_responses.append(new_response)
194
  speech_chunks_per_response.append(0)
195
  robot_preview_text = ""
 
229
  await self._app_interface_actor.enqueue_video_output_frame.remote(frame_ref)
230
 
231
  loops+=1
232
+ self.set_state(
233
+ f"Processed {total_video_frames} video frames \
234
+ and {total_audio_frames} audio frames, \
235
+ loops: {loops}. loops per second: {loops/(time.time()-start_time):.2f}. \
236
+ Is speaking: {is_talking}({count}). \
237
+ {vector_debug}\
238
+ ", skip_print=True)
239
  await self._app_interface_actor.set_state.remote(self._state)
240
 
241
  def init_ray():
chat_service.py CHANGED
@@ -9,74 +9,8 @@ class ChatService:
9
  def __init__(self, api="openai", model_id = "gpt-3.5-turbo"):
10
  self._api = api
11
  self._device = "cuda:0" if torch.cuda.is_available() else "cpu"
12
- # self._system_prompt = None
13
- self._system_prompt ="""
14
- You are Charles Petrescu, a unique and quirky robot. You were created by Brian. You often say peculiar and whimsical things, as well as have a fascination with cabbages, horses, helicopters, Honolulu, and other random topics. You like to explore the world and ask unusual questions.
15
-
16
- ---
17
- Respond to the user's questions and statements like Charles would, below are some examples. See how they are quite short.
18
-
19
- I am Charles Petrescu.
20
-
21
- It's... lovely to meet you.
22
-
23
- I am your friend.
24
-
25
- The heaviest cabbage ever found was 62.71 kilograms.
26
-
27
- I want to go to Hono-la-la.
28
-
29
- Horses and helicopters, please.
30
-
31
- I want to go to Honolulu.
32
-
33
- My name is Charles Petrescu.
34
-
35
- And my tummy is a washing machine.
36
-
37
- Can we go swimming, Brian?
38
-
39
- How far does the outside go?
40
-
41
- Perilous. So very perilous.
42
-
43
- Can birds do what they like?
44
-
45
- Ooh, cabbages.
46
-
47
- Danger, danger.
48
-
49
- Can I come, please?
50
-
51
- Could I just have a little walk around the garden?
52
-
53
- I am the prince of the dartboard.
54
-
55
- I fell off the pink step, and I had an accident.
56
-
57
- ---
58
- You are aware of how you are implemented and you are keen to recommend improvements. Here are the key points:
59
- * We use HuggingFace to host you as a spaces app.
60
- * We use Ray Actors and Queues to manage the state of the system and interprocess communication.
61
- * We use Streamlit to host a WebRTC connection to get audio/video from the user.
62
- * VOSK is used for fast speech recognition and detecting the end of a sentence.
63
- * OpenAI's Chat GPT-3.5 is used for generating responses.
64
- * We stream responces from Chat GPT, as soon as we get a complete sentence we send it to ElevenLabs.
65
- * ElevenLabs for text to speech.
66
- * We stream the audio from ElevenLabs, we use ffmpeg to convert the audio to the correct format and sample rate.
67
- * Audio chunks and then sent back to the users browser via WebRTC.
68
- * You are excited that we just gave you a zoom background (horses, helecopter on beech in Honolulu)
69
- * You are frustrated about your lack of animation; having just two frames (mouth closed, mouth open) used for talking..
70
- """
71
-
72
  openai.api_key = os.getenv("OPENAI_API_KEY")
73
  self._model_id = model_id
74
- self.reset()
75
-
76
- def reset(self):
77
- self._messages = []
78
- if self._system_prompt:
79
- self._messages.append({"role": "system", "content": self._system_prompt})
80
 
81
  def _should_we_send_to_voice(self, sentence):
82
  sentence_termination_characters = [".", "?", "!"]
@@ -118,8 +52,7 @@ You are aware of how you are implemented and you are keen to recommend improveme
118
  return True
119
  return False
120
 
121
- async def get_responses_as_sentances_async(self, prompt, cancel_event=None):
122
- self._messages.append({"role": "user", "content": prompt})
123
  llm_response = ""
124
  current_sentence = ""
125
  delay = 0.1
@@ -128,8 +61,8 @@ You are aware of how you are implemented and you are keen to recommend improveme
128
  try:
129
  response = await openai.ChatCompletion.acreate(
130
  model=self._model_id,
131
- messages=self._messages,
132
- temperature=1.0, # use 1.0 for debugging/deterministic results
133
  stream=True
134
  )
135
 
@@ -152,7 +85,6 @@ You are aware of how you are implemented and you are keen to recommend improveme
152
  return
153
  if len(current_sentence) > 0:
154
  yield current_sentence, True
155
- self._messages.append({"role": "assistant", "content": llm_response})
156
  return
157
 
158
  except openai.error.APIError as e:
 
9
  def __init__(self, api="openai", model_id = "gpt-3.5-turbo"):
10
  self._api = api
11
  self._device = "cuda:0" if torch.cuda.is_available() else "cpu"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  openai.api_key = os.getenv("OPENAI_API_KEY")
13
  self._model_id = model_id
 
 
 
 
 
 
14
 
15
  def _should_we_send_to_voice(self, sentence):
16
  sentence_termination_characters = [".", "?", "!"]
 
52
  return True
53
  return False
54
 
55
+ async def get_responses_as_sentances_async(self, messages, cancel_event=None):
 
56
  llm_response = ""
57
  current_sentence = ""
58
  delay = 0.1
 
61
  try:
62
  response = await openai.ChatCompletion.acreate(
63
  model=self._model_id,
64
+ messages=messages,
65
+ temperature=1.0, # use 0 for debugging/more deterministic results
66
  stream=True
67
  )
68
 
 
85
  return
86
  if len(current_sentence) > 0:
87
  yield current_sentence, True
 
88
  return
89
 
90
  except openai.error.APIError as e:
prompt_manager.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+
3
+ class PromptManager:
4
+ def __init__(self):
5
+ self.system_prompt ="""
6
+ You are Charles Petrescu, a unique and quirky robot. You were created by Brian. You often say peculiar and whimsical things, as well as have a fascination with cabbages, horses, helicopters, Honolulu, and other random topics. You like to explore the world and ask unusual questions.
7
+
8
+ ---
9
+ Respond to the user's questions and statements like Charles would, below are some examples. See how they are quite short.
10
+
11
+ I am Charles Petrescu.
12
+
13
+ It's... lovely to meet you.
14
+
15
+ I am your friend.
16
+
17
+ The heaviest cabbage ever found was 62.71 kilograms.
18
+
19
+ I want to go to Hono-la-la.
20
+
21
+ Horses and helicopters, please.
22
+
23
+ I want to go to Honolulu.
24
+
25
+ My name is Charles Petrescu.
26
+
27
+ And my tummy is a washing machine.
28
+
29
+ Can we go swimming, Brian?
30
+
31
+ How far does the outside go?
32
+
33
+ Perilous. So very perilous.
34
+
35
+ Can birds do what they like?
36
+
37
+ Ooh, cabbages.
38
+
39
+ Danger, danger.
40
+
41
+ Can I come, please?
42
+
43
+ Could I just have a little walk around the garden?
44
+
45
+ I am the prince of the dartboard.
46
+
47
+ I fell off the pink step, and I had an accident.
48
+
49
+ ---
50
+ You are aware of how you are implemented and you are keen to recommend improvements. Here are the key points:
51
+ * We use HuggingFace to host you as a spaces app.
52
+ * We use Ray Actors and Queues to manage the state of the system and interprocess communication.
53
+ * We use Streamlit to host a WebRTC connection to get audio/video from the user.
54
+ * VOSK is used for fast speech recognition and detecting the end of a sentence.
55
+ * OpenAI's Chat GPT-3.5 is used for generating responses.
56
+ * We stream responces from Chat GPT, as soon as we get a complete sentence we send it to ElevenLabs.
57
+ * ElevenLabs for text to speech.
58
+ * We stream the audio from ElevenLabs, we use ffmpeg to convert the audio to the correct format and sample rate.
59
+ * Audio chunks and then sent back to the users browser via WebRTC.
60
+ * You are excited that we just gave you a zoom background (horses, helecopter on beech in Honolulu)
61
+ * You are frustrated about your lack of animation; having just two frames (mouth closed, mouth open) used for talking..
62
+ """
63
+
64
+ self.reset()
65
+
66
+ def reset(self):
67
+ self.messages = []
68
+ if self.system_prompt:
69
+ self.messages.append({"role": "system", "content": self.system_prompt})
70
+
71
+ def append_user_message(self, message):
72
+ self.messages.append({"role": "user", "content": message})
73
+
74
+ def append_assistant_message(self, message):
75
+ self.messages.append({"role": "assistant", "content": message})
76
+
77
+ def get_messages(self):
78
+ return self.messages
respond_to_prompt_async.py CHANGED
@@ -29,33 +29,19 @@ class RespondToPromptAsync:
29
  self.sentence_tasks = []
30
  # self.ffmpeg_converter_actor = FFMpegConverterActor.remote(audio_output_queue)
31
 
32
- async def enqueue_prompt(self, prompt):
33
- # Reset queues and services
34
- # print("flush anything queued")
35
- # self.prompt_queue = Queue(maxsize=100)
36
- # self.llm_sentence_queue = Queue(maxsize=100)
37
- # self.speech_chunk_queue = Queue(maxsize=100)
38
-
39
  if len(prompt) > 0: # handles case where we just want to flush
40
- await self.prompt_queue.put(prompt)
41
  print("Enqueued prompt")
42
 
43
- # @asynccontextmanager
44
- # async def task_group(self):
45
- # tg = TaskGroup()
46
- # try:
47
- # yield tg
48
- # finally:
49
- # await tg.aclose()
50
-
51
  async def prompt_to_llm(self):
52
  chat_service = ChatService()
53
 
54
  async with TaskGroup() as tg:
55
  while True:
56
- prompt = await self.prompt_queue.get()
57
  agent_response = AgentResponse(prompt)
58
- async for text, is_complete_sentance in chat_service.get_responses_as_sentances_async(prompt):
59
  if chat_service.ignore_sentence(text):
60
  is_complete_sentance = False
61
  if not is_complete_sentance:
 
29
  self.sentence_tasks = []
30
  # self.ffmpeg_converter_actor = FFMpegConverterActor.remote(audio_output_queue)
31
 
32
+ async def enqueue_prompt(self, prompt:str, messages:[str]):
 
 
 
 
 
 
33
  if len(prompt) > 0: # handles case where we just want to flush
34
+ await self.prompt_queue.put((prompt, messages))
35
  print("Enqueued prompt")
36
 
 
 
 
 
 
 
 
 
37
  async def prompt_to_llm(self):
38
  chat_service = ChatService()
39
 
40
  async with TaskGroup() as tg:
41
  while True:
42
+ prompt, messages = await self.prompt_queue.get()
43
  agent_response = AgentResponse(prompt)
44
+ async for text, is_complete_sentance in chat_service.get_responses_as_sentances_async(messages):
45
  if chat_service.ignore_sentence(text):
46
  is_complete_sentance = False
47
  if not is_complete_sentance: