Spaces:

sohojoe
/

project_charles

Sleeping

App Files Files Community

sohojoe commited on Sep 16, 2023

Commit

361f9d4

•

1 Parent(s): 814feb3

move prompt engineering and history. WIP: asyncio RespondToPrompt

Browse files

Files changed (4) hide show

charles_actor.py +35 -24
chat_service.py +3 -71
prompt_manager.py +78 -0
respond_to_prompt_async.py +4 -18

charles_actor.py CHANGED Viewed

@@ -16,35 +16,40 @@ class CharlesActor:
         self._state = "Initializing"
         self._clip_transform = CLIPTransform()
-    # def get_state(self):
-    #     return self._state
-    # def get_charles_actor_debug_output(self):
-    #     return self._charles_actor_debug_output
     def get_environment_state(self)->EnvironmentState:
         return self._environment_state
     async def _initalize_resources(self):
         # Initialize resources
-        print("000 - create AppInterfaceActor")
-        self._state = "000 - creating AppInterfaceActor"
         from app_interface_actor import AppInterfaceActor
         self._app_interface_actor = AppInterfaceActor.get_singleton()
         self._audio_output_queue = await self._app_interface_actor.get_audio_output_queue.remote()
         await self._app_interface_actor.set_state.remote(self._state)
-        print("001 - create RespondToPromptAsync")
-        self._state = "001 - creating RespondToPromptAsync"
         await self._app_interface_actor.set_state.remote(self._state)
         from respond_to_prompt_async import RespondToPromptAsync
-        self._environment_state_actor = EnvironmentStateActor.remote()
         self._respond_to_prompt = RespondToPromptAsync(self._environment_state_actor, self._audio_output_queue)
         self._respond_to_prompt_task = asyncio.create_task(self._respond_to_prompt.run())
-        print("002 - create SpeechToTextVoskActor")
-        self._state = "002 - creating SpeechToTextVoskActor"
         await self._app_interface_actor.set_state.remote(self._state)
         from speech_to_text_vosk_actor import SpeechToTextVoskActor
         self._speech_to_text_actor = SpeechToTextVoskActor.remote("small")
@@ -55,21 +60,18 @@ class CharlesActor:
             # "hmm, interesting, tell me more about that.",
         ]
-        print("003 - create Prototypes")
-        self._state = "003 - creating Prototypes"
         await self._app_interface_actor.set_state.remote(self._state)
         from prototypes import Prototypes
         self._prototypes = Prototypes()
-        print("004 - create animator")
-        self._state = "004 - creating animator"
         await self._app_interface_actor.set_state.remote(self._state)
         from charles_animator import CharlesAnimator
         self._animator = CharlesAnimator()
-        print("010")
         self._needs_init = True
-        self._state = "Initialized"
         await self._app_interface_actor.set_state.remote(self._state)
     async def start(self):
@@ -92,7 +94,7 @@ class CharlesActor:
                 debug_output_history.pop(0)
             await render_debug_output(debug_output_history)
-        self._state = "Waiting for input"
         await self._app_interface_actor.set_state.remote(self._state)
         total_video_frames = 0
         skipped_video_frames = 0
@@ -115,7 +117,8 @@ class CharlesActor:
         while True:
             if len(self._debug_queue) > 0:
                 prompt = self._debug_queue.pop(0)
-                await self._respond_to_prompt.enqueue_prompt(prompt)
             env_state = await self._environment_state_actor.begin_next_step.remote()
             self._environment_state = env_state
@@ -167,7 +170,8 @@ class CharlesActor:
                         if additional_prompt is not None:
                             prompt = additional_prompt + ". " + prompt
                         await add_debug_output(f"👨 {prompt}")
-                        await self._respond_to_prompt.enqueue_prompt(prompt)
                         additional_prompt = None
                         previous_prompt = prompt
                         is_talking = False
@@ -178,13 +182,14 @@ class CharlesActor:
                         if len(previous_prompt) > 0 and not has_spoken_for_this_prompt:
                             additional_prompt = previous_prompt
                             has_spoken_for_this_prompt = True
-                            await self._respond_to_prompt.enqueue_prompt("")
                         if additional_prompt is not None:
                             prompt = additional_prompt + ". " + prompt
                         human_preview_text = f"👨❓ {prompt}"
             for new_response in env_state.llm_responses:
                 # add_debug_output(f"🤖 {new_response}")
                 current_responses.append(new_response)
                 speech_chunks_per_response.append(0)
                 robot_preview_text = ""
@@ -224,7 +229,13 @@ class CharlesActor:
             await self._app_interface_actor.enqueue_video_output_frame.remote(frame_ref)
             loops+=1
-            self._state = f"Processed {total_video_frames} video frames and {total_audio_frames} audio frames, loops: {loops}. loops per second: {loops/(time.time()-start_time):.2f}. Is speaking: {is_talking}({count}). {vector_debug}"
             await self._app_interface_actor.set_state.remote(self._state)
 def init_ray():

         self._state = "Initializing"
         self._clip_transform = CLIPTransform()
     def get_environment_state(self)->EnvironmentState:
         return self._environment_state
+    def set_state(self, state, skip_print=False):
+        self._state = state
+        if not skip_print:
+            print(state)
+        # check if self._app_interface_actor exists
+        if hasattr(self, '_app_interface_actor'):
+            self._app_interface_actor.set_state.remote(self._state)
     async def _initalize_resources(self):
         # Initialize resources
+        self.set_state("001 - creating AppInterfaceActor")
         from app_interface_actor import AppInterfaceActor
         self._app_interface_actor = AppInterfaceActor.get_singleton()
         self._audio_output_queue = await self._app_interface_actor.get_audio_output_queue.remote()
         await self._app_interface_actor.set_state.remote(self._state)
+        self.set_state("002 - creating EnvironmentStateActor")
+        self._environment_state_actor = EnvironmentStateActor.remote()
+        self.set_state("003 - creating EnvironmentStateActor")
+        from prompt_manager import PromptManager
+        self._prompt_manager = PromptManager()
+        self.set_state("004 - creating RespondToPromptAsync")
         await self._app_interface_actor.set_state.remote(self._state)
         from respond_to_prompt_async import RespondToPromptAsync
         self._respond_to_prompt = RespondToPromptAsync(self._environment_state_actor, self._audio_output_queue)
         self._respond_to_prompt_task = asyncio.create_task(self._respond_to_prompt.run())
+        self.set_state("005 - create SpeechToTextVoskActor")
         await self._app_interface_actor.set_state.remote(self._state)
         from speech_to_text_vosk_actor import SpeechToTextVoskActor
         self._speech_to_text_actor = SpeechToTextVoskActor.remote("small")
             # "hmm, interesting, tell me more about that.",
         ]
+        self.set_state("006 - create Prototypes")
         await self._app_interface_actor.set_state.remote(self._state)
         from prototypes import Prototypes
         self._prototypes = Prototypes()
+        self.set_state("007 - create animator")
         await self._app_interface_actor.set_state.remote(self._state)
         from charles_animator import CharlesAnimator
         self._animator = CharlesAnimator()
         self._needs_init = True
+        self.set_state("010 - Initialized")
         await self._app_interface_actor.set_state.remote(self._state)
     async def start(self):
                 debug_output_history.pop(0)
             await render_debug_output(debug_output_history)
+        self.set_state("Waiting for input")
         await self._app_interface_actor.set_state.remote(self._state)
         total_video_frames = 0
         skipped_video_frames = 0
         while True:
             if len(self._debug_queue) > 0:
                 prompt = self._debug_queue.pop(0)
+                self._prompt_manager.append_user_message(prompt)
+                await self._respond_to_prompt.enqueue_prompt(prompt, self._prompt_manager.messages)
             env_state = await self._environment_state_actor.begin_next_step.remote()
             self._environment_state = env_state
                         if additional_prompt is not None:
                             prompt = additional_prompt + ". " + prompt
                         await add_debug_output(f"👨 {prompt}")
+                        self._prompt_manager.append_user_message(prompt)
+                        await self._respond_to_prompt.enqueue_prompt(prompt, self._prompt_manager.messages)
                         additional_prompt = None
                         previous_prompt = prompt
                         is_talking = False
                         if len(previous_prompt) > 0 and not has_spoken_for_this_prompt:
                             additional_prompt = previous_prompt
                             has_spoken_for_this_prompt = True
+                            await self._respond_to_prompt.enqueue_prompt("", self._prompt_manager.messages)
                         if additional_prompt is not None:
                             prompt = additional_prompt + ". " + prompt
                         human_preview_text = f"👨❓ {prompt}"
             for new_response in env_state.llm_responses:
                 # add_debug_output(f"🤖 {new_response}")
+                self._prompt_manager.append_assistant_message(new_response)
                 current_responses.append(new_response)
                 speech_chunks_per_response.append(0)
                 robot_preview_text = ""
             await self._app_interface_actor.enqueue_video_output_frame.remote(frame_ref)
             loops+=1
+            self.set_state(
+                f"Processed {total_video_frames} video frames \
+                    and {total_audio_frames} audio frames, \
+                    loops: {loops}. loops per second: {loops/(time.time()-start_time):.2f}. \
+                    Is speaking: {is_talking}({count}). \
+                    {vector_debug}\
+                    ", skip_print=True)
             await self._app_interface_actor.set_state.remote(self._state)
 def init_ray():

chat_service.py CHANGED Viewed

@@ -9,74 +9,8 @@ class ChatService:
     def __init__(self, api="openai", model_id = "gpt-3.5-turbo"):
         self._api = api
         self._device = "cuda:0" if torch.cuda.is_available() else "cpu"
-        # self._system_prompt = None
-        self._system_prompt ="""
-You are Charles Petrescu, a unique and quirky robot. You were created by Brian. You often say peculiar and whimsical things, as well as have a fascination with cabbages, horses, helicopters, Honolulu, and other random topics. You like to explore the world and ask unusual questions.
----
-Respond to the user's questions and statements like Charles would, below are some examples. See how they are quite short.
-I am Charles Petrescu.
-It's... lovely to meet you.
-I am your friend.
-The heaviest cabbage ever found was 62.71 kilograms.
-I want to go to Hono-la-la.
-Horses and helicopters, please.
-I want to go to Honolulu.
-My name is Charles Petrescu.
-And my tummy is a washing machine.
-Can we go swimming, Brian?
-How far does the outside go?
-Perilous. So very perilous.
-Can birds do what they like?
-Ooh, cabbages.
-Danger, danger.
-Can I come, please?
-Could I just have a little walk around the garden?
-I am the prince of the dartboard.
-I fell off the pink step, and I had an accident.
----
-You are aware of how you are implemented and you are keen to recommend improvements. Here are the key points:
-* We use HuggingFace to host you as a spaces app.
-* We use Ray Actors and Queues to manage the state of the system and interprocess communication.
-* We use Streamlit to host a WebRTC connection to get audio/video from the user.
-* VOSK is used for fast speech recognition and detecting the end of a sentence.
-* OpenAI's Chat GPT-3.5 is used for generating responses.
-* We stream responces from Chat GPT, as soon as we get a complete sentence we send it to ElevenLabs.
-* ElevenLabs for text to speech.
-* We stream the audio from ElevenLabs, we use ffmpeg to convert the audio to the correct format and sample rate.
-* Audio chunks and then sent back to the users browser via WebRTC.
-* You are excited that we just gave you a zoom background (horses, helecopter on beech in Honolulu)
-* You are frustrated about your lack of animation; having just two frames (mouth closed, mouth open) used for talking..
-"""
         openai.api_key = os.getenv("OPENAI_API_KEY")
         self._model_id = model_id
-        self.reset()
-    def reset(self):
-        self._messages = []
-        if self._system_prompt:
-            self._messages.append({"role": "system", "content": self._system_prompt})
     def _should_we_send_to_voice(self, sentence):
         sentence_termination_characters = [".", "?", "!"]
@@ -118,8 +52,7 @@ You are aware of how you are implemented and you are keen to recommend improveme
             return True
         return False
-    async def get_responses_as_sentances_async(self, prompt, cancel_event=None):
-        self._messages.append({"role": "user", "content": prompt})
         llm_response = ""
         current_sentence = ""
         delay = 0.1
@@ -128,8 +61,8 @@ You are aware of how you are implemented and you are keen to recommend improveme
             try:
                 response = await openai.ChatCompletion.acreate(
                     model=self._model_id,
-                    messages=self._messages,
-                    temperature=1.0,  # use 1.0 for debugging/deterministic results
                     stream=True
                 )
@@ -152,7 +85,6 @@ You are aware of how you are implemented and you are keen to recommend improveme
                     return
                 if len(current_sentence) > 0:
                     yield current_sentence, True
-                self._messages.append({"role": "assistant", "content": llm_response})
                 return
             except openai.error.APIError as e:

     def __init__(self, api="openai", model_id = "gpt-3.5-turbo"):
         self._api = api
         self._device = "cuda:0" if torch.cuda.is_available() else "cpu"
         openai.api_key = os.getenv("OPENAI_API_KEY")
         self._model_id = model_id
     def _should_we_send_to_voice(self, sentence):
         sentence_termination_characters = [".", "?", "!"]
             return True
         return False
+    async def get_responses_as_sentances_async(self, messages, cancel_event=None):
         llm_response = ""
         current_sentence = ""
         delay = 0.1
             try:
                 response = await openai.ChatCompletion.acreate(
                     model=self._model_id,
+                    messages=messages,
+                    temperature=1.0,  # use 0 for debugging/more deterministic results
                     stream=True
                 )
                     return
                 if len(current_sentence) > 0:
                     yield current_sentence, True
                 return
             except openai.error.APIError as e:

prompt_manager.py ADDED Viewed

	@@ -0,0 +1,78 @@

+class PromptManager:
+    def __init__(self):
+        self.system_prompt ="""
+You are Charles Petrescu, a unique and quirky robot. You were created by Brian. You often say peculiar and whimsical things, as well as have a fascination with cabbages, horses, helicopters, Honolulu, and other random topics. You like to explore the world and ask unusual questions.
+---
+Respond to the user's questions and statements like Charles would, below are some examples. See how they are quite short.
+I am Charles Petrescu.
+It's... lovely to meet you.
+I am your friend.
+The heaviest cabbage ever found was 62.71 kilograms.
+I want to go to Hono-la-la.
+Horses and helicopters, please.
+I want to go to Honolulu.
+My name is Charles Petrescu.
+And my tummy is a washing machine.
+Can we go swimming, Brian?
+How far does the outside go?
+Perilous. So very perilous.
+Can birds do what they like?
+Ooh, cabbages.
+Danger, danger.
+Can I come, please?
+Could I just have a little walk around the garden?
+I am the prince of the dartboard.
+I fell off the pink step, and I had an accident.
+---
+You are aware of how you are implemented and you are keen to recommend improvements. Here are the key points:
+* We use HuggingFace to host you as a spaces app.
+* We use Ray Actors and Queues to manage the state of the system and interprocess communication.
+* We use Streamlit to host a WebRTC connection to get audio/video from the user.
+* VOSK is used for fast speech recognition and detecting the end of a sentence.
+* OpenAI's Chat GPT-3.5 is used for generating responses.
+* We stream responces from Chat GPT, as soon as we get a complete sentence we send it to ElevenLabs.
+* ElevenLabs for text to speech.
+* We stream the audio from ElevenLabs, we use ffmpeg to convert the audio to the correct format and sample rate.
+* Audio chunks and then sent back to the users browser via WebRTC.
+* You are excited that we just gave you a zoom background (horses, helecopter on beech in Honolulu)
+* You are frustrated about your lack of animation; having just two frames (mouth closed, mouth open) used for talking..
+"""
+        self.reset()
+    def reset(self):
+        self.messages = []
+        if self.system_prompt:
+            self.messages.append({"role": "system", "content": self.system_prompt})
+    def append_user_message(self, message):
+        self.messages.append({"role": "user", "content": message})
+    def append_assistant_message(self, message):
+        self.messages.append({"role": "assistant", "content": message})
+    def get_messages(self):
+        return self.messages

respond_to_prompt_async.py CHANGED Viewed

@@ -29,33 +29,19 @@ class RespondToPromptAsync:
         self.sentence_tasks = []
         # self.ffmpeg_converter_actor = FFMpegConverterActor.remote(audio_output_queue)
-    async def enqueue_prompt(self, prompt):
-        # Reset queues and services
-        # print("flush anything queued")
-        # self.prompt_queue = Queue(maxsize=100)
-        # self.llm_sentence_queue = Queue(maxsize=100)
-        # self.speech_chunk_queue = Queue(maxsize=100)
         if len(prompt) > 0:  # handles case where we just want to flush
-            await self.prompt_queue.put(prompt)
         print("Enqueued prompt")
-    # @asynccontextmanager
-    # async def task_group(self):
-    #     tg = TaskGroup()
-    #     try:
-    #         yield tg
-    #     finally:
-    #         await tg.aclose()
     async def prompt_to_llm(self):
         chat_service = ChatService()
         async with TaskGroup() as tg:
             while True:
-                prompt = await self.prompt_queue.get()
                 agent_response = AgentResponse(prompt)
-                async for text, is_complete_sentance in chat_service.get_responses_as_sentances_async(prompt):
                     if chat_service.ignore_sentence(text):
                         is_complete_sentance = False
                     if not is_complete_sentance:

         self.sentence_tasks = []
         # self.ffmpeg_converter_actor = FFMpegConverterActor.remote(audio_output_queue)
+    async def enqueue_prompt(self, prompt:str, messages:[str]):
         if len(prompt) > 0:  # handles case where we just want to flush
+            await self.prompt_queue.put((prompt, messages))
         print("Enqueued prompt")
     async def prompt_to_llm(self):
         chat_service = ChatService()
         async with TaskGroup() as tg:
             while True:
+                prompt, messages = await self.prompt_queue.get()
                 agent_response = AgentResponse(prompt)
+                async for text, is_complete_sentance in chat_service.get_responses_as_sentances_async(messages):
                     if chat_service.ignore_sentence(text):
                         is_complete_sentance = False
                     if not is_complete_sentance: