Spaces:

sohojoe
/

project_charles

Sleeping

App Files Files Community

sohojoe commited on Sep 4, 2023

Commit

6130167

•

1 Parent(s): 149eeaf

slight improvement on history

Browse files

Files changed (2) hide show

app.py +2 -2
charles_actor.py +47 -16

app.py CHANGED Viewed

@@ -114,8 +114,8 @@ async def main():
                     pass
             if charles_actor is not None:
                 try:
-                    new_environment_state = await charles_actor.get_environment_state.remote()
-                    environment_state_ouput.markdown(f"{new_environment_state}")
                     charles_debug_str = await charles_actor.get_charles_actor_debug_output.remote()
                     charles_actor_debug_output.markdown(charles_debug_str)
                 except Exception as e:

                     pass
             if charles_actor is not None:
                 try:
+                    # new_environment_state = await charles_actor.get_environment_state.remote()
+                    # environment_state_ouput.markdown(f"{new_environment_state}")
                     charles_debug_str = await charles_actor.get_charles_actor_debug_output.remote()
                     charles_actor_debug_output.markdown(charles_debug_str)
                 except Exception as e:

charles_actor.py CHANGED Viewed

@@ -68,12 +68,6 @@ class CharlesActor:
             table_content = "| Charles Actor debug history |\n| --- |\n"
             table_content += "\n".join([f"| {item} |" for item in reversed(debug_output_history)])
             self._charles_actor_debug_output = table_content
-        def preview_debug_output(output):
-            table_content = "| Charles Actor debug history |\n| --- |\n"
-            debug_output_history_copy = debug_output_history.copy()
-            debug_output_history_copy.append(output)
-            table_content += "\n".join([f"| {item} |" for item in reversed(debug_output_history_copy)])
-            self._charles_actor_debug_output = table_content
         self._state = "Waiting for input"
         total_video_frames = 0
@@ -84,6 +78,11 @@ class CharlesActor:
         vector_debug = "--n/a--"
         process_speech_to_text_future = []
         while True:
             if len(self._debug_queue) > 0:
@@ -114,10 +113,6 @@ class CharlesActor:
                 distances, closest_item_key, distance_debug_str = self._prototypes.get_distances(image_vector)
                 vector_debug = f"{closest_item_key} {distance_debug_str}"
-            human_preview_text = ""
-            robot_preview_text = ""
             if len(process_speech_to_text_future) > 0:
                 ready, _ = ray.wait([process_speech_to_text_future[0]], timeout=0)
                 if ready:
@@ -128,22 +123,58 @@ class CharlesActor:
                     if speaker_finished and len(prompt) > 0 and prompt not in prompts_to_ignore:
                         print(f"Prompt: {prompt}")
-                        # system_one_audio_history.append("... " + str(raw_json))
                         add_debug_output(f"👨 {prompt}")
                         await self._respond_to_prompt_actor.enqueue_prompt.remote(prompt)
                     elif len(prompt) > 0 and prompt not in prompts_to_ignore:
                         human_preview_text = f"👨❓ {prompt}"
             for new_response in env_state.llm_responses:
-                add_debug_output(f"🤖 {new_response}")
             if len(env_state.llm_preview):
                 robot_preview_text = f"🤖❓ {env_state.llm_preview}"
             if len(human_preview_text) > 0:
-                preview_debug_output(human_preview_text)
-            elif len(robot_preview_text) > 0:
-                preview_debug_output(robot_preview_text)
             await asyncio.sleep(0.01)
             loops+=1
             self._state = f"Processed {total_video_frames} video frames and {total_audio_frames} audio frames, loops: {loops}. loops per second: {loops/(time.time()-start_time):.2f}. {vector_debug}"

             table_content = "| Charles Actor debug history |\n| --- |\n"
             table_content += "\n".join([f"| {item} |" for item in reversed(debug_output_history)])
             self._charles_actor_debug_output = table_content
         self._state = "Waiting for input"
         total_video_frames = 0
         vector_debug = "--n/a--"
         process_speech_to_text_future = []
+        current_responses = []
+        speech_chunks_per_response = []
+        human_preview_text = ""
+        robot_preview_text = ""
         while True:
             if len(self._debug_queue) > 0:
                 distances, closest_item_key, distance_debug_str = self._prototypes.get_distances(image_vector)
                 vector_debug = f"{closest_item_key} {distance_debug_str}"
             if len(process_speech_to_text_future) > 0:
                 ready, _ = ray.wait([process_speech_to_text_future[0]], timeout=0)
                 if ready:
                     if speaker_finished and len(prompt) > 0 and prompt not in prompts_to_ignore:
                         print(f"Prompt: {prompt}")
+                        lines = []
+                        for i, response in enumerate(current_responses):
+                            line = "🤖 " if len(lines) == 0 else "... "
+                            line += f"{response} [{speech_chunks_per_response[i]}]"
+                            lines.append(line)
+                        for line in reversed(lines):
+                            add_debug_output(line)
                         add_debug_output(f"👨 {prompt}")
+                        current_responses = []
+                        speech_chunks_per_response = []
+                        env_state.llm_preview = ""
+                        env_state.llm_responses = []
+                        env_state.tts_raw_chunk_ids = []
+                        human_preview_text = ""
+                        robot_preview_text = ""
                         await self._respond_to_prompt_actor.enqueue_prompt.remote(prompt)
                     elif len(prompt) > 0 and prompt not in prompts_to_ignore:
                         human_preview_text = f"👨❓ {prompt}"
             for new_response in env_state.llm_responses:
+                # add_debug_output(f"🤖 {new_response}")
+                current_responses.append(new_response)
+                speech_chunks_per_response.append(0)
+                robot_preview_text = ""
             if len(env_state.llm_preview):
                 robot_preview_text = f"🤖❓ {env_state.llm_preview}"
+            for chunk in env_state.tts_raw_chunk_ids:
+                chunk = json.loads(chunk)
+                # prompt = chunk['prompt']
+                response_id = chunk['llm_sentence_id']
+                speech_chunks_per_response[response_id] += 1
+            table_content = "| Charles Actor debug history |\n| --- |\n"
+            debug_output_history_copy = debug_output_history.copy()
+            if len(robot_preview_text) > 0:
+                debug_output_history_copy.append(robot_preview_text)
+            lines = []
+            for i, response in enumerate(current_responses):
+                line = "🤖 " if len(lines) == 0 else "... "
+                line += f"{response} [{speech_chunks_per_response[i]}]"
+                lines.append(line)
+            for line in reversed(lines):
+                debug_output_history_copy.append(line)
             if len(human_preview_text) > 0:
+                debug_output_history_copy.append(human_preview_text)
+            if len(debug_output_history_copy) > 10:
+                debug_output_history_copy.pop(0)
+            table_content += "\n".join([f"| {item} |" for item in reversed(debug_output_history_copy)])
+            self._charles_actor_debug_output = table_content
             await asyncio.sleep(0.01)
             loops+=1
             self._state = f"Processed {total_video_frames} video frames and {total_audio_frames} audio frames, loops: {loops}. loops per second: {loops/(time.time()-start_time):.2f}. {vector_debug}"