sohojoe commited on
Commit
6130167
β€’
1 Parent(s): 149eeaf

slight improvement on history

Browse files
Files changed (2) hide show
  1. app.py +2 -2
  2. charles_actor.py +47 -16
app.py CHANGED
@@ -114,8 +114,8 @@ async def main():
114
  pass
115
  if charles_actor is not None:
116
  try:
117
- new_environment_state = await charles_actor.get_environment_state.remote()
118
- environment_state_ouput.markdown(f"{new_environment_state}")
119
  charles_debug_str = await charles_actor.get_charles_actor_debug_output.remote()
120
  charles_actor_debug_output.markdown(charles_debug_str)
121
  except Exception as e:
 
114
  pass
115
  if charles_actor is not None:
116
  try:
117
+ # new_environment_state = await charles_actor.get_environment_state.remote()
118
+ # environment_state_ouput.markdown(f"{new_environment_state}")
119
  charles_debug_str = await charles_actor.get_charles_actor_debug_output.remote()
120
  charles_actor_debug_output.markdown(charles_debug_str)
121
  except Exception as e:
charles_actor.py CHANGED
@@ -68,12 +68,6 @@ class CharlesActor:
68
  table_content = "| Charles Actor debug history |\n| --- |\n"
69
  table_content += "\n".join([f"| {item} |" for item in reversed(debug_output_history)])
70
  self._charles_actor_debug_output = table_content
71
- def preview_debug_output(output):
72
- table_content = "| Charles Actor debug history |\n| --- |\n"
73
- debug_output_history_copy = debug_output_history.copy()
74
- debug_output_history_copy.append(output)
75
- table_content += "\n".join([f"| {item} |" for item in reversed(debug_output_history_copy)])
76
- self._charles_actor_debug_output = table_content
77
 
78
  self._state = "Waiting for input"
79
  total_video_frames = 0
@@ -84,6 +78,11 @@ class CharlesActor:
84
  vector_debug = "--n/a--"
85
 
86
  process_speech_to_text_future = []
 
 
 
 
 
87
 
88
  while True:
89
  if len(self._debug_queue) > 0:
@@ -114,10 +113,6 @@ class CharlesActor:
114
  distances, closest_item_key, distance_debug_str = self._prototypes.get_distances(image_vector)
115
  vector_debug = f"{closest_item_key} {distance_debug_str}"
116
 
117
-
118
- human_preview_text = ""
119
- robot_preview_text = ""
120
-
121
  if len(process_speech_to_text_future) > 0:
122
  ready, _ = ray.wait([process_speech_to_text_future[0]], timeout=0)
123
  if ready:
@@ -128,22 +123,58 @@ class CharlesActor:
128
 
129
  if speaker_finished and len(prompt) > 0 and prompt not in prompts_to_ignore:
130
  print(f"Prompt: {prompt}")
131
- # system_one_audio_history.append("... " + str(raw_json))
 
 
 
 
 
 
132
  add_debug_output(f"πŸ‘¨ {prompt}")
 
 
 
 
 
 
 
133
  await self._respond_to_prompt_actor.enqueue_prompt.remote(prompt)
134
  elif len(prompt) > 0 and prompt not in prompts_to_ignore:
135
  human_preview_text = f"πŸ‘¨β“ {prompt}"
136
 
137
  for new_response in env_state.llm_responses:
138
- add_debug_output(f"πŸ€– {new_response}")
 
 
 
139
  if len(env_state.llm_preview):
140
  robot_preview_text = f"πŸ€–β“ {env_state.llm_preview}"
141
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
142
  if len(human_preview_text) > 0:
143
- preview_debug_output(human_preview_text)
144
- elif len(robot_preview_text) > 0:
145
- preview_debug_output(robot_preview_text)
146
-
 
 
 
147
  await asyncio.sleep(0.01)
148
  loops+=1
149
  self._state = f"Processed {total_video_frames} video frames and {total_audio_frames} audio frames, loops: {loops}. loops per second: {loops/(time.time()-start_time):.2f}. {vector_debug}"
 
68
  table_content = "| Charles Actor debug history |\n| --- |\n"
69
  table_content += "\n".join([f"| {item} |" for item in reversed(debug_output_history)])
70
  self._charles_actor_debug_output = table_content
 
 
 
 
 
 
71
 
72
  self._state = "Waiting for input"
73
  total_video_frames = 0
 
78
  vector_debug = "--n/a--"
79
 
80
  process_speech_to_text_future = []
81
+ current_responses = []
82
+ speech_chunks_per_response = []
83
+ human_preview_text = ""
84
+ robot_preview_text = ""
85
+
86
 
87
  while True:
88
  if len(self._debug_queue) > 0:
 
113
  distances, closest_item_key, distance_debug_str = self._prototypes.get_distances(image_vector)
114
  vector_debug = f"{closest_item_key} {distance_debug_str}"
115
 
 
 
 
 
116
  if len(process_speech_to_text_future) > 0:
117
  ready, _ = ray.wait([process_speech_to_text_future[0]], timeout=0)
118
  if ready:
 
123
 
124
  if speaker_finished and len(prompt) > 0 and prompt not in prompts_to_ignore:
125
  print(f"Prompt: {prompt}")
126
+ lines = []
127
+ for i, response in enumerate(current_responses):
128
+ line = "πŸ€– " if len(lines) == 0 else "... "
129
+ line += f"{response} [{speech_chunks_per_response[i]}]"
130
+ lines.append(line)
131
+ for line in reversed(lines):
132
+ add_debug_output(line)
133
  add_debug_output(f"πŸ‘¨ {prompt}")
134
+ current_responses = []
135
+ speech_chunks_per_response = []
136
+ env_state.llm_preview = ""
137
+ env_state.llm_responses = []
138
+ env_state.tts_raw_chunk_ids = []
139
+ human_preview_text = ""
140
+ robot_preview_text = ""
141
  await self._respond_to_prompt_actor.enqueue_prompt.remote(prompt)
142
  elif len(prompt) > 0 and prompt not in prompts_to_ignore:
143
  human_preview_text = f"πŸ‘¨β“ {prompt}"
144
 
145
  for new_response in env_state.llm_responses:
146
+ # add_debug_output(f"πŸ€– {new_response}")
147
+ current_responses.append(new_response)
148
+ speech_chunks_per_response.append(0)
149
+ robot_preview_text = ""
150
  if len(env_state.llm_preview):
151
  robot_preview_text = f"πŸ€–β“ {env_state.llm_preview}"
152
 
153
+ for chunk in env_state.tts_raw_chunk_ids:
154
+ chunk = json.loads(chunk)
155
+ # prompt = chunk['prompt']
156
+ response_id = chunk['llm_sentence_id']
157
+ speech_chunks_per_response[response_id] += 1
158
+
159
+ table_content = "| Charles Actor debug history |\n| --- |\n"
160
+ debug_output_history_copy = debug_output_history.copy()
161
+ if len(robot_preview_text) > 0:
162
+ debug_output_history_copy.append(robot_preview_text)
163
+ lines = []
164
+ for i, response in enumerate(current_responses):
165
+ line = "πŸ€– " if len(lines) == 0 else "... "
166
+ line += f"{response} [{speech_chunks_per_response[i]}]"
167
+ lines.append(line)
168
+ for line in reversed(lines):
169
+ debug_output_history_copy.append(line)
170
  if len(human_preview_text) > 0:
171
+ debug_output_history_copy.append(human_preview_text)
172
+ if len(debug_output_history_copy) > 10:
173
+ debug_output_history_copy.pop(0)
174
+ table_content += "\n".join([f"| {item} |" for item in reversed(debug_output_history_copy)])
175
+ self._charles_actor_debug_output = table_content
176
+
177
+
178
  await asyncio.sleep(0.01)
179
  loops+=1
180
  self._state = f"Processed {total_video_frames} video frames and {total_audio_frames} audio frames, loops: {loops}. loops per second: {loops/(time.time()-start_time):.2f}. {vector_debug}"