sohojoe commited on
Commit
cf5e7f4
1 Parent(s): f559f1e

animate charles

Browse files
agent_response.py CHANGED
@@ -9,7 +9,7 @@ class AgentResponse(dict):
9
  self['llm_sentence'] = ''
10
  self['llm_sentence_id'] = 0
11
  self['llm_sentences'] = []
12
- self['tts_raw_chunk'] = None
13
  self['tts_raw_chunk_id'] = 0
14
 
15
  def make_copy(self):
 
9
  self['llm_sentence'] = ''
10
  self['llm_sentence_id'] = 0
11
  self['llm_sentences'] = []
12
+ self['tts_raw_chunk_ref'] = None
13
  self['tts_raw_chunk_id'] = 0
14
 
15
  def make_copy(self):
charles_actor.py CHANGED
@@ -33,7 +33,8 @@ class CharlesActor:
33
  self._state = "000 - creating StreamlitAVQueue"
34
  from streamlit_av_queue import StreamlitAVQueue
35
  self._streamlit_av_queue = StreamlitAVQueue()
36
- self._out_audio_queue = self._streamlit_av_queue.get_out_audio_queue()
 
37
 
38
  print("001 - create RespondToPromptActor")
39
  self._state = "001 - creating RespondToPromptActor"
@@ -57,6 +58,12 @@ class CharlesActor:
57
  self._state = "003 - creating Prototypes"
58
  from prototypes import Prototypes
59
  self._prototypes = Prototypes()
 
 
 
 
 
 
60
  print("010")
61
  self._needs_init = True
62
  self._state = "Initialized"
@@ -184,8 +191,19 @@ class CharlesActor:
184
 
185
 
186
  await asyncio.sleep(0.01)
 
 
 
 
 
 
 
 
 
 
 
187
  loops+=1
188
- self._state = f"Processed {total_video_frames} video frames and {total_audio_frames} audio frames, loops: {loops}. loops per second: {loops/(time.time()-start_time):.2f}. {vector_debug}"
189
 
190
  def init_ray():
191
  try:
 
33
  self._state = "000 - creating StreamlitAVQueue"
34
  from streamlit_av_queue import StreamlitAVQueue
35
  self._streamlit_av_queue = StreamlitAVQueue()
36
+ self._out_audio_queue = await self._streamlit_av_queue.get_out_audio_queue()
37
+ self._out_video_queue = await self._streamlit_av_queue.get_out_video_queue()
38
 
39
  print("001 - create RespondToPromptActor")
40
  self._state = "001 - creating RespondToPromptActor"
 
58
  self._state = "003 - creating Prototypes"
59
  from prototypes import Prototypes
60
  self._prototypes = Prototypes()
61
+
62
+ print("004 - create animator")
63
+ self._state = "004 - creating animator"
64
+ from charles_animator import CharlesAnimator
65
+ self._animator = CharlesAnimator()
66
+
67
  print("010")
68
  self._needs_init = True
69
  self._state = "Initialized"
 
191
 
192
 
193
  await asyncio.sleep(0.01)
194
+
195
+ # add observations to the environment state
196
+ count = len(self._out_audio_queue)
197
+ is_talking = bool(count > 0)
198
+ frame = self._animator.update(is_talking)
199
+ if self._out_video_queue.full():
200
+ evicted_item = await self._out_video_queue.get_async()
201
+ del evicted_item
202
+ frame_ref = ray.put(frame)
203
+ await self._out_video_queue.put_async(frame_ref)
204
+
205
  loops+=1
206
+ self._state = f"Processed {total_video_frames} video frames and {total_audio_frames} audio frames, loops: {loops}. loops per second: {loops/(time.time()-start_time):.2f}. Is speaking: {is_talking}({count}). {vector_debug}"
207
 
208
  def init_ray():
209
  try:
charles_animator.py ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Modifying the code to ensure the mouth is open when the character starts talking
2
+
3
+ import random
4
+ import time
5
+ import cv2
6
+ import av
7
+ import numpy as np
8
+
9
+ def resize_and_crop(image, dim=(640, 480)):
10
+ h, w = image.shape[:2]
11
+ aspect_ratio = w / h
12
+
13
+ target_width, target_height = dim
14
+ target_aspect = target_width / target_height
15
+
16
+ if aspect_ratio > target_aspect:
17
+ # Original aspect is wider than target, fit by height
18
+ new_height = target_height
19
+ new_width = int(target_height * aspect_ratio)
20
+ else:
21
+ # Original aspect is taller than target, fit by width
22
+ new_width = target_width
23
+ new_height = int(target_width / aspect_ratio)
24
+
25
+ # Resize the image with new dimensions
26
+ resized_image = cv2.resize(image, (new_width, new_height), interpolation=cv2.INTER_AREA)
27
+
28
+ # Crop to target dimensions
29
+ x_offset = (new_width - target_width) // 2
30
+ y_offset = (new_height - target_height) // 2
31
+
32
+ cropped_image = resized_image[y_offset:y_offset + target_height, x_offset:x_offset + target_width]
33
+
34
+ return cropped_image
35
+
36
+ def overlay_images(background, overlay, x, y):
37
+ """
38
+ Overlay an image with transparency over another image.
39
+ """
40
+ # Check if overlay dimensions fit within the background at the given (x, y) position
41
+ if y + overlay.shape[0] > background.shape[0] or x + overlay.shape[1] > background.shape[1]:
42
+ raise ValueError("Overlay dimensions exceed background dimensions at the specified position.")
43
+
44
+ # Extract the alpha channel from the overlay and create an inverse alpha channel
45
+ alpha = overlay[:, :, 3] / 255.0
46
+ inverse_alpha = 1.0 - alpha
47
+
48
+ # Convert overlay to BGR if it's in RGB
49
+ if overlay.shape[2] == 4: # If it has an alpha channel
50
+ overlay = cv2.cvtColor(overlay[:, :, :3], cv2.COLOR_RGB2BGR)
51
+ overlay = np.concatenate([overlay, overlay[:, :, 3:]], axis=2) # Add alpha channel back
52
+ else:
53
+ overlay = cv2.cvtColor(overlay, cv2.COLOR_RGB2BGR)
54
+
55
+ # Overlay the images
56
+ for c in range(0, 3):
57
+ background[y:overlay.shape[0]+y, x:overlay.shape[1]+x, c] = (
58
+ alpha * overlay[:, :, c] + inverse_alpha * background[y:overlay.shape[0]+y, x:overlay.shape[1]+x, c]
59
+ )
60
+
61
+ return background
62
+
63
+ def create_charles_frames(background, charles_frames):
64
+ output_frames = []
65
+ # Load background image
66
+ background = cv2.imread(background, cv2.COLOR_BGR2RGB)
67
+ background = cv2.cvtColor(background, cv2.COLOR_BGR2RGB)
68
+ # resize background to match user image
69
+ background = resize_and_crop(background, (640, 480))
70
+
71
+ for bot_image_path in charles_frames:
72
+ bot_image = cv2.imread(bot_image_path, cv2.IMREAD_UNCHANGED)
73
+
74
+ # assert bot image is square
75
+ assert bot_image.shape[0] == bot_image.shape[1]
76
+
77
+ # resize bot image if it is larger than backgroun impage in any direction
78
+ if bot_image.shape[0] > background.shape[0]:
79
+ bot_image = cv2.resize(bot_image, (background.shape[0], background.shape[0]), interpolation=cv2.INTER_AREA)
80
+
81
+ # Overlay bot image on the right-hand side
82
+ x_bot = background.shape[1] - bot_image.shape[1]
83
+ y_bot = background.shape[0] - bot_image.shape[0]
84
+ background_with_bot = overlay_images(background.copy(), bot_image, x_bot, y_bot)
85
+
86
+ output_frames.append(background_with_bot)
87
+
88
+ return output_frames
89
+
90
+ class CharlesAnimator:
91
+ def __init__(self):
92
+ self.mouth_open = False
93
+ self.last_change_time = 0
94
+ self.next_change_in = 0
95
+ self.was_talking = False
96
+ # use static frames for pefromance
97
+ self.static_frames = create_charles_frames("./images/zoom-background.png", [
98
+ "./images/charles.png",
99
+ "./images/charles-open.png"
100
+ ])
101
+
102
+ def update(self, is_talking):
103
+ start_talking = True if is_talking and not self.was_talking else False
104
+ self.was_talking = is_talking
105
+ current_time = time.time()
106
+
107
+ # Open the mouth when the character starts talking
108
+ if start_talking:
109
+ self.mouth_open = True
110
+ self.next_change_in = current_time + random.uniform(0.1, 0.5)
111
+ return self.mouth_open
112
+
113
+ # Initialize the next change time if it's zero.
114
+ if self.next_change_in == 0:
115
+ self.next_change_in = current_time + random.uniform(0.1, 0.5)
116
+
117
+ # Update the mouth state only if the character is talking.
118
+ if is_talking:
119
+ # Check if it's time to change the mouth state.
120
+ if current_time >= self.next_change_in:
121
+ self.mouth_open = not self.mouth_open
122
+ self.next_change_in = current_time + random.uniform(0.1, 0.5)
123
+ else:
124
+ # Close the mouth if the character is not talking.
125
+ self.mouth_open = False
126
+
127
+ frame = self.static_frames[1] if self.mouth_open else self.static_frames[0]
128
+ return frame
ffmpeg_converter_actor.py CHANGED
@@ -19,7 +19,8 @@ class FFMpegConverterActor:
19
  while True:
20
  chunk = await self.output_pipe.readexactly(self.buffer_size)
21
  # print(f"FFMpegConverterActor: read {len(chunk)} bytes")
22
- await self.output_queue.put_async(chunk)
 
23
 
24
  async def start_process(self):
25
  cmd = [
 
19
  while True:
20
  chunk = await self.output_pipe.readexactly(self.buffer_size)
21
  # print(f"FFMpegConverterActor: read {len(chunk)} bytes")
22
+ chunk_ref = ray.put(chunk)
23
+ await self.output_queue.put_async(chunk_ref)
24
 
25
  async def start_process(self):
26
  cmd = [
images/charles-open.png ADDED

Git LFS Details

  • SHA256: f9078c42fb437eb67a24ca6ba61207eed32a94bbb0ec7faf82273142ad8a773e
  • Pointer size: 131 Bytes
  • Size of remote file: 327 kB
images/charles.png ADDED

Git LFS Details

  • SHA256: 6c4bb66c4d5f88dc72a7acbe27bdfa67bd272a16f89f59a2d65eb5586eee9925
  • Pointer size: 131 Bytes
  • Size of remote file: 326 kB
images/zoom-background.png ADDED

Git LFS Details

  • SHA256: 0b80c7c201b44552475515653a6de32619c5ff5169b1d970a7a2e72e52b98c92
  • Pointer size: 132 Bytes
  • Size of remote file: 6.25 MB
respond_to_prompt_actor.py CHANGED
@@ -130,7 +130,8 @@ class SpeechToConverterActor:
130
  self.ffmpeg_converter_actor.run.remote()
131
  while True:
132
  chunk_response = await self.input_queue.get_async()
133
- audio_chunk = chunk_response['tts_raw_chunk']
 
134
  await self.ffmpeg_converter_actor.push_chunk.remote(audio_chunk)
135
 
136
  async def cancel(self):
 
130
  self.ffmpeg_converter_actor.run.remote()
131
  while True:
132
  chunk_response = await self.input_queue.get_async()
133
+ audio_chunk_ref = chunk_response['tts_raw_chunk_ref']
134
+ audio_chunk = ray.get(audio_chunk_ref)
135
  await self.ffmpeg_converter_actor.push_chunk.remote(audio_chunk)
136
 
137
  async def cancel(self):
streamlit_av_queue.py CHANGED
@@ -3,9 +3,11 @@ import av
3
  import asyncio
4
  from collections import deque
5
  import threading
 
6
 
7
  import numpy as np
8
  import ray
 
9
  from webrtc_av_queue_actor import WebRtcAVQueueActor
10
  import pydub
11
  import torch
@@ -20,7 +22,8 @@ class StreamlitAVQueue:
20
  self.queue_actor = WebRtcAVQueueActor.options(
21
  name="WebRtcAVQueueActor",
22
  get_if_exists=True,
23
- ).remote()
 
24
 
25
  def set_looking_listening(self, looking, listening: bool):
26
  with self._lock:
@@ -31,18 +34,33 @@ class StreamlitAVQueue:
31
  self,
32
  frames: List[av.VideoFrame],
33
  ) -> av.VideoFrame:
 
34
  try:
35
  with self._lock:
36
  should_look = self._looking
37
- if len(frames) > 0 and should_look:
38
- for frame in frames:
39
- shared_tensor = frame.to_ndarray(format="rgb24")
40
- shared_tensor_ref = ray.put(shared_tensor)
 
 
 
41
  await self.queue_actor.enqueue_in_video_frame.remote(shared_tensor_ref)
 
 
 
 
 
 
 
 
 
 
 
42
  # print (f"tesnor len: {len(shared_tensor)}, tensor shape: {shared_tensor.shape}, tensor type:{shared_tensor.dtype} tensor ref: {shared_tensor_ref}")
43
  except Exception as e:
44
  print (e)
45
- return frames
46
 
47
  async def queued_audio_frames_callback(
48
  self,
@@ -103,8 +121,8 @@ class StreamlitAVQueue:
103
  shared_tensors = await self.queue_actor.get_in_video_frames.remote()
104
  return shared_tensors
105
 
106
- def get_out_audio_queue(self):
107
  return self.queue_actor.get_out_audio_queue.remote()
108
 
109
- # def get_out_audio_frame(self):
110
- # return self.queue_actor.get_out_audio_frame.remote()
 
3
  import asyncio
4
  from collections import deque
5
  import threading
6
+ import cv2
7
 
8
  import numpy as np
9
  import ray
10
+ from ray.util.queue import Queue
11
  from webrtc_av_queue_actor import WebRtcAVQueueActor
12
  import pydub
13
  import torch
 
22
  self.queue_actor = WebRtcAVQueueActor.options(
23
  name="WebRtcAVQueueActor",
24
  get_if_exists=True,
25
+ ).remote()
26
+ self._out_video_frame = None
27
 
28
  def set_looking_listening(self, looking, listening: bool):
29
  with self._lock:
 
34
  self,
35
  frames: List[av.VideoFrame],
36
  ) -> av.VideoFrame:
37
+ updated_frames = []
38
  try:
39
  with self._lock:
40
  should_look = self._looking
41
+ next_out_video_frame = await self.queue_actor.get_out_video_frame.remote()
42
+ if next_out_video_frame is not None:
43
+ self._out_video_frame = next_out_video_frame
44
+ for i, frame in enumerate(frames):
45
+ user_image = frame.to_ndarray(format="rgb24")
46
+ if should_look:
47
+ shared_tensor_ref = ray.put(user_image)
48
  await self.queue_actor.enqueue_in_video_frame.remote(shared_tensor_ref)
49
+ if self._out_video_frame is not None:
50
+ frame = self._out_video_frame
51
+ # resize user image to 1/4 size
52
+ user_frame = cv2.resize(user_image, (user_image.shape[1]//4, user_image.shape[0]//4), interpolation=cv2.INTER_AREA)
53
+ x_user = 0
54
+ y_user = frame.shape[0] - user_frame.shape[0]
55
+ final_frame = frame.copy()
56
+ final_frame[y_user:y_user+user_frame.shape[0], x_user:x_user+user_frame.shape[1]] = user_frame
57
+ frame = av.VideoFrame.from_ndarray(final_frame, format="rgb24")
58
+
59
+ updated_frames.append(frame)
60
  # print (f"tesnor len: {len(shared_tensor)}, tensor shape: {shared_tensor.shape}, tensor type:{shared_tensor.dtype} tensor ref: {shared_tensor_ref}")
61
  except Exception as e:
62
  print (e)
63
+ return updated_frames
64
 
65
  async def queued_audio_frames_callback(
66
  self,
 
121
  shared_tensors = await self.queue_actor.get_in_video_frames.remote()
122
  return shared_tensors
123
 
124
+ def get_out_audio_queue(self)->Queue:
125
  return self.queue_actor.get_out_audio_queue.remote()
126
 
127
+ def get_out_video_queue(self)->Queue:
128
+ return self.queue_actor.get_out_video_queue.remote()
tests/test_image.py ADDED
@@ -0,0 +1,192 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ import av
3
+ import numpy as np
4
+
5
+ def resize_aspect_fit(image, dim=(640, 480)):
6
+ h, w = image.shape[:2]
7
+ aspect_ratio = w / h
8
+
9
+ target_width, target_height = dim
10
+ target_aspect = target_width / target_height
11
+
12
+ if aspect_ratio > target_aspect:
13
+ # Original aspect is wider than target
14
+ new_width = target_width
15
+ new_height = int(target_width / aspect_ratio)
16
+ else:
17
+ # Original aspect is taller than target
18
+ new_height = target_height
19
+ new_width = int(target_height * aspect_ratio)
20
+
21
+ resized_image = cv2.resize(image, (new_width, new_height), interpolation=cv2.INTER_AREA)
22
+ return resized_image
23
+
24
+ def resize_and_crop(image, dim=(640, 480)):
25
+ h, w = image.shape[:2]
26
+ aspect_ratio = w / h
27
+
28
+ target_width, target_height = dim
29
+ target_aspect = target_width / target_height
30
+
31
+ if aspect_ratio > target_aspect:
32
+ # Original aspect is wider than target, fit by height
33
+ new_height = target_height
34
+ new_width = int(target_height * aspect_ratio)
35
+ else:
36
+ # Original aspect is taller than target, fit by width
37
+ new_width = target_width
38
+ new_height = int(target_width / aspect_ratio)
39
+
40
+ # Resize the image with new dimensions
41
+ resized_image = cv2.resize(image, (new_width, new_height), interpolation=cv2.INTER_AREA)
42
+
43
+ # Crop to target dimensions
44
+ x_offset = (new_width - target_width) // 2
45
+ y_offset = (new_height - target_height) // 2
46
+
47
+ cropped_image = resized_image[y_offset:y_offset + target_height, x_offset:x_offset + target_width]
48
+
49
+ return cropped_image
50
+
51
+ def overlay_images(background, overlay, x, y):
52
+ """
53
+ Overlay an image with transparency over another image.
54
+ """
55
+ # Check if overlay dimensions fit within the background at the given (x, y) position
56
+ if y + overlay.shape[0] > background.shape[0] or x + overlay.shape[1] > background.shape[1]:
57
+ raise ValueError("Overlay dimensions exceed background dimensions at the specified position.")
58
+
59
+ # Extract the alpha channel from the overlay and create an inverse alpha channel
60
+ alpha = overlay[:, :, 3] / 255.0
61
+ inverse_alpha = 1.0 - alpha
62
+
63
+ # Convert overlay to BGR if it's in RGB
64
+ if overlay.shape[2] == 4: # If it has an alpha channel
65
+ overlay = cv2.cvtColor(overlay[:, :, :3], cv2.COLOR_RGB2BGR)
66
+ overlay = np.concatenate([overlay, overlay[:, :, 3:]], axis=2) # Add alpha channel back
67
+ else:
68
+ overlay = cv2.cvtColor(overlay, cv2.COLOR_RGB2BGR)
69
+
70
+ # Overlay the images
71
+ for c in range(0, 3):
72
+ background[y:overlay.shape[0]+y, x:overlay.shape[1]+x, c] = (
73
+ alpha * overlay[:, :, c] + inverse_alpha * background[y:overlay.shape[0]+y, x:overlay.shape[1]+x, c]
74
+ )
75
+
76
+ return background
77
+
78
+
79
+ def transform_frame(user_frame: av.VideoFrame) -> av.VideoFrame:
80
+ # Convert av.VideoFrame to numpy array (OpenCV format)
81
+ user_frame_np = np.frombuffer(user_frame.planes[0], np.uint8).reshape(user_frame.height, user_frame.width, -1)
82
+
83
+ # Load background image
84
+ background = cv2.imread("zoom-background.png")
85
+
86
+ # Load bot image (assuming it has an alpha channel for transparency)
87
+ bot_image = cv2.imread("bot-image.png", cv2.IMREAD_UNCHANGED)
88
+
89
+ # Resize background to match the user frame dimensions
90
+ aspect_ratio = background.shape[1] / background.shape[0]
91
+ new_h = user_frame.height
92
+ new_w = int(new_h * aspect_ratio)
93
+ background_resized = cv2.resize(background, (new_w, new_h))
94
+
95
+ # Crop the background if it exceeds the user frame width
96
+ if new_w > user_frame.width:
97
+ crop_x1 = (new_w - user_frame.width) // 2
98
+ crop_x2 = crop_x1 + user_frame.width
99
+ background_resized = background_resized[:, crop_x1:crop_x2, :3]
100
+
101
+ # Overlay bot image on the right-hand side
102
+ x_bot = background_resized.shape[1] - bot_image.shape[1]
103
+ y_bot = 0
104
+ background_resized = overlay_images(background_resized, bot_image, x_bot, y_bot)
105
+
106
+ # Overlay user's video frame in the bottom-left corner
107
+ x_user = 0
108
+ y_user = background_resized.shape[0] - user_frame.height
109
+ background_resized[y_user:user_frame.height+y_user, x_user:user_frame.width+x_user, :3] = user_frame_np
110
+
111
+ # Convert the final frame back to av.VideoFrame
112
+ output_frame = av.VideoFrame.from_ndarray(background_resized, format="bgr24")
113
+
114
+ return output_frame
115
+
116
+ def create_charles_frames(background, charles_frames):
117
+ output_frames = []
118
+ # Load background image
119
+ background = cv2.imread(background, cv2.COLOR_BGR2RGB)
120
+ background = cv2.cvtColor(background, cv2.COLOR_BGR2RGB)
121
+ # resize background to match user image
122
+ background = resize_and_crop(background, (640, 480))
123
+
124
+ for bot_image_path in charles_frames:
125
+ bot_image = cv2.imread(bot_image_path, cv2.IMREAD_UNCHANGED)
126
+
127
+ # assert bot image is square
128
+ assert bot_image.shape[0] == bot_image.shape[1]
129
+
130
+ # resize bot image if it is larger than backgroun impage in any direction
131
+ if bot_image.shape[0] > background.shape[0]:
132
+ bot_image = cv2.resize(bot_image, (background.shape[0], background.shape[0]), interpolation=cv2.INTER_AREA)
133
+
134
+ # Overlay bot image on the right-hand side
135
+ x_bot = background.shape[1] - bot_image.shape[1]
136
+ y_bot = background.shape[0] - bot_image.shape[0]
137
+ background_with_bot = overlay_images(background.copy(), bot_image, x_bot, y_bot)
138
+
139
+ output_frames.append(background_with_bot)
140
+
141
+ return output_frames
142
+
143
+
144
+ def test_create_bot_frames():
145
+ frames = create_charles_frames("./images/zoom-background.png", ["./images/charles.png", "./images/charles-open.png"])
146
+ index = 0
147
+ for frame in frames:
148
+ final_frame_bgr = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
149
+ cv2.imwrite(f"./images/charles_frame_{index}.jpg", final_frame_bgr)
150
+ index += 1
151
+
152
+ def test_overlay():
153
+ # Load mock user image
154
+ user_image = cv2.imread("./prototypes/person-016.jpg", cv2.COLOR_BGR2RGB)
155
+ user_image = cv2.cvtColor(user_image, cv2.COLOR_BGR2RGB)
156
+ # resize to 640x480, handle that this is smaller and can be cropped
157
+ user_image = resize_and_crop(user_image, (640, 480))
158
+
159
+ # Load background image
160
+ background = cv2.imread("./images/zoom-background.png", cv2.COLOR_BGR2RGB)
161
+ background = cv2.cvtColor(background, cv2.COLOR_BGR2RGB)
162
+ # resize background to match user image
163
+ background = resize_and_crop(background, (user_image.shape[:2][1], user_image.shape[:2][0]))
164
+
165
+ # Load bot image (assuming it has an alpha channel for transparency)
166
+ bot_image = cv2.imread("./images/charles-open.png", cv2.IMREAD_UNCHANGED)
167
+
168
+ # resize bot image if it is larger than backgroun impage in any direction
169
+ if bot_image.shape[0] > background.shape[0]:
170
+ bot_image = cv2.resize(bot_image, (background.shape[0], background.shape[0]), interpolation=cv2.INTER_AREA)
171
+
172
+ # Overlay bot image on the right-hand side
173
+ x_bot = background.shape[1] - bot_image.shape[1]
174
+ y_bot = background.shape[0] - bot_image.shape[0]
175
+ background_with_bot = overlay_images(background.copy(), bot_image, x_bot, y_bot)
176
+
177
+ # Overlay user's frame in the bottom-left corner (1/3 size)
178
+ # resize user image to 1/4 size
179
+ user_frame = cv2.resize(user_image, (user_image.shape[1]//4, user_image.shape[0]//4), interpolation=cv2.INTER_AREA)
180
+ x_user = 0
181
+ y_user = background.shape[0] - user_frame.shape[0]
182
+ final_frame = background_with_bot.copy()
183
+ # final_frame[y_user:user_frame.shape[0]+y_user, x_user:user_frame.shape[1]+x_user, :3] = user_frame
184
+ final_frame[y_user:y_user+user_frame.shape[0], x_user:x_user+user_frame.shape[1]] = user_frame
185
+
186
+
187
+ # Save the final frame as JPEG
188
+ final_frame_bgr = cv2.cvtColor(final_frame, cv2.COLOR_RGB2BGR)
189
+ cv2.imwrite("./images/final_frame.jpg", final_frame_bgr)
190
+
191
+ test_overlay()
192
+ test_create_bot_frames()
tests/test_talking.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Modifying the code to ensure the mouth is open when the character starts talking
2
+
3
+ import random
4
+ import time
5
+
6
+
7
+ class CharacterFace:
8
+ def __init__(self):
9
+ self.mouth_open = False
10
+ self.last_change_time = 0
11
+ self.next_change_in = 0
12
+
13
+ def update(self, is_talking, start_talking=False):
14
+ current_time = time.time()
15
+
16
+ # Open the mouth when the character starts talking
17
+ if start_talking:
18
+ self.mouth_open = True
19
+ self.next_change_in = current_time + random.uniform(0.1, 0.5)
20
+ return self.mouth_open
21
+
22
+ # Initialize the next change time if it's zero.
23
+ if self.next_change_in == 0:
24
+ self.next_change_in = current_time + random.uniform(0.1, 0.5)
25
+
26
+ # Update the mouth state only if the character is talking.
27
+ if is_talking:
28
+ # Check if it's time to change the mouth state.
29
+ if current_time >= self.next_change_in:
30
+ self.mouth_open = not self.mouth_open
31
+ self.next_change_in = current_time + random.uniform(0.1, 0.5)
32
+ else:
33
+ # Close the mouth if the character is not talking.
34
+ self.mouth_open = False
35
+
36
+ return self.mouth_open
37
+
38
+
39
+ def _debug_test():
40
+ # Example usage
41
+ face = CharacterFace()
42
+ output = []
43
+
44
+ # Initialize variables to control talk and pause durations
45
+ next_talk_time = 0
46
+ next_pause_time = 0
47
+ is_talking = False
48
+
49
+ # Simulate the character talking and not talking with variable durations
50
+ for _ in range(500): # Increase the number of iterations for a longer simulation
51
+ current_time = time.time()
52
+ start_talking = False
53
+
54
+ if is_talking and current_time >= next_talk_time:
55
+ is_talking = False
56
+ next_pause_time = current_time + random.uniform(0.5, 3.0)
57
+
58
+ if not is_talking and current_time >= next_pause_time:
59
+ is_talking = True
60
+ start_talking = True # Set flag to open mouth at the start of talking
61
+ next_talk_time = current_time + random.uniform(1.0, 5.0)
62
+
63
+ mouth_open = face.update(is_talking, start_talking)
64
+ print(f"Is Talking: {is_talking}, Mouth Open: {mouth_open}")
65
+ time.sleep(random.uniform(0.1, 0.5))
text_to_speech_service.py CHANGED
@@ -5,7 +5,7 @@ from elevenlabs import generate, play
5
  from elevenlabs import set_api_key
6
  from elevenlabs import generate, stream
7
  from agent_response import AgentResponse
8
-
9
 
10
  class TextToSpeechService:
11
  def __init__(self, voice_id="Bella", model_id="eleven_monolingual_v1"):
@@ -60,7 +60,8 @@ class TextToSpeechService:
60
 
61
  # Run next(stream) in a separate thread to avoid blocking the event loop
62
  chunk = await asyncio.to_thread(next, stream)
63
- sentence_response['tts_raw_chunk'] = chunk
 
64
  if cancel_event.is_set():
65
  return
66
  yield sentence_response
 
5
  from elevenlabs import set_api_key
6
  from elevenlabs import generate, stream
7
  from agent_response import AgentResponse
8
+ import ray
9
 
10
  class TextToSpeechService:
11
  def __init__(self, voice_id="Bella", model_id="eleven_monolingual_v1"):
 
60
 
61
  # Run next(stream) in a separate thread to avoid blocking the event loop
62
  chunk = await asyncio.to_thread(next, stream)
63
+ chunk_ref = ray.put(chunk)
64
+ sentence_response['tts_raw_chunk_ref'] = chunk_ref
65
  if cancel_event.is_set():
66
  return
67
  yield sentence_response
webrtc_av_queue_actor.py CHANGED
@@ -8,9 +8,10 @@ import numpy as np
8
  @ray.remote
9
  class WebRtcAVQueueActor:
10
  def __init__(self):
11
- self.in_audio_queue = Queue(maxsize=100) # Adjust the size as needed
12
- self.in_video_queue = Queue(maxsize=100) # Adjust the size as needed
13
- self.out_audio_queue = Queue(maxsize=100) # Adjust the size as needed
 
14
 
15
 
16
  async def enqueue_in_video_frame(self, shared_tensor_ref):
@@ -25,7 +26,6 @@ class WebRtcAVQueueActor:
25
  del evicted_item
26
  await self.in_audio_queue.put_async(shared_buffer_ref)
27
 
28
-
29
  async def get_in_audio_frames(self):
30
  audio_frames = []
31
  if self.in_audio_queue.empty():
@@ -44,11 +44,21 @@ class WebRtcAVQueueActor:
44
  video_frames.append(shared_tensor_ref)
45
  return video_frames
46
 
47
- def get_out_audio_queue(self):
48
  return self.out_audio_queue
49
 
 
 
 
50
  async def get_out_audio_frame(self):
51
  if self.out_audio_queue.empty():
52
  return None
53
- audio_frame = await self.out_audio_queue.get_async()
54
- return audio_frame
 
 
 
 
 
 
 
 
8
  @ray.remote
9
  class WebRtcAVQueueActor:
10
  def __init__(self):
11
+ self.in_audio_queue = Queue(maxsize=3000) # Adjust the size as needed
12
+ self.in_video_queue = Queue(maxsize=10) # Adjust the size as needed
13
+ self.out_audio_queue = Queue(maxsize=3000) # Adjust the size as needed
14
+ self.out_video_queue = Queue(maxsize=10) # Adjust the size as needed
15
 
16
 
17
  async def enqueue_in_video_frame(self, shared_tensor_ref):
 
26
  del evicted_item
27
  await self.in_audio_queue.put_async(shared_buffer_ref)
28
 
 
29
  async def get_in_audio_frames(self):
30
  audio_frames = []
31
  if self.in_audio_queue.empty():
 
44
  video_frames.append(shared_tensor_ref)
45
  return video_frames
46
 
47
+ def get_out_audio_queue(self)->Queue:
48
  return self.out_audio_queue
49
 
50
+ def get_out_video_queue(self)->Queue:
51
+ return self.out_video_queue
52
+
53
  async def get_out_audio_frame(self):
54
  if self.out_audio_queue.empty():
55
  return None
56
+ frame = await self.out_audio_queue.get_async()
57
+ return frame
58
+
59
+ async def get_out_video_frame(self):
60
+ if self.out_video_queue.empty():
61
+ return None
62
+ while not self.out_video_queue.empty():
63
+ frame = await self.out_video_queue.get_async()
64
+ return frame