sohojoe commited on
Commit
4385b66
1 Parent(s): 69f88db

streaming speech in the debug

Browse files
Files changed (4) hide show
  1. audio_stream_processor.py +42 -0
  2. chat_service.py +1 -1
  3. debug.py +35 -22
  4. speech_service.py +9 -0
audio_stream_processor.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import subprocess
2
+ from threading import Thread
3
+ from queue import Queue
4
+ from typing import Iterator
5
+
6
+ class AudioStreamProcessor:
7
+ def __init__(self):
8
+ self.queue = Queue()
9
+ self.thread = Thread(target=self._process_audio_streams)
10
+ self.thread.start()
11
+
12
+ def add_audio_stream(self, audio_stream: Iterator[bytes]):
13
+ self.queue.put(audio_stream)
14
+
15
+ def _process_audio_streams(self):
16
+ while True:
17
+ audio_stream = self.queue.get()
18
+ if audio_stream is None: # We'll use None as a sentinel to mark the end
19
+ break
20
+ self._stream(audio_stream)
21
+
22
+ def _stream(self, audio_stream: Iterator[bytes]):
23
+ mpv_command = ["mpv", "--no-cache", "--no-terminal", "--", "fd://0"]
24
+ mpv_process = subprocess.Popen(
25
+ mpv_command,
26
+ stdin=subprocess.PIPE,
27
+ stdout=subprocess.DEVNULL,
28
+ stderr=subprocess.DEVNULL,
29
+ )
30
+
31
+ for chunk in audio_stream:
32
+ if chunk is not None:
33
+ mpv_process.stdin.write(chunk)
34
+ mpv_process.stdin.flush()
35
+
36
+ if mpv_process.stdin:
37
+ mpv_process.stdin.close()
38
+ mpv_process.wait()
39
+
40
+ def close(self):
41
+ self.queue.put(None) # Signal the processing thread to terminate
42
+ self.thread.join()
chat_service.py CHANGED
@@ -55,7 +55,7 @@ class ChatService:
55
  max_new_tokens=200,
56
  do_sample=True,
57
  top_k=40,
58
- temperature=1.0,
59
  pad_token_id=self._tokenizer.eos_token_id,
60
  )
61
  agent_response = self._tokenizer.decode(outputs[0], truncate_before_pattern=[r"\n\n^#", "^'''", "\n\n\n"])
 
55
  max_new_tokens=200,
56
  do_sample=True,
57
  top_k=40,
58
+ temperature=1.0, # use 1.0 for debugging/deteministic results
59
  pad_token_id=self._tokenizer.eos_token_id,
60
  )
61
  agent_response = self._tokenizer.decode(outputs[0], truncate_before_pattern=[r"\n\n^#", "^'''", "\n\n\n"])
debug.py CHANGED
@@ -2,34 +2,47 @@ from clip_transform import CLIPTransform
2
  from chat_service import ChatService
3
  from dotenv import load_dotenv
4
  from speech_service import SpeechService
 
 
5
 
6
- load_dotenv()
7
 
8
- # print ("Initializing CLIP templates")
9
- # clip_transform = CLIPTransform()
10
- # print ("CLIP success")
11
 
12
- print ("Initializing Chat")
13
- chat_service = ChatService()
 
14
 
15
- user_speech_service = SpeechService(voice_id="Adam")
16
- ai_speech_service = SpeechService(voice_id="2OviOUQc1JsQRQgNkVBj") # Chales003
17
 
18
- user_speech_service.print_voices()
 
 
19
 
 
20
 
21
- prompts = [
22
- "hello, how are you today?",
23
- "tell me about your shadow self?",
24
- "hmm, interesting, tell me more about that.",
25
- "wait, that is so interesting, what else?",
26
- ]
27
- for prompt in prompts:
28
- print (f'prompt: "{prompt}"')
29
- user_speech_service.speak(prompt)
30
- response = chat_service.chat(prompt)
31
- print (f'response: "{response}"')
32
- ai_speech_service.speak(response)
33
 
 
 
 
 
 
34
 
35
- print ("Chat success")
 
 
 
 
 
 
2
  from chat_service import ChatService
3
  from dotenv import load_dotenv
4
  from speech_service import SpeechService
5
+ from concurrent.futures import ThreadPoolExecutor
6
+ from audio_stream_processor import AudioStreamProcessor
7
 
 
8
 
9
+ def run_debug_code():
10
+ load_dotenv()
 
11
 
12
+ # print ("Initializing CLIP templates")
13
+ # clip_transform = CLIPTransform()
14
+ # print ("CLIP success")
15
 
16
+ print ("Initializing Chat")
17
+ chat_service = ChatService()
18
 
19
+ user_speech_service = SpeechService(voice_id="Adam")
20
+ ai_speech_service = SpeechService(voice_id="2OviOUQc1JsQRQgNkVBj") # Chales003
21
+ processor = AudioStreamProcessor()
22
 
23
+ # user_speech_service.print_voices() # if you want to see your custom voices
24
 
25
+ prompts = [
26
+ "hello, how are you today?",
27
+ "tell me about your shadow self?",
28
+ "hmm, interesting, tell me more about that.",
29
+ "wait, that is so interesting, what else?",
30
+ ]
31
+ for prompt in prompts:
32
+ print ("")
33
+ print (f'prompt: "{prompt}"')
34
+ stream = user_speech_service.stream(prompt)
35
+ processor.add_audio_stream(stream)
 
36
 
37
+ response = chat_service.chat(prompt)
38
+ print ("")
39
+ print (f'response: "{response}"')
40
+ stream = ai_speech_service.stream(response)
41
+ processor.add_audio_stream(stream)
42
 
43
+ processor.close()
44
+ print ("Chat success")
45
+
46
+
47
+ if __name__ == '__main__':
48
+ run_debug_code()
speech_service.py CHANGED
@@ -46,3 +46,12 @@ class SpeechService:
46
  play(audio)
47
  return
48
 
 
 
 
 
 
 
 
 
 
 
46
  play(audio)
47
  return
48
 
49
+ def stream(self, prompt):
50
+ audio_stream = generate(
51
+ text=prompt,
52
+ voice=self._voice_id,
53
+ model=self._model_id,
54
+ stream=True
55
+ )
56
+ return audio_stream
57
+