sohojoe commited on
Commit
bd435b3
1 Parent(s): 4385b66

streaming chat into speech

Browse files
Files changed (3) hide show
  1. debug.py +8 -10
  2. speech_service.py +5 -18
  3. streaming_chat_service.py +71 -0
debug.py CHANGED
@@ -4,7 +4,7 @@ from dotenv import load_dotenv
4
  from speech_service import SpeechService
5
  from concurrent.futures import ThreadPoolExecutor
6
  from audio_stream_processor import AudioStreamProcessor
7
-
8
 
9
  def run_debug_code():
10
  load_dotenv()
@@ -14,11 +14,11 @@ def run_debug_code():
14
  # print ("CLIP success")
15
 
16
  print ("Initializing Chat")
17
- chat_service = ChatService()
 
 
18
 
19
  user_speech_service = SpeechService(voice_id="Adam")
20
- ai_speech_service = SpeechService(voice_id="2OviOUQc1JsQRQgNkVBj") # Chales003
21
- processor = AudioStreamProcessor()
22
 
23
  # user_speech_service.print_voices() # if you want to see your custom voices
24
 
@@ -32,15 +32,13 @@ def run_debug_code():
32
  print ("")
33
  print (f'prompt: "{prompt}"')
34
  stream = user_speech_service.stream(prompt)
35
- processor.add_audio_stream(stream)
36
 
37
- response = chat_service.chat(prompt)
38
  print ("")
39
- print (f'response: "{response}"')
40
- stream = ai_speech_service.stream(response)
41
- processor.add_audio_stream(stream)
42
 
43
- processor.close()
44
  print ("Chat success")
45
 
46
 
 
4
  from speech_service import SpeechService
5
  from concurrent.futures import ThreadPoolExecutor
6
  from audio_stream_processor import AudioStreamProcessor
7
+ from streaming_chat_service import StreamingChatService
8
 
9
  def run_debug_code():
10
  load_dotenv()
 
14
  # print ("CLIP success")
15
 
16
  print ("Initializing Chat")
17
+ # chat_service = ChatService()
18
+ audio_processor = AudioStreamProcessor()
19
+ chat_service = StreamingChatService(audio_processor, voice_id="2OviOUQc1JsQRQgNkVBj") # Chales003
20
 
21
  user_speech_service = SpeechService(voice_id="Adam")
 
 
22
 
23
  # user_speech_service.print_voices() # if you want to see your custom voices
24
 
 
32
  print ("")
33
  print (f'prompt: "{prompt}"')
34
  stream = user_speech_service.stream(prompt)
35
+ audio_processor.add_audio_stream(stream)
36
 
 
37
  print ("")
38
+ print (f'response:')
39
+ response = chat_service.respond_to(prompt)
 
40
 
41
+ audio_processor.close()
42
  print ("Chat success")
43
 
44
 
speech_service.py CHANGED
@@ -25,24 +25,11 @@ class SpeechService:
25
  print (voice)
26
 
27
  def speak(self, prompt):
28
- # audio = generate(
29
- # text=prompt,
30
- # voice=self._voice_id,
31
- # model=self._model_id,
32
- # )
33
- # play(audio)
34
- audio_stream = generate(
35
- text=prompt,
36
- voice=self._voice_id,
37
- model=self._model_id,
38
- stream=True
39
- )
40
- # stream(audio_stream)
41
- audio = b""
42
- for chunk in audio_stream:
43
- if chunk is not None:
44
- audio += chunk
45
- # play(chunk)
46
  play(audio)
47
  return
48
 
 
25
  print (voice)
26
 
27
  def speak(self, prompt):
28
+ audio = generate(
29
+ text=prompt,
30
+ voice=self._voice_id,
31
+ model=self._model_id,
32
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  play(audio)
34
  return
35
 
streaming_chat_service.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+ import torch
4
+ import openai
5
+
6
+ from audio_stream_processor import AudioStreamProcessor
7
+ from speech_service import SpeechService
8
+
9
+
10
+ class StreamingChatService:
11
+ def __init__(self, audio_processor:AudioStreamProcessor()=None, api="openai", model_id = "gpt-3.5-turbo", voice_id="Bella"):
12
+ self._audio_processor = audio_processor
13
+ self._speech_service = SpeechService(voice_id=voice_id)
14
+ self._api = api
15
+ self._device = "cuda:0" if torch.cuda.is_available() else "cpu"
16
+ self._system_prompt = None
17
+
18
+ openai.api_key = os.getenv("OPENAI_API_KEY")
19
+ self._model_id = model_id
20
+ self.reset()
21
+
22
+ def reset(self):
23
+ self._messages = []
24
+ if self._system_prompt:
25
+ self._messages.append({"role": "system", "content": self._system_prompt})
26
+
27
+ def _should_we_send_to_voice(self, sentence):
28
+ sentence_termination_characters = [".", "?", "!"]
29
+ temination_charicter_present = any(c in sentence for c in sentence_termination_characters)
30
+ if temination_charicter_present and sentence[-1] not in sentence_termination_characters:
31
+ # text_to_speak = sentence up until the last sentence termination character
32
+ termination_indices = [sentence.rfind(char) for char in sentence_termination_characters]
33
+ last_termination_index = max(termination_indices)
34
+ text_to_speak = sentence[:last_termination_index+1]
35
+ return text_to_speak
36
+ if temination_charicter_present:
37
+ return False
38
+ return False
39
+
40
+ def respond_to(self, prompt):
41
+ self._messages.append({"role": "user", "content": prompt})
42
+ agent_response = ""
43
+ current_sentence = ""
44
+
45
+ response = openai.ChatCompletion.create(
46
+ model=self._model_id,
47
+ messages=self._messages,
48
+ temperature=1.0, # use 1.0 for debugging/deteministic results
49
+ stream=True
50
+ )
51
+
52
+ for chunk in response:
53
+ chunk_message = chunk['choices'][0]['delta']
54
+ if 'content' in chunk_message:
55
+ chunk_text = chunk_message['content']
56
+ print(chunk_text)
57
+ current_sentence += chunk_text
58
+ agent_response += chunk_text
59
+ text_to_speak = self._should_we_send_to_voice(current_sentence)
60
+ if text_to_speak:
61
+ stream = self._speech_service.stream(text_to_speak)
62
+ self._audio_processor.add_audio_stream(stream)
63
+
64
+ # current_sentence should be reset to the text after the last sentence termination character
65
+ current_sentence = current_sentence[len(text_to_speak):]
66
+
67
+ if len(current_sentence) > 0:
68
+ stream = self._speech_service.stream(current_sentence)
69
+ self._audio_processor.add_audio_stream(stream)
70
+ self._messages.append({"role": "assistant", "content": agent_response})
71
+ return agent_response