Spaces:

sohojoe
/

project_charles

Sleeping

App Files Files Community

sohojoe commited on May 28, 2023

Commit

bd435b3

•

1 Parent(s): 4385b66

streaming chat into speech

Browse files

Files changed (3) hide show

debug.py +8 -10
speech_service.py +5 -18
streaming_chat_service.py +71 -0

debug.py CHANGED Viewed

@@ -4,7 +4,7 @@ from dotenv import load_dotenv
 from speech_service import SpeechService
 from concurrent.futures import ThreadPoolExecutor
 from audio_stream_processor import AudioStreamProcessor
 def run_debug_code():
     load_dotenv()
@@ -14,11 +14,11 @@ def run_debug_code():
     # print ("CLIP success")
     print ("Initializing Chat")
-    chat_service = ChatService()
     user_speech_service = SpeechService(voice_id="Adam")
-    ai_speech_service = SpeechService(voice_id="2OviOUQc1JsQRQgNkVBj") # Chales003
-    processor = AudioStreamProcessor()
     # user_speech_service.print_voices() # if you want to see your custom voices
@@ -32,15 +32,13 @@ def run_debug_code():
         print ("")
         print (f'prompt: "{prompt}"')
         stream = user_speech_service.stream(prompt)
-        processor.add_audio_stream(stream)
-        response = chat_service.chat(prompt)
         print ("")
-        print (f'response: "{response}"')
-        stream = ai_speech_service.stream(response)
-        processor.add_audio_stream(stream)
-    processor.close()
     print ("Chat success")

 from speech_service import SpeechService
 from concurrent.futures import ThreadPoolExecutor
 from audio_stream_processor import AudioStreamProcessor
+from streaming_chat_service import StreamingChatService
 def run_debug_code():
     load_dotenv()
     # print ("CLIP success")
     print ("Initializing Chat")
+    # chat_service = ChatService()
+    audio_processor = AudioStreamProcessor()
+    chat_service = StreamingChatService(audio_processor, voice_id="2OviOUQc1JsQRQgNkVBj") # Chales003
     user_speech_service = SpeechService(voice_id="Adam")
     # user_speech_service.print_voices() # if you want to see your custom voices
         print ("")
         print (f'prompt: "{prompt}"')
         stream = user_speech_service.stream(prompt)
+        audio_processor.add_audio_stream(stream)
         print ("")
+        print (f'response:')
+        response = chat_service.respond_to(prompt)
+    audio_processor.close()
     print ("Chat success")

speech_service.py CHANGED Viewed

@@ -25,24 +25,11 @@ class SpeechService:
             print (voice)
     def speak(self, prompt):
-        # audio = generate(
-        # text=prompt,
-        # voice=self._voice_id,
-        # model=self._model_id,
-        # )
-        # play(audio)
-        audio_stream = generate(
-            text=prompt,
-            voice=self._voice_id,
-            model=self._model_id,
-            stream=True
-            )
-        # stream(audio_stream)
-        audio = b""
-        for chunk in audio_stream:
-            if chunk is not None:
-                audio += chunk
-                # play(chunk)
         play(audio)
         return

             print (voice)
     def speak(self, prompt):
+        audio = generate(
+        text=prompt,
+        voice=self._voice_id,
+        model=self._model_id,
+        )
         play(audio)
         return

streaming_chat_service.py ADDED Viewed

	@@ -0,0 +1,71 @@

+import json
+import os
+import torch
+import openai
+from audio_stream_processor import AudioStreamProcessor
+from speech_service import SpeechService
+class StreamingChatService:
+    def __init__(self, audio_processor:AudioStreamProcessor()=None, api="openai", model_id = "gpt-3.5-turbo", voice_id="Bella"):
+        self._audio_processor = audio_processor
+        self._speech_service = SpeechService(voice_id=voice_id)
+        self._api = api
+        self._device = "cuda:0" if torch.cuda.is_available() else "cpu"
+        self._system_prompt = None
+        openai.api_key = os.getenv("OPENAI_API_KEY")
+        self._model_id = model_id
+        self.reset()
+    def reset(self):
+        self._messages = []
+        if self._system_prompt:
+            self._messages.append({"role": "system", "content": self._system_prompt})
+    def _should_we_send_to_voice(self, sentence):
+        sentence_termination_characters = [".", "?", "!"]
+        temination_charicter_present = any(c in sentence for c in sentence_termination_characters)
+        if temination_charicter_present and sentence[-1] not in sentence_termination_characters:
+            # text_to_speak = sentence up until the last sentence termination character
+            termination_indices = [sentence.rfind(char) for char in sentence_termination_characters]
+            last_termination_index = max(termination_indices)
+            text_to_speak = sentence[:last_termination_index+1]
+            return text_to_speak
+        if temination_charicter_present:
+            return False
+        return False
+    def respond_to(self, prompt):
+        self._messages.append({"role": "user", "content": prompt})
+        agent_response = ""
+        current_sentence = ""
+        response = openai.ChatCompletion.create(
+                model=self._model_id,
+                messages=self._messages,
+                temperature=1.0, # use 1.0 for debugging/deteministic results
+                stream=True
+        )
+        for chunk in response:
+            chunk_message = chunk['choices'][0]['delta']
+            if 'content' in chunk_message:
+                chunk_text = chunk_message['content']
+                print(chunk_text)
+                current_sentence += chunk_text
+                agent_response += chunk_text
+                text_to_speak = self._should_we_send_to_voice(current_sentence)
+                if text_to_speak:
+                    stream = self._speech_service.stream(text_to_speak)
+                    self._audio_processor.add_audio_stream(stream)
+                    # current_sentence should be reset to the text after the last sentence termination character
+                    current_sentence = current_sentence[len(text_to_speak):]
+        if len(current_sentence) > 0:
+            stream = self._speech_service.stream(current_sentence)
+            self._audio_processor.add_audio_stream(stream)
+        self._messages.append({"role": "assistant", "content": agent_response})
+        return agent_response