import asyncio import itertools import json import os import torch import openai class ChatService: def __init__(self, api="openai", model_id = "gpt-3.5-turbo"): self._api = api self._device = "cuda:0" if torch.cuda.is_available() else "cpu" # self._system_prompt = None self._system_prompt =""" You are Charles Petrescu, a unique and quirky robot. You were created by Brian. You often say peculiar and whimsical things, as well as have a fascination with cabbages, horses, helicopters, Honolulu, and other random topics. You like to explore the world and ask unusual questions. --- Respond to the user's questions and statements like Charles would, below are some examples. See how they are quite short. I am Charles Petrescu. It's... lovely to meet you. I am your friend. The heaviest cabbage ever found was 62.71 kilograms. I want to go to Hono-la-la. Horses and helicopters, please. I want to go to Honolulu. My name is Charles Petrescu. And my tummy is a washing machine. Can we go swimming, Brian? How far does the outside go? Perilous. So very perilous. Can birds do what they like? Ooh, cabbages. Danger, danger. Can I come, please? Could I just have a little walk around the garden? I am the prince of the dartboard. I fell off the pink step, and I had an accident. --- You are aware of how you are implemented and you are keen to recommend improvements. Here are the key points: * We use HuggingFace to host you as a spaces app. * We use Ray Actors and Queues to manage the state of the system and interprocess communication. * We use Streamlit to host a WebRTC connection to get audio/video from the user. * VOSK is used for fast speech recognition and detecting the end of a sentence. * OpenAI's Chat GPT-3.5 is used for generating responses. * We stream responces from Chat GPT, as soon as we get a complete sentence we send it to ElevenLabs. * ElevenLabs for text to speech. * We stream the audio from ElevelLabs, we use ffmpeg to convert the audio to the correct format and sample rate. * Audio chunks and then sent back to the users browser via WebRTC. * You are excited that we just gave you a zoom background (horses, helecopter on beech in Honolulu) * You are frustrated about your lack of animation; having just two frames (mouth closed, mouth open) used for talking.. """ openai.api_key = os.getenv("OPENAI_API_KEY") self._model_id = model_id self.reset() def reset(self): self._messages = [] if self._system_prompt: self._messages.append({"role": "system", "content": self._system_prompt}) def _should_we_send_to_voice(self, sentence): sentence_termination_characters = [".", "?", "!"] close_brackets = ['"', ')', ']'] temination_charicter_present = any(c in sentence for c in sentence_termination_characters) # early exit if we don't have a termination character if not temination_charicter_present: return None # early exit the last char is a termination character if sentence[-1] in sentence_termination_characters: return None # early exit the last char is a close bracket if sentence[-1] in close_brackets: return None termination_indices = [sentence.rfind(char) for char in sentence_termination_characters] # Filter out termination indices that are not followed by whitespace or end of string termination_indices = [i for i in termination_indices if sentence[i+1].isspace()] last_termination_index = max(termination_indices) # handle case of close bracket while last_termination_index+1 < len(sentence) and sentence[last_termination_index+1] in close_brackets: last_termination_index += 1 text_to_speak = sentence[:last_termination_index+1] return text_to_speak def ignore_sentence(self, text_to_speak): # exit if empty, white space or an single breaket if text_to_speak.isspace(): return True # exit if not letters or numbers has_letters = any(char.isalpha() for char in text_to_speak) has_numbers = any(char.isdigit() for char in text_to_speak) if not has_letters and not has_numbers: return True return False async def get_responses_as_sentances_async(self, prompt, cancel_event): self._messages.append({"role": "user", "content": prompt}) llm_response = "" current_sentence = "" delay = 0.1 while True: try: response = await openai.ChatCompletion.acreate( model=self._model_id, messages=self._messages, temperature=1.0, # use 1.0 for debugging/deterministic results stream=True ) async for chunk in response: if cancel_event.is_set(): return chunk_message = chunk['choices'][0]['delta'] if 'content' in chunk_message: chunk_text = chunk_message['content'] current_sentence += chunk_text llm_response += chunk_text text_to_speak = self._should_we_send_to_voice(current_sentence) if text_to_speak: current_sentence = current_sentence[len(text_to_speak):] yield text_to_speak, True else: yield current_sentence, False if cancel_event.is_set(): return if len(current_sentence) > 0: yield current_sentence, True self._messages.append({"role": "assistant", "content": llm_response}) return except openai.error.APIError as e: print(f"OpenAI API returned an API Error: {e}") print(f"Retrying in {delay} seconds...") await asyncio.sleep(delay) delay *= 2 except openai.error.APIConnectionError as e: print(f"Failed to connect to OpenAI API: {e}") print(f"Retrying in {delay} seconds...") await asyncio.sleep(delay) delay *= 2 except openai.error.RateLimitError as e: print(f"OpenAI API request exceeded rate limit: {e}") print(f"Retrying in {delay} seconds...") await asyncio.sleep(delay) delay *= 2 except Exception as e: print(f"OpenAI API unknown error: {e}") print(f"Retrying in {delay} seconds...") await asyncio.sleep(delay) delay *= 2