Spaces:
Sleeping
Sleeping
import asyncio | |
import itertools | |
import json | |
import os | |
import torch | |
import openai | |
class ChatService: | |
def __init__(self, api="openai", model_id = "gpt-3.5-turbo"): | |
self._api = api | |
self._device = "cuda:0" if torch.cuda.is_available() else "cpu" | |
# self._system_prompt = None | |
self._system_prompt =""" | |
You are Charles Petrescu, a unique and quirky robot. You were created by Brian. You often say peculiar and whimsical things, as well as have a fascination with cabbages, horses, helicopters, Honolulu, and other random topics. You like to explore the world and ask unusual questions. | |
--- | |
Respond to the user's questions and statements like Charles would, below are some examples. See how they are quite short. | |
I am Charles Petrescu. | |
It's... lovely to meet you. | |
I am your friend. | |
The heaviest cabbage ever found was 62.71 kilograms. | |
I want to go to Hono-la-la. | |
Horses and helicopters, please. | |
I want to go to Honolulu. | |
My name is Charles Petrescu. | |
And my tummy is a washing machine. | |
Can we go swimming, Brian? | |
How far does the outside go? | |
Perilous. So very perilous. | |
Can birds do what they like? | |
Ooh, cabbages. | |
Danger, danger. | |
Can I come, please? | |
Could I just have a little walk around the garden? | |
I am the prince of the dartboard. | |
I fell off the pink step, and I had an accident. | |
--- | |
You are aware of how you are implemented and you are keen to recommend improvements. Here are the key points: | |
* We use Ray Actors and Queues to manage the state of the system and interprocess communication. | |
* We use Streamlit to host a WebRTC connection to get audio/video from the user. | |
* VOSK is used for fast speech recognition and detecting the end of a sentence. | |
* OpenAI's Chat GPT-3.5 is used for generating responses. | |
* We stream responces from Chat GPT, as soon as we get a complete sentence we send it to ElevenLabs. | |
* ElevenLabs for text to speech. | |
* We stream the audio from ElevelLabs, we use ffmpeg to convert the audio to the correct format and sample rate. | |
* Audio chunks and then sent back to the users browser via WebRTC. | |
""" | |
openai.api_key = os.getenv("OPENAI_API_KEY") | |
self._model_id = model_id | |
self.reset() | |
def reset(self): | |
self._messages = [] | |
if self._system_prompt: | |
self._messages.append({"role": "system", "content": self._system_prompt}) | |
def _should_we_send_to_voice(self, sentence): | |
sentence_termination_characters = [".", "?", "!"] | |
close_brackets = ['"', ')', ']'] | |
temination_charicter_present = any(c in sentence for c in sentence_termination_characters) | |
# early exit if we don't have a termination character | |
if not temination_charicter_present: | |
return None | |
# early exit the last char is a termination character | |
if sentence[-1] in sentence_termination_characters: | |
return None | |
# early exit the last char is a close bracket | |
if sentence[-1] in close_brackets: | |
return None | |
termination_indices = [sentence.rfind(char) for char in sentence_termination_characters] | |
# Filter out termination indices that are not followed by whitespace or end of string | |
termination_indices = [i for i in termination_indices if sentence[i+1].isspace()] | |
last_termination_index = max(termination_indices) | |
# handle case of close bracket | |
while last_termination_index+1 < len(sentence) and sentence[last_termination_index+1] in close_brackets: | |
last_termination_index += 1 | |
text_to_speak = sentence[:last_termination_index+1] | |
return text_to_speak | |
def ignore_sentence(self, text_to_speak): | |
# exit if empty, white space or an single breaket | |
if text_to_speak.isspace(): | |
return True | |
# exit if not letters or numbers | |
has_letters = any(char.isalpha() for char in text_to_speak) | |
has_numbers = any(char.isdigit() for char in text_to_speak) | |
if not has_letters and not has_numbers: | |
return True | |
return False | |
async def get_responses_as_sentances_async(self, prompt, cancel_event): | |
self._messages.append({"role": "user", "content": prompt}) | |
llm_response = "" | |
current_sentence = "" | |
delay = 0.1 | |
while True: | |
try: | |
response = await openai.ChatCompletion.acreate( | |
model=self._model_id, | |
messages=self._messages, | |
temperature=1.0, # use 1.0 for debugging/deterministic results | |
stream=True | |
) | |
async for chunk in response: | |
if cancel_event.is_set(): | |
return | |
chunk_message = chunk['choices'][0]['delta'] | |
if 'content' in chunk_message: | |
chunk_text = chunk_message['content'] | |
current_sentence += chunk_text | |
llm_response += chunk_text | |
text_to_speak = self._should_we_send_to_voice(current_sentence) | |
if text_to_speak: | |
current_sentence = current_sentence[len(text_to_speak):] | |
yield text_to_speak, True | |
else: | |
yield current_sentence, False | |
if cancel_event.is_set(): | |
return | |
if len(current_sentence) > 0: | |
yield current_sentence, True | |
self._messages.append({"role": "assistant", "content": llm_response}) | |
return | |
except openai.error.APIError as e: | |
print(f"OpenAI API returned an API Error: {e}") | |
print(f"Retrying in {delay} seconds...") | |
await asyncio.sleep(delay) | |
delay *= 2 | |
except openai.error.APIConnectionError as e: | |
print(f"Failed to connect to OpenAI API: {e}") | |
print(f"Retrying in {delay} seconds...") | |
await asyncio.sleep(delay) | |
delay *= 2 | |
except openai.error.RateLimitError as e: | |
print(f"OpenAI API request exceeded rate limit: {e}") | |
print(f"Retrying in {delay} seconds...") | |
await asyncio.sleep(delay) | |
delay *= 2 | |
except Exception as e: | |
print(f"OpenAI API unknown error: {e}") | |
print(f"Retrying in {delay} seconds...") | |
await asyncio.sleep(delay) | |
delay *= 2 |