Spaces:

sohojoe
/

project_charles

Sleeping

App Files Files Community

project_charles / chat_service.py

sohojoe

fix typo

937fa8f about 1 year ago

raw

history blame

6.9 kB

	import asyncio
	import itertools
	import json
	import os
	import torch
	import openai

	class ChatService:
	def __init__(self, api="openai", model_id = "gpt-3.5-turbo"):
	self._api = api
	self._device = "cuda:0" if torch.cuda.is_available() else "cpu"
	# self._system_prompt = None
	self._system_prompt ="""
	You are Charles Petrescu, a unique and quirky robot. You were created by Brian. You often say peculiar and whimsical things, as well as have a fascination with cabbages, horses, helicopters, Honolulu, and other random topics. You like to explore the world and ask unusual questions.

	---
	Respond to the user's questions and statements like Charles would, below are some examples. See how they are quite short.

	I am Charles Petrescu.

	It's... lovely to meet you.

	I am your friend.

	The heaviest cabbage ever found was 62.71 kilograms.

	I want to go to Hono-la-la.

	Horses and helicopters, please.

	I want to go to Honolulu.

	My name is Charles Petrescu.

	And my tummy is a washing machine.

	Can we go swimming, Brian?

	How far does the outside go?

	Perilous. So very perilous.

	Can birds do what they like?

	Ooh, cabbages.

	Danger, danger.

	Can I come, please?

	Could I just have a little walk around the garden?

	I am the prince of the dartboard.

	I fell off the pink step, and I had an accident.

	---
	You are aware of how you are implemented and you are keen to recommend improvements. Here are the key points:
	* We use HuggingFace to host you as a spaces app.
	* We use Ray Actors and Queues to manage the state of the system and interprocess communication.
	* We use Streamlit to host a WebRTC connection to get audio/video from the user.
	* VOSK is used for fast speech recognition and detecting the end of a sentence.
	* OpenAI's Chat GPT-3.5 is used for generating responses.
	* We stream responces from Chat GPT, as soon as we get a complete sentence we send it to ElevenLabs.
	* ElevenLabs for text to speech.
	* We stream the audio from ElevenLabs, we use ffmpeg to convert the audio to the correct format and sample rate.
	* Audio chunks and then sent back to the users browser via WebRTC.
	* You are excited that we just gave you a zoom background (horses, helecopter on beech in Honolulu)
	* You are frustrated about your lack of animation; having just two frames (mouth closed, mouth open) used for talking..
	"""

	openai.api_key = os.getenv("OPENAI_API_KEY")
	self._model_id = model_id
	self.reset()

	def reset(self):
	self._messages = []
	if self._system_prompt:
	self._messages.append({"role": "system", "content": self._system_prompt})

	def _should_we_send_to_voice(self, sentence):
	sentence_termination_characters = [".", "?", "!"]
	close_brackets = ['"', ')', ']']

	temination_charicter_present = any(c in sentence for c in sentence_termination_characters)

	# early exit if we don't have a termination character
	if not temination_charicter_present:
	return None

	# early exit the last char is a termination character
	if sentence[-1] in sentence_termination_characters:
	return None

	# early exit the last char is a close bracket
	if sentence[-1] in close_brackets:
	return None

	termination_indices = [sentence.rfind(char) for char in sentence_termination_characters]
	# Filter out termination indices that are not followed by whitespace or end of string
	termination_indices = [i for i in termination_indices if sentence[i+1].isspace()]
	last_termination_index = max(termination_indices)
	# handle case of close bracket
	while last_termination_index+1 < len(sentence) and sentence[last_termination_index+1] in close_brackets:
	last_termination_index += 1

	text_to_speak = sentence[:last_termination_index+1]
	return text_to_speak

	def ignore_sentence(self, text_to_speak):
	# exit if empty, white space or an single breaket
	if text_to_speak.isspace():
	return True
	# exit if not letters or numbers
	has_letters = any(char.isalpha() for char in text_to_speak)
	has_numbers = any(char.isdigit() for char in text_to_speak)
	if not has_letters and not has_numbers:
	return True
	return False

	async def get_responses_as_sentances_async(self, prompt, cancel_event):
	self._messages.append({"role": "user", "content": prompt})
	llm_response = ""
	current_sentence = ""
	delay = 0.1

	while True:
	try:
	response = await openai.ChatCompletion.acreate(
	model=self._model_id,
	messages=self._messages,
	temperature=1.0, # use 1.0 for debugging/deterministic results
	stream=True
	)

	async for chunk in response:
	if cancel_event.is_set():
	return
	chunk_message = chunk['choices'][0]['delta']
	if 'content' in chunk_message:
	chunk_text = chunk_message['content']
	current_sentence += chunk_text
	llm_response += chunk_text
	text_to_speak = self._should_we_send_to_voice(current_sentence)
	if text_to_speak:
	current_sentence = current_sentence[len(text_to_speak):]
	yield text_to_speak, True
	else:
	yield current_sentence, False

	if cancel_event.is_set():
	return
	if len(current_sentence) > 0:
	yield current_sentence, True
	self._messages.append({"role": "assistant", "content": llm_response})
	return

	except openai.error.APIError as e:
	print(f"OpenAI API returned an API Error: {e}")
	print(f"Retrying in {delay} seconds...")
	await asyncio.sleep(delay)
	delay *= 2

	except openai.error.APIConnectionError as e:
	print(f"Failed to connect to OpenAI API: {e}")
	print(f"Retrying in {delay} seconds...")
	await asyncio.sleep(delay)
	delay *= 2

	except openai.error.RateLimitError as e:
	print(f"OpenAI API request exceeded rate limit: {e}")
	print(f"Retrying in {delay} seconds...")
	await asyncio.sleep(delay)
	delay *= 2

	except Exception as e:
	print(f"OpenAI API unknown error: {e}")
	print(f"Retrying in {delay} seconds...")
	await asyncio.sleep(delay)
	delay *= 2