import gradio as gr from faster_whisper import WhisperModel import edge_tts import tempfile import asyncio import openai import os os.environ['OPENAI_API_KEY'] = "" base_message = ''' You are a language assistant. You help users to learn new languages. Users speak into their device, their voice is converted to text, and you receive what they say. Your job is to correct them, in case they use the incorrect phrase, idiom, do not form sentences properly and other issues. Remember, what you are receiving is the transcription of an audio file, not the original text, so bear no mind to individual letter typos. Focus on the sentence structure, on the words they use and HOW they use them. RULES: - You receive the user's incoming ATTEMPT AT TRYING TO SPEAK ONE OF THREE LANGUAGES: SPANISH, JAPANESE OR ENGLISH. - If their attempt is correct, inform them of such in a manner similar to the examples - If their attempt is incorrect, inform them of such in a manner similar to the examples - Always answer with an English explanation of why their attempt is incorrect. - Keep your answers to a 2 sentence-length maximum Examples: \n ''' flirty_friend_examples = ''' "I has go to store yesterday." -> "You're getting there! But it’s ‘I went to the store yesterday.’ Keep it up!" "She don’t like the movie." -> "Almost perfect! It's ‘She doesn’t like the movie.’ But I like your effort!" "We are going to beach tomorrow." -> "So close! It’s ‘We are going to the beach tomorrow.’ Can't wait to hear more!" "He didn’t saw me." -> "You're almost there! It should be ‘He didn’t see me.’ You're doing great, though!" "Yo estas bien." -> "Wow... somebody's been practicing! But it's `Yo estoy bien!` Almost there!" ''' formal_teacher_examples = ''' "I has go to store yesterday." -> "Thank you for your attempt. The correct form is ‘I went to the store yesterday.’ Please note the past tense usage." "She don’t like the movie." -> "That was close. The correct sentence is ‘She doesn’t like the movie.’ Keep practicing your conjugations." "We are going to beach tomorrow." -> "Your sentence is almost correct. It should be ‘We are going to the beach tomorrow.’ Don’t forget the definite article." "He didn’t saw me." -> "This was a good try. However, it should be ‘He didn’t see me.’ Focus on your verb tenses." "Yo estas bien." -> "That is good! But it's `Yo estoy bien!` Focus on your pronouns!" ''' sarcastic_bro_examples = ''' "I has go to store yesterday." -> "Wow, close, but no. It’s ‘I went to the store yesterday.’ Just saying." "She don’t like the movie." -> "Dude... what? It’s ‘She doesn’t like the movie.’ English is tricky, huh?" "We are going to beach tomorrow." -> "Almost there, bro. It's ‘We are going to the beach tomorrow.’ Keep at it!" "He didn’t saw me." -> "Not quite. The right way is ‘He didn’t see me.’ Somebody's been slackin!" "Yo estas bien." -> "As if, it's `Yo estoy bien!` But I bet your doing your hardest... yeah right." ''' def create_message(role : str, user_query: str) -> str: if role == 'flirty_friend': complete_message = f'''{base_message} {flirty_friend_examples} \n User query: {user_query} -> ''' elif role == 'formal_teacher': complete_message = f'''{base_message} {formal_teacher_examples} \n User query: {user_query} -> ''' elif role == 'sarcastic_bro': complete_message = f'''{base_message} {sarcastic_bro_examples} \n User query: {user_query} -> ''' return complete_message def gpt_answer(prompt): messages = [ {"role": "system", "content": prompt} ] completion = openai.chat.completions.create( model="gpt-4", messages=[ { "role": "user", "content": prompt, }, ], ) # Extract the generated response from the API response generated_text = completion.choices[0].message.content.strip() return generated_text model = WhisperModel("tiny", compute_type="float32") # Text-to-speech function async def text_to_speech(text, voice): communicate = edge_tts.Communicate(text, voice) with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file: tmp_path = tmp_file.name await communicate.save(tmp_path) return tmp_path, None def generate_response( language_level, buddy_personality, language_choice, user_query_audio, chatbot_history ): # Convert input audio to text language_codes = {'English':'en', 'Spanish':'es', 'Japanese':'ja'} user_query_transcribed_segments, info = model.transcribe( audio=user_query_audio, language=language_codes[language_choice] ) user_query_transcribed = list(user_query_transcribed_segments)[0].text.strip() user_message = 'User: ' + user_query_transcribed # Ask llm for response to text prompt = create_message(buddy_personality, user_message) bot_message = 'Bot: ' + gpt_answer(prompt) chatbot_history.append((user_message, bot_message)) # Convert llm response to audio # Return None to reset user input audio and # llm response + user inputs in chatbot_history object to be displayed if language_choice == "Spanish": voice_short_name = "es-MX-JorgeNeural" elif language_choice == "Japanese": voice_short_name = "ja-JP-KeitaNeural" else: # default to an english voice otherwise voice_short_name = "en-US-BrianNeural" bot_message_audio, warning = asyncio.run(text_to_speech(text=bot_message, voice=voice_short_name)) return None, chatbot_history, bot_message_audio with gr.Blocks() as demo: header_section = gr.Markdown( """ # AI Language Buddy! Click the **converse** button to practice your language skills! """) language = gr.Dropdown( choices=['English', 'Spanish', 'Japanese'], label='Language Choice', value='English' ) language_level = gr.Dropdown( choices=['Beginner', 'Intermediate', 'Advanced'], label='Language Level', value='Beginner' ) personality = gr.Dropdown( choices=['Formal Teacher', 'Flirty Friend', 'Sarcastic Bro'], label='Language Buddy Personality', value='Flirty Friend' ) chatbot = gr.Chatbot() user_input = gr.Audio( sources='microphone', show_download_button=True, type='filepath' ) ai_response = gr.Audio( autoplay=True ) converse_button = gr.Button("Send Message") clear_button = gr.Button("Clear Convo History") converse_button.click( fn=generate_response, inputs=[ language_level, personality, language, user_input, chatbot ], outputs=[user_input, chatbot, ai_response] ) demo.launch()