import os import json from google.cloud import firestore import gradio as gr import whisper from transformers import pipeline from dotenv import load_dotenv import base64 import datetime # Positive sentiments positive_sentiments = [ "approval", "realization", "joy", "caring", "relief", "desire", "admiration", "optimism", "love", "excitement", "curiosity", "amusement", "gratitude", "pride" ] # Load google cloud credentials load_dotenv() base64_credentials = os.environ.get('GOOGLE_APPLICATION_CREDENTIALS') decoded_credentials = base64.b64decode(base64_credentials).decode() credentials_json = json.loads(decoded_credentials) db = firestore.Client.from_service_account_info(credentials_json) # ===== Authentication ===== def authenticate(new_username, new_pw): if new_username == '' or new_pw == '': return [None, None, 0, gr.update(), gr.update()] users_ref = db.collection('Users') doc_ref = users_ref.document(new_username) doc = doc_ref.get() new_score = 0 if doc.exists: # User exists in Firestore user_data = doc.to_dict() new_score = user_data['score'] # Handle incorrect password if user_data['password'] != new_pw: raise gr.Error("Incorrect password") else: doc_ref.set({"username": new_username, "password": new_pw, "score": new_score}) gr.Info(f"Welcome, {new_username}!") show_welcome = gr.update(visible=True, value=f'

Hello {new_username}! 👋

') hide_signin = gr.update(visible=False) return [new_username, new_pw, new_score, show_welcome, hide_signin] def get_user_transcripts(username): arr = [] if username is None: return [gr.update(value=arr)] # Fetch user's records user_transcripts = db.collection(f'Users/{username}/Transcripts').stream() for trans in user_transcripts: trans_dict = trans.to_dict() arr.append([trans_dict['date'], trans_dict['transcription'], trans_dict['sentiment_output']]) if (len(arr) == 0): arr = ['', '', ''] return arr def get_user_score(username): doc = db.document(f'Users/{username}').get() if doc.exists: # User exists in Firestore user_data = doc.to_dict() return [f"""

Earn points by making customers happy!


{user_data["score"]}

"""] return [f'

'] # ===== Loading Whisper ===== # Load model model = whisper.load_model("base") sentiment_analysis = pipeline("sentiment-analysis", framework="pt", model="SamLowe/roberta-base-go_emotions") def analyze_sentiment(text): results = sentiment_analysis(text) sentiment_results = {result['label']: result['score'] for result in results} return sentiment_results def is_positive(result): result = result.split(' ')[0] if (result in positive_sentiments): return True return False def get_sentiment_emoji(sentiment): # Define the emojis corresponding to each sentiment emoji_mapping = { "disappointment": "😞", "sadness": "đŸ˜ĸ", "annoyance": "😠", "neutral": "😐", "disapproval": "👎", "realization": "😮", "nervousness": "đŸ˜Ŧ", "approval": "👍", "joy": "😄", "anger": "😡", "embarrassment": "đŸ˜ŗ", "caring": "🤗", "remorse": "😔", "disgust": "đŸ¤ĸ", "grief": "đŸ˜Ĩ", "confusion": "😕", "relief": "😌", "desire": "😍", "admiration": "😌", "optimism": "😊", "fear": "😨", "love": "❤ī¸", "excitement": "🎉", "curiosity": "🤔", "amusement": "😄", "surprise": "😲", "gratitude": "🙏", "pride": "đŸĻ" } return emoji_mapping.get(sentiment, "") def display_sentiment_results(sentiment_results, option): sentiment_text = "" for sentiment, score in sentiment_results.items(): emoji = get_sentiment_emoji(sentiment) if option == "Sentiment Only": sentiment_text += f"{sentiment} {emoji}\n" elif option == "Sentiment + Score": sentiment_text += f"{sentiment} {emoji}: {score}\n" return sentiment_text def inference(username, audio, sentiment_option): audio = whisper.load_audio(audio) audio = whisper.pad_or_trim(audio) mel = whisper.log_mel_spectrogram(audio).to(model.device) _, probs = model.detect_language(mel) lang = max(probs, key=probs.get) options = whisper.DecodingOptions(fp16=False) result = whisper.decode(model, mel, options) sentiment_results = analyze_sentiment(result.text) sentiment_output = display_sentiment_results(sentiment_results, sentiment_option) if username: # save results in firestore ts = datetime.datetime.now() ts_formatted = ts.strftime("%d %b %Y, %H:%M") ref = db.document(f'Users/{username}') transcript_ref = db.document(f'Users/{username}/Transcripts/{ts_formatted}') transcript_ref.set({"date": ts_formatted, "transcription": result.text, "sentiment_output": sentiment_output}) person_doc = ref.get() user_data = person_doc.to_dict() new_score = user_data['score'] if is_positive(sentiment_output): new_score = new_score + 1 db.document(f'Users/{username}').update({"score": new_score}) gr.Info("Transcription saved!") return lang.upper(), result.text, sentiment_output, new_score title = """

☕ Lim Kopi Call Center Service đŸ’Ŧ

""" image_path = "coffee_logo.jpg" description = """ đŸ’ģ This MVP shows how we can use Whisper to conduct audio sentiment analysis on voice recordings of customer service agents. Whisper is a general speech recognition model built by OpenAI. It is trained on a large dataset of diverse audio and supports multilingual speech recognition, speech translation, and language identification tasks.

⚙ī¸ MVP Components:

     - Real-time multilingual speech recognition
     - Language identification
     - Sentiment analysis of the transcriptions

đŸŽ¯ The sentiment analysis results are provided as a dictionary with different emotions and their corresponding scores, so customer service agents can receive feedback on the overall call quality and customer receptiveness.

😃 The sentiment analysis results are displayed with emojis representing the corresponding sentiment.

✅ The higher the score for a specific emotion, the stronger the presence of that emotion in the transcribed text.

❓ Use the microphone for real-time speech recognition.

⚡ī¸ The model will transcribe the audio for record-keeping, and perform sentiment analysis on the transcribed text.
""" custom_css = """ #banner-image { display: app; margin-left: auto; margin-right: auto; } #chat-message { font-size: 14px; min-height: 300px; } #flex-row { display: flex; flex-direction: row; justify-content: center; align-items: flex-end; } """ app = gr.Blocks(title="Lim Kopi Call Center Service", css=custom_css) with app: gr.HTML(title) authed_username = gr.State(value=None) authed_password = gr.State(value=None) user_score = gr.State(value=0) # ===== UI ===== with gr.Row(): with gr.Column(): with gr.Group(): gr.Image(image_path, elem_id="banner-image", show_label=False) with gr.Column(): gr.HTML(description) with gr.Row(elem_id="flex-row"): with gr.Column(scale=1): user_welcome = gr.HTML(visible=False) with gr.Group(visible=True) as auth_block: username_input = gr.Textbox(label="Username", placeholder="Your cool alter ego") password_input = gr.Textbox(label="Password", type='password', placeholder="A super secret code only you know") login_btn = gr.Button("Login / Sign Up") login_btn.click( authenticate, inputs=[username_input, password_input], outputs=[authed_username, authed_password, user_score, user_welcome, auth_block] ) with gr.Column(scale=1): with gr.Group(): sentiment_option = gr.Radio( choices=["Sentiment Only", "Sentiment + Score"], label="Select an option", ) audio = gr.Audio( source="microphone", type="filepath" ) btn = gr.Button("Transcribe") with gr.Row(): with gr.Column(): gr.HTML("
") gr.HTML("""

🎉 Results

""") with gr.Group(): lang_str = gr.Textbox(label="Language") text = gr.Textbox(label="Transcription") sentiment_output = gr.Textbox(label="Sentiment Analysis Results") btn.click(inference, inputs=[authed_username, audio, sentiment_option], outputs=[lang_str, text, sentiment_output, user_score]) with gr.Row(visible=True) as scoreboard: with gr.Column(): gr.HTML("
") gr.HTML("""

đŸ’¯ Your Score

""") score_sheet = gr.HTML(visible=True, value=f'

Log in to see your score and transcripts

') user_welcome.change(get_user_score, inputs=[authed_username], outputs=[score_sheet]) sentiment_output.change(get_user_score, inputs=[authed_username], outputs=[score_sheet]) with gr.Row(visible=True) as transcription_records: with gr.Column(): gr.HTML("
") gr.HTML("""

đŸĒŠ Your Transcription Records

""") transcription_df = gr.Dataframe( headers=["Date", "Transcription", "Sentiment"], datatype=["str", "str", "str"], value=get_user_transcripts(authed_username.value), type='array', ), user_welcome.change(get_user_transcripts, inputs=[authed_username], outputs=[transcription_df[0]]) sentiment_output.change(get_user_transcripts, inputs=[authed_username], outputs=[transcription_df[0]]) app.queue() app.launch()