import os
import json
from google.cloud import firestore
import gradio as gr
import whisper
from transformers import pipeline
from dotenv import load_dotenv
import base64
import datetime
# Positive sentiments
positive_sentiments = [
"approval",
"realization",
"joy",
"caring",
"relief",
"desire",
"admiration",
"optimism",
"love",
"excitement",
"curiosity",
"amusement",
"gratitude",
"pride"
]
# Load google cloud credentials
load_dotenv()
base64_credentials = os.environ.get('GOOGLE_APPLICATION_CREDENTIALS')
decoded_credentials = base64.b64decode(base64_credentials).decode()
credentials_json = json.loads(decoded_credentials)
db = firestore.Client.from_service_account_info(credentials_json)
# ===== Authentication =====
def authenticate(new_username, new_pw):
if new_username == '' or new_pw == '': return [None, None, 0, gr.update(), gr.update()]
users_ref = db.collection('Users')
doc_ref = users_ref.document(new_username)
doc = doc_ref.get()
new_score = 0
if doc.exists:
# User exists in Firestore
user_data = doc.to_dict()
new_score = user_data['score']
# Handle incorrect password
if user_data['password'] != new_pw:
raise gr.Error("Incorrect password")
else:
doc_ref.set({"username": new_username, "password": new_pw, "score": new_score})
gr.Info(f"Welcome, {new_username}!")
show_welcome = gr.update(visible=True, value=f'
Hello {new_username}! đ
')
hide_signin = gr.update(visible=False)
return [new_username, new_pw, new_score, show_welcome, hide_signin]
def get_user_transcripts(username):
arr = []
if username is None: return [gr.update(value=arr)]
# Fetch user's records
user_transcripts = db.collection(f'Users/{username}/Transcripts').stream()
for trans in user_transcripts:
trans_dict = trans.to_dict()
arr.append([trans_dict['date'], trans_dict['transcription'], trans_dict['sentiment_output']])
if (len(arr) == 0):
arr = ['', '', '']
return arr
def get_user_score(username):
doc = db.document(f'Users/{username}').get()
if doc.exists:
# User exists in Firestore
user_data = doc.to_dict()
return [f"""
Earn points by making customers happy!
{user_data["score"]}
"""]
return [f'']
# ===== Loading Whisper =====
# Load model
model = whisper.load_model("base")
sentiment_analysis = pipeline("sentiment-analysis", framework="pt", model="SamLowe/roberta-base-go_emotions")
def analyze_sentiment(text):
results = sentiment_analysis(text)
sentiment_results = {result['label']: result['score'] for result in results}
return sentiment_results
def is_positive(result):
result = result.split(' ')[0]
if (result in positive_sentiments):
return True
return False
def get_sentiment_emoji(sentiment):
# Define the emojis corresponding to each sentiment
emoji_mapping = {
"disappointment": "đ",
"sadness": "đĸ",
"annoyance": "đ ",
"neutral": "đ",
"disapproval": "đ",
"realization": "đŽ",
"nervousness": "đŦ",
"approval": "đ",
"joy": "đ",
"anger": "đĄ",
"embarrassment": "đŗ",
"caring": "đ¤",
"remorse": "đ",
"disgust": "đ¤ĸ",
"grief": "đĨ",
"confusion": "đ",
"relief": "đ",
"desire": "đ",
"admiration": "đ",
"optimism": "đ",
"fear": "đ¨",
"love": "â¤ī¸",
"excitement": "đ",
"curiosity": "đ¤",
"amusement": "đ",
"surprise": "đ˛",
"gratitude": "đ",
"pride": "đĻ"
}
return emoji_mapping.get(sentiment, "")
def display_sentiment_results(sentiment_results, option):
sentiment_text = ""
for sentiment, score in sentiment_results.items():
emoji = get_sentiment_emoji(sentiment)
if option == "Sentiment Only":
sentiment_text += f"{sentiment} {emoji}\n"
elif option == "Sentiment + Score":
sentiment_text += f"{sentiment} {emoji}: {score}\n"
return sentiment_text
def inference(username, audio, sentiment_option):
audio = whisper.load_audio(audio)
audio = whisper.pad_or_trim(audio)
mel = whisper.log_mel_spectrogram(audio).to(model.device)
_, probs = model.detect_language(mel)
lang = max(probs, key=probs.get)
options = whisper.DecodingOptions(fp16=False)
result = whisper.decode(model, mel, options)
sentiment_results = analyze_sentiment(result.text)
sentiment_output = display_sentiment_results(sentiment_results, sentiment_option)
if username:
# save results in firestore
ts = datetime.datetime.now()
ts_formatted = ts.strftime("%d %b %Y, %H:%M")
ref = db.document(f'Users/{username}')
transcript_ref = db.document(f'Users/{username}/Transcripts/{ts_formatted}')
transcript_ref.set({"date": ts_formatted, "transcription": result.text, "sentiment_output": sentiment_output})
person_doc = ref.get()
user_data = person_doc.to_dict()
new_score = user_data['score']
if is_positive(sentiment_output):
new_score = new_score + 1
db.document(f'Users/{username}').update({"score": new_score})
gr.Info("Transcription saved!")
return lang.upper(), result.text, sentiment_output, new_score
title = """â Lim Kopi Call Center Service đŦ
"""
image_path = "coffee_logo.jpg"
description = """
đģ This MVP shows how we can use Whisper to conduct audio sentiment analysis on voice recordings of customer service agents. Whisper is a general speech recognition model built by OpenAI. It is trained on a large dataset of diverse audio and supports multilingual speech recognition, speech translation, and language identification tasks.
âī¸ MVP Components:
- Real-time multilingual speech recognition
- Language identification
- Sentiment analysis of the transcriptions
đ¯ The sentiment analysis results are provided as a dictionary with different emotions and their corresponding scores, so customer service agents can receive feedback on the overall call quality and customer receptiveness.
đ The sentiment analysis results are displayed with emojis representing the corresponding sentiment.
â
The higher the score for a specific emotion, the stronger the presence of that emotion in the transcribed text.
â Use the microphone for real-time speech recognition.
âĄī¸ The model will transcribe the audio for record-keeping, and perform sentiment analysis on the transcribed text.
"""
custom_css = """
#banner-image {
display: app;
margin-left: auto;
margin-right: auto;
}
#chat-message {
font-size: 14px;
min-height: 300px;
}
#flex-row {
display: flex;
flex-direction: row;
justify-content: center;
align-items: flex-end;
}
"""
app = gr.Blocks(title="Lim Kopi Call Center Service", css=custom_css)
with app:
gr.HTML(title)
authed_username = gr.State(value=None)
authed_password = gr.State(value=None)
user_score = gr.State(value=0)
# ===== UI =====
with gr.Row():
with gr.Column():
with gr.Group():
gr.Image(image_path, elem_id="banner-image", show_label=False)
with gr.Column():
gr.HTML(description)
with gr.Row(elem_id="flex-row"):
with gr.Column(scale=1):
user_welcome = gr.HTML(visible=False)
with gr.Group(visible=True) as auth_block:
username_input = gr.Textbox(label="Username", placeholder="Your cool alter ego")
password_input = gr.Textbox(label="Password", type='password', placeholder="A super secret code only you know")
login_btn = gr.Button("Login / Sign Up")
login_btn.click(
authenticate,
inputs=[username_input, password_input],
outputs=[authed_username, authed_password, user_score, user_welcome, auth_block]
)
with gr.Column(scale=1):
with gr.Group():
sentiment_option = gr.Radio(
choices=["Sentiment Only", "Sentiment + Score"],
label="Select an option",
)
audio = gr.Audio(
source="microphone",
type="filepath"
)
btn = gr.Button("Transcribe")
with gr.Row():
with gr.Column():
gr.HTML("
")
gr.HTML("""đ Results
""")
with gr.Group():
lang_str = gr.Textbox(label="Language")
text = gr.Textbox(label="Transcription")
sentiment_output = gr.Textbox(label="Sentiment Analysis Results")
btn.click(inference, inputs=[authed_username, audio, sentiment_option], outputs=[lang_str, text, sentiment_output, user_score])
with gr.Row(visible=True) as scoreboard:
with gr.Column():
gr.HTML("
")
gr.HTML("""đ¯ Your Score
""")
score_sheet = gr.HTML(visible=True, value=f'Log in to see your score and transcripts
')
user_welcome.change(get_user_score, inputs=[authed_username], outputs=[score_sheet])
sentiment_output.change(get_user_score, inputs=[authed_username], outputs=[score_sheet])
with gr.Row(visible=True) as transcription_records:
with gr.Column():
gr.HTML("
")
gr.HTML(""" đĒŠ Your Transcription Records
""")
transcription_df = gr.Dataframe(
headers=["Date", "Transcription", "Sentiment"],
datatype=["str", "str", "str"],
value=get_user_transcripts(authed_username.value),
type='array',
),
user_welcome.change(get_user_transcripts, inputs=[authed_username], outputs=[transcription_df[0]])
sentiment_output.change(get_user_transcripts, inputs=[authed_username], outputs=[transcription_df[0]])
app.queue()
app.launch()