yappeizhen's picture
scoreboard feature and bugfix on empty transcript table
667b50a
raw
history blame
No virus
10.8 kB
import os
import json
from google.cloud import firestore
import gradio as gr
import whisper
from transformers import pipeline
from dotenv import load_dotenv
import base64
import datetime
# Positive sentiments
positive_sentiments = [
"approval",
"realization",
"joy",
"caring",
"relief",
"desire",
"admiration",
"optimism",
"love",
"excitement",
"curiosity",
"amusement",
"gratitude",
"pride"
]
# Load google cloud credentials
load_dotenv()
base64_credentials = os.environ.get('GOOGLE_APPLICATION_CREDENTIALS')
decoded_credentials = base64.b64decode(base64_credentials).decode()
credentials_json = json.loads(decoded_credentials)
db = firestore.Client.from_service_account_info(credentials_json)
# ===== Authentication =====
def authenticate(new_username, new_pw):
if new_username == '' or new_pw == '': return [None, None, 0, gr.update(), gr.update()]
users_ref = db.collection('Users')
doc_ref = users_ref.document(new_username)
doc = doc_ref.get()
new_score = 0
if doc.exists:
# User exists in Firestore
user_data = doc.to_dict()
new_score = user_data['score']
# Handle incorrect password
if user_data['password'] != new_pw:
raise gr.Error("Incorrect password")
else:
doc_ref.set({"username": new_username, "password": new_pw, "score": new_score})
gr.Info(f"Welcome, {new_username}!")
show_welcome = gr.update(visible=True, value=f'<div style=\'height:190px; display:flex; justify-content:center; align-items:center;\'><h1 style=\'text-align:center\'>Hello {new_username}! ๐Ÿ‘‹</h1></div>')
hide_signin = gr.update(visible=False)
return [new_username, new_pw, new_score, show_welcome, hide_signin]
def get_user_transcripts(username):
arr = []
if username is None: return [gr.update(value=arr)]
# Fetch user's records
user_transcripts = db.collection(f'Users/{username}/Transcripts').stream()
for trans in user_transcripts:
trans_dict = trans.to_dict()
arr.append([trans_dict['date'], trans_dict['transcription'], trans_dict['sentiment_output']])
if (len(arr) == 0):
arr = ['', '', '']
return arr
def get_user_score(username):
doc = db.document(f'Users/{username}').get()
if doc.exists:
# User exists in Firestore
user_data = doc.to_dict()
return [f"""
<p align="center">Earn points by making customers happy!</p>
<br/>
<h1 align="center" style=\'font-size:56px;\'>{user_data["score"]}</h1>
"""]
return [f'<h1 align="center"></h1>']
# ===== Loading Whisper =====
# Load model
model = whisper.load_model("base")
sentiment_analysis = pipeline("sentiment-analysis", framework="pt", model="SamLowe/roberta-base-go_emotions")
def analyze_sentiment(text):
results = sentiment_analysis(text)
sentiment_results = {result['label']: result['score'] for result in results}
return sentiment_results
def is_positive(result):
result = result.split(' ')[0]
if (result in positive_sentiments):
return True
return False
def get_sentiment_emoji(sentiment):
# Define the emojis corresponding to each sentiment
emoji_mapping = {
"disappointment": "๐Ÿ˜ž",
"sadness": "๐Ÿ˜ข",
"annoyance": "๐Ÿ˜ ",
"neutral": "๐Ÿ˜",
"disapproval": "๐Ÿ‘Ž",
"realization": "๐Ÿ˜ฎ",
"nervousness": "๐Ÿ˜ฌ",
"approval": "๐Ÿ‘",
"joy": "๐Ÿ˜„",
"anger": "๐Ÿ˜ก",
"embarrassment": "๐Ÿ˜ณ",
"caring": "๐Ÿค—",
"remorse": "๐Ÿ˜”",
"disgust": "๐Ÿคข",
"grief": "๐Ÿ˜ฅ",
"confusion": "๐Ÿ˜•",
"relief": "๐Ÿ˜Œ",
"desire": "๐Ÿ˜",
"admiration": "๐Ÿ˜Œ",
"optimism": "๐Ÿ˜Š",
"fear": "๐Ÿ˜จ",
"love": "โค๏ธ",
"excitement": "๐ŸŽ‰",
"curiosity": "๐Ÿค”",
"amusement": "๐Ÿ˜„",
"surprise": "๐Ÿ˜ฒ",
"gratitude": "๐Ÿ™",
"pride": "๐Ÿฆ"
}
return emoji_mapping.get(sentiment, "")
def display_sentiment_results(sentiment_results, option):
sentiment_text = ""
for sentiment, score in sentiment_results.items():
emoji = get_sentiment_emoji(sentiment)
if option == "Sentiment Only":
sentiment_text += f"{sentiment} {emoji}\n"
elif option == "Sentiment + Score":
sentiment_text += f"{sentiment} {emoji}: {score}\n"
return sentiment_text
def inference(username, audio, sentiment_option):
audio = whisper.load_audio(audio)
audio = whisper.pad_or_trim(audio)
mel = whisper.log_mel_spectrogram(audio).to(model.device)
_, probs = model.detect_language(mel)
lang = max(probs, key=probs.get)
options = whisper.DecodingOptions(fp16=False)
result = whisper.decode(model, mel, options)
sentiment_results = analyze_sentiment(result.text)
sentiment_output = display_sentiment_results(sentiment_results, sentiment_option)
if username:
# save results in firestore
ts = datetime.datetime.now()
ts_formatted = ts.strftime("%d %b %Y, %H:%M")
ref = db.document(f'Users/{username}')
transcript_ref = db.document(f'Users/{username}/Transcripts/{ts_formatted}')
transcript_ref.set({"date": ts_formatted, "transcription": result.text, "sentiment_output": sentiment_output})
person_doc = ref.get()
user_data = person_doc.to_dict()
new_score = user_data['score']
if is_positive(sentiment_output):
new_score = new_score + 1
db.document(f'Users/{username}').update({"score": new_score})
gr.Info("Transcription saved!")
return lang.upper(), result.text, sentiment_output, new_score
title = """<h1 align="center">โ˜• Lim Kopi Call Center Service ๐Ÿ’ฌ</h1>"""
image_path = "coffee_logo.jpg"
description = """
๐Ÿ’ป This MVP shows how we can use Whisper to conduct audio sentiment analysis on voice recordings of customer service agents. Whisper is a general speech recognition model built by OpenAI. It is trained on a large dataset of diverse audio and supports multilingual speech recognition, speech translation, and language identification tasks.<br><br>
โš™๏ธ MVP Components:<br>
<br>
&nbsp;&nbsp;&nbsp;&nbsp; - Real-time multilingual speech recognition<br>
&nbsp;&nbsp;&nbsp;&nbsp; - Language identification<br>
&nbsp;&nbsp;&nbsp;&nbsp; - Sentiment analysis of the transcriptions<br>
<br>
๐ŸŽฏ The sentiment analysis results are provided as a dictionary with different emotions and their corresponding scores, so customer service agents can receive feedback on the overall call quality and customer receptiveness.<br>
<br>
๐Ÿ˜ƒ The sentiment analysis results are displayed with emojis representing the corresponding sentiment.<br>
<br>
โœ… The higher the score for a specific emotion, the stronger the presence of that emotion in the transcribed text.<br>
<br>
โ“ Use the microphone for real-time speech recognition.<br>
<br>
โšก๏ธ The model will transcribe the audio for record-keeping, and perform sentiment analysis on the transcribed text.<br>
"""
custom_css = """
#banner-image {
display: app;
margin-left: auto;
margin-right: auto;
}
#chat-message {
font-size: 14px;
min-height: 300px;
}
#flex-row {
display: flex;
flex-direction: row;
justify-content: center;
align-items: flex-end;
}
"""
app = gr.Blocks(title="Lim Kopi Call Center Service", css=custom_css)
with app:
gr.HTML(title)
authed_username = gr.State(value=None)
authed_password = gr.State(value=None)
user_score = gr.State(value=0)
# ===== UI =====
with gr.Row():
with gr.Column():
with gr.Group():
gr.Image(image_path, elem_id="banner-image", show_label=False)
with gr.Column():
gr.HTML(description)
with gr.Row(elem_id="flex-row"):
with gr.Column(scale=1):
user_welcome = gr.HTML(visible=False)
with gr.Group(visible=True) as auth_block:
username_input = gr.Textbox(label="Username", placeholder="Your cool alter ego")
password_input = gr.Textbox(label="Password", type='password', placeholder="A super secret code only you know")
login_btn = gr.Button("Login / Sign Up")
login_btn.click(
authenticate,
inputs=[username_input, password_input],
outputs=[authed_username, authed_password, user_score, user_welcome, auth_block]
)
with gr.Column(scale=1):
with gr.Group():
sentiment_option = gr.Radio(
choices=["Sentiment Only", "Sentiment + Score"],
label="Select an option",
)
audio = gr.Audio(
source="microphone",
type="filepath"
)
btn = gr.Button("Transcribe")
with gr.Row():
with gr.Column():
gr.HTML("<br/>")
gr.HTML("""<h1 align="center">๐ŸŽ‰ Results</h1>""")
with gr.Group():
lang_str = gr.Textbox(label="Language")
text = gr.Textbox(label="Transcription")
sentiment_output = gr.Textbox(label="Sentiment Analysis Results")
btn.click(inference, inputs=[authed_username, audio, sentiment_option], outputs=[lang_str, text, sentiment_output, user_score])
with gr.Row(visible=True) as scoreboard:
with gr.Column():
gr.HTML("<br/>")
gr.HTML("""<h1 align="center">๐Ÿ’ฏ Your Score</h1>""")
score_sheet = gr.HTML(visible=True, value=f'<p align="center">Log in to see your score and transcripts</p>')
user_welcome.change(get_user_score, inputs=[authed_username], outputs=[score_sheet])
sentiment_output.change(get_user_score, inputs=[authed_username], outputs=[score_sheet])
with gr.Row(visible=True) as transcription_records:
with gr.Column():
gr.HTML("<br/>")
gr.HTML("""<h1 align="center"> ๐Ÿชฉ Your Transcription Records</h1>""")
transcription_df = gr.Dataframe(
headers=["Date", "Transcription", "Sentiment"],
datatype=["str", "str", "str"],
value=get_user_transcripts(authed_username.value),
type='array',
),
user_welcome.change(get_user_transcripts, inputs=[authed_username], outputs=[transcription_df[0]])
sentiment_output.change(get_user_transcripts, inputs=[authed_username], outputs=[transcription_df[0]])
app.queue()
app.launch()