Spaces:

LimKopi
/

Whisper-Audio-Analysis

Runtime error

App Files Files Community

Whisper-Audio-Analysis / app.py

yappeizhen

feat: customisations

803a411 about 1 year ago

raw

history blame

No virus

4.86 kB

	import gradio as gr
	import whisper
	from transformers import pipeline

	model = whisper.load_model("base")
	sentiment_analysis = pipeline("sentiment-analysis", framework="pt", model="SamLowe/roberta-base-go_emotions")

	def analyze_sentiment(text):
	results = sentiment_analysis(text)
	sentiment_results = {result['label']: result['score'] for result in results}
	return sentiment_results

	def get_sentiment_emoji(sentiment):
	# Define the emojis corresponding to each sentiment
	emoji_mapping = {
	"disappointment": "😞",
	"sadness": "😢",
	"annoyance": "😠",
	"neutral": "😐",
	"disapproval": "👎",
	"realization": "😮",
	"nervousness": "😬",
	"approval": "👍",
	"joy": "😄",
	"anger": "😡",
	"embarrassment": "😳",
	"caring": "🤗",
	"remorse": "😔",
	"disgust": "🤢",
	"grief": "😥",
	"confusion": "😕",
	"relief": "😌",
	"desire": "😍",
	"admiration": "😌",
	"optimism": "😊",
	"fear": "😨",
	"love": "❤️",
	"excitement": "🎉",
	"curiosity": "🤔",
	"amusement": "😄",
	"surprise": "😲",
	"gratitude": "🙏",
	"pride": "🦁"
	}
	return emoji_mapping.get(sentiment, "")

	def display_sentiment_results(sentiment_results, option):
	sentiment_text = ""
	for sentiment, score in sentiment_results.items():
	emoji = get_sentiment_emoji(sentiment)
	if option == "Sentiment Only":
	sentiment_text += f"{sentiment} {emoji}\n"
	elif option == "Sentiment + Score":
	sentiment_text += f"{sentiment} {emoji}: {score}\n"
	return sentiment_text

	def inference(audio, sentiment_option):
	audio = whisper.load_audio(audio)
	audio = whisper.pad_or_trim(audio)

	mel = whisper.log_mel_spectrogram(audio).to(model.device)

	_, probs = model.detect_language(mel)
	lang = max(probs, key=probs.get)

	options = whisper.DecodingOptions(fp16=False)
	result = whisper.decode(model, mel, options)

	sentiment_results = analyze_sentiment(result.text)
	sentiment_output = display_sentiment_results(sentiment_results, sentiment_option)

	return lang.upper(), result.text, sentiment_output

	title = """<h1 align="center">☕ Lim Kopi 💬</h1>"""
	image_path = "coffee_logo.jpg"
	description = """
	💻 This MVP shows how we can use Whisper to conduct audio sentiment analysis on voice recordings of customer service agents. Whisper is a general speech recognition model built by OpenAI. It is trained on a large dataset of diverse audio and supports multilingual speech recognition, speech translation, and language identification tasks.<br><br>
	⚙️ MVP Components:<br>
	<br>
	- Real-time multilingual speech recognition<br>
	- Language identification<br>
	- Sentiment analysis of the transcriptions<br>
	<br>
	🎯 The sentiment analysis results are provided as a dictionary with different emotions and their corresponding scores, so customer service agents can receive feedback on the overall call quality and customer receptiveness.<br>
	<br>

	😃 The sentiment analysis results are displayed with emojis representing the corresponding sentiment.<br>
	<br>

	✅ The higher the score for a specific emotion, the stronger the presence of that emotion in the transcribed text.<br>
	<br>

	❓ Use the microphone for real-time speech recognition.<br>
	<br>

	⚡️ The model will transcribe the audio for record-keeping, and perform sentiment analysis on the transcribed text.<br>

	"""

	custom_css = """
	#banner-image {
	display: block;
	margin-left: auto;
	margin-right: auto;
	}
	#chat-message {
	font-size: 14px;
	min-height: 300px;
	}
	.svelte-1mwvhlq {
	display: none !important;
	}
	"""

	block = gr.Blocks(title="Lim Kopi Call Center Service", css=custom_css)

	with block:
	gr.HTML(title)

	with gr.Row():
	with gr.Column():
	gr.Image(image_path, elem_id="banner-image", show_label=False)
	with gr.Column():
	gr.HTML(description)

	with gr.Group():
	with gr.Box():
	sentiment_option = gr.Radio(
	choices=["Sentiment Only", "Sentiment + Score"],
	label="Select an option",
	)
	audio = gr.Audio(
	source="microphone",
	type="filepath"
	)
	with gr.Box():
	btn = gr.Button("Transcribe")

	lang_str = gr.Textbox(label="Language")

	text = gr.Textbox(label="Transcription")

	sentiment_output = gr.Textbox(label="Sentiment Analysis Results")

	btn.click(inference, inputs=[audio, sentiment_option], outputs=[lang_str, text, sentiment_output])

	block.launch()