Spaces:
Running
Running
import gradio as gr | |
from transformers import pipeline | |
import numpy as np | |
import os | |
accuracy_classifier = pipeline(task="audio-classification", model="JohnJumon/pronunciation_accuracy") | |
fluency_classifier = pipeline(task="audio-classification", model="JohnJumon/fluency_accuracy") | |
prosodic_classifier = pipeline(task="audio-classification", model="JohnJumon/prosodic_accuracy") | |
def pronunciation_scoring(audio): | |
accuracy_description = { | |
'Extremely Poor': 'Extremely poor pronunciation and only one or two words are recognizable', | |
'Poor': 'Poor, clumsy and rigid pronunciation of the sentence as a whole, with serious pronunciation mistakes', | |
'Average': 'The overall pronunciation of the sentence is understandable, with many pronunciation mistakes and accent, but it does not affect the understanding of basic meanings', | |
'Good': 'The overall pronunciation of the sentence is good, with a few pronunciation mistakes', | |
'Excellent': 'The overall pronunciation of the sentence is excellent, with accurate phonology and no obvious pronunciation mistakes' | |
} | |
fluency_description = { | |
'Very Influent': 'Intermittent, very influent speech, with lots of pauses, repetition, and stammering', | |
'Influent': 'The speech is a little influent, with many pauses, repetition, and stammering', | |
'Average': 'Fluent in general, with a few pauses, repetition, and stammering', | |
'Fluent': 'Fluent without noticeable pauses or stammering' | |
} | |
prosodic_description = { | |
'Poor': 'Poor intonation and lots of stammering and pauses, unable to read a complete sentence', | |
'Unstable': 'Unstable speech speed, speak too fast or too slow, without the sense of rhythm', | |
'Stable': 'Unstable speech speed, many stammering and pauses with a poor sense of rhythm', | |
'Almost': 'Nearly correct intonation at a stable speaking speed, nearly smooth and coherent, but with little stammering and few pauses', | |
'Perfect': 'Correct intonation at a stable speaking speed, speak with cadence, and can speak like a native' | |
} | |
accuracy = accuracy_classifier(audio) | |
fluency = fluency_classifier(audio) | |
prosodic = prosodic_classifier(audio) | |
result = { | |
'accuracy': accuracy, | |
'fluency': fluency, | |
'prosodic': prosodic | |
} | |
for category, scores in result.items(): | |
max_score_label = max(scores, key=lambda x: x['score'])['label'] | |
result[category] = max_score_label | |
return result['accuracy'], accuracy_description[result['accuracy']], result['fluency'], fluency_description[result['fluency']], result['prosodic'], prosodic_description[result['prosodic']] | |
gradio_app = gr.Interface( | |
pronunciation_scoring, | |
inputs=gr.Audio(sources="microphone", type="filepath"), | |
outputs=[ | |
gr.Label(label="Accuracy Result"), | |
gr.Textbox(interactive=False, show_label=False), | |
gr.Label(label="Fluency Result"), | |
gr.Textbox(interactive=False, show_label=False), | |
gr.Label(label="Prosodic Result"), | |
gr.Textbox(interactive=False, show_label=False) | |
], | |
title="Pronunciation Scoring", | |
description="This app will score your pronunciation accuracy, fluency, and prosodic (intonation)", | |
examples=[ | |
[os.path.join(os.path.dirname(__file__),"audio.wav")], | |
] | |
) | |
if __name__ == "__main__": | |
gradio_app.launch() |