import gradio as gr from transformers import pipeline import numpy as np accuracy_classifier = pipeline(task="audio-classification", model="JohnJumon/pronunciation_accuracy") fluency_classifier = pipeline(task="audio-classification", model="JohnJumon/fluency_accuracy") prosodic_classifier = pipeline(task="audio-classification", model="JohnJumon/prosodic_accuracy") def pronunciation_scoring(audio): y = audio y = y.astype(np.float32) y /= np.max(np.abs(y)) accuracy = accuracy_classifier(y) fluency = fluency_classifier(y) prosodic = prosodic_classifier(y) result = { 'accuracy': accuracy, 'fluency': fluency, 'prosodic': prosodic } for category, scores in result.items(): max_score_label = max(scores, key=lambda x: x['score'])['label'] result[category] = max_score_label return result gradio_app = gr.Interface( pronunciation_scoring, inputs=gr.Audio(sources=["microphone"]), outputs=gr.Label(label="Result"), title="Pronunciation Scoring", ) if __name__ == "__main__": gradio_app.launch()