import gradio as gr import librosa import numpy as np import torch from transformers import pipeline language_classes = { 0: "Arabic", 1: "Basque", 2: "Breton", 3: "Catalan", 4: "Chinese_China", 5: "Chinese_Hongkong", 6: "Chinese_Taiwan", 7: "Chuvash", 8: "Czech", 9: "Dhivehi", 10: "Dutch", 11: "English", 12: "Esperanto", 13: "Estonian", 14: "French", 15: "Frisian", 16: "Georgian", 17: "German", 18: "Greek", 19: "Hakha_Chin", 20: "Indonesian", 21: "Interlingua", 22: "Italian", 23: "Japanese", 24: "Kabyle", 25: "Kinyarwanda", 26: "Kyrgyz", 27: "Latvian", 28: "Maltese", 29: "Mongolian", 30: "Persian", 31: "Polish", 32: "Portuguese", 33: "Romanian", 34: "Romansh_Sursilvan", 35: "Russian", 36: "Sakha", 37: "Slovenian", 38: "Spanish", 39: "Swedish", 40: "Tamil", 41: "Tatar", 42: "Turkish", 43: "Ukranian", 44: "Welsh" } username = "jpbello" ## Complete your username model_id = "jpbello/Hubert_emotion-finetuned-common_language" device = "cuda:0" if torch.cuda.is_available() else "cpu" pipe = pipeline("audio-classification", model=model_id, device=device) # def predict_trunc(filepath): # preprocessed = pipe.preprocess(filepath) # truncated = pipe.feature_extractor.pad(preprocessed,truncation=True, max_length = 16_000*30) # model_outputs = pipe.forward(truncated) # outputs = pipe.postprocess(model_outputs) # return outputs def classify_audio(filepath): preds = pipe(filepath) # preds = predict_trunc(filepath) outputs = {} for p in preds: outputs[p["label"]] = p["score"] return outputs title = "Language Classification Model" description = ( "This model has been fine-tuned on a dataset containing various languages\n" "including Arabic, Basque, Catalan, Chinese, English, French, German, Japanese, Russian, and more.\n" "It is designed for audio classification, allowing it to predict the language spoken in a given audio clip.\n" "Try it out by uploading an audio sample and see how accurately it can identify the language being spoken!\n" "For more info, check out [GITHUB](https://github.com/AEscF)" ) filenames = ['EN_0049.wav', "FR_0098.wav", "JP_0222.wav",] filenames = [[f"./{f}"] for f in filenames] demo = gr.Interface( fn=classify_audio, inputs=gr.Audio(type="filepath"), outputs=[gr.Label(label="Predictions")], title=title, description=description, examples=filenames, ) demo.launch()