Spaces:
Running
Running
import os | |
import torch | |
import gradio as gr | |
from huggingface_hub import InferenceClient | |
from model import predict_params, AudioDataset | |
# TODO: Que no diga lo de que no hay 1s_normal al predecir | |
token = os.getenv("HF_TOKEN") | |
client = InferenceClient("meta-llama/Meta-Llama-3-8B-Instruct", token=token) | |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
model_class, id2label_class = predict_params( | |
model_path="distilhubert-finetuned-mixed-data", | |
dataset_path="data/mixed_data", | |
filter_white_noise=True, | |
undersample_normal=True | |
) | |
model_mon, id2label_mon = predict_params( | |
model_path="distilhubert-finetuned-cry-detector", | |
dataset_path="data/baby_cry_detection", | |
filter_white_noise=False, | |
undersample_normal=False | |
) | |
def call(audiopath, model, dataset_path, filter_white_noise, undersample_normal=False): | |
model.to(device) | |
model.eval() | |
audio_dataset = AudioDataset(dataset_path, {}, filter_white_noise, undersample_normal) | |
processed_audio = audio_dataset.preprocess_audio(audiopath) | |
inputs = {"input_values": processed_audio.to(device).unsqueeze(0)} | |
with torch.no_grad(): | |
outputs = model(**inputs) | |
logits = outputs.logits | |
return logits | |
def predict(audio_path_pred): | |
with torch.no_grad(): | |
logits = call(audio_path_pred, model=model_class, dataset_path="data/mixed_data", filter_white_noise=True, undersample_normal=False) | |
predicted_class_ids_class = torch.argmax(logits, dim=-1).item() | |
label_class = id2label_class[predicted_class_ids_class] | |
label_mapping = {0: 'Cansancio/Incomodidad', 1: 'Dolor', 2: 'Hambre', 3: 'Problemas para respirar'} | |
label_class = label_mapping.get(predicted_class_ids_class, label_class) | |
return label_class | |
def predict_stream(audio_path_stream): | |
with torch.no_grad(): | |
logits = call(audio_path_stream, model=model_mon, dataset_path="data/baby_cry_detection", filter_white_noise=False, undersample_normal=False) | |
probabilities = torch.nn.functional.softmax(logits, dim=-1) | |
crying_probabilities = probabilities[:, 1] | |
avg_crying_probability = crying_probabilities.mean()*100 | |
if avg_crying_probability < 15: | |
label_class = predict(audio_path_stream) | |
return "Está llorando por:", f"{label_class}. Probabilidad: {avg_crying_probability:.1f}%" | |
else: | |
return "No está llorando.", f"Probabilidad: {avg_crying_probability:.1f}%" | |
def decibelios(audio_path_stream): | |
with torch.no_grad(): | |
logits = call(audio_path_stream, model=model_mon, dataset_path="data/baby_cry_detection", filter_white_noise=False, undersample_normal=False) | |
rms = torch.sqrt(torch.mean(torch.square(logits))) | |
db_level = 20 * torch.log10(rms + 1e-6).item() | |
return db_level | |
def mostrar_decibelios(audio_path_stream, visual_threshold): | |
db_level = decibelios(audio_path_stream) | |
if db_level < visual_threshold: | |
return f"Prediciendo. Decibelios: {db_level:.2f}" | |
elif db_level > visual_threshold: | |
return "No detectamos ruido..." | |
def predict_stream_decib(audio_path_stream, visual_threshold): | |
db_level = decibelios(audio_path_stream) | |
if db_level < visual_threshold: | |
llorando, probabilidad = predict_stream(audio_path_stream) | |
return f"{llorando} {probabilidad}" | |
else: | |
return "" | |
def chatbot_config(message, history: list[tuple[str, str]]): | |
system_message = "You are a Chatbot specialized in baby health and care." | |
max_tokens = 512 | |
temperature = 0.5 | |
top_p = 0.95 | |
messages = [{"role": "system", "content": system_message}] | |
for val in history: | |
if val[0]: | |
messages.append({"role": "user", "content": val[0]}) | |
if val[1]: | |
messages.append({"role": "assistant", "content": val[1]}) | |
messages.append({"role": "user", "content": message}) | |
response = "" | |
for message_response in client.chat_completion(messages, max_tokens=max_tokens, stream=True, temperature=temperature, top_p=top_p): | |
token = message_response.choices[0].delta.content | |
response += token | |
yield response | |
def cambiar_pestaña(): | |
return gr.update(visible=False), gr.update(visible=True) | |
my_theme = gr.themes.Soft( | |
primary_hue="emerald", | |
secondary_hue="green", | |
neutral_hue="slate", | |
text_size="sm", | |
spacing_size="sm", | |
font=[gr.themes.GoogleFont('Nunito'), 'ui-sans-serif', 'system-ui', 'sans-serif'], | |
font_mono=[gr.themes.GoogleFont('Nunito'), 'ui-monospace', 'Consolas', 'monospace'], | |
).set( | |
body_background_fill='*neutral_50', | |
body_text_color='*neutral_600', | |
body_text_size='*text_sm', | |
embed_radius='*radius_md', | |
shadow_drop='*shadow_spread', | |
shadow_spread='*button_shadow_active' | |
) | |
with gr.Blocks(theme=my_theme) as demo: | |
with gr.Column(visible=True) as inicial: | |
gr.HTML( | |
""" | |
<style> | |
@import url('https://fonts.googleapis.com/css2?family=Lobster&display=swap'); | |
@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap'); | |
h1 { | |
font-family: 'Lobster', cursive; | |
font-size: 5em !important; | |
text-align: center; | |
margin: 0; | |
} | |
.gr-button { | |
background-color: #4CAF50 !important; | |
color: white !important; | |
border: none; | |
padding: 25px 50px; /* Increase the padding for bigger buttons */ | |
text-align: center; | |
text-decoration: none; | |
display: inline-block; | |
font-family: 'Lobster', cursive; /* Apply the Lobster font */ | |
font-size: 2em !important; /* Increase the button text size */ | |
margin: 4px 2px; | |
cursor: pointer; | |
border-radius: 12px; | |
} | |
.gr-button:hover { | |
background-color: #45a049; | |
} | |
h2 { | |
font-family: 'Lobster', cursive; | |
font-size: 3em !important; | |
text-align: center; | |
margin: 0; | |
} | |
p.slogan, h4, p, h3 { | |
font-family: 'Roboto', sans-serif; | |
text-align: center; | |
} | |
</style> | |
<h1>Iremia</h1> | |
<h4 style='text-align: center; font-size: 1.5em'>Tu aliado para el bienestar de tu bebé</h4> | |
""" | |
) | |
gr.Markdown( | |
"<h4 style='text-align: left; font-size: 1.5em;'>¿Qué es Iremia?</h4>" | |
"<p style='text-align: left'>Iremia es un proyecto llevado a cabo por un grupo de estudiantes interesados en el desarrollo de modelos de inteligencia artificial, enfocados específicamente en casos de uso relevantes para ayudar a cuidar a los más pequeños de la casa.</p>" | |
"<h4 style='text-align: left; font-size: 1.5em;'>Nuestra misión</h4>" | |
"<p style='text-align: left'>Sabemos que la paternidad puede suponer un gran desafío. Nuestra misión es brindarles a todos los padres unas herramientas de última tecnología que los ayuden a navegar esos primeros meses de vida tan cruciales en el desarrollo de sus pequeños.</p>" | |
"<h4 style='text-align: left; font-size: 1.5em;'>¿Qué ofrece Iremia?</h4>" | |
"<p style='text-align: left'>Chatbot: Pregunta a nuestro asistente que te ayudará con cualquier duda que tengas sobre el cuidado de tu bebé.</p>" | |
"<p style='text-align: left'>Analizador: Con nuestro modelo de inteligencia artificial somos capaces de predecir por qué tu hijo de menos de 2 años está llorando.</p>" | |
"<p style='text-align: left'>Monitor: Nuestro monitor no es como otros que hay en el mercado, ya que es capaz de reconocer si un sonido es un llanto del bebé o no; y si está llorando, predice automáticamente la causa. Dándote la tranquilidad de saber siempre qué pasa con tu pequeño, ahorrándote tiempo y horas de sueño.</p>" | |
) | |
boton_inicial = gr.Button("Comenzar") | |
with gr.Column(visible=False) as chatbot: | |
gr.Markdown("<h2>Asistente</h2>") | |
gr.ChatInterface( | |
chatbot_config, | |
theme=my_theme, | |
retry_btn=None, | |
undo_btn=None, | |
clear_btn="Limpiar 🗑️", | |
autofocus=True, | |
fill_height=True, | |
) | |
with gr.Row(): | |
with gr.Column(): | |
boton_predictor = gr.Button("Analizador") | |
with gr.Column(): | |
boton_monitor = gr.Button("Monitor") | |
with gr.Column(visible=False) as pag_predictor: | |
gr.Markdown("<h2>Analizador</h2>") | |
audio_input = gr.Audio( | |
min_length=1.0, | |
format="wav", | |
label="Baby recorder", | |
type="filepath", | |
) | |
gr.Button("¿Por qué llora?").click( | |
predict, | |
inputs=audio_input, | |
outputs=gr.Textbox(label="Tu bebé llora por:") | |
) | |
gr.Button("Volver").click(cambiar_pestaña, outputs=[pag_predictor, chatbot]) | |
with gr.Column(visible=False) as pag_monitor: | |
gr.Markdown("<h2>Monitor</h2>") | |
audio_stream = gr.Audio( | |
format="wav", | |
label="Baby recorder", | |
type="filepath", | |
streaming=True | |
) | |
threshold_db = gr.Slider( | |
minimum=0, | |
maximum=100, | |
step=1, | |
value=30, | |
label="Decibelios para activar la predicción:" | |
) | |
audio_stream.stream( | |
mostrar_decibelios, | |
inputs=[audio_stream, threshold_db], | |
outputs=gr.Textbox(value="Esperando...", label="Estado") | |
) | |
audio_stream.stream( | |
predict_stream_decib, | |
inputs=[audio_stream, threshold_db], | |
outputs=gr.Textbox(value="", label="Tu bebé:") | |
) | |
gr.Button("Volver").click(cambiar_pestaña, outputs=[pag_monitor, chatbot]) | |
boton_inicial.click(cambiar_pestaña, outputs=[inicial, chatbot]) | |
boton_predictor.click(cambiar_pestaña, outputs=[chatbot, pag_predictor]) | |
boton_monitor.click(cambiar_pestaña, outputs=[chatbot, pag_monitor]) | |
demo.launch(share=True) | |