Spaces:

A-POR-LOS-8000
/

CHATBOT

Sleeping

File size: 10,798 Bytes

1e6dc54
 
5cf41d0
ace06e3
abdf62b
6ff9ded
763091b
6d1143c
2ca1b49
166aa6c
53f6532
95fada5
53f6532
 
 
 
 
95fada5
53f6532
 
 
 
ace06e3
763091b
abdf62b
 
53f6532
166aa6c
 
1e6dc54
 
abdf62b
166aa6c
1e6dc54
166aa6c
abdf62b
763091b
166aa6c
 
763091b
166aa6c
 
 
 
 
53f6532
abdf62b
 
166aa6c
 
 
6ff9ded
166aa6c
6ff9ded
166aa6c
 
6ff9ded
 
 
 
 
 
 
 
166aa6c
 
 
deb9c39
 
 
6ff9ded
166aa6c
 
 
deb9c39
8830613
deb9c39
abdf62b
166aa6c
abdf62b
 
 
 
763091b
abdf62b
ace06e3
 
 
ebf42ac
 
ace06e3
 
 
abdf62b
 
ace06e3
 
 
1e6dc54
 
 
763091b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ace06e3
763091b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
deb9c39
763091b
 
 
deb9c39
 
763091b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
deb9c39
763091b
 
 
 
 
 
 
 
 
 
 
 
deb9c39
763091b
 
deb9c39
763091b
 
 
 
 
 
 
 
 
 
 
6ff9ded
763091b
 
abdf62b
deb9c39
 
abdf62b
 
 
 
 
 
166aa6c
 
abdf62b
 
 
763091b
abdf62b
 
deb9c39
abdf62b
 
 
 
 
 
166aa6c
 
 
 
 
6ff9ded
166aa6c
 
 
 
 
 
abdf62b
166aa6c
 
 
abdf62b
763091b
 
166aa6c
 
abdf62b

import os
import torch
import gradio as gr
from huggingface_hub import InferenceClient
from model import predict_params, AudioDataset
import torchaudio
# TODO: Que no diga lo de que no hay 1s_normal al predecir
token = os.getenv("HF_TOKEN")
client = InferenceClient("meta-llama/Meta-Llama-3-8B-Instruct", token=token)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_class, id2label_class = predict_params(
    model_path="A-POR-LOS-8000/distilhubert-finetuned-mixed-data",
    dataset_path="data/mixed_data",
    filter_white_noise=True,
    undersample_normal=True
    )
model_mon, id2label_mon = predict_params(
    model_path="A-POR-LOS-8000/distilhubert-finetuned-cry-detector",
    dataset_path="data/baby_cry_detection",
    filter_white_noise=False,
    undersample_normal=False
    )

def call(audiopath, model, dataset_path, filter_white_noise, undersample_normal=False):
    model.to(device)
    model.eval()
    audio_dataset = AudioDataset(dataset_path, {}, filter_white_noise, undersample_normal)
    processed_audio = audio_dataset.preprocess_audio(audiopath)
    inputs = {"input_values": processed_audio.to(device).unsqueeze(0)}
    with torch.no_grad():
        outputs = model(**inputs)
        logits = outputs.logits
    return logits

def predict(audio_path_pred):
    with torch.no_grad():
        logits = call(audio_path_pred, model=model_class, dataset_path="data/mixed_data", filter_white_noise=True, undersample_normal=False)
        predicted_class_ids_class = torch.argmax(logits, dim=-1).item()
        label_class = id2label_class[predicted_class_ids_class]
        label_mapping = {0: 'Cansancio/Incomodidad', 1: 'Dolor', 2: 'Hambre', 3: 'Problemas para respirar'}
        label_class = label_mapping.get(predicted_class_ids_class, label_class)
    return label_class

def predict_stream(audio_path_stream):
    with torch.no_grad():
        logits = call(audio_path_stream, model=model_mon, dataset_path="data/baby_cry_detection", filter_white_noise=False, undersample_normal=False)
        probabilities = torch.nn.functional.softmax(logits, dim=-1)
        crying_probabilities = probabilities[:, 1]
        avg_crying_probability = crying_probabilities.mean()*100
        if avg_crying_probability < 15:
            label_class = predict(audio_path_stream)
            return f"Está llorando por: {label_class}"
        else:
            return "No está llorando"

def decibelios(audio_path_stream):
    waveform, sample_rate = torchaudio.load(audio_path_stream)
    rms = torch.sqrt(torch.mean(torch.square(waveform)))
    db_level = 20 * torch.log10(rms + 1e-6).item()
    min_db = -80 
    max_db = 0   
    scaled_db_level = (db_level - min_db) / (max_db - min_db)
    normalized_db_level = scaled_db_level * 100
    return normalized_db_level

def mostrar_decibelios(audio_path_stream, visual_threshold):
    db_level = decibelios(audio_path_stream)
    if db_level > visual_threshold:
        return f"Prediciendo... Decibelios: {db_level:.2f}"
    elif db_level < visual_threshold:
        return f"Esperando... Decibelios: {db_level:.2f}"

def predict_stream_decib(audio_path_stream, visual_threshold):
    db_level = decibelios(audio_path_stream)
    if db_level > visual_threshold:
        llorando, probabilidad = predict_stream(audio_path_stream)
        return f"{llorando}"
    else:
        return ""

def chatbot_config(message, history: list[tuple[str, str]]):
    system_message = "You are a Chatbot specialized in baby health and care."
    max_tokens = 512
    temperature = 0.5
    top_p = 0.95
    messages = [{"role": "system", "content": system_message}]
    for val in history:
        if val[0]:
            messages.append({"role": "user", "content": val[0]})
        if val[1]:
            messages.append({"role": "assistant", "content": val[1]})
    messages.append({"role": "user", "content": message})
    response = ""
    for message_response in client.chat_completion(messages, max_tokens=max_tokens, stream=True, temperature=temperature, top_p=top_p):
        token = message_response.choices[0].delta.content
        response += token
        yield response

def cambiar_pestaña():
    return gr.update(visible=False), gr.update(visible=True)

my_theme = gr.themes.Soft(
    primary_hue="emerald",
    secondary_hue="green",
    neutral_hue="slate",
    text_size="sm",
    spacing_size="sm",
    font=[gr.themes.GoogleFont('Nunito'), 'ui-sans-serif', 'system-ui', 'sans-serif'],
    font_mono=[gr.themes.GoogleFont('Nunito'), 'ui-monospace', 'Consolas', 'monospace'],
    ).set(
    body_background_fill='*neutral_50',
    body_text_color='*neutral_600',
    body_text_size='*text_sm',
    embed_radius='*radius_md',
    shadow_drop='*shadow_spread',
    shadow_spread='*button_shadow_active'
    )

with gr.Blocks(theme=my_theme) as demo:
    with gr.Column(visible=True) as inicial:    
        gr.HTML(
            """
            <style>
            @import url('https://fonts.googleapis.com/css2?family=Lobster&display=swap');
            @import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
            
            h1 {
                font-family: 'Lobster', cursive;
                font-size: 5em !important;
                text-align: center;
                margin: 0;
            }
            
            .gr-button {
                background-color: #4CAF50 !important; 
                color: white !important; 
                border: none;
                padding: 25px 50px; 
                text-align: center;
                text-decoration: none;
                display: inline-block;
                font-family: 'Lobster', cursive; 
                font-size: 2em !important; 
                margin: 4px 2px;
                cursor: pointer;
                border-radius: 12px;
            }
            
            .gr-button:hover {
                background-color: #45a049; 
            }
            h2 {
                font-family: 'Lobster', cursive;
                font-size: 3em !important;
                text-align: center;
                margin: 0;
            }
            p.slogan, h4, p, h3 {
                font-family: 'Roboto', sans-serif;
                text-align: center;
            }
            </style>
            <h1>Iremia</h1>
            <h4 style='text-align: center; font-size: 1.5em'>El mejor aliado para el bienestar de tu bebé</h4>
            """
        )
        gr.Markdown(
            "<h4 style='text-align: left; font-size: 1.5em;'>¿Qué es Iremia?</h4>"
            "<p style='text-align: left'>Iremia es un proyecto llevado a cabo por un grupo de estudiantes interesados en el desarrollo de modelos de inteligencia artificial, enfocados específicamente en casos de uso relevantes para ayudar a cuidar a los más pequeños de la casa.</p>"
            "<h4 style='text-align: left; font-size: 1.5em;'>Nuestra misión</h4>"
            "<p style='text-align: left'>Sabemos que la paternidad puede suponer un gran desafío. Nuestra misión es brindarles a todos los padres unas herramientas de última tecnología que los ayuden a navegar esos primeros meses de vida tan cruciales en el desarrollo de sus pequeños.</p>"
            "<h4 style='text-align: left; font-size: 1.5em;'>¿Qué ofrece Iremia?</h4>"
            "<p style='text-align: left'>Chatbot: Pregunta a nuestro asistente que te ayudará con cualquier duda que tengas sobre el cuidado de tu bebé.</p>"
            "<p style='text-align: left'>Analizador: Con nuestro modelo de inteligencia artificial somos capaces de predecir por qué tu hijo de menos de 2 años está llorando.</p>"
            "<p style='text-align: left'>Monitor: Nuestro monitor no es como otros que hay en el mercado, ya que es capaz de reconocer si un sonido es un llanto del bebé o no; y si está llorando, predice automáticamente la causa. Dándote la tranquilidad de saber siempre qué pasa con tu pequeño, ahorrándote tiempo y horas de sueño.</p>"
        )
        boton_inicial = gr.Button("¡Prueba nuestros modelos!")
    with gr.Column(visible=False) as chatbot:    
            gr.Markdown("<h2>Asistente</h2>")
            gr.Markdown("<h4 style='text-align: center; font-size: 1.5em'>Pregunta a nuestro asistente cualquier duda que tengas sobre el cuidado de tu bebé</h4>")
            gr.ChatInterface(
                chatbot_config,
                theme=my_theme,
                retry_btn=None,
                undo_btn=None,
                clear_btn="Limpiar 🗑️",
                autofocus=True,
                fill_height=True,
                )
            with gr.Row():
                with gr.Column():
                    boton_predictor = gr.Button("Predictor")
                with gr.Column():
                    boton_monitor = gr.Button("Monitor")
    with gr.Column(visible=False) as pag_predictor:
        gr.Markdown("<h2>Predictor</h2>")
        gr.Markdown("<h4 style='text-align: center; font-size: 1.5em'>Descubre por qué tu bebé está llorando</h4>")
        audio_input = gr.Audio(
            min_length=1.0,
            format="wav",
            label="Baby recorder",
            type="filepath",
            )
        gr.Button("¿Por qué llora?").click(
            predict,
            inputs=audio_input,
            outputs=gr.Textbox(label="Tu bebé llora por:")
            )
        gr.Button("Volver").click(cambiar_pestaña, outputs=[pag_predictor, chatbot])
    with gr.Column(visible=False) as pag_monitor:
        gr.Markdown("<h2>Monitor</h2>")
        gr.Markdown("<h4 style='text-align: center; font-size: 1.5em'>Detecta en tiempo real si tu bebé está llorando y por qué</h4>")
        audio_stream = gr.Audio(
                format="wav",
                label="Baby recorder",
                type="filepath",
                streaming=True
            )
        threshold_db = gr.Slider(
            minimum=0,
            maximum=100,
            step=1,
            value=30,
            label="Umbral de ruido para activar la predicción:"
            )
        audio_stream.stream(
            mostrar_decibelios,
            inputs=[audio_stream, threshold_db],
            outputs=gr.Textbox(value="Esperando...", label="Estado")
            )
        audio_stream.stream(
            predict_stream_decib,
            inputs=[audio_stream, threshold_db],
            outputs=gr.Textbox(value="", label="Tu bebé:")
        )
        gr.Button("Volver").click(cambiar_pestaña, outputs=[pag_monitor, chatbot])
    boton_inicial.click(cambiar_pestaña, outputs=[inicial, chatbot])
    boton_predictor.click(cambiar_pestaña, outputs=[chatbot, pag_predictor])
    boton_monitor.click(cambiar_pestaña, outputs=[chatbot, pag_monitor])
demo.launch(share=True)