Spaces:

A-POR-LOS-8000
/

CHATBOT

Running

App Files Files Community

Marcos12886 commited on Sep 13

Commit

763091b

•

1 Parent(s): 1565b0a

TODO FUNCIONANDO. Igual que github

Browse files

Files changed (3) hide show

README.md +31 -13
app.py +107 -32
model.py +19 -28

README.md CHANGED Viewed

@@ -1,13 +1,31 @@
----
-title: CHATBOT
-emoji: 🔥
-colorFrom: pink
-colorTo: pink
-sdk: gradio
-sdk_version: 4.42.0
-app_file: app.py
-pinned: false
-license: apache-2.0
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+## Instalación
+La instalación y uso están pensados para una gráfica NVIDIA decentilla. Si no dispones de una gráfica NVIDIA, ejecutar en las gráficas de Colab.
+Instalaciones necesarias para local:
+- pip install transformers[torch] gradio tensorboardX scikit-learn
+- pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu124
+#### GitHub
+En el archivo .gitignore están puestas las carpetas que no se deben subir a github.
+## Estructura
+Dos funcionalidades:
+- Monitor de bebés: identificar si tu bebé llora y por qué
+- Clasificador de llantos: conocer por qué llora tu bebé
+- Chatbot: poder hablar con llama 3 8B sobre las preocupaciones con tu bebé
+Flujo de archivos:
+1. Construir la estructura de los modelos y entrenarlos [model.py](model.py)
+2. Chatbot en el que grabar audio y conectar con el llm [app.py](app.py)
+Un modelo ([model.py](model.py)) entrenado con distintos datos:
+- Modelo para monitorizar: --n monitor
+- Modelo clasificador de llantos: --n class
+Chatbot [app.py](app.py)
+### Datos utilizados
+- https://data.mendeley.com/datasets/hbppd883sd/1
+- https://zenodo.org/records/2535878
+- https://paperswithcode.com/dataset/esc50
+- https://osf.io/usr8d

app.py CHANGED Viewed

@@ -3,25 +3,24 @@ import torch
 import gradio as gr
 from huggingface_hub import InferenceClient
 from model import predict_params, AudioDataset
-from interfaz import estilo, my_theme
 token = os.getenv("HF_TOKEN")
 client = InferenceClient("meta-llama/Meta-Llama-3-8B-Instruct", token=token)
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 model_class, id2label_class = predict_params(
-    model_path="A-POR-LOS-8000/distilhubert-finetuned-mixed-data",
     dataset_path="data/mixed_data",
     filter_white_noise=True,
     undersample_normal=True
     )
 model_mon, id2label_mon = predict_params(
-    model_path="A-POR-LOS-8000/distilhubert-finetuned-cry-detector",
     dataset_path="data/baby_cry_detection",
     filter_white_noise=False,
     undersample_normal=False
     )
-def call(audiopath, model, dataset_path, filter_white_noise, undersample_normal):
     model.to(device)
     model.eval()
     audio_dataset = AudioDataset(dataset_path, {}, filter_white_noise, undersample_normal)
@@ -34,10 +33,10 @@ def call(audiopath, model, dataset_path, filter_white_noise, undersample_normal)
 def predict(audio_path_pred):
     with torch.no_grad():
-        logits = call(audio_path_pred, model=model_class, dataset_path="data/mixed_data", filter_white_noise=True, undersample_normal=True)
         predicted_class_ids_class = torch.argmax(logits, dim=-1).item()
         label_class = id2label_class[predicted_class_ids_class]
-        label_mapping = {0: 'Dolor', 1: 'Cansancio/Incomodidad', 2: 'Hambre', 3: 'Problemas para respirar'}
         label_class = label_mapping.get(predicted_class_ids_class, label_class)
     return label_class
@@ -49,9 +48,9 @@ def predict_stream(audio_path_stream):
         avg_crying_probability = crying_probabilities.mean()*100
         if avg_crying_probability < 15:
             label_class = predict(audio_path_stream)
-            return "Está llorando por:", f"{label_class}. Probabilidad: {avg_crying_probability:.1f}%"
         else:
-            return "No está llorando.", f"Probabilidad: {avg_crying_probability:.1f}%"
 def decibelios(audio_path_stream):
     with torch.no_grad():
@@ -70,15 +69,15 @@ def mostrar_decibelios(audio_path_stream, visual_threshold):
 def predict_stream_decib(audio_path_stream, visual_threshold):
     db_level = decibelios(audio_path_stream)
     if db_level < visual_threshold:
-        llorando, probabilidad = predict_stream(audio_path_stream)
-        return f"{llorando} {probabilidad}"
     else:
         return ""
 def chatbot_config(message, history: list[tuple[str, str]]):
     system_message = "You are a Chatbot specialized in baby health and care."
     max_tokens = 512
-    temperature = 0.7
     top_p = 0.95
     messages = [{"role": "system", "content": system_message}]
     for val in history:
@@ -96,25 +95,100 @@ def chatbot_config(message, history: list[tuple[str, str]]):
 def cambiar_pestaña():
     return gr.update(visible=False), gr.update(visible=True)
 with gr.Blocks(theme=my_theme) as demo:
-    estilo()
-    with gr.Column(visible=True) as chatbot:
-        gr.Markdown("<h2>Asistente</h2>")
-        gr.ChatInterface(
-            chatbot_config # TODO: Mirar argumentos
-            )
-        gr.Markdown("Este chatbot no sustituye a un profesional de la salud. Ante cualquier preocupación o duda, consulta con tu pediatra.")
-        with gr.Row():
-            with gr.Column():
-                gr.Markdown("<h2>Predictor</h2>")
-                boton_predictor = gr.Button("Prueba el predictor")
-                gr.Markdown("<p>Descubre por qué llora tu bebé</p>")
-            with gr.Column():
-                gr.Markdown("<h2>Monitor</h2>")
-                boton_monitor = gr.Button("Prueba el monitor")
-                gr.Markdown("<p>Monitoriza si tu hijo está llorando y por qué, sin levantarte del sofá</p>")
     with gr.Column(visible=False) as pag_predictor:
-        gr.Markdown("<h2>Predictor</h2>")
         audio_input = gr.Audio(
             min_length=1.0,
             format="wav",
@@ -126,7 +200,7 @@ with gr.Blocks(theme=my_theme) as demo:
             inputs=audio_input,
             outputs=gr.Textbox(label="Tu bebé llora por:")
             )
-        gr.Button("Volver a la pantalla inicial").click(cambiar_pestaña, outputs=[pag_predictor, chatbot])
     with gr.Column(visible=False) as pag_monitor:
         gr.Markdown("<h2>Monitor</h2>")
         audio_stream = gr.Audio(
@@ -140,7 +214,7 @@ with gr.Blocks(theme=my_theme) as demo:
             maximum=100,
             step=1,
             value=30,
-            label="Umbral de dB para activar la predicción"
             )
         audio_stream.stream(
             mostrar_decibelios,
@@ -152,7 +226,8 @@ with gr.Blocks(theme=my_theme) as demo:
             inputs=[audio_stream, threshold_db],
             outputs=gr.Textbox(value="", label="Tu bebé:")
         )
-        gr.Button("Volver a la pantalla inicial").click(cambiar_pestaña, outputs=[pag_monitor, chatbot])
     boton_predictor.click(cambiar_pestaña, outputs=[chatbot, pag_predictor])
     boton_monitor.click(cambiar_pestaña, outputs=[chatbot, pag_monitor])
 demo.launch(share=True)

 import gradio as gr
 from huggingface_hub import InferenceClient
 from model import predict_params, AudioDataset
+# TODO: Que no diga lo de que no hay 1s_normal al predecir
 token = os.getenv("HF_TOKEN")
 client = InferenceClient("meta-llama/Meta-Llama-3-8B-Instruct", token=token)
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 model_class, id2label_class = predict_params(
+    model_path="distilhubert-finetuned-mixed-data",
     dataset_path="data/mixed_data",
     filter_white_noise=True,
     undersample_normal=True
     )
 model_mon, id2label_mon = predict_params(
+    model_path="distilhubert-finetuned-cry-detector",
     dataset_path="data/baby_cry_detection",
     filter_white_noise=False,
     undersample_normal=False
     )
+def call(audiopath, model, dataset_path, filter_white_noise, undersample_normal=False):
     model.to(device)
     model.eval()
     audio_dataset = AudioDataset(dataset_path, {}, filter_white_noise, undersample_normal)
 def predict(audio_path_pred):
     with torch.no_grad():
+        logits = call(audio_path_pred, model=model_class, dataset_path="data/mixed_data", filter_white_noise=True, undersample_normal=False)
         predicted_class_ids_class = torch.argmax(logits, dim=-1).item()
         label_class = id2label_class[predicted_class_ids_class]
+        label_mapping = {0: 'Cansancio/Incomodidad', 1: 'Dolor', 2: 'Hambre', 3: 'Problemas para respirar'}
         label_class = label_mapping.get(predicted_class_ids_class, label_class)
     return label_class
         avg_crying_probability = crying_probabilities.mean()*100
         if avg_crying_probability < 15:
             label_class = predict(audio_path_stream)
+            return f"Está llorando por: {label_class}"
         else:
+            return "No está llorando."
 def decibelios(audio_path_stream):
     with torch.no_grad():
 def predict_stream_decib(audio_path_stream, visual_threshold):
     db_level = decibelios(audio_path_stream)
     if db_level < visual_threshold:
+        llorando = predict_stream(audio_path_stream)
+        return f"{llorando}"
     else:
         return ""
 def chatbot_config(message, history: list[tuple[str, str]]):
     system_message = "You are a Chatbot specialized in baby health and care."
     max_tokens = 512
+    temperature = 0.5
     top_p = 0.95
     messages = [{"role": "system", "content": system_message}]
     for val in history:
 def cambiar_pestaña():
     return gr.update(visible=False), gr.update(visible=True)
+my_theme = gr.themes.Soft(
+    primary_hue="emerald",
+    secondary_hue="green",
+    neutral_hue="slate",
+    text_size="sm",
+    spacing_size="sm",
+    font=[gr.themes.GoogleFont('Nunito'), 'ui-sans-serif', 'system-ui', 'sans-serif'],
+    font_mono=[gr.themes.GoogleFont('Nunito'), 'ui-monospace', 'Consolas', 'monospace'],
+    ).set(
+    body_background_fill='*neutral_50',
+    body_text_color='*neutral_600',
+    body_text_size='*text_sm',
+    embed_radius='*radius_md',
+    shadow_drop='*shadow_spread',
+    shadow_spread='*button_shadow_active'
+    )
 with gr.Blocks(theme=my_theme) as demo:
+    with gr.Column(visible=True) as inicial:
+        gr.HTML(
+            """
+            <style>
+            @import url('https://fonts.googleapis.com/css2?family=Lobster&display=swap');
+            @import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
+            h1 {
+                font-family: 'Lobster', cursive;
+                font-size: 5em !important;
+                text-align: center;
+                margin: 0;
+            }
+            .gr-button {
+                background-color: #4CAF50 !important;
+                color: white !important;
+                border: none;
+                padding: 25px 50px; /* Increase the padding for bigger buttons */
+                text-align: center;
+                text-decoration: none;
+                display: inline-block;
+                font-family: 'Lobster', cursive; /* Apply the Lobster font */
+                font-size: 2em !important; /* Increase the button text size */
+                margin: 4px 2px;
+                cursor: pointer;
+                border-radius: 12px;
+            }
+            .gr-button:hover {
+                background-color: #45a049;
+            }
+            h2 {
+                font-family: 'Lobster', cursive;
+                font-size: 3em !important;
+                text-align: center;
+                margin: 0;
+            }
+            p.slogan, h4, p, h3 {
+                font-family: 'Roboto', sans-serif;
+                text-align: center;
+            }
+            </style>
+            <h1>Iremia</h1>
+            <h4 style='text-align: center; font-size: 1.5em'>Tu aliado para el bienestar de tu bebé</h4>
+            """
+        )
+        gr.Markdown(
+            "<h4 style='text-align: left; font-size: 1.5em;'>¿Qué es Iremia?</h4>"
+            "<p style='text-align: left'>Iremia es un proyecto llevado a cabo por un grupo de estudiantes interesados en el desarrollo de modelos de inteligencia artificial, enfocados específicamente en casos de uso relevantes para ayudar a cuidar a los más pequeños de la casa.</p>"
+            "<h4 style='text-align: left; font-size: 1.5em;'>Nuestra misión</h4>"
+            "<p style='text-align: left'>Sabemos que la paternidad puede suponer un gran desafío. Nuestra misión es brindarles a todos los padres unas herramientas de última tecnología que los ayuden a navegar esos primeros meses de vida tan cruciales en el desarrollo de sus pequeños.</p>"
+            "<h4 style='text-align: left; font-size: 1.5em;'>¿Qué ofrece Iremia?</h4>"
+            "<p style='text-align: left'>Chatbot: Pregunta a nuestro asistente que te ayudará con cualquier duda que tengas sobre el cuidado de tu bebé.</p>"
+            "<p style='text-align: left'>Analizador: Con nuestro modelo de inteligencia artificial somos capaces de predecir por qué tu hijo de menos de 2 años está llorando.</p>"
+            "<p style='text-align: left'>Monitor: Nuestro monitor no es como otros que hay en el mercado, ya que es capaz de reconocer si un sonido es un llanto del bebé o no; y si está llorando, predice automáticamente la causa. Dándote la tranquilidad de saber siempre qué pasa con tu pequeño, ahorrándote tiempo y horas de sueño.</p>"
+        )
+        boton_inicial = gr.Button("Comenzar")
+    with gr.Column(visible=False) as chatbot:
+            gr.Markdown("<h2>Asistente</h2>")
+            gr.ChatInterface(
+                chatbot_config,
+                theme=my_theme,
+                retry_btn=None,
+                undo_btn=None,
+                clear_btn="Limpiar 🗑️",
+                autofocus=True,
+                fill_height=True,
+                )
+            with gr.Row():
+                with gr.Column():
+                    boton_predictor = gr.Button("Analizador")
+                with gr.Column():
+                    boton_monitor = gr.Button("Monitor")
     with gr.Column(visible=False) as pag_predictor:
+        gr.Markdown("<h2>Analizador</h2>")
         audio_input = gr.Audio(
             min_length=1.0,
             format="wav",
             inputs=audio_input,
             outputs=gr.Textbox(label="Tu bebé llora por:")
             )
+        gr.Button("Volver").click(cambiar_pestaña, outputs=[pag_predictor, chatbot])
     with gr.Column(visible=False) as pag_monitor:
         gr.Markdown("<h2>Monitor</h2>")
         audio_stream = gr.Audio(
             maximum=100,
             step=1,
             value=30,
+            label="Decibelios para activar la predicción:"
             )
         audio_stream.stream(
             mostrar_decibelios,
             inputs=[audio_stream, threshold_db],
             outputs=gr.Textbox(value="", label="Tu bebé:")
         )
+        gr.Button("Volver").click(cambiar_pestaña, outputs=[pag_monitor, chatbot])
+    boton_inicial.click(cambiar_pestaña, outputs=[inicial, chatbot])
     boton_predictor.click(cambiar_pestaña, outputs=[chatbot, pag_predictor])
     boton_monitor.click(cambiar_pestaña, outputs=[chatbot, pag_monitor])
 demo.launch(share=True)

model.py CHANGED Viewed

@@ -30,7 +30,7 @@ class AudioDataset(Dataset):
         self.dataset_path = dataset_path
         self.label2id = label2id
         self.file_paths = []
-        self.filter_white_noise = filter_white_noise  # Changed this line
         self.labels = []
         for label_dir, label_id in self.label2id.items():
             label_path = os.path.join(self.dataset_path, label_dir)
@@ -39,33 +39,25 @@ class AudioDataset(Dataset):
                     audio_path = os.path.join(label_path, file_name)
                     self.file_paths.append(audio_path)
                     self.labels.append(label_id)
-        if undersample_normal:
             self.undersample_normal_class()
     def undersample_normal_class(self):
         normal_label = self.label2id.get('1s_normal')
-        if normal_label is None:
-            print("Warning: No '1s_normal' class found. Skipping undersampling.")
-            return
         label_counts = Counter(self.labels)
         other_counts = [count for label, count in label_counts.items() if label != normal_label]
-        if not other_counts:
-            print("Warning: No non-normal classes found. Skipping undersampling.")
-            return
-        target_count = max(other_counts)
-        normal_indices = [i for i, label in enumerate(self.labels) if label == normal_label]
-        if len(normal_indices) <= target_count:
-            print("Warning: Normal class count is already <= other class counts. Skipping undersampling.")
-            return
-        keep_indices = random.sample(normal_indices, target_count)
-        new_file_paths = []
-        new_labels = []
-        for i, (path, label) in enumerate(zip(self.file_paths, self.labels)):
-            if label != normal_label or i in keep_indices:
-                new_file_paths.append(path)
-                new_labels.append(label)
-        self.file_paths = new_file_paths
-        self.labels = new_labels
     def __len__(self):
         return len(self.file_paths)
@@ -107,12 +99,11 @@ def is_white_noise(audio):
     std = torch.std(audio)
     return torch.abs(mean) < 0.001 and std < 0.01
-def seed_everything():
     torch.manual_seed(seed)
     torch.cuda.manual_seed(seed)
-    torch.backends.cudnn.deterministic = True
-    torch.backends.cudnn.benchmark = False
-    os.environ['CUBLAS_WORKSPACE_CONFIG'] = ':16384:8'
 def build_label_mappings(dataset_path):
     label2id = {}
@@ -165,10 +156,10 @@ def load_model(model_path, id2label, num_labels):
         finetuning_task="audio-classification"
     )
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-    model = HubertForSequenceClassification.from_pretrained( # TODO: mirar parámetros. Posibles optimizaciones
         pretrained_model_name_or_path=model_path,
         config=config,
-        torch_dtype=torch.float32,
     )
     model.to(device)
     return model

         self.dataset_path = dataset_path
         self.label2id = label2id
         self.file_paths = []
+        self.filter_white_noise = filter_white_noise
         self.labels = []
         for label_dir, label_id in self.label2id.items():
             label_path = os.path.join(self.dataset_path, label_dir)
                     audio_path = os.path.join(label_path, file_name)
                     self.file_paths.append(audio_path)
                     self.labels.append(label_id)
+        if undersample_normal and self.label2id:
             self.undersample_normal_class()
     def undersample_normal_class(self):
         normal_label = self.label2id.get('1s_normal')
         label_counts = Counter(self.labels)
         other_counts = [count for label, count in label_counts.items() if label != normal_label]
+        if other_counts:  # Ensure there are other counts before taking max
+            target_count = max(other_counts)
+            normal_indices = [i for i, label in enumerate(self.labels) if label == normal_label]
+            keep_indices = random.sample(normal_indices, target_count)
+            new_file_paths = []
+            new_labels = []
+            for i, (path, label) in enumerate(zip(self.file_paths, self.labels)):
+                if label != normal_label or i in keep_indices:
+                    new_file_paths.append(path)
+                    new_labels.append(label)
+            self.file_paths = new_file_paths
+            self.labels = new_labels
     def __len__(self):
         return len(self.file_paths)
     std = torch.std(audio)
     return torch.abs(mean) < 0.001 and std < 0.01
+def seed_everything(): # TODO: mirar si es necesario algo más
     torch.manual_seed(seed)
     torch.cuda.manual_seed(seed)
+    # torch.backends.cudnn.deterministic = True # Para reproducibilidad
+    # torch.backends.cudnn.benchmark = False # Para reproducibilidad
 def build_label_mappings(dataset_path):
     label2id = {}
         finetuning_task="audio-classification"
     )
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    model = HubertForSequenceClassification.from_pretrained(
         pretrained_model_name_or_path=model_path,
         config=config,
+        torch_dtype=torch.float32, # TODO: Comprobar si se necesita float32 y ver si se puede cambiar por float16
     )
     model.to(device)
     return model