Spaces:
Sleeping
Sleeping
File size: 10,467 Bytes
1e6dc54 5cf41d0 ace06e3 abdf62b 763091b 6d1143c 2ca1b49 166aa6c 53f6532 763091b 53f6532 763091b 53f6532 ace06e3 763091b abdf62b 53f6532 166aa6c 1e6dc54 abdf62b 166aa6c 1e6dc54 166aa6c abdf62b 763091b 166aa6c 763091b 166aa6c 53f6532 abdf62b 166aa6c 8830613 166aa6c 8830613 166aa6c 53f6532 166aa6c 8830613 abdf62b 166aa6c abdf62b 763091b abdf62b ace06e3 ebf42ac ace06e3 abdf62b ace06e3 1e6dc54 763091b ace06e3 763091b abdf62b 763091b abdf62b 166aa6c abdf62b 763091b abdf62b 166aa6c 763091b 166aa6c abdf62b 166aa6c abdf62b 763091b 166aa6c abdf62b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 |
import os
import torch
import gradio as gr
from huggingface_hub import InferenceClient
from model import predict_params, AudioDataset
# TODO: Que no diga lo de que no hay 1s_normal al predecir
token = os.getenv("HF_TOKEN")
client = InferenceClient("meta-llama/Meta-Llama-3-8B-Instruct", token=token)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_class, id2label_class = predict_params(
model_path="distilhubert-finetuned-mixed-data",
dataset_path="data/mixed_data",
filter_white_noise=True,
undersample_normal=True
)
model_mon, id2label_mon = predict_params(
model_path="distilhubert-finetuned-cry-detector",
dataset_path="data/baby_cry_detection",
filter_white_noise=False,
undersample_normal=False
)
def call(audiopath, model, dataset_path, filter_white_noise, undersample_normal=False):
model.to(device)
model.eval()
audio_dataset = AudioDataset(dataset_path, {}, filter_white_noise, undersample_normal)
processed_audio = audio_dataset.preprocess_audio(audiopath)
inputs = {"input_values": processed_audio.to(device).unsqueeze(0)}
with torch.no_grad():
outputs = model(**inputs)
logits = outputs.logits
return logits
def predict(audio_path_pred):
with torch.no_grad():
logits = call(audio_path_pred, model=model_class, dataset_path="data/mixed_data", filter_white_noise=True, undersample_normal=False)
predicted_class_ids_class = torch.argmax(logits, dim=-1).item()
label_class = id2label_class[predicted_class_ids_class]
label_mapping = {0: 'Cansancio/Incomodidad', 1: 'Dolor', 2: 'Hambre', 3: 'Problemas para respirar'}
label_class = label_mapping.get(predicted_class_ids_class, label_class)
return label_class
def predict_stream(audio_path_stream):
with torch.no_grad():
logits = call(audio_path_stream, model=model_mon, dataset_path="data/baby_cry_detection", filter_white_noise=False, undersample_normal=False)
probabilities = torch.nn.functional.softmax(logits, dim=-1)
crying_probabilities = probabilities[:, 1]
avg_crying_probability = crying_probabilities.mean()*100
if avg_crying_probability < 15:
label_class = predict(audio_path_stream)
return "Está llorando por:", f"{label_class}. Probabilidad: {avg_crying_probability:.1f}%"
else:
return "No está llorando.", f"Probabilidad: {avg_crying_probability:.1f}%"
def decibelios(audio_path_stream):
with torch.no_grad():
logits = call(audio_path_stream, model=model_mon, dataset_path="data/baby_cry_detection", filter_white_noise=False, undersample_normal=False)
rms = torch.sqrt(torch.mean(torch.square(logits)))
db_level = 20 * torch.log10(rms + 1e-6).item()
return db_level
def mostrar_decibelios(audio_path_stream, visual_threshold):
db_level = decibelios(audio_path_stream)
if db_level < visual_threshold:
return f"Prediciendo. Decibelios: {db_level:.2f}"
elif db_level > visual_threshold:
return "No detectamos ruido..."
def predict_stream_decib(audio_path_stream, visual_threshold):
db_level = decibelios(audio_path_stream)
if db_level < visual_threshold:
llorando, probabilidad = predict_stream(audio_path_stream)
return f"{llorando} {probabilidad}"
else:
return ""
def chatbot_config(message, history: list[tuple[str, str]]):
system_message = "You are a Chatbot specialized in baby health and care."
max_tokens = 512
temperature = 0.5
top_p = 0.95
messages = [{"role": "system", "content": system_message}]
for val in history:
if val[0]:
messages.append({"role": "user", "content": val[0]})
if val[1]:
messages.append({"role": "assistant", "content": val[1]})
messages.append({"role": "user", "content": message})
response = ""
for message_response in client.chat_completion(messages, max_tokens=max_tokens, stream=True, temperature=temperature, top_p=top_p):
token = message_response.choices[0].delta.content
response += token
yield response
def cambiar_pestaña():
return gr.update(visible=False), gr.update(visible=True)
my_theme = gr.themes.Soft(
primary_hue="emerald",
secondary_hue="green",
neutral_hue="slate",
text_size="sm",
spacing_size="sm",
font=[gr.themes.GoogleFont('Nunito'), 'ui-sans-serif', 'system-ui', 'sans-serif'],
font_mono=[gr.themes.GoogleFont('Nunito'), 'ui-monospace', 'Consolas', 'monospace'],
).set(
body_background_fill='*neutral_50',
body_text_color='*neutral_600',
body_text_size='*text_sm',
embed_radius='*radius_md',
shadow_drop='*shadow_spread',
shadow_spread='*button_shadow_active'
)
with gr.Blocks(theme=my_theme) as demo:
with gr.Column(visible=True) as inicial:
gr.HTML(
"""
<style>
@import url('https://fonts.googleapis.com/css2?family=Lobster&display=swap');
@import url('https://fonts.googleapis.com/css2?family=Roboto&display=swap');
h1 {
font-family: 'Lobster', cursive;
font-size: 5em !important;
text-align: center;
margin: 0;
}
.gr-button {
background-color: #4CAF50 !important;
color: white !important;
border: none;
padding: 25px 50px; /* Increase the padding for bigger buttons */
text-align: center;
text-decoration: none;
display: inline-block;
font-family: 'Lobster', cursive; /* Apply the Lobster font */
font-size: 2em !important; /* Increase the button text size */
margin: 4px 2px;
cursor: pointer;
border-radius: 12px;
}
.gr-button:hover {
background-color: #45a049;
}
h2 {
font-family: 'Lobster', cursive;
font-size: 3em !important;
text-align: center;
margin: 0;
}
p.slogan, h4, p, h3 {
font-family: 'Roboto', sans-serif;
text-align: center;
}
</style>
<h1>Iremia</h1>
<h4 style='text-align: center; font-size: 1.5em'>Tu aliado para el bienestar de tu bebé</h4>
"""
)
gr.Markdown(
"<h4 style='text-align: left; font-size: 1.5em;'>¿Qué es Iremia?</h4>"
"<p style='text-align: left'>Iremia es un proyecto llevado a cabo por un grupo de estudiantes interesados en el desarrollo de modelos de inteligencia artificial, enfocados específicamente en casos de uso relevantes para ayudar a cuidar a los más pequeños de la casa.</p>"
"<h4 style='text-align: left; font-size: 1.5em;'>Nuestra misión</h4>"
"<p style='text-align: left'>Sabemos que la paternidad puede suponer un gran desafío. Nuestra misión es brindarles a todos los padres unas herramientas de última tecnología que los ayuden a navegar esos primeros meses de vida tan cruciales en el desarrollo de sus pequeños.</p>"
"<h4 style='text-align: left; font-size: 1.5em;'>¿Qué ofrece Iremia?</h4>"
"<p style='text-align: left'>Chatbot: Pregunta a nuestro asistente que te ayudará con cualquier duda que tengas sobre el cuidado de tu bebé.</p>"
"<p style='text-align: left'>Analizador: Con nuestro modelo de inteligencia artificial somos capaces de predecir por qué tu hijo de menos de 2 años está llorando.</p>"
"<p style='text-align: left'>Monitor: Nuestro monitor no es como otros que hay en el mercado, ya que es capaz de reconocer si un sonido es un llanto del bebé o no; y si está llorando, predice automáticamente la causa. Dándote la tranquilidad de saber siempre qué pasa con tu pequeño, ahorrándote tiempo y horas de sueño.</p>"
)
boton_inicial = gr.Button("Comenzar")
with gr.Column(visible=False) as chatbot:
gr.Markdown("<h2>Asistente</h2>")
gr.ChatInterface(
chatbot_config,
theme=my_theme,
retry_btn=None,
undo_btn=None,
clear_btn="Limpiar 🗑️",
autofocus=True,
fill_height=True,
)
with gr.Row():
with gr.Column():
boton_predictor = gr.Button("Analizador")
with gr.Column():
boton_monitor = gr.Button("Monitor")
with gr.Column(visible=False) as pag_predictor:
gr.Markdown("<h2>Analizador</h2>")
audio_input = gr.Audio(
min_length=1.0,
format="wav",
label="Baby recorder",
type="filepath",
)
gr.Button("¿Por qué llora?").click(
predict,
inputs=audio_input,
outputs=gr.Textbox(label="Tu bebé llora por:")
)
gr.Button("Volver").click(cambiar_pestaña, outputs=[pag_predictor, chatbot])
with gr.Column(visible=False) as pag_monitor:
gr.Markdown("<h2>Monitor</h2>")
audio_stream = gr.Audio(
format="wav",
label="Baby recorder",
type="filepath",
streaming=True
)
threshold_db = gr.Slider(
minimum=0,
maximum=100,
step=1,
value=30,
label="Decibelios para activar la predicción:"
)
audio_stream.stream(
mostrar_decibelios,
inputs=[audio_stream, threshold_db],
outputs=gr.Textbox(value="Esperando...", label="Estado")
)
audio_stream.stream(
predict_stream_decib,
inputs=[audio_stream, threshold_db],
outputs=gr.Textbox(value="", label="Tu bebé:")
)
gr.Button("Volver").click(cambiar_pestaña, outputs=[pag_monitor, chatbot])
boton_inicial.click(cambiar_pestaña, outputs=[inicial, chatbot])
boton_predictor.click(cambiar_pestaña, outputs=[chatbot, pag_predictor])
boton_monitor.click(cambiar_pestaña, outputs=[chatbot, pag_monitor])
demo.launch(share=True)
|