diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..371668e1ae533bf2675a1e94e6abb171baee9918 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2024 Elena Ryumina + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/README.md b/README.md index 77d7ef7e9c6a09f3a306e574c08e61e6714dc479..d8abc9e248c5c05ceb73da55a35f4aebeab07c1f 100644 --- a/README.md +++ b/README.md @@ -1,13 +1,40 @@ --- -title: MMESA ZeroGPU -emoji: 📈 -colorFrom: red -colorTo: yellow +title: Multi-Modal for Emotion and Sentiment Analysis (MMESA) +emoji: 😀😲😐😥🥴😱😡 +colorFrom: blue +colorTo: pink sdk: gradio -sdk_version: 4.39.0 +sdk_version: 4.24.0 app_file: app.py pinned: false -license: apache-2.0 +license: mit +short_description: A tool to detect Stress, Anxiety and Depression --- -Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference +## Technologies + +This project utilizes various Python scripts for different aspects of analysis and recognition: + +- `blink_detection.py`: Detects and analyzes blinking patterns. +- `body_movement_analysis.py`: Analyzes body movements. +- `emotion_analysis.py`: Analyzes emotional states. +- `face_expressions.py`: Recognizes facial expressions. +- `FACS_analysis_sad.py`: Performs Facial Action Coding System analysis for sadness. +- `gaze_estimation.py`: Estimates gaze direction. +- `head_posture_detection.py`: Detects head posture. +- `heart_rate_variability.py`: Analyzes heart rate variability. +- `posture_analysis.py`: Analyzes posture. +- `roberta_chatbot.py`: Chatbot using the RoBERTa model. +- `sentiment_analysis.py`: Performs sentiment analysis. +- `skin_analysis.py`: Analyzes skin conditions. +- `sleep_quality.py`: Evaluates sleep quality. +- `speech_emotion_recognition.py`: Recognizes emotions from speech. +- `speech_stress_analysis.py`: Analyzes stress levels from speech. + +These scripts combine to provide comprehensive analysis capabilities for various aspects of human behavior and physiology. + +## Upload Trick to HG + +git lfs track "_.dat" && git lfs track "_.pt" && git add .gitattributes && git add assets/models/shape_predictor_68_face_landmarks.dat && git add assets/models/FER_dinamic_LSTM_IEMOCAP.pt && git add assets/models/FER_static_ResNet50_AffectNet.pt && git commit -m 'bigfiles' && git push origin main --force + +git add . && git commit -m 'pre-launch' && git push diff --git a/__pycache__/audio_OK.cpython-310.pyc b/__pycache__/audio_OK.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..480c409ae2380cca80adce217b5347f533ddddea Binary files /dev/null and b/__pycache__/audio_OK.cpython-310.pyc differ diff --git a/__pycache__/fixapp.cpython-310.pyc b/__pycache__/fixapp.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9b53a1ad8f4fb0450356417dd7cb071aa461527e Binary files /dev/null and b/__pycache__/fixapp.cpython-310.pyc differ diff --git a/__pycache__/mpstest.cpython-310.pyc b/__pycache__/mpstest.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d4ea42e7807bee94b56e3ac17c91dbdbb30e4d95 Binary files /dev/null and b/__pycache__/mpstest.cpython-310.pyc differ diff --git a/__pycache__/tinnitus.cpython-310.pyc b/__pycache__/tinnitus.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7605147e394e12aa83ac04539d336a0c6ad6d6d2 Binary files /dev/null and b/__pycache__/tinnitus.cpython-310.pyc differ diff --git a/__pycache__/ui_components.cpython-310.pyc b/__pycache__/ui_components.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..20ab2fa74bd552c4921139157670d6d6fcb3086f Binary files /dev/null and b/__pycache__/ui_components.cpython-310.pyc differ diff --git a/app.py b/app.py new file mode 100644 index 0000000000000000000000000000000000000000..57dd7b3fe9de69121f40fe64dce15ff89181a4f3 --- /dev/null +++ b/app.py @@ -0,0 +1,66 @@ +import gradio as gr +from tabs.heart_rate_variability import create_hrv_tab +from tabs.blink_detection import create_blink_tab +from tabs.gaze_estimation import create_gaze_estimation_tab +from tabs.speech_stress_analysis import create_voice_stress_tab +from tabs.head_posture_detection import create_head_posture_tab +from tabs.face_expressions import create_face_expressions_tab +from tabs.speech_emotion_recognition import create_emotion_recognition_tab +from tabs.sleep_quality import create_sleep_quality_tab +from tabs.sentiment_analysis import create_sentiment_tab +from tabs.emotion_analysis import create_emotion_tab +from tabs.body_movement_analysis import create_body_movement_tab +from tabs.posture_analysis import create_posture_analysis_tab +from tabs.skin_analysis import create_skin_conductance_tab +from tabs.FACS_analysis_sad import create_facs_analysis_sad_tab +from tabs.roberta_chatbot import create_roberta_chatbot_tab +import spaces + +# Import the UI components +from ui_components import CUSTOM_CSS, HEADER_HTML, DISCLAIMER_HTML + +TAB_STRUCTURE = [ + ("Visual Analysis", [ + ("Emotional Face Expressions", create_face_expressions_tab), + ("FACS for Stress, Anxiety, Depression", create_facs_analysis_sad_tab), + ("Gaze Estimation", create_gaze_estimation_tab), + ("Head Posture", create_head_posture_tab), + ("Blink Rate", create_blink_tab), + ("Sleep Quality", create_sleep_quality_tab), + ("Heart Rate Variability", create_hrv_tab), + ("Body Movement", create_body_movement_tab), + ("Posture", create_posture_analysis_tab), + ("Skin", create_skin_conductance_tab) + ]), + ("Speech Analysis", [ + ("Speech Stress", create_voice_stress_tab), + ("Speech Emotion", create_emotion_recognition_tab) + ]), + ("Text Analysis", [ + ("Sentiment", create_sentiment_tab), + ("Emotion", create_emotion_tab), + ("Roberta Mental Health Chatbot", create_roberta_chatbot_tab) + ]), + ("Brain Analysis (coming soon)", [ + ]) +] + +@spaces.GPU +def create_demo(): + with gr.Blocks(css=CUSTOM_CSS) as demo: + gr.Markdown(HEADER_HTML) + with gr.Tabs(elem_classes=["main-tab"]): + for main_tab, sub_tabs in TAB_STRUCTURE: + with gr.Tab(main_tab): + with gr.Tabs(): + for sub_tab, create_fn in sub_tabs: + with gr.Tab(sub_tab): + create_fn() + gr.HTML(DISCLAIMER_HTML) + return demo + +# Create the demo instance +demo = create_demo() + +if __name__ == "__main__": + demo.queue(api_open=True).launch(share=False) \ No newline at end of file diff --git a/app/__init__.py b/app/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/app/__pycache__/__init__.cpython-310.pyc b/app/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4d0511c6503b3d5b709b058e5a45b1a4fe3e767b Binary files /dev/null and b/app/__pycache__/__init__.cpython-310.pyc differ diff --git a/app/__pycache__/__init__.cpython-312.pyc b/app/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f52faa26ade7317c137860171296602da7951f3e Binary files /dev/null and b/app/__pycache__/__init__.cpython-312.pyc differ diff --git a/app/__pycache__/__init__.cpython-38.pyc b/app/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..55d756f30d8e47e499e4a900f5c37713c3e67481 Binary files /dev/null and b/app/__pycache__/__init__.cpython-38.pyc differ diff --git a/app/__pycache__/app_utils.cpython-310.pyc b/app/__pycache__/app_utils.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5ddbddf7c7cf002399decc8ec09af3858ab93b55 Binary files /dev/null and b/app/__pycache__/app_utils.cpython-310.pyc differ diff --git a/app/__pycache__/app_utils.cpython-312.pyc b/app/__pycache__/app_utils.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..be10a56223cf874d2212e6f6961f1b7b6c06607f Binary files /dev/null and b/app/__pycache__/app_utils.cpython-312.pyc differ diff --git a/app/__pycache__/app_utils.cpython-38.pyc b/app/__pycache__/app_utils.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f575ffaf5fb4802ec4af2a14d11c850f9cd47d9c Binary files /dev/null and b/app/__pycache__/app_utils.cpython-38.pyc differ diff --git a/app/__pycache__/authors.cpython-310.pyc b/app/__pycache__/authors.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3a35c86dc0507b7b5d120380557fb0c0077db44c Binary files /dev/null and b/app/__pycache__/authors.cpython-310.pyc differ diff --git a/app/__pycache__/authors.cpython-312.pyc b/app/__pycache__/authors.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..dbc33aeb5a1a65a582b44ccd83356c4ca5c92204 Binary files /dev/null and b/app/__pycache__/authors.cpython-312.pyc differ diff --git a/app/__pycache__/authors.cpython-38.pyc b/app/__pycache__/authors.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..dae92066583a36e33bbe043cd290a172cdbcdd71 Binary files /dev/null and b/app/__pycache__/authors.cpython-38.pyc differ diff --git a/app/__pycache__/config.cpython-310.pyc b/app/__pycache__/config.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f5de50aa69490701bbb0407e2b14481cca9975dc Binary files /dev/null and b/app/__pycache__/config.cpython-310.pyc differ diff --git a/app/__pycache__/config.cpython-312.pyc b/app/__pycache__/config.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4e919d893b0f581c7ae4595f1d03d5efc8e9b6e9 Binary files /dev/null and b/app/__pycache__/config.cpython-312.pyc differ diff --git a/app/__pycache__/config.cpython-38.pyc b/app/__pycache__/config.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ff01378c8bbf844391970c090fbd42a334b79148 Binary files /dev/null and b/app/__pycache__/config.cpython-38.pyc differ diff --git a/app/__pycache__/description.cpython-310.pyc b/app/__pycache__/description.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d9eeab94eeda9c46e814e5f4ec9a83a6ef859dfc Binary files /dev/null and b/app/__pycache__/description.cpython-310.pyc differ diff --git a/app/__pycache__/description.cpython-312.pyc b/app/__pycache__/description.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..05e374d2edad30de42f8d89a72bb4af2e67dbe84 Binary files /dev/null and b/app/__pycache__/description.cpython-312.pyc differ diff --git a/app/__pycache__/description.cpython-38.pyc b/app/__pycache__/description.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a4e8a705a29d3db014a2dd8fbf0622241d8d3f07 Binary files /dev/null and b/app/__pycache__/description.cpython-38.pyc differ diff --git a/app/__pycache__/face_utils.cpython-310.pyc b/app/__pycache__/face_utils.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d8716849c63f9c7b4caa48fe67af6d0df48278ee Binary files /dev/null and b/app/__pycache__/face_utils.cpython-310.pyc differ diff --git a/app/__pycache__/face_utils.cpython-312.pyc b/app/__pycache__/face_utils.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3b9f4aadd2b55a1a985fe5138bae5f89782a5d05 Binary files /dev/null and b/app/__pycache__/face_utils.cpython-312.pyc differ diff --git a/app/__pycache__/face_utils.cpython-38.pyc b/app/__pycache__/face_utils.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9d9562b5b623b81e26f3d18273e86c585e565e57 Binary files /dev/null and b/app/__pycache__/face_utils.cpython-38.pyc differ diff --git a/app/__pycache__/model.cpython-310.pyc b/app/__pycache__/model.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1d3ed542f81a42e1a14de6952e386c367a494c72 Binary files /dev/null and b/app/__pycache__/model.cpython-310.pyc differ diff --git a/app/__pycache__/model.cpython-312.pyc b/app/__pycache__/model.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e68bec2da5870c4e66df3b9a7d65d7c5969faac0 Binary files /dev/null and b/app/__pycache__/model.cpython-312.pyc differ diff --git a/app/__pycache__/model.cpython-38.pyc b/app/__pycache__/model.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5323a0982079af4b3f451986464af152dfce0960 Binary files /dev/null and b/app/__pycache__/model.cpython-38.pyc differ diff --git a/app/__pycache__/model_architectures.cpython-310.pyc b/app/__pycache__/model_architectures.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..05260b1322dc760a6d724a0ebd94d5c04f95d165 Binary files /dev/null and b/app/__pycache__/model_architectures.cpython-310.pyc differ diff --git a/app/__pycache__/model_architectures.cpython-312.pyc b/app/__pycache__/model_architectures.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a3bb410b777d4fad0f22e23f528d38e58ca86943 Binary files /dev/null and b/app/__pycache__/model_architectures.cpython-312.pyc differ diff --git a/app/__pycache__/model_architectures.cpython-38.pyc b/app/__pycache__/model_architectures.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..17ba7433c2d14127bcc4a5418a72aeedf5d2eb07 Binary files /dev/null and b/app/__pycache__/model_architectures.cpython-38.pyc differ diff --git a/app/__pycache__/plot.cpython-310.pyc b/app/__pycache__/plot.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f20cfb4283e8cf02a75c3f50a35b756007813792 Binary files /dev/null and b/app/__pycache__/plot.cpython-310.pyc differ diff --git a/app/__pycache__/plot.cpython-312.pyc b/app/__pycache__/plot.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..13f8909c80a0246aaba2004c365319bf6b3114ae Binary files /dev/null and b/app/__pycache__/plot.cpython-312.pyc differ diff --git a/app/__pycache__/plot.cpython-38.pyc b/app/__pycache__/plot.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..88f34c91d010ea26dd7f7db3011132455278d3e5 Binary files /dev/null and b/app/__pycache__/plot.cpython-38.pyc differ diff --git a/app/app_utils.py b/app/app_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..f8a923c72468a7790f0e2721541db56e61126350 --- /dev/null +++ b/app/app_utils.py @@ -0,0 +1,321 @@ +""" +File: app_utils.py +Author: Elena Ryumina and Dmitry Ryumin (modified by Assistant) +Description: This module contains utility functions for facial expression recognition application, including FACS Analysis for SAD. +License: MIT License +""" + +import torch +import numpy as np +import mediapipe as mp +from PIL import Image +import cv2 +from pytorch_grad_cam.utils.image import show_cam_on_image +import matplotlib.pyplot as plt + +# Importing necessary components for the Gradio app +from app.model import pth_model_static, pth_model_dynamic, cam, pth_processing +from app.face_utils import get_box, display_info +from app.config import DICT_EMO, config_data +from app.plot import statistics_plot + +mp_face_mesh = mp.solutions.face_mesh + +def preprocess_image_and_predict(inp): + inp = np.array(inp) + + if inp is None: + return None, None, None + + try: + h, w = inp.shape[:2] + except Exception: + return None, None, None + + with mp_face_mesh.FaceMesh( + max_num_faces=1, + refine_landmarks=False, + min_detection_confidence=0.5, + min_tracking_confidence=0.5, + ) as face_mesh: + results = face_mesh.process(inp) + if results.multi_face_landmarks: + for fl in results.multi_face_landmarks: + startX, startY, endX, endY = get_box(fl, w, h) + cur_face = inp[startY:endY, startX:endX] + cur_face_n = pth_processing(Image.fromarray(cur_face)) + with torch.no_grad(): + prediction = ( + torch.nn.functional.softmax(pth_model_static(cur_face_n), dim=1) + .detach() + .numpy()[0] + ) + confidences = {DICT_EMO[i]: float(prediction[i]) for i in range(7)} + grayscale_cam = cam(input_tensor=cur_face_n) + grayscale_cam = grayscale_cam[0, :] + cur_face_hm = cv2.resize(cur_face,(224,224)) + cur_face_hm = np.float32(cur_face_hm) / 255 + heatmap = show_cam_on_image(cur_face_hm, grayscale_cam, use_rgb=True) + + return cur_face, heatmap, confidences + +def preprocess_frame_and_predict_aus(frame): + if len(frame.shape) == 2: + frame = cv2.cvtColor(frame, cv2.COLOR_GRAY2RGB) + elif frame.shape[2] == 4: + frame = cv2.cvtColor(frame, cv2.COLOR_RGBA2RGB) + + with mp_face_mesh.FaceMesh( + max_num_faces=1, + refine_landmarks=False, + min_detection_confidence=0.5, + min_tracking_confidence=0.5 + ) as face_mesh: + results = face_mesh.process(frame) + + if results.multi_face_landmarks: + h, w = frame.shape[:2] + for fl in results.multi_face_landmarks: + startX, startY, endX, endY = get_box(fl, w, h) + cur_face = frame[startY:endY, startX:endX] + cur_face_n = pth_processing(Image.fromarray(cur_face)) + + with torch.no_grad(): + features = pth_model_static(cur_face_n) + au_intensities = features_to_au_intensities(features) + + grayscale_cam = cam(input_tensor=cur_face_n) + grayscale_cam = grayscale_cam[0, :] + cur_face_hm = cv2.resize(cur_face, (224, 224)) + cur_face_hm = np.float32(cur_face_hm) / 255 + heatmap = show_cam_on_image(cur_face_hm, grayscale_cam, use_rgb=True) + + return cur_face, au_intensities, heatmap + + return None, None, None + +def features_to_au_intensities(features): + features_np = features.detach().cpu().numpy()[0] + au_intensities = (features_np - features_np.min()) / (features_np.max() - features_np.min()) + return au_intensities[:24] # Assuming we want 24 AUs + +def preprocess_video_and_predict(video): + cap = cv2.VideoCapture(video) + w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) + h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) + fps = np.round(cap.get(cv2.CAP_PROP_FPS)) + + path_save_video_face = 'result_face.mp4' + vid_writer_face = cv2.VideoWriter(path_save_video_face, cv2.VideoWriter_fourcc(*'mp4v'), fps, (224, 224)) + + path_save_video_hm = 'result_hm.mp4' + vid_writer_hm = cv2.VideoWriter(path_save_video_hm, cv2.VideoWriter_fourcc(*'mp4v'), fps, (224, 224)) + + lstm_features = [] + count_frame = 1 + count_face = 0 + probs = [] + frames = [] + au_intensities_list = [] + last_output = None + last_heatmap = None + last_au_intensities = None + cur_face = None + + with mp_face_mesh.FaceMesh( + max_num_faces=1, + refine_landmarks=False, + min_detection_confidence=0.5, + min_tracking_confidence=0.5) as face_mesh: + + while cap.isOpened(): + _, frame = cap.read() + if frame is None: break + + frame_copy = frame.copy() + frame_copy.flags.writeable = False + frame_copy = cv2.cvtColor(frame_copy, cv2.COLOR_BGR2RGB) + results = face_mesh.process(frame_copy) + frame_copy.flags.writeable = True + + if results.multi_face_landmarks: + for fl in results.multi_face_landmarks: + startX, startY, endX, endY = get_box(fl, w, h) + cur_face = frame_copy[startY:endY, startX: endX] + + if count_face%config_data.FRAME_DOWNSAMPLING == 0: + cur_face_copy = pth_processing(Image.fromarray(cur_face)) + with torch.no_grad(): + features = torch.nn.functional.relu(pth_model_static.extract_features(cur_face_copy)).detach().numpy() + au_intensities = features_to_au_intensities(pth_model_static(cur_face_copy)) + + grayscale_cam = cam(input_tensor=cur_face_copy) + grayscale_cam = grayscale_cam[0, :] + cur_face_hm = cv2.resize(cur_face,(224,224), interpolation = cv2.INTER_AREA) + cur_face_hm = np.float32(cur_face_hm) / 255 + heatmap = show_cam_on_image(cur_face_hm, grayscale_cam, use_rgb=False) + last_heatmap = heatmap + last_au_intensities = au_intensities + + if len(lstm_features) == 0: + lstm_features = [features]*10 + else: + lstm_features = lstm_features[1:] + [features] + + lstm_f = torch.from_numpy(np.vstack(lstm_features)) + lstm_f = torch.unsqueeze(lstm_f, 0) + with torch.no_grad(): + output = pth_model_dynamic(lstm_f).detach().numpy() + last_output = output + + if count_face == 0: + count_face += 1 + + else: + if last_output is not None: + output = last_output + heatmap = last_heatmap + au_intensities = last_au_intensities + + elif last_output is None: + output = np.empty((1, 7)) + output[:] = np.nan + au_intensities = np.empty(24) + au_intensities[:] = np.nan + + probs.append(output[0]) + frames.append(count_frame) + au_intensities_list.append(au_intensities) + else: + if last_output is not None: + lstm_features = [] + empty = np.empty((7)) + empty[:] = np.nan + probs.append(empty) + frames.append(count_frame) + au_intensities_list.append(np.full(24, np.nan)) + + if cur_face is not None: + heatmap_f = display_info(heatmap, 'Frame: {}'.format(count_frame), box_scale=.3) + + cur_face = cv2.cvtColor(cur_face, cv2.COLOR_RGB2BGR) + cur_face = cv2.resize(cur_face, (224,224), interpolation = cv2.INTER_AREA) + cur_face = display_info(cur_face, 'Frame: {}'.format(count_frame), box_scale=.3) + vid_writer_face.write(cur_face) + vid_writer_hm.write(heatmap_f) + + count_frame += 1 + if count_face != 0: + count_face += 1 + + vid_writer_face.release() + vid_writer_hm.release() + + stat = statistics_plot(frames, probs) + au_stat = au_statistics_plot(frames, au_intensities_list) + + if not stat or not au_stat: + return None, None, None, None, None + + return video, path_save_video_face, path_save_video_hm, stat, au_stat + +def au_statistics_plot(frames, au_intensities_list): + fig, ax = plt.subplots(figsize=(12, 6)) + au_intensities_array = np.array(au_intensities_list) + + for i in range(au_intensities_array.shape[1]): + ax.plot(frames, au_intensities_array[:, i], label=f'AU{i+1}') + + ax.set_xlabel('Frame') + ax.set_ylabel('AU Intensity') + ax.set_title('Action Unit Intensities Over Time') + ax.legend(bbox_to_anchor=(1.05, 1), loc='upper left') + plt.tight_layout() + return fig + +def preprocess_video_and_predict_sleep_quality(video): + cap = cv2.VideoCapture(video) + w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) + h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) + fps = np.round(cap.get(cv2.CAP_PROP_FPS)) + + path_save_video_original = 'result_original.mp4' + path_save_video_face = 'result_face.mp4' + path_save_video_sleep = 'result_sleep.mp4' + + vid_writer_original = cv2.VideoWriter(path_save_video_original, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h)) + vid_writer_face = cv2.VideoWriter(path_save_video_face, cv2.VideoWriter_fourcc(*'mp4v'), fps, (224, 224)) + vid_writer_sleep = cv2.VideoWriter(path_save_video_sleep, cv2.VideoWriter_fourcc(*'mp4v'), fps, (224, 224)) + + frames = [] + sleep_quality_scores = [] + eye_bags_images = [] + + with mp_face_mesh.FaceMesh( + max_num_faces=1, + refine_landmarks=False, + min_detection_confidence=0.5, + min_tracking_confidence=0.5) as face_mesh: + + while cap.isOpened(): + ret, frame = cap.read() + if not ret: + break + + frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) + results = face_mesh.process(frame_rgb) + + if results.multi_face_landmarks: + for fl in results.multi_face_landmarks: + startX, startY, endX, endY = get_box(fl, w, h) + cur_face = frame_rgb[startY:endY, startX:endX] + + sleep_quality_score, eye_bags_image = analyze_sleep_quality(cur_face) + sleep_quality_scores.append(sleep_quality_score) + eye_bags_images.append(cv2.resize(eye_bags_image, (224, 224))) + + sleep_quality_viz = create_sleep_quality_visualization(cur_face, sleep_quality_score) + + cur_face = cv2.resize(cur_face, (224, 224)) + + vid_writer_face.write(cv2.cvtColor(cur_face, cv2.COLOR_RGB2BGR)) + vid_writer_sleep.write(sleep_quality_viz) + + vid_writer_original.write(frame) + frames.append(len(frames) + 1) + + cap.release() + vid_writer_original.release() + vid_writer_face.release() + vid_writer_sleep.release() + + sleep_stat = sleep_quality_statistics_plot(frames, sleep_quality_scores) + + if eye_bags_images: + average_eye_bags_image = np.mean(np.array(eye_bags_images), axis=0).astype(np.uint8) + else: + average_eye_bags_image = np.zeros((224, 224, 3), dtype=np.uint8) + + return (path_save_video_original, path_save_video_face, path_save_video_sleep, + average_eye_bags_image, sleep_stat) + +def analyze_sleep_quality(face_image): + # Placeholder function - implement your sleep quality analysis here + sleep_quality_score = np.random.random() + eye_bags_image = cv2.resize(face_image, (224, 224)) + return sleep_quality_score, eye_bags_image + +def create_sleep_quality_visualization(face_image, sleep_quality_score): + viz = face_image.copy() + cv2.putText(viz, f"Sleep Quality: {sleep_quality_score:.2f}", (10, 30), + cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2) + return cv2.cvtColor(viz, cv2.COLOR_RGB2BGR) + +def sleep_quality_statistics_plot(frames, sleep_quality_scores): + # Placeholder function - implement your statistics plotting here + fig, ax = plt.subplots() + ax.plot(frames, sleep_quality_scores) + ax.set_xlabel('Frame') + ax.set_ylabel('Sleep Quality Score') + ax.set_title('Sleep Quality Over Time') + return fig \ No newline at end of file diff --git a/app/au_processing.py b/app/au_processing.py new file mode 100644 index 0000000000000000000000000000000000000000..8b2bd14117d2633c553e7553a05e01002ae39da3 --- /dev/null +++ b/app/au_processing.py @@ -0,0 +1,64 @@ +import numpy as np +import matplotlib.pyplot as plt +import cv2 +import torch +from PIL import Image +from app.model import pth_model_static, cam, pth_processing +from app.face_utils import get_box +import mediapipe as mp + +mp_face_mesh = mp.solutions.face_mesh + +def preprocess_frame_and_predict_aus(frame): + if len(frame.shape) == 2: + frame = cv2.cvtColor(frame, cv2.COLOR_GRAY2RGB) + elif frame.shape[2] == 4: + frame = cv2.cvtColor(frame, cv2.COLOR_RGBA2RGB) + + with mp_face_mesh.FaceMesh( + max_num_faces=1, + refine_landmarks=False, + min_detection_confidence=0.5, + min_tracking_confidence=0.5 + ) as face_mesh: + results = face_mesh.process(frame) + + if results.multi_face_landmarks: + h, w = frame.shape[:2] + for fl in results.multi_face_landmarks: + startX, startY, endX, endY = get_box(fl, w, h) + cur_face = frame[startY:endY, startX:endX] + cur_face_n = pth_processing(Image.fromarray(cur_face)) + + with torch.no_grad(): + features = pth_model_static(cur_face_n) + au_intensities = features_to_au_intensities(features) + + grayscale_cam = cam(input_tensor=cur_face_n) + grayscale_cam = grayscale_cam[0, :] + cur_face_hm = cv2.resize(cur_face, (224, 224)) + cur_face_hm = np.float32(cur_face_hm) / 255 + heatmap = show_cam_on_image(cur_face_hm, grayscale_cam, use_rgb=True) + + return cur_face, au_intensities, heatmap + + return None, None, None + +def features_to_au_intensities(features): + features_np = features.detach().cpu().numpy()[0] + au_intensities = (features_np - features_np.min()) / (features_np.max() - features_np.min()) + return au_intensities[:24] # Assuming we want 24 AUs + +def au_statistics_plot(frames, au_intensities_list): + fig, ax = plt.subplots(figsize=(12, 6)) + au_intensities_array = np.array(au_intensities_list) + + for i in range(au_intensities_array.shape[1]): + ax.plot(frames, au_intensities_array[:, i], label=f'AU{i+1}') + + ax.set_xlabel('Frame') + ax.set_ylabel('AU Intensity') + ax.set_title('Action Unit Intensities Over Time') + ax.legend(bbox_to_anchor=(1.05, 1), loc='upper left') + plt.tight_layout() + return fig \ No newline at end of file diff --git a/app/authors.py b/app/authors.py new file mode 100644 index 0000000000000000000000000000000000000000..d1f522027b85ec0e60bc42aa6896ffbe8d472270 --- /dev/null +++ b/app/authors.py @@ -0,0 +1,34 @@ +""" +File: authors.py +Author: Elena Ryumina and Dmitry Ryumin +Description: About the authors. +License: MIT License +""" + + +AUTHORS = """ + Authors: [Elena Ryumina](https://github.com/ElenaRyumina), [Dmitry Ryumin](https://github.com/DmitryRyumin), [Denis Dresvyanskiy](https://www.uni-ulm.de/en/nt/staff/research-assistants/dresvyanskiy/), [Maxim Markitantov](https://hci.nw.ru/en/employees/10) and [Alexey Karpov](https://hci.nw.ru/en/employees/1) + + Authorship contribution: + + App developers: ``Elena Ryumina`` and ``Dmitry Ryumin`` + + Methodology developers: ``Elena Ryumina``, ``Denis Dresvyanskiy`` and ``Alexey Karpov`` + + Model developer: ``Elena Ryumina`` + + TensorFlow to PyTorch model converters: ``Maxim Markitantov`` and ``Elena Ryumina`` + + Citation + + If you are using EMO-AffectNetModel in your research, please consider to cite research [paper](https://www.sciencedirect.com/science/article/pii/S0925231222012656). Here is an example of BibTeX entry: + +
@article{RYUMINA2022,
+        title        = {In Search of a Robust Facial Expressions Recognition Model: A Large-Scale Visual Cross-Corpus Study},
+        author       = {Elena Ryumina and Denis Dresvyanskiy and Alexey Karpov},
+        journal      = {Neurocomputing},
+        year         = {2022},
+        doi          = {10.1016/j.neucom.2022.10.013},
+        url          = {https://www.sciencedirect.com/science/article/pii/S0925231222012656},
+    }
+""" diff --git a/app/config.py b/app/config.py new file mode 100644 index 0000000000000000000000000000000000000000..b00a579f60a55c2ecf4dd7b27842610beb60d99a --- /dev/null +++ b/app/config.py @@ -0,0 +1,49 @@ +""" +File: config.py +Author: Elena Ryumina and Dmitry Ryumin +Description: Configuration file. +License: MIT License +""" + +import toml +from typing import Dict +from types import SimpleNamespace + + +def flatten_dict(prefix: str, d: Dict) -> Dict: + result = {} + + for k, v in d.items(): + if isinstance(v, dict): + result.update(flatten_dict(f"{prefix}{k}_", v)) + else: + result[f"{prefix}{k}"] = v + + return result + + +config = toml.load("config.toml") + +config_data = flatten_dict("", config) + +config_data = SimpleNamespace(**config_data) + +DICT_EMO = { + 0: "Neutral", + 1: "Happiness", + 2: "Sadness", + 3: "Surprise", + 4: "Fear", + 5: "Disgust", + 6: "Anger", +} + +COLORS = { + 0: 'blue', + 1: 'orange', + 2: 'green', + 3: 'red', + 4: 'purple', + 5: 'brown', + 6: 'pink' +} diff --git a/app/description.py b/app/description.py new file mode 100644 index 0000000000000000000000000000000000000000..c66e15863d4181879e9bef2b0cae1479c89d5b66 --- /dev/null +++ b/app/description.py @@ -0,0 +1,46 @@ +""" +File: description.py +Author: Elena Ryumina and Dmitry Ryumin +Description: Project description for the Gradio app. +License: MIT License +""" + +# Importing necessary components for the Gradio app +from app.config import config_data + +DESCRIPTION_STATIC = f"""\ +# Static Facial Expression Recognition +
+ Version + + +
+""" + +DESCRIPTION_DYNAMIC = f"""\ +# Dynamic Facial Expression Recognition +
+ Version + +
+""" + +DESCRIPTION_SLEEP_QUALITY = """ +# Sleep Quality Analysis + +This tab analyzes sleep quality based on facial features, focusing on skin tone and eye bags. + +## How to use: +1. Upload a video of a person's face. +2. Click 'Submit' to process the video. +3. View the results, including: + - Original video + - Processed face video + - Sleep quality analysis video + - Eye bags detection image + - Sleep quality statistics over time + +The analysis provides insights into potential sleep issues based on visual cues. + +Note: This analysis is for informational purposes only and should not be considered a medical diagnosis. Always consult with a healthcare professional for sleep-related concerns. +""" \ No newline at end of file diff --git a/app/face_utils.py b/app/face_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..d58d9b8efe044b667a822240d3ca342f652f08f4 --- /dev/null +++ b/app/face_utils.py @@ -0,0 +1,68 @@ +""" +File: face_utils.py +Author: Elena Ryumina and Dmitry Ryumin +Description: This module contains utility functions related to facial landmarks and image processing. +License: MIT License +""" + +import numpy as np +import math +import cv2 + + +def norm_coordinates(normalized_x, normalized_y, image_width, image_height): + x_px = min(math.floor(normalized_x * image_width), image_width - 1) + y_px = min(math.floor(normalized_y * image_height), image_height - 1) + return x_px, y_px + + +def get_box(fl, w, h): + idx_to_coors = {} + for idx, landmark in enumerate(fl.landmark): + landmark_px = norm_coordinates(landmark.x, landmark.y, w, h) + if landmark_px: + idx_to_coors[idx] = landmark_px + + x_min = np.min(np.asarray(list(idx_to_coors.values()))[:, 0]) + y_min = np.min(np.asarray(list(idx_to_coors.values()))[:, 1]) + endX = np.max(np.asarray(list(idx_to_coors.values()))[:, 0]) + endY = np.max(np.asarray(list(idx_to_coors.values()))[:, 1]) + + (startX, startY) = (max(0, x_min), max(0, y_min)) + (endX, endY) = (min(w - 1, endX), min(h - 1, endY)) + + return startX, startY, endX, endY + +def display_info(img, text, margin=1.0, box_scale=1.0): + img_copy = img.copy() + img_h, img_w, _ = img_copy.shape + line_width = int(min(img_h, img_w) * 0.001) + thickness = max(int(line_width / 3), 1) + + font_face = cv2.FONT_HERSHEY_SIMPLEX + font_color = (0, 0, 0) + font_scale = thickness / 1.5 + + t_w, t_h = cv2.getTextSize(text, font_face, font_scale, None)[0] + + margin_n = int(t_h * margin) + sub_img = img_copy[0 + margin_n: 0 + margin_n + t_h + int(2 * t_h * box_scale), + img_w - t_w - margin_n - int(2 * t_h * box_scale): img_w - margin_n] + + white_rect = np.ones(sub_img.shape, dtype=np.uint8) * 255 + + img_copy[0 + margin_n: 0 + margin_n + t_h + int(2 * t_h * box_scale), + img_w - t_w - margin_n - int(2 * t_h * box_scale):img_w - margin_n] = cv2.addWeighted(sub_img, 0.5, white_rect, .5, 1.0) + + cv2.putText(img=img_copy, + text=text, + org=(img_w - t_w - margin_n - int(2 * t_h * box_scale) // 2, + 0 + margin_n + t_h + int(2 * t_h * box_scale) // 2), + fontFace=font_face, + fontScale=font_scale, + color=font_color, + thickness=thickness, + lineType=cv2.LINE_AA, + bottomLeftOrigin=False) + + return img_copy diff --git a/app/image_processing.py b/app/image_processing.py new file mode 100644 index 0000000000000000000000000000000000000000..2b5cabae1fba1eaf8a348fbc151c9d2bdc4ecc92 --- /dev/null +++ b/app/image_processing.py @@ -0,0 +1,49 @@ +import numpy as np +import cv2 +from PIL import Image +import torch +from app.model import pth_model_static, cam, pth_processing +from app.face_utils import get_box +from app.config import DICT_EMO +from pytorch_grad_cam.utils.image import show_cam_on_image +import mediapipe as mp + +mp_face_mesh = mp.solutions.face_mesh + +def preprocess_image_and_predict(inp): + inp = np.array(inp) + + if inp is None: + return None, None, None + + try: + h, w = inp.shape[:2] + except Exception: + return None, None, None + + with mp_face_mesh.FaceMesh( + max_num_faces=1, + refine_landmarks=False, + min_detection_confidence=0.5, + min_tracking_confidence=0.5, + ) as face_mesh: + results = face_mesh.process(inp) + if results.multi_face_landmarks: + for fl in results.multi_face_landmarks: + startX, startY, endX, endY = get_box(fl, w, h) + cur_face = inp[startY:endY, startX:endX] + cur_face_n = pth_processing(Image.fromarray(cur_face)) + with torch.no_grad(): + prediction = ( + torch.nn.functional.softmax(pth_model_static(cur_face_n), dim=1) + .detach() + .numpy()[0] + ) + confidences = {DICT_EMO[i]: float(prediction[i]) for i in range(7)} + grayscale_cam = cam(input_tensor=cur_face_n) + grayscale_cam = grayscale_cam[0, :] + cur_face_hm = cv2.resize(cur_face,(224,224)) + cur_face_hm = np.float32(cur_face_hm) / 255 + heatmap = show_cam_on_image(cur_face_hm, grayscale_cam, use_rgb=True) + + return cur_face, heatmap, confidences \ No newline at end of file diff --git a/app/model.py b/app/model.py new file mode 100644 index 0000000000000000000000000000000000000000..1aa1d6f3485f79fb9abe40ae058e60ac576aef9b --- /dev/null +++ b/app/model.py @@ -0,0 +1,64 @@ +""" +File: model.py +Author: Elena Ryumina and Dmitry Ryumin +Description: This module provides functions for loading and processing a pre-trained deep learning model + for facial expression recognition. +License: MIT License +""" + +import torch +import requests +from PIL import Image +from torchvision import transforms +from pytorch_grad_cam import GradCAM + +# Importing necessary components for the Gradio app +from app.config import config_data +from app.model_architectures import ResNet50, LSTMPyTorch + + +def load_model(model_url, model_path): + try: + with requests.get(model_url, stream=True) as response: + with open(model_path, "wb") as file: + for chunk in response.iter_content(chunk_size=8192): + file.write(chunk) + return model_path + except Exception as e: + print(f"Error loading model: {e}") + return None + +path_static = load_model(config_data.model_static_url, config_data.model_static_path) +pth_model_static = ResNet50(7, channels=3) +pth_model_static.load_state_dict(torch.load(path_static)) +pth_model_static.eval() + +path_dynamic = load_model(config_data.model_dynamic_url, config_data.model_dynamic_path) +pth_model_dynamic = LSTMPyTorch() +pth_model_dynamic.load_state_dict(torch.load(path_dynamic)) +pth_model_dynamic.eval() + +target_layers = [pth_model_static.layer4] +cam = GradCAM(model=pth_model_static, target_layers=target_layers) + +def pth_processing(fp): + class PreprocessInput(torch.nn.Module): + def init(self): + super(PreprocessInput, self).init() + + def forward(self, x): + x = x.to(torch.float32) + x = torch.flip(x, dims=(0,)) + x[0, :, :] -= 91.4953 + x[1, :, :] -= 103.8827 + x[2, :, :] -= 131.0912 + return x + + def get_img_torch(img, target_size=(224, 224)): + transform = transforms.Compose([transforms.PILToTensor(), PreprocessInput()]) + img = img.resize(target_size, Image.Resampling.NEAREST) + img = transform(img) + img = torch.unsqueeze(img, 0) + return img + + return get_img_torch(fp) diff --git a/app/model_architectures.py b/app/model_architectures.py new file mode 100644 index 0000000000000000000000000000000000000000..c10b760381dea55da4e8943f8a463142a8216a84 --- /dev/null +++ b/app/model_architectures.py @@ -0,0 +1,150 @@ +""" +File: model.py +Author: Elena Ryumina and Dmitry Ryumin +Description: This module provides model architectures. +License: MIT License +""" + +import torch +import torch.nn as nn +import torch.nn.functional as F +import math + +class Bottleneck(nn.Module): + expansion = 4 + def __init__(self, in_channels, out_channels, i_downsample=None, stride=1): + super(Bottleneck, self).__init__() + + self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, padding=0, bias=False) + self.batch_norm1 = nn.BatchNorm2d(out_channels, eps=0.001, momentum=0.99) + + self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, padding='same', bias=False) + self.batch_norm2 = nn.BatchNorm2d(out_channels, eps=0.001, momentum=0.99) + + self.conv3 = nn.Conv2d(out_channels, out_channels*self.expansion, kernel_size=1, stride=1, padding=0, bias=False) + self.batch_norm3 = nn.BatchNorm2d(out_channels*self.expansion, eps=0.001, momentum=0.99) + + self.i_downsample = i_downsample + self.stride = stride + self.relu = nn.ReLU() + + def forward(self, x): + identity = x.clone() + x = self.relu(self.batch_norm1(self.conv1(x))) + + x = self.relu(self.batch_norm2(self.conv2(x))) + + x = self.conv3(x) + x = self.batch_norm3(x) + + #downsample if needed + if self.i_downsample is not None: + identity = self.i_downsample(identity) + #add identity + x+=identity + x=self.relu(x) + + return x + +class Conv2dSame(torch.nn.Conv2d): + + def calc_same_pad(self, i: int, k: int, s: int, d: int) -> int: + return max((math.ceil(i / s) - 1) * s + (k - 1) * d + 1 - i, 0) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + ih, iw = x.size()[-2:] + + pad_h = self.calc_same_pad(i=ih, k=self.kernel_size[0], s=self.stride[0], d=self.dilation[0]) + pad_w = self.calc_same_pad(i=iw, k=self.kernel_size[1], s=self.stride[1], d=self.dilation[1]) + + if pad_h > 0 or pad_w > 0: + x = F.pad( + x, [pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2] + ) + return F.conv2d( + x, + self.weight, + self.bias, + self.stride, + self.padding, + self.dilation, + self.groups, + ) + +class ResNet(nn.Module): + def __init__(self, ResBlock, layer_list, num_classes, num_channels=3): + super(ResNet, self).__init__() + self.in_channels = 64 + + self.conv_layer_s2_same = Conv2dSame(num_channels, 64, 7, stride=2, groups=1, bias=False) + self.batch_norm1 = nn.BatchNorm2d(64, eps=0.001, momentum=0.99) + self.relu = nn.ReLU() + self.max_pool = nn.MaxPool2d(kernel_size = 3, stride=2) + + self.layer1 = self._make_layer(ResBlock, layer_list[0], planes=64, stride=1) + self.layer2 = self._make_layer(ResBlock, layer_list[1], planes=128, stride=2) + self.layer3 = self._make_layer(ResBlock, layer_list[2], planes=256, stride=2) + self.layer4 = self._make_layer(ResBlock, layer_list[3], planes=512, stride=2) + + self.avgpool = nn.AdaptiveAvgPool2d((1,1)) + self.fc1 = nn.Linear(512*ResBlock.expansion, 512) + self.relu1 = nn.ReLU() + self.fc2 = nn.Linear(512, num_classes) + + def extract_features(self, x): + x = self.relu(self.batch_norm1(self.conv_layer_s2_same(x))) + x = self.max_pool(x) + # print(x.shape) + x = self.layer1(x) + x = self.layer2(x) + x = self.layer3(x) + x = self.layer4(x) + + x = self.avgpool(x) + x = x.reshape(x.shape[0], -1) + x = self.fc1(x) + return x + + def forward(self, x): + x = self.extract_features(x) + x = self.relu1(x) + x = self.fc2(x) + return x + + def _make_layer(self, ResBlock, blocks, planes, stride=1): + ii_downsample = None + layers = [] + + if stride != 1 or self.in_channels != planes*ResBlock.expansion: + ii_downsample = nn.Sequential( + nn.Conv2d(self.in_channels, planes*ResBlock.expansion, kernel_size=1, stride=stride, bias=False, padding=0), + nn.BatchNorm2d(planes*ResBlock.expansion, eps=0.001, momentum=0.99) + ) + + layers.append(ResBlock(self.in_channels, planes, i_downsample=ii_downsample, stride=stride)) + self.in_channels = planes*ResBlock.expansion + + for i in range(blocks-1): + layers.append(ResBlock(self.in_channels, planes)) + + return nn.Sequential(*layers) + +def ResNet50(num_classes, channels=3): + return ResNet(Bottleneck, [3,4,6,3], num_classes, channels) + + +class LSTMPyTorch(nn.Module): + def __init__(self): + super(LSTMPyTorch, self).__init__() + + self.lstm1 = nn.LSTM(input_size=512, hidden_size=512, batch_first=True, bidirectional=False) + self.lstm2 = nn.LSTM(input_size=512, hidden_size=256, batch_first=True, bidirectional=False) + self.fc = nn.Linear(256, 7) + self.softmax = nn.Softmax(dim=1) + + def forward(self, x): + x, _ = self.lstm1(x) + x, _ = self.lstm2(x) + x = self.fc(x[:, -1, :]) + x = self.softmax(x) + return x \ No newline at end of file diff --git a/app/plot.py b/app/plot.py new file mode 100644 index 0000000000000000000000000000000000000000..267513c598f157194fb56ddb128bc2743e12d280 --- /dev/null +++ b/app/plot.py @@ -0,0 +1,29 @@ +""" +File: config.py +Author: Elena Ryumina and Dmitry Ryumin +Description: Plotting statistical information. +License: MIT License +""" +import matplotlib.pyplot as plt +import numpy as np + +# Importing necessary components for the Gradio app +from app.config import DICT_EMO, COLORS + + +def statistics_plot(frames, probs): + fig, ax = plt.subplots(figsize=(10, 4)) + fig.subplots_adjust(left=0.07, bottom=0.14, right=0.98, top=0.8, wspace=0, hspace=0) + # Установка параметров left, bottom, right, top, чтобы выделить место для легенды и названий осей + probs = np.array(probs) + for i in range(7): + try: + ax.plot(frames, probs[:, i], label=DICT_EMO[i], c=COLORS[i]) + except Exception: + return None + + ax.legend(loc='upper center', bbox_to_anchor=(0.47, 1.2), ncol=7, fontsize=12) + ax.set_xlabel('Frames', fontsize=12) # Добавляем подпись к оси X + ax.set_ylabel('Probability', fontsize=12) # Добавляем подпись к оси Y + ax.grid(True) + return plt diff --git a/app/sleep_quality_processing.py b/app/sleep_quality_processing.py new file mode 100644 index 0000000000000000000000000000000000000000..0ed416abce64d792f8ee887915cf58523e853160 --- /dev/null +++ b/app/sleep_quality_processing.py @@ -0,0 +1,94 @@ +import cv2 +import numpy as np +import matplotlib.pyplot as plt +import mediapipe as mp +from app.face_utils import get_box + +mp_face_mesh = mp.solutions.face_mesh + +def preprocess_video_and_predict_sleep_quality(video): + cap = cv2.VideoCapture(video) + w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) + h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) + fps = np.round(cap.get(cv2.CAP_PROP_FPS)) + + path_save_video_original = 'result_original.mp4' + path_save_video_face = 'result_face.mp4' + path_save_video_sleep = 'result_sleep.mp4' + + vid_writer_original = cv2.VideoWriter(path_save_video_original, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h)) + vid_writer_face = cv2.VideoWriter(path_save_video_face, cv2.VideoWriter_fourcc(*'mp4v'), fps, (224, 224)) + vid_writer_sleep = cv2.VideoWriter(path_save_video_sleep, cv2.VideoWriter_fourcc(*'mp4v'), fps, (224, 224)) + + frames = [] + sleep_quality_scores = [] + eye_bags_images = [] + + with mp_face_mesh.FaceMesh( + max_num_faces=1, + refine_landmarks=False, + min_detection_confidence=0.5, + min_tracking_confidence=0.5) as face_mesh: + + while cap.isOpened(): + ret, frame = cap.read() + if not ret: + break + + frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) + results = face_mesh.process(frame_rgb) + + if results.multi_face_landmarks: + for fl in results.multi_face_landmarks: + startX, startY, endX, endY = get_box(fl, w, h) + cur_face = frame_rgb[startY:endY, startX:endX] + + sleep_quality_score, eye_bags_image = analyze_sleep_quality(cur_face) + sleep_quality_scores.append(sleep_quality_score) + eye_bags_images.append(cv2.resize(eye_bags_image, (224, 224))) + + sleep_quality_viz = create_sleep_quality_visualization(cur_face, sleep_quality_score) + + cur_face = cv2.resize(cur_face, (224, 224)) + + vid_writer_face.write(cv2.cvtColor(cur_face, cv2.COLOR_RGB2BGR)) + vid_writer_sleep.write(sleep_quality_viz) + + vid_writer_original.write(frame) + frames.append(len(frames) + 1) + + cap.release() + vid_writer_original.release() + vid_writer_face.release() + vid_writer_sleep.release() + + sleep_stat = sleep_quality_statistics_plot(frames, sleep_quality_scores) + + if eye_bags_images: + average_eye_bags_image = np.mean(np.array(eye_bags_images), axis=0).astype(np.uint8) + else: + average_eye_bags_image = np.zeros((224, 224, 3), dtype=np.uint8) + + return (path_save_video_original, path_save_video_face, path_save_video_sleep, + average_eye_bags_image, sleep_stat) + +def analyze_sleep_quality(face_image): + # Placeholder function - implement your sleep quality analysis here + sleep_quality_score = np.random.random() + eye_bags_image = cv2.resize(face_image, (224, 224)) + return sleep_quality_score, eye_bags_image + +def create_sleep_quality_visualization(face_image, sleep_quality_score): + viz = face_image.copy() + cv2.putText(viz, f"Sleep Quality: {sleep_quality_score:.2f}", (10, 30), + cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2) + return cv2.cvtColor(viz, cv2.COLOR_RGB2BGR) + +def sleep_quality_statistics_plot(frames, sleep_quality_scores): + fig, ax = plt.subplots() + ax.plot(frames, sleep_quality_scores) + ax.set_xlabel('Frame') + ax.set_ylabel('Sleep Quality Score') + ax.set_title('Sleep Quality Over Time') + plt.tight_layout() + return fig \ No newline at end of file diff --git a/app/video_processing.py b/app/video_processing.py new file mode 100644 index 0000000000000000000000000000000000000000..880fffd4017b0b432bc23928750abf809224310d --- /dev/null +++ b/app/video_processing.py @@ -0,0 +1,132 @@ +import cv2 +import numpy as np +import torch +from PIL import Image +import mediapipe as mp +from app.model import pth_model_static, pth_model_dynamic, cam, pth_processing +from app.face_utils import get_box, display_info +from app.config import config_data +from app.plot import statistics_plot +from .au_processing import features_to_au_intensities, au_statistics_plot + +mp_face_mesh = mp.solutions.face_mesh + +def preprocess_video_and_predict(video): + cap = cv2.VideoCapture(video) + w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) + h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) + fps = np.round(cap.get(cv2.CAP_PROP_FPS)) + + path_save_video_face = 'result_face.mp4' + vid_writer_face = cv2.VideoWriter(path_save_video_face, cv2.VideoWriter_fourcc(*'mp4v'), fps, (224, 224)) + + path_save_video_hm = 'result_hm.mp4' + vid_writer_hm = cv2.VideoWriter(path_save_video_hm, cv2.VideoWriter_fourcc(*'mp4v'), fps, (224, 224)) + + lstm_features = [] + count_frame = 1 + count_face = 0 + probs = [] + frames = [] + au_intensities_list = [] + last_output = None + last_heatmap = None + last_au_intensities = None + cur_face = None + + with mp_face_mesh.FaceMesh( + max_num_faces=1, + refine_landmarks=False, + min_detection_confidence=0.5, + min_tracking_confidence=0.5) as face_mesh: + + while cap.isOpened(): + _, frame = cap.read() + if frame is None: break + + frame_copy = frame.copy() + frame_copy.flags.writeable = False + frame_copy = cv2.cvtColor(frame_copy, cv2.COLOR_BGR2RGB) + results = face_mesh.process(frame_copy) + frame_copy.flags.writeable = True + + if results.multi_face_landmarks: + for fl in results.multi_face_landmarks: + startX, startY, endX, endY = get_box(fl, w, h) + cur_face = frame_copy[startY:endY, startX: endX] + + if count_face%config_data.FRAME_DOWNSAMPLING == 0: + cur_face_copy = pth_processing(Image.fromarray(cur_face)) + with torch.no_grad(): + features = torch.nn.functional.relu(pth_model_static.extract_features(cur_face_copy)).detach().numpy() + au_intensities = features_to_au_intensities(pth_model_static(cur_face_copy)) + + grayscale_cam = cam(input_tensor=cur_face_copy) + grayscale_cam = grayscale_cam[0, :] + cur_face_hm = cv2.resize(cur_face,(224,224), interpolation = cv2.INTER_AREA) + cur_face_hm = np.float32(cur_face_hm) / 255 + heatmap = show_cam_on_image(cur_face_hm, grayscale_cam, use_rgb=False) + last_heatmap = heatmap + last_au_intensities = au_intensities + + if len(lstm_features) == 0: + lstm_features = [features]*10 + else: + lstm_features = lstm_features[1:] + [features] + + lstm_f = torch.from_numpy(np.vstack(lstm_features)) + lstm_f = torch.unsqueeze(lstm_f, 0) + with torch.no_grad(): + output = pth_model_dynamic(lstm_f).detach().numpy() + last_output = output + + if count_face == 0: + count_face += 1 + + else: + if last_output is not None: + output = last_output + heatmap = last_heatmap + au_intensities = last_au_intensities + + elif last_output is None: + output = np.empty((1, 7)) + output[:] = np.nan + au_intensities = np.empty(24) + au_intensities[:] = np.nan + + probs.append(output[0]) + frames.append(count_frame) + au_intensities_list.append(au_intensities) + else: + if last_output is not None: + lstm_features = [] + empty = np.empty((7)) + empty[:] = np.nan + probs.append(empty) + frames.append(count_frame) + au_intensities_list.append(np.full(24, np.nan)) + + if cur_face is not None: + heatmap_f = display_info(heatmap, 'Frame: {}'.format(count_frame), box_scale=.3) + + cur_face = cv2.cvtColor(cur_face, cv2.COLOR_RGB2BGR) + cur_face = cv2.resize(cur_face, (224,224), interpolation = cv2.INTER_AREA) + cur_face = display_info(cur_face, 'Frame: {}'.format(count_frame), box_scale=.3) + vid_writer_face.write(cur_face) + vid_writer_hm.write(heatmap_f) + + count_frame += 1 + if count_face != 0: + count_face += 1 + + vid_writer_face.release() + vid_writer_hm.release() + + stat = statistics_plot(frames, probs) + au_stat = au_statistics_plot(frames, au_intensities_list) + + if not stat or not au_stat: + return None, None, None, None, None + + return video, path_save_video_face, path_save_video_hm, stat, au_stat \ No newline at end of file diff --git a/app_gpuzero.py b/app_gpuzero.py new file mode 100644 index 0000000000000000000000000000000000000000..2af8abfcb68800af90e0e76313dc8eb2a3faecbc --- /dev/null +++ b/app_gpuzero.py @@ -0,0 +1,64 @@ +import gradio as gr +from tabs.heart_rate_variability import create_hrv_tab +from tabs.blink_detection import create_blink_tab +from tabs.gaze_estimation import create_gaze_estimation_tab +from tabs.speech_stress_analysis import create_voice_stress_tab +from tabs.head_posture_detection import create_head_posture_tab +from tabs.face_expressions import create_face_expressions_tab +from tabs.speech_emotion_recognition import create_emotion_recognition_tab +from tabs.sleep_quality import create_sleep_quality_tab +from tabs.sentiment_analysis import create_sentiment_tab +from tabs.emotion_analysis import create_emotion_tab +from tabs.body_movement_analysis import create_body_movement_tab +from tabs.posture_analysis import create_posture_analysis_tab +from tabs.skin_analysis import create_skin_conductance_tab +from tabs.FACS_analysis_sad import create_facs_analysis_sad_tab +from tabs.roberta_chatbot import create_roberta_chatbot_tab + +# Import the UI components +from ui_components import CUSTOM_CSS, HEADER_HTML, DISCLAIMER_HTML + +TAB_STRUCTURE = [ + ("Visual Analysis", [ + ("Emotional Face Expressions", create_face_expressions_tab), + ("FACS for Stress, Anxiety, Depression", create_facs_analysis_sad_tab), + ("Gaze Estimation", create_gaze_estimation_tab), + ("Head Posture", create_head_posture_tab), + ("Blink Rate", create_blink_tab), + ("Sleep Quality", create_sleep_quality_tab), + ("Heart Rate Variability", create_hrv_tab), + ("Body Movement", create_body_movement_tab), + ("Posture", create_posture_analysis_tab), + ("Skin", create_skin_conductance_tab) + ]), + ("Speech Analysis", [ + ("Speech Stress", create_voice_stress_tab), + ("Speech Emotion", create_emotion_recognition_tab) + ]), + ("Text Analysis", [ + ("Sentiment", create_sentiment_tab), + ("Emotion", create_emotion_tab), + ("Roberta Mental Health Chatbot", create_roberta_chatbot_tab) + ]), + ("Brain Analysis (coming soon)", [ + ]) +] + +def create_demo(): + with gr.Blocks(css=CUSTOM_CSS) as demo: + gr.Markdown(HEADER_HTML) + with gr.Tabs(elem_classes=["main-tab"]): + for main_tab, sub_tabs in TAB_STRUCTURE: + with gr.Tab(main_tab): + with gr.Tabs(): + for sub_tab, create_fn in sub_tabs: + with gr.Tab(sub_tab): + create_fn() + gr.HTML(DISCLAIMER_HTML) + return demo + +# Create the demo instance +demo = create_demo() + +if __name__ == "__main__": + demo.queue(api_open=True).launch(share=False) \ No newline at end of file diff --git a/assets/.DS_Store b/assets/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..5d027663072f40481ff932c316219f52d467cfcf Binary files /dev/null and b/assets/.DS_Store differ diff --git a/assets/audio/fitness.wav b/assets/audio/fitness.wav new file mode 100644 index 0000000000000000000000000000000000000000..949b02efd9aa877e19b79a1d29c7057c45b1b5af Binary files /dev/null and b/assets/audio/fitness.wav differ diff --git a/assets/images/dyaglogo.webp b/assets/images/dyaglogo.webp new file mode 100644 index 0000000000000000000000000000000000000000..9daa94dd2c5253ef60dbbb8578111e71d8dc66c3 Binary files /dev/null and b/assets/images/dyaglogo.webp differ diff --git a/assets/images/fitness.jpg b/assets/images/fitness.jpg new file mode 100644 index 0000000000000000000000000000000000000000..27d90926b96d64d57d99fab4cc5b8700d0591fec Binary files /dev/null and b/assets/images/fitness.jpg differ diff --git a/assets/resources/README.md b/assets/resources/README.md new file mode 100644 index 0000000000000000000000000000000000000000..ba181d6e11261ff91fea76ae62797bb9793d5f7a --- /dev/null +++ b/assets/resources/README.md @@ -0,0 +1,7 @@ +https://huggingface.co/ElenaRyumina/face_emotion_recognition/tree/main + +https://huggingface.co/ElenaRyumina/face_emotion_recognition/resolve/main/FER_static_ResNet50_AffectNet.pt + +https://huggingface.co/public-data/dlib_face_landmark_model/tree/main + +wget https://huggingface.co/public-data/dlib_face_landmark_model/resolve/main/shape_predictor_68_face_landmarks.dat diff --git a/assets/videos/fitness.mp4 b/assets/videos/fitness.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..e29086ac2ed29ba8142200e0a7fe483df2b65a4b Binary files /dev/null and b/assets/videos/fitness.mp4 differ diff --git a/config.toml b/config.toml new file mode 100644 index 0000000000000000000000000000000000000000..1c849e674c507de82cf616188f06404daa777be2 --- /dev/null +++ b/config.toml @@ -0,0 +1,10 @@ +APP_VERSION = "0.2.0" +FRAME_DOWNSAMPLING = 5 + +[model_static] +url = "https://huggingface.co/ElenaRyumina/face_emotion_recognition/resolve/main/FER_static_ResNet50_AffectNet.pt" +path = "assets/models/FER_static_ResNet50_AffectNet.pt" + +[model_dynamic] +url = "https://huggingface.co/ElenaRyumina/face_emotion_recognition/resolve/main/FER_dinamic_LSTM_IEMOCAP.pt" +path = "assets/models/FER_dinamic_LSTM_IEMOCAP.pt" diff --git a/css/app.css b/css/app.css new file mode 100644 index 0000000000000000000000000000000000000000..22ad2533d59094eb0b1079409811100c93bd8d40 --- /dev/null +++ b/css/app.css @@ -0,0 +1,101 @@ +div.app-flex-container { + display: flex; + align-items: left; +} + +div.app-flex-container > a { + margin-left: 6px; +} + +div.dl1 div.upload-container { + height: 350px; + max-height: 350px; +} + +div.dl2 { + max-height: 200px; +} + +div.dl2 img { + max-height: 200px; +} + +div.dl5 { + max-height: 200px; +} + +div.dl5 img { + max-height: 200px; +} + +div.video1 div.video-container { + height: 500px; +} + +div.video2 { + height: 200px; +} + +div.video3 { + height: 200px; +} + +div.video4 { + height: 200px; +} + +div.stat { + height: 286px; +} + +div.settings-wrapper { + display: none; +} + +.submit { + display: inline-block; + padding: 10px 20px; + font-size: 16px; + font-weight: bold; + text-align: center; + text-decoration: none; + cursor: pointer; + border: var(--button-border-width) solid var(--button-primary-border-color); + background: var(--button-primary-background-fill); + color: var(--button-primary-text-color); + border-radius: 8px; + transition: all 0.3s ease; +} + +.submit[disabled] { + cursor: not-allowed; + opacity: 0.6; +} + +.submit:hover:not([disabled]) { + border-color: var(--button-primary-border-color-hover); + background: var(--button-primary-background-fill-hover); + color: var(--button-primary-text-color-hover); +} + +.clear { + display: inline-block; + padding: 10px 20px; + font-size: 16px; + font-weight: bold; + text-align: center; + text-decoration: none; + cursor: pointer; + border-radius: 8px; + transition: all 0.3s ease; +} + +.clear[disabled] { + cursor: not-allowed; + opacity: 0.6; +} + +.submit:active:not([disabled]), +.clear:active:not([disabled]) { + transform: scale(0.98); +} diff --git a/llm/mentalBERT.py b/llm/mentalBERT.py new file mode 100644 index 0000000000000000000000000000000000000000..15bb13bfa87ffde9d8f67737d1f281db6a09cd93 --- /dev/null +++ b/llm/mentalBERT.py @@ -0,0 +1,73 @@ +import torch +from transformers import RobertaTokenizer, RobertaForSequenceClassification +import gradio as gr + +# Load the tokenizer and models +tokenizer = RobertaTokenizer.from_pretrained("mental/mental-roberta-base") +sentiment_model = RobertaForSequenceClassification.from_pretrained("mental/mental-roberta-base") +emotion_model = RobertaForSequenceClassification.from_pretrained("j-hartmann/emotion-english-distilroberta-base") + +# Define the labels +sentiment_labels = ["negative", "positive"] +emotion_labels = ["anger", "disgust", "fear", "joy", "neutral", "sadness", "surprise"] + +def analyze_text(text): + try: + # Tokenize the input text + inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512) + + # Get sentiment model outputs + sentiment_outputs = sentiment_model(**inputs) + sentiment_logits = sentiment_outputs.logits + sentiment_probs = torch.nn.functional.softmax(sentiment_logits, dim=-1) + + # Debugging: Print logits and probs shapes + print("Sentiment logits shape:", sentiment_logits.shape) + print("Sentiment logits:", sentiment_logits) + print("Sentiment probs shape:", sentiment_probs.shape) + print("Sentiment probs:", sentiment_probs) + + # Get the highest probability and corresponding label for sentiment + max_sentiment_prob, max_sentiment_index = torch.max(sentiment_probs, dim=1) + sentiment = sentiment_labels[max_sentiment_index.item()] + + # Get emotion model outputs + emotion_outputs = emotion_model(**inputs) + emotion_logits = emotion_outputs.logits + emotion_probs = torch.nn.functional.softmax(emotion_logits, dim=-1) + + # Debugging: Print logits and probs shapes + print("Emotion logits shape:", emotion_logits.shape) + print("Emotion logits:", emotion_logits) + print("Emotion probs shape:", emotion_probs.shape) + print("Emotion probs:", emotion_probs) + + # Get the highest probability and corresponding label for emotion + max_emotion_prob, max_emotion_index = torch.max(emotion_probs, dim=1) + emotion = emotion_labels[max_emotion_index.item()] + + return sentiment, f"{max_sentiment_prob.item():.4f}", emotion, f"{max_emotion_prob.item():.4f}" + except Exception as e: + print("Error:", str(e)) + return "Error", "N/A", "Error", "N/A" + +# Define the Gradio interface +interface = gr.Interface( + fn=analyze_text, + inputs=gr.Textbox( + lines=5, + placeholder="Enter text here...", + value="I don’t know a lot but what I do know is, we don’t start off very big and we all try to make each other smaller." + ), + outputs=[ + gr.Textbox(label="Detected Sentiment"), + gr.Textbox(label="Sentiment Confidence Score"), + gr.Textbox(label="Detected Emotion"), + gr.Textbox(label="Emotion Confidence Score") + ], + title="Sentiment and Emotion Analysis: Detecting Positive/Negative Sentiment and Specific Emotions", + description="Enter a piece of text to detect overall sentiment (positive or negative) and specific emotions (anger, disgust, fear, joy, neutral, sadness, surprise)." +) + +# Launch the interface +interface.launch() diff --git a/notebooks/pytorch-roberta-onnx.ipynb b/notebooks/pytorch-roberta-onnx.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..8901e25bdd8c8844407ee3ac3b0dea92e60b133d --- /dev/null +++ b/notebooks/pytorch-roberta-onnx.ipynb @@ -0,0 +1,280 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Pytorch RoBERTa to ONNX" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This notebook documents how to export the PyTorch NLP model into ONNX format and then use it to make predictions using the ONNX runtime.\n", + "\n", + "The model uses the `simpletransformers` library which is a Python wrappers around the `transformers` library which contains PyTorch NLP transformer architectures and weights." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import torch\n", + "import numpy as np\n", + "from simpletransformers.model import TransformerModel\n", + "from transformers import RobertaForSequenceClassification, RobertaTokenizer\n", + "import onnx\n", + "import onnxruntime" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 1: Load pretrained PyTorch model" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Download the model weights from https://storage.googleapis.com/seldon-models/pytorch/moviesentiment_roberta/pytorch_model.bin" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "model = TransformerModel('roberta', 'roberta-base', args=({'fp16': False}))" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model.model.load_state_dict(torch.load('pytorch_model.bin'))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 2: Export as ONNX" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "PyTorch supports exporting to ONNX, you just need to specify a valid input tensor for the model." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "tokenizer = RobertaTokenizer.from_pretrained('roberta-base')\n", + "input_ids = torch.tensor(tokenizer.encode(\"This film is so bad\", add_special_tokens=True)).unsqueeze(0) # Batch size 1" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "tensor([[ 0, 713, 822, 16, 98, 1099, 2]])" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "input_ids" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Export as ONNX, we specify dynamic axes for batch dimension and sequence length as sentences come in various lengths." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/janis/.conda/envs/py37/lib/python3.7/site-packages/transformers/modeling_roberta.py:172: TracerWarning: Converting a tensor to a Python number might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!\n", + " if input_ids[:, 0].sum().item() != 0:\n" + ] + } + ], + "source": [ + "torch.onnx.export(model.model,\n", + " (input_ids),\n", + " \"roberta.onnx\",\n", + " input_names=['input'],\n", + " output_names=['output'],\n", + " dynamic_axes={'input' :{0 : 'batch_size',\n", + " 1: 'sentence_length'},\n", + " 'output': {0: 'batch_size'}})" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 3: Test predictions are the same using ONNX runtime" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "onnx_model = onnx.load(\"roberta.onnx\")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "# checks the exported model, may crash ipython kernel if run together with the PyTorch model in memory\n", + "# onnx.checker.check_model(onnx_model)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "import onnxruntime\n", + "\n", + "ort_session = onnxruntime.InferenceSession(\"roberta.onnx\")" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "def to_numpy(tensor):\n", + " return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "input_ids = torch.tensor(tokenizer.encode(\"This film is so bad\", add_special_tokens=True)).unsqueeze(0) # Batch size 1" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "# compute ONNX Runtime output prediction\n", + "ort_inputs = {ort_session.get_inputs()[0].name: to_numpy(input_ids)}\n", + "ort_out = ort_session.run(None, ort_inputs)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "out = model.model(input_ids)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "((tensor([[ 2.3067, -2.6440]], grad_fn=),),\n", + " [array([[ 2.3066945, -2.6439788]], dtype=float32)])" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "out, ort_out" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "np.testing.assert_allclose(to_numpy(out[0]), ort_out[0], rtol=1e-03, atol=1e-05)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/onxxchatbot.py b/onxxchatbot.py new file mode 100644 index 0000000000000000000000000000000000000000..26da2069319a40fc432521507ce1506e5d5f6865 --- /dev/null +++ b/onxxchatbot.py @@ -0,0 +1,40 @@ +import gradio as gr +from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification + +# Load pre-trained model and tokenizer +model_name = "roberta-base" +tokenizer = AutoTokenizer.from_pretrained(model_name) +model = AutoModelForSequenceClassification.from_pretrained(model_name) + +# Create a text classification pipeline +classifier = pipeline("text-classification", model=model, tokenizer=tokenizer) + +# Define response generation function +def generate_response(input_text): + # Classify the input text + result = classifier(input_text)[0] + label = result['label'] + score = result['score'] + + # Map the classification result to a response + responses = { + "LABEL_0": "I understand you might be going through a difficult time. Remember, it's okay to seek help when you need it.", + "LABEL_1": "Your feelings are valid. Have you considered talking to a mental health professional about this?", + "LABEL_2": "Taking care of your mental health is crucial. Small steps like regular exercise and good sleep can make a big difference.", + "LABEL_3": "It sounds like you're dealing with a lot. Remember, you're not alone in this journey.", + "LABEL_4": "I hear you. Coping with mental health challenges can be tough. Have you tried any relaxation techniques like deep breathing or meditation?" + } + + return responses.get(label, "I'm here to listen and support you. Could you tell me more about how you're feeling?") + +# Define chatbot function for Gradio +def chatbot(message, history): + response = generate_response(message) + return response + +# Create Gradio interface +iface = gr.ChatInterface( + fn=chatbot, + title="Mental Health Support Chatbot (RoBERTa)", + description="This chatbot uses a pre-trained RoBERTa model for mental health conversations. Remember, this is not a substitute for professional help. If you're in crisis, please seek immediate professional assistance." +) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..fd403d57972551b864003a74dfe8354f1e1f0140 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,139 @@ +absl-py==2.1.0 +aiofiles==23.2.1 +altair==5.3.0 +annotated-types==0.7.0 +anyio==4.4.0 +astunparse==1.6.3 +attrs==23.2.0 +audioread==3.0.1 +certifi==2024.7.4 +cffi==1.16.0 +charset-normalizer==3.3.2 +click==8.1.7 +contourpy==1.2.1 +cycler==0.12.1 +decorator==4.4.2 +dlib==19.24.4 +dnspython==2.6.1 +email_validator==2.2.0 +exceptiongroup==1.2.2 +fastapi==0.111.1 +fastapi-cli==0.0.4 +ffmpy==0.3.2 +filelock==3.15.4 +flatbuffers==24.3.25 +fonttools==4.53.1 +fsspec==2024.6.1 +gast==0.6.0 +google-pasta==0.2.0 +grad-cam +gradio==4.38.1 +gradio_client==1.1.0 +grpcio==1.64.1 +h11==0.14.0 +h5py==3.11.0 +httpcore==1.0.5 +httptools==0.6.1 +httpx==0.27.0 +huggingface-hub==0.23.5 +idna==3.7 +imageio==2.34.2 +imageio-ffmpeg==0.5.1 +importlib_resources==6.4.0 +imutils==0.5.4 +jax==0.4.30 +jaxlib==0.4.30 +Jinja2==3.1.4 +joblib==1.4.2 +jsonschema==4.23.0 +jsonschema-specifications==2023.12.1 +keras==3.4.1 +kiwisolver==1.4.5 +lazy_loader==0.4 +libclang==18.1.1 +librosa==0.10.2.post1 +llvmlite==0.43.0 +Markdown==3.6 +markdown-it-py==3.0.0 +MarkupSafe==2.1.5 +matplotlib==3.9.1 +mdurl==0.1.2 +mediapipe==0.10.14 +ml-dtypes==0.4.0 +moviepy==1.0.3 +mpmath==1.3.0 +msgpack==1.0.8 +namex==0.0.8 +networkx==3.3 +numba==0.60.0 +numpy==1.26.4 +opencv-contrib-python==4.10.0.84 +opencv-python==4.10.0.84 +opt-einsum==3.3.0 +optree==0.12.1 +orjson==3.10.6 +packaging==24.1 +pandas==2.2.2 +pillow==10.4.0 +platformdirs==4.2.2 +pooch==1.8.2 +proglog==0.1.10 +protobuf==4.25.3 +pycparser==2.22 +pydantic==2.8.2 +pydantic_core==2.20.1 +pydub==0.25.1 +Pygments==2.18.0 +pyparsing==3.1.2 +python-dateutil==2.9.0.post0 +python-dotenv==1.0.1 +python-multipart==0.0.9 +pytz==2024.1 +PyYAML==6.0.1 +referencing==0.35.1 +regex==2024.5.15 +requests==2.32.3 +rich==13.7.1 +rpds-py==0.19.0 +ruff==0.5.2 +safetensors==0.4.3 +scikit-learn==1.5.1 +scipy==1.14.0 +semantic-version==2.10.0 +shellingham==1.5.4 +six==1.16.0 +sniffio==1.3.1 +sounddevice==0.4.7 +soundfile==0.12.1 +soxr==0.3.7 +starlette==0.37.2 +sympy==1.13.0 +tensorboard==2.17.0 +tensorboard-data-server==0.7.2 +tensorflow==2.17.0 +tensorflow-io-gcs-filesystem==0.37.1 +termcolor==2.4.0 +tf_keras==2.17.0 +threadpoolctl==3.5.0 +tokenizers==0.19.1 +toml==0.10.2 +tomlkit==0.12.0 +toolz==0.12.1 +torch==2.3.1 +torchaudio==2.3.1 +torchvision==0.18.1 +tqdm==4.66.4 +transformers==4.42.4 +ttach==0.0.3 +typer==0.12.3 +typing_extensions==4.12.2 +tzdata==2024.1 +ujson==5.10.0 +urllib3==2.2.2 +uvicorn==0.30.1 +uvloop==0.19.0 +watchfiles==0.22.0 +wavio==0.0.9 +websockets==11.0.3 +Werkzeug==3.0.3 +wrapt==1.16.0 diff --git a/tabs/.DS_Store b/tabs/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..07f6a4627a6945aafb14d77279f6600856f016ec Binary files /dev/null and b/tabs/.DS_Store differ diff --git a/tabs/FACS_analysis_sad.py b/tabs/FACS_analysis_sad.py new file mode 100644 index 0000000000000000000000000000000000000000..30f64371de40eac653b19198061e516852ec80ac --- /dev/null +++ b/tabs/FACS_analysis_sad.py @@ -0,0 +1,101 @@ +import gradio as gr +import cv2 +import numpy as np +import matplotlib.pyplot as plt +from app.app_utils import preprocess_frame_and_predict_aus + +# Define the AUs associated with stress, anxiety, and depression +STRESS_AUS = [4, 7, 17, 23, 24] +ANXIETY_AUS = [1, 2, 4, 5, 20] +DEPRESSION_AUS = [1, 4, 15, 17] + +AU_DESCRIPTIONS = { + 1: "Inner Brow Raiser", + 2: "Outer Brow Raiser", + 4: "Brow Lowerer", + 5: "Upper Lid Raiser", + 7: "Lid Tightener", + 15: "Lip Corner Depressor", + 17: "Chin Raiser", + 20: "Lip Stretcher", + 23: "Lip Tightener", + 24: "Lip Pressor" +} + +def normalize_score(score): + return max(0, min(1, (score + 1.5) / 3)) # Adjust the range as needed + +def process_video_for_facs(video_path): + cap = cv2.VideoCapture(video_path) + frames = [] + au_intensities_list = [] + + while True: + ret, frame = cap.read() + if not ret: + break + + processed_frame, au_intensities, _ = preprocess_frame_and_predict_aus(frame) + + if processed_frame is not None and au_intensities is not None: + frames.append(processed_frame) + au_intensities_list.append(au_intensities) + + cap.release() + + if not frames: + return None, None + + # Calculate average AU intensities + avg_au_intensities = np.mean(au_intensities_list, axis=0) + + # Calculate and normalize emotional state scores + stress_score = normalize_score(np.mean([avg_au_intensities[au-1] for au in STRESS_AUS if au <= len(avg_au_intensities)])) + anxiety_score = normalize_score(np.mean([avg_au_intensities[au-1] for au in ANXIETY_AUS if au <= len(avg_au_intensities)])) + depression_score = normalize_score(np.mean([avg_au_intensities[au-1] for au in DEPRESSION_AUS if au <= len(avg_au_intensities)])) + + fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 10)) + + # Emotional state scores + states = ['Stress', 'Anxiety', 'Depression'] + scores = [stress_score, anxiety_score, depression_score] + bars = ax1.bar(states, scores) + ax1.set_ylim(0, 1) + ax1.set_title('Emotional State Scores') + for bar in bars: + height = bar.get_height() + ax1.text(bar.get_x() + bar.get_width()/2., height, + f'{height:.2f}', ha='center', va='bottom') + + # AU intensities + all_aus = sorted(set(STRESS_AUS + ANXIETY_AUS + DEPRESSION_AUS)) + all_aus = [au for au in all_aus if au <= len(avg_au_intensities)] + au_labels = [f"AU{au}\n{AU_DESCRIPTIONS.get(au, '')}" for au in all_aus] + au_values = [avg_au_intensities[au-1] for au in all_aus] + ax2.bar(range(len(au_labels)), au_values) + ax2.set_xticks(range(len(au_labels))) + ax2.set_xticklabels(au_labels, rotation=45, ha='right') + ax2.set_ylim(0, 1) + ax2.set_title('Average Action Unit Intensities') + + plt.tight_layout() + + return frames[-1], fig # Return the last processed frame and the plot + +def create_facs_analysis_sad_tab(): + with gr.Row(): + with gr.Column(scale=1): + input_video = gr.Video() + analyze_btn = gr.Button("Analyze") + gr.Examples(["./assets/videos/fitness.mp4"], inputs=[input_video]) + with gr.Column(scale=2): + output_image = gr.Image(label="Processed Frame") + facs_chart = gr.Plot(label="FACS Analysis for SAD") + + analyze_btn.click( + fn=process_video_for_facs, + inputs=input_video, + outputs=[output_image, facs_chart], + ) + + return input_video, output_image, facs_chart, analyze_btn \ No newline at end of file diff --git a/tabs/__pycache__/audio_emotion_recognition.cpython-310.pyc b/tabs/__pycache__/audio_emotion_recognition.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0c31ef926ad47e44ffe476c2b1dc98aabb3fa8d2 Binary files /dev/null and b/tabs/__pycache__/audio_emotion_recognition.cpython-310.pyc differ diff --git a/tabs/__pycache__/blink_detection.cpython-310.pyc b/tabs/__pycache__/blink_detection.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b37cc1ac56b6f434d2b4eed0e07fd0b5351f5b00 Binary files /dev/null and b/tabs/__pycache__/blink_detection.cpython-310.pyc differ diff --git a/tabs/__pycache__/body_movement_analysis.cpython-310.pyc b/tabs/__pycache__/body_movement_analysis.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2ad3168e5f71dd33a31993fc16bfedacdad8bdbb Binary files /dev/null and b/tabs/__pycache__/body_movement_analysis.cpython-310.pyc differ diff --git a/tabs/__pycache__/emotion_analysis.cpython-310.pyc b/tabs/__pycache__/emotion_analysis.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f5199982b96d590304ce1e0daf68685de350da1d Binary files /dev/null and b/tabs/__pycache__/emotion_analysis.cpython-310.pyc differ diff --git a/tabs/__pycache__/emotion_recognition.cpython-310.pyc b/tabs/__pycache__/emotion_recognition.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..16c7feca8bc5df7a1d94c04b653b74d21c14fa1b Binary files /dev/null and b/tabs/__pycache__/emotion_recognition.cpython-310.pyc differ diff --git a/tabs/__pycache__/face_expressions.cpython-310.pyc b/tabs/__pycache__/face_expressions.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..481448f2ef37db65982e2ebcdca0554397992337 Binary files /dev/null and b/tabs/__pycache__/face_expressions.cpython-310.pyc differ diff --git a/tabs/__pycache__/facs_analysis_sad.cpython-310.pyc b/tabs/__pycache__/facs_analysis_sad.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2882a7600e77646bcc5010f2297679a50b12336f Binary files /dev/null and b/tabs/__pycache__/facs_analysis_sad.cpython-310.pyc differ diff --git a/tabs/__pycache__/gaze_estimation.cpython-310.pyc b/tabs/__pycache__/gaze_estimation.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7d148192ae22a1456e64193d11870b575ba4ce0c Binary files /dev/null and b/tabs/__pycache__/gaze_estimation.cpython-310.pyc differ diff --git a/tabs/__pycache__/head_posture_detection.cpython-310.pyc b/tabs/__pycache__/head_posture_detection.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..000dd2f4b83bd949b5d2e52176a2d761273348a0 Binary files /dev/null and b/tabs/__pycache__/head_posture_detection.cpython-310.pyc differ diff --git a/tabs/__pycache__/heart_rate_variability.cpython-310.pyc b/tabs/__pycache__/heart_rate_variability.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..139882141a2d0fa0defe899b8bafeb5ab18e7fb9 Binary files /dev/null and b/tabs/__pycache__/heart_rate_variability.cpython-310.pyc differ diff --git a/tabs/__pycache__/onxxchatbot.cpython-310.pyc b/tabs/__pycache__/onxxchatbot.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3b8f2df6ba912ab2a9ac733183b5551e65d2e746 Binary files /dev/null and b/tabs/__pycache__/onxxchatbot.cpython-310.pyc differ diff --git a/tabs/__pycache__/posture_analysis.cpython-310.pyc b/tabs/__pycache__/posture_analysis.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4530571f543b2d2b5f7140952ac27bf05e41632c Binary files /dev/null and b/tabs/__pycache__/posture_analysis.cpython-310.pyc differ diff --git a/tabs/__pycache__/roberta_chatbot.cpython-310.pyc b/tabs/__pycache__/roberta_chatbot.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9844e8430805cbb55309833f7897385d3d196e68 Binary files /dev/null and b/tabs/__pycache__/roberta_chatbot.cpython-310.pyc differ diff --git a/tabs/__pycache__/sentiment_analysis.cpython-310.pyc b/tabs/__pycache__/sentiment_analysis.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3e2a73060a8c2d01ccdd0d67969b9c39ac86ee7f Binary files /dev/null and b/tabs/__pycache__/sentiment_analysis.cpython-310.pyc differ diff --git a/tabs/__pycache__/sentiment_emotion_analysis.cpython-310.pyc b/tabs/__pycache__/sentiment_emotion_analysis.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e7a0871049d1e129647374047a4a3bcc4d192b76 Binary files /dev/null and b/tabs/__pycache__/sentiment_emotion_analysis.cpython-310.pyc differ diff --git a/tabs/__pycache__/skin_analysis.cpython-310.pyc b/tabs/__pycache__/skin_analysis.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f3776e8f75c256cc1b9ac51ca708fe135c6610e4 Binary files /dev/null and b/tabs/__pycache__/skin_analysis.cpython-310.pyc differ diff --git a/tabs/__pycache__/skin_conductance.cpython-310.pyc b/tabs/__pycache__/skin_conductance.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2ebef426bb0a67fb62e46758a4d57c820ca1ffb8 Binary files /dev/null and b/tabs/__pycache__/skin_conductance.cpython-310.pyc differ diff --git a/tabs/__pycache__/sleep_quality.cpython-310.pyc b/tabs/__pycache__/sleep_quality.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..39d7195a88ff4fd101dfedb45ff4213b9c298cb4 Binary files /dev/null and b/tabs/__pycache__/sleep_quality.cpython-310.pyc differ diff --git a/tabs/__pycache__/speech_emotion_recognition.cpython-310.pyc b/tabs/__pycache__/speech_emotion_recognition.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d2c7309e8ab1601b2e27496243abf188b73bc34c Binary files /dev/null and b/tabs/__pycache__/speech_emotion_recognition.cpython-310.pyc differ diff --git a/tabs/__pycache__/speech_stress_analysis.cpython-310.pyc b/tabs/__pycache__/speech_stress_analysis.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7e00c7c2e701036db4aab4056e05b6aa2311896d Binary files /dev/null and b/tabs/__pycache__/speech_stress_analysis.cpython-310.pyc differ diff --git a/tabs/__pycache__/text_sentiment_emotion_analysis.cpython-310.pyc b/tabs/__pycache__/text_sentiment_emotion_analysis.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f5c3901fa27e664cde61955cbc7623568aa14e12 Binary files /dev/null and b/tabs/__pycache__/text_sentiment_emotion_analysis.cpython-310.pyc differ diff --git a/tabs/__pycache__/voice_stress_analysis.cpython-310.pyc b/tabs/__pycache__/voice_stress_analysis.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..198196f2a719b7c72e00c6f095c408208a9ca1c4 Binary files /dev/null and b/tabs/__pycache__/voice_stress_analysis.cpython-310.pyc differ diff --git a/tabs/blink_detection.py b/tabs/blink_detection.py new file mode 100644 index 0000000000000000000000000000000000000000..e8fde46eed4ad8b6fb0c30af3e210c8b63098a6f --- /dev/null +++ b/tabs/blink_detection.py @@ -0,0 +1,66 @@ +import gradio as gr +import matplotlib.pyplot as plt +import tempfile, cv2, dlib, imutils +import torch +from scipy.spatial import distance +from imutils import face_utils + +def eye_aspect_ratio(eye): + A, B, C = [distance.euclidean(eye[i], eye[j]) for i, j in [(1, 5), (2, 4), (0, 3)]] + return (A + B) / (2.0 * C) + +def detect_blinks(video_file): + detector = dlib.get_frontal_face_detector() + predictor = dlib.shape_predictor("assets/models/shape_predictor_68_face_landmarks.dat") + + cap = cv2.VideoCapture(video_file) + ear_list, counter, total = [], 0, 0 + + while cap.isOpened(): + ret, frame = cap.read() + if not ret: + break + + gray = cv2.cvtColor(imutils.resize(frame, width=500), cv2.COLOR_BGR2GRAY) + for rect in detector(gray, 0): + shape = face_utils.shape_to_np(predictor(gray, rect)) + left_eye, right_eye = [shape[face_utils.FACIAL_LANDMARKS_IDXS[eye][0]:face_utils.FACIAL_LANDMARKS_IDXS[eye][1]] for eye in ["left_eye", "right_eye"]] + ear = sum(eye_aspect_ratio(eye) for eye in [left_eye, right_eye]) / 2.0 + ear_list.append(ear) + + if ear < 0.3: + counter += 1 + elif counter >= 5: + total += 1 + counter = 0 + else: + counter = 0 + + cap.release() + + plt.figure(figsize=(10, 4)) + plt.plot(ear_list) + plt.title('Eye Aspect Ratio over Time') + plt.xlabel('Frame') + plt.ylabel('EAR') + plt.tight_layout() + temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.png') + plt.savefig(temp_file.name) + plt.close() + + return total, temp_file.name + +def create_blink_tab(): + with gr.Row(): + with gr.Column(scale=2): + input_video = gr.Video(label="Input Video") + with gr.Row(): + clear_btn = gr.Button("Clear", scale=1) + submit_btn = gr.Button("Analyze", scale=1, elem_classes="submit") + with gr.Column(scale=1): + output_count = gr.Label(label="Blink Count") + output_plot = gr.Image(label="EAR Plot") + + submit_btn.click(fn=detect_blinks, inputs=[input_video], outputs=[output_count, output_plot]) + clear_btn.click(lambda: (None, None, None), outputs=[input_video, output_count, output_plot]) + gr.Examples(["./assets/videos/fitness.mp4"], [input_video]) diff --git a/tabs/body_movement_analysis.py b/tabs/body_movement_analysis.py new file mode 100644 index 0000000000000000000000000000000000000000..1804460736c18b1b5c538952882a118a17b3152d --- /dev/null +++ b/tabs/body_movement_analysis.py @@ -0,0 +1,51 @@ +import gradio as gr +import cv2 +import numpy as np +import tempfile +import os + +def analyze_body_movement(video): + with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as temp_file: + video_path = video if isinstance(video, str) else temp_file.name + if not isinstance(video, str): + temp_file.write(video) + + cap = cv2.VideoCapture(video_path) + if not cap.isOpened(): + return "Error: Unable to open video file." + + frame_count = movement_score = 0 + prev_gray = None + + while True: + ret, frame = cap.read() + if not ret: + break + + gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) + if prev_gray is not None: + movement_score += np.sum(cv2.absdiff(prev_gray, gray)) + prev_gray = gray + frame_count += 1 + + cap.release() + if not isinstance(video, str): + os.unlink(video_path) + + avg_movement = movement_score / (frame_count - 1) if frame_count > 1 else 0 + movement_level = "Low" if avg_movement < 1000 else "Medium" if avg_movement < 5000 else "High" + + return f"Movement level: {movement_level}\nAverage movement score: {avg_movement:.2f}" + +def create_body_movement_tab(): + with gr.Column(): + with gr.Row(): + with gr.Column(): + video_input = gr.Video() + analyze_button = gr.Button("Analyze") + output = gr.Textbox(label="Analysis Results") + + # Add the example here + gr.Examples(["./assets/videos/fitness.mp4"], [video_input]) + + analyze_button.click(analyze_body_movement, inputs=video_input, outputs=output) \ No newline at end of file diff --git a/tabs/emotion_analysis.py b/tabs/emotion_analysis.py new file mode 100644 index 0000000000000000000000000000000000000000..98247097f3304e01a87912c5ddf1d1a89ffb69b0 --- /dev/null +++ b/tabs/emotion_analysis.py @@ -0,0 +1,36 @@ +import os +import torch +from transformers import AutoTokenizer, AutoModelForSequenceClassification +import gradio as gr + +os.environ["TOKENIZERS_PARALLELISM"] = "true" + +emotion_tokenizer = AutoTokenizer.from_pretrained("j-hartmann/emotion-english-distilroberta-base") +emotion_model = AutoModelForSequenceClassification.from_pretrained("j-hartmann/emotion-english-distilroberta-base") +emotion_labels = ["anger", "disgust", "fear", "joy", "neutral", "sadness", "surprise"] + +def analyze_emotion(text): + try: + inputs = emotion_tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512) + outputs = emotion_model(**inputs) + probs = torch.nn.functional.softmax(outputs.logits, dim=-1) + max_prob, max_index = torch.max(probs, dim=1) + return emotion_labels[max_index.item()], f"{max_prob.item():.4f}" + except Exception as e: + print(f"Error in emotion analysis: {e}") + return "Error", "N/A" + +def create_emotion_tab(): + with gr.Row(): + with gr.Column(scale=2): + input_text = gr.Textbox(value='I actually speak to the expets myself to give you the best value you can get', lines=5, placeholder="Enter text here...", label="Input Text") + with gr.Row(): + clear_btn = gr.Button("Clear", scale=1) + submit_btn = gr.Button("Analyze", scale=1, elem_classes="submit") + with gr.Column(scale=1): + output_emotion = gr.Textbox(label="Detected Emotion") + output_confidence = gr.Textbox(label="Emotion Confidence Score") + + submit_btn.click(analyze_emotion, inputs=[input_text], outputs=[output_emotion, output_confidence]) + clear_btn.click(lambda: ("", "", ""), outputs=[input_text, output_emotion, output_confidence]) + gr.Examples(["I am so happy today!", "I feel terrible and sad.", "This is a neutral statement."], inputs=[input_text]) \ No newline at end of file diff --git a/tabs/face_expressions.py b/tabs/face_expressions.py new file mode 100644 index 0000000000000000000000000000000000000000..bdd878411f09e952c2cfbd669a8df0c5e34f8a47 --- /dev/null +++ b/tabs/face_expressions.py @@ -0,0 +1,34 @@ +import gradio as gr +from app.app_utils import preprocess_video_and_predict + +def clear_dynamic_info(): + return [gr.Video(value=None)] * 4 + [gr.Plot(value=None)] + +def create_face_expressions_tab(): + with gr.Row(): + with gr.Column(scale=1): + input_video = gr.Video(elem_classes="video1") + with gr.Row(): + clear_btn = gr.Button("Clear") + submit_btn = gr.Button("Analyze", elem_classes="submit") + with gr.Column(scale=1, elem_classes="dl4"): + output_videos = [ + gr.Video(label=label, elem_classes=f"video{i+2}") + for i, label in enumerate(["Original video", "Pre-processed video", "Heatmaps"]) + ] + output_statistics = gr.Plot(label="Statistics of emotions", elem_classes="stat") + + submit_btn.click( + fn=preprocess_video_and_predict, + inputs=input_video, + outputs=output_videos + [output_statistics], + queue=True, + ) + + clear_btn.click( + fn=clear_dynamic_info, + outputs=[input_video] + output_videos + [output_statistics], + queue=True, + ) + + gr.Examples(["./assets/videos/fitness.mp4"], inputs=[input_video]) \ No newline at end of file diff --git a/tabs/gaze_estimation.py b/tabs/gaze_estimation.py new file mode 100644 index 0000000000000000000000000000000000000000..b33366016326a656ca13014cb6b5f996c35ff11c --- /dev/null +++ b/tabs/gaze_estimation.py @@ -0,0 +1,58 @@ +import tempfile +import cv2 +import dlib +import numpy as np +from scipy.spatial import distance as dist +from imutils import face_utils +import gradio as gr + +def detect_eye_movements(video_path): + detector = dlib.get_frontal_face_detector() + predictor = dlib.shape_predictor("assets/models/shape_predictor_68_face_landmarks.dat") + + cap = cv2.VideoCapture(video_path) + frame_width, frame_height = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) + + with tempfile.NamedTemporaryFile(delete=False, suffix='.avi') as temp_file: + out = cv2.VideoWriter(temp_file.name, cv2.VideoWriter_fourcc(*'XVID'), 20.0, (frame_width, frame_height)) + gaze_points = [] + + while cap.isOpened(): + ret, frame = cap.read() + if not ret: + break + + gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) + for rect in detector(gray, 0): + shape = face_utils.shape_to_np(predictor(gray, rect)) + for eye in [shape[36:42], shape[42:48]]: + eye_center = eye.mean(axis=0).astype("int") + gaze_points.append(eye_center) + cv2.circle(frame, tuple(eye_center), 3, (0, 255, 0), -1) + + out.write(frame) + + cap.release() + out.release() + + fixed_threshold = 10 + fixed_gaze_count = sum(dist.euclidean(gaze_points[i-1], gaze_points[i]) < fixed_threshold + for i in range(1, len(gaze_points))) + gaze_type = "Fixed Gaze" if fixed_gaze_count > len(gaze_points) // 2 else "Scattered Gaze" + + return temp_file.name, gaze_type + +def create_gaze_estimation_tab(): + with gr.Row(): + with gr.Column(scale=1): + input_video = gr.Video(label="Input Video") + with gr.Row(): + clear_btn = gr.Button("Clear") + submit_btn = gr.Button("Analyze", elem_classes="submit") + with gr.Column(scale=1, elem_classes="dl4"): + output_video = gr.Video(label="Processed Video", elem_classes="video2") + output_gaze_type = gr.Label(label="Gaze Type") + + submit_btn.click(detect_eye_movements, inputs=input_video, outputs=[output_video, output_gaze_type], queue=True) + clear_btn.click(lambda: (None, None, None), outputs=[input_video, output_video, output_gaze_type], queue=True) + gr.Examples(["./assets/videos/fitness.mp4"], inputs=[input_video]) \ No newline at end of file diff --git a/tabs/head_posture_detection.py b/tabs/head_posture_detection.py new file mode 100644 index 0000000000000000000000000000000000000000..c369465b76463227fbf489295a0e7144ab7a7216 --- /dev/null +++ b/tabs/head_posture_detection.py @@ -0,0 +1,58 @@ +import tempfile +import cv2 +import dlib +import numpy as np +from scipy.spatial import distance as dist +from imutils import face_utils +import gradio as gr + +def detect_head_posture(video_path): + detector = dlib.get_frontal_face_detector() + predictor = dlib.shape_predictor("assets/models/shape_predictor_68_face_landmarks.dat") + + cap = cv2.VideoCapture(video_path) + frame_width, frame_height = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) + + with tempfile.NamedTemporaryFile(delete=False, suffix='.avi') as temp_file: + out = cv2.VideoWriter(temp_file.name, cv2.VideoWriter_fourcc(*'XVID'), 20.0, (frame_width, frame_height)) + posture_data = [] + + while cap.isOpened(): + ret, frame = cap.read() + if not ret: + break + + gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) + for rect in detector(gray, 0): + shape = face_utils.shape_to_np(predictor(gray, rect)) + jaw_width = dist.euclidean(shape[1], shape[15]) + jaw_height = dist.euclidean(shape[8], (shape[1] + shape[15]) / 2) + + posture = "Upright" if jaw_height / jaw_width > 0.5 else "Slumped" + posture_data.append(posture) + + for (x, y) in shape: + cv2.circle(frame, (x, y), 1, (0, 255, 0), -1) + + out.write(frame) + + cap.release() + out.release() + + posture_type = max(set(posture_data), key=posture_data.count) + return temp_file.name, posture_type + +def create_head_posture_tab(): + with gr.Row(): + with gr.Column(scale=1): + input_video = gr.Video(label="Input Video") + with gr.Row(): + clear_btn = gr.Button("Clear") + submit_btn = gr.Button("Analyze", elem_classes="submit") + with gr.Column(scale=1, elem_classes="dl4"): + output_video = gr.Video(label="Processed Video", elem_classes="video2") + output_posture = gr.Label(label="Posture Type") + + submit_btn.click(detect_head_posture, inputs=input_video, outputs=[output_video, output_posture], queue=True) + clear_btn.click(lambda: (None, None, None), outputs=[input_video, output_video, output_posture], queue=True) + gr.Examples(["./assets/videos/fitness.mp4"], inputs=[input_video]) \ No newline at end of file diff --git a/tabs/heart_rate_variability.py b/tabs/heart_rate_variability.py new file mode 100644 index 0000000000000000000000000000000000000000..58ed8d744048f3527f18e5ec675df9bf8d198ffe --- /dev/null +++ b/tabs/heart_rate_variability.py @@ -0,0 +1,60 @@ +import gradio as gr +import numpy as np +import matplotlib.pyplot as plt +import tempfile +import cv2 +from scipy.signal import find_peaks, butter, filtfilt + +def extract_ppg_signal(video_file): + cap = cv2.VideoCapture(video_file) + ppg_signal = [] + while cap.isOpened(): + ret, frame = cap.read() + if not ret: + break + ppg_signal.append(np.mean(cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY))) + cap.release() + return np.array(ppg_signal) + +def process_ppg_signal(ppg_signal, fs=30): + nyquist = 0.5 * fs + b, a = butter(1, [0.5 / nyquist, 3.0 / nyquist], btype='band') + return filtfilt(b, a, ppg_signal) + +def detect_hrv(video_file): + filtered_signal = process_ppg_signal(extract_ppg_signal(video_file)) + peaks, _ = find_peaks(filtered_signal, distance=15) # 30 fps / 2.5 + rr_intervals = np.diff(peaks) / 30 + heart_rate = 60 / rr_intervals.mean() + hrv = np.std(rr_intervals) + + plt.figure(figsize=(10, 4)) + time = np.arange(len(filtered_signal)) / 30 + plt.plot(time, filtered_signal, label='Filtered PPG Signal') + plt.plot(time[peaks], filtered_signal[peaks], 'ro', label='Detected Peaks') + plt.title('Heart Rate Variability over Time') + plt.xlabel('Time (s)') + plt.ylabel('PPG Signal Intensity') + plt.legend() + plt.tight_layout() + with tempfile.NamedTemporaryFile(delete=False, suffix='.png') as temp_file: + plt.savefig(temp_file.name) + plt.close() + + return f"{hrv:.2f} ms", f"{heart_rate:.2f} BPM", temp_file.name + +def create_hrv_tab(): + with gr.Row(): + with gr.Column(scale=2): + input_video = gr.Video(label="Input Video") + with gr.Row(): + clear_btn = gr.Button("Clear", scale=1) + submit_btn = gr.Button("Analyze", scale=1, elem_classes="submit") + with gr.Column(scale=1): + output_hrv = gr.Label(label="HRV Value") + output_hr = gr.Label(label="Average Heart Rate") + output_plot = gr.Image(label="HRV Plot") + + submit_btn.click(detect_hrv, inputs=[input_video], outputs=[output_hrv, output_hr, output_plot], queue=True) + clear_btn.click(lambda: (None, None, None, None), outputs=[input_video, output_hrv, output_hr, output_plot], queue=True) + gr.Examples(["./assets/videos/fitness.mp4"], [input_video]) \ No newline at end of file diff --git a/tabs/posture_analysis.py b/tabs/posture_analysis.py new file mode 100644 index 0000000000000000000000000000000000000000..15545e04aefa2658c276d561c5aec0c1f571defa --- /dev/null +++ b/tabs/posture_analysis.py @@ -0,0 +1,49 @@ +import gradio as gr +import cv2 +import numpy as np +import tempfile +import os + +def analyze_posture(video): + with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as temp_file: + video_path = video if isinstance(video, str) else temp_file.name + if not isinstance(video, str): + temp_file.write(video) + + cap = cv2.VideoCapture(video_path) + if not cap.isOpened(): + return "Error: Unable to open video file." + + posture_score = frame_count = 0 + while True: + ret, frame = cap.read() + if not ret: + break + + left_half = frame[:, :frame.shape[1]//2] + right_half = cv2.flip(frame[:, frame.shape[1]//2:], 1) + posture_score += np.sum(cv2.absdiff(left_half, right_half)) + frame_count += 1 + + cap.release() + if not isinstance(video, str): + os.unlink(video_path) + + avg_posture_score = posture_score / frame_count if frame_count > 0 else 0 + posture_quality = "Good" if avg_posture_score < 1000000 else "Fair" if avg_posture_score < 2000000 else "Poor" + + return f"Posture quality: {posture_quality}\nAverage posture score: {avg_posture_score:.2f}" + +def create_posture_analysis_tab(): + with gr.Column(): + video_input = gr.Video() + analyze_button = gr.Button("Analyze") + output = gr.Textbox(label="Analysis Results") + + analyze_button.click(analyze_posture, inputs=video_input, outputs=output) + + # Add examples + gr.Examples( + examples=["./assets/videos/fitness.mp4"], + inputs=video_input + ) \ No newline at end of file diff --git a/tabs/roberta_chatbot.py b/tabs/roberta_chatbot.py new file mode 100644 index 0000000000000000000000000000000000000000..b5f8bc146b2348e98bf4eae0308a9a66cf2f1a8e --- /dev/null +++ b/tabs/roberta_chatbot.py @@ -0,0 +1,42 @@ +import gradio as gr +from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification + +def create_roberta_chatbot_tab(): + # Load pre-trained model and tokenizer + model_name = "roberta-base" + tokenizer = AutoTokenizer.from_pretrained(model_name) + model = AutoModelForSequenceClassification.from_pretrained(model_name) + + # Create a text classification pipeline + classifier = pipeline("text-classification", model=model, tokenizer=tokenizer) + + # Define response generation function + def generate_response(input_text): + # Classify the input text + result = classifier(input_text)[0] + label = result['label'] + score = result['score'] + + # Map the classification result to a response + responses = { + "LABEL_0": "I understand you might be going through a difficult time. Remember, it's okay to seek help when you need it.", + "LABEL_1": "Your feelings are valid. Have you considered talking to a mental health professional about this?", + "LABEL_2": "Taking care of your mental health is crucial. Small steps like regular exercise and good sleep can make a big difference.", + "LABEL_3": "It sounds like you're dealing with a lot. Remember, you're not alone in this journey.", + "LABEL_4": "I hear you. Coping with mental health challenges can be tough. Have you tried any relaxation techniques like deep breathing or meditation?" + } + + return responses.get(label, "I'm here to listen and support you. Could you tell me more about how you're feeling?") + + # Define chatbot function for Gradio + def chatbot(message, history): + response = generate_response(message) + return response + + # Create Gradio interface + iface = gr.ChatInterface( + fn=chatbot, + # title="Mental Health Support Chatbot (RoBERTa)", + ) + + return iface \ No newline at end of file diff --git a/tabs/sentiment_analysis.py b/tabs/sentiment_analysis.py new file mode 100644 index 0000000000000000000000000000000000000000..38ed7a7992ce40f401956b79f310eab192ae1dbe --- /dev/null +++ b/tabs/sentiment_analysis.py @@ -0,0 +1,36 @@ +import os +import torch +from transformers import AutoTokenizer, AutoModelForSequenceClassification +import gradio as gr + +os.environ["TOKENIZERS_PARALLELISM"] = "true" + +sentiment_tokenizer = AutoTokenizer.from_pretrained("nlptown/bert-base-multilingual-uncased-sentiment") +sentiment_model = AutoModelForSequenceClassification.from_pretrained("nlptown/bert-base-multilingual-uncased-sentiment") +sentiment_labels = ["very negative", "negative", "neutral", "positive", "very positive"] + +def analyze_sentiment(text): + try: + inputs = sentiment_tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512) + outputs = sentiment_model(**inputs) + probs = torch.nn.functional.softmax(outputs.logits, dim=-1) + max_prob, max_index = torch.max(probs, dim=1) + return sentiment_labels[max_index.item()], f"{max_prob.item():.4f}" + except Exception as e: + print(f"Error in sentiment analysis: {e}") + return "Error", "N/A" + +def create_sentiment_tab(): + with gr.Row(): + with gr.Column(scale=2): + input_text = gr.Textbox(value="I actually speak to the expets myself to give you the best value you can get", lines=5, placeholder="Enter text here...", label="Input Text") + with gr.Row(): + clear_btn = gr.Button("Clear", scale=1) + submit_btn = gr.Button("Analyze", scale=1, elem_classes="submit") + with gr.Column(scale=1): + output_sentiment = gr.Textbox(label="Detected Sentiment") + output_confidence = gr.Textbox(label="Sentiment Confidence Score") + + submit_btn.click(analyze_sentiment, inputs=[input_text], outputs=[output_sentiment, output_confidence], queue=True) + clear_btn.click(lambda: ("", "", ""), outputs=[input_text, output_sentiment, output_confidence], queue=True) + gr.Examples(["I am so happy today!", "I feel terrible and sad.", "This is a neutral statement."], inputs=[input_text]) \ No newline at end of file diff --git a/tabs/skin_analysis.py b/tabs/skin_analysis.py new file mode 100644 index 0000000000000000000000000000000000000000..6a33981bc8060fde298d3a5c4cd7c7dd1e5027bf --- /dev/null +++ b/tabs/skin_analysis.py @@ -0,0 +1,27 @@ +import gradio as gr +import cv2 +import numpy as np + +def analyze_skin_conductance(image): + img = image if isinstance(image, np.ndarray) else cv2.imread(image) + if img is None: + return "Error: Unable to process the image." + + hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV) + mask = cv2.inRange(hsv, np.array([0, 20, 70], dtype=np.uint8), np.array([20, 150, 255], dtype=np.uint8)) + + sweat_percentage = (np.count_nonzero(mask) / mask.size) * 100 + stress_level = "Low" if sweat_percentage < 10 else "Medium" if sweat_percentage < 30 else "High" + + return f"Estimated stress/arousal level: {stress_level}\nSweat indicator percentage: {sweat_percentage:.2f}%" + +def create_skin_conductance_tab(): + with gr.Column(): + image_input = gr.Image() + analyze_button = gr.Button("Analyze") + output = gr.Textbox(label="Analysis Results") + + analyze_button.click(analyze_skin_conductance, inputs=image_input, outputs=output) + + # Add the Examples component + gr.Examples(["./assets/images/fitness.jpg"], inputs=[image_input]) \ No newline at end of file diff --git a/tabs/sleep_quality.py b/tabs/sleep_quality.py new file mode 100644 index 0000000000000000000000000000000000000000..72e7d8022717cc9187bc3521fd7b6ada1fe202d6 --- /dev/null +++ b/tabs/sleep_quality.py @@ -0,0 +1,37 @@ +import gradio as gr +from app.app_utils import preprocess_video_and_predict_sleep_quality + +def clear_sleep_quality_info(): + return [gr.Video(value=None)] * 3 + [gr.Image(value=None), gr.Plot(value=None)] + +def create_sleep_quality_tab(): + with gr.Row(): + with gr.Column(scale=1): + input_video = gr.Video(elem_classes="video1") + with gr.Row(): + clear_btn = gr.Button("Clear") + submit_btn = gr.Button("Analyze", elem_classes="submit") + with gr.Column(scale=1, elem_classes="dl4"): + outputs = [ + gr.Video(label=label, elem_classes=f"video{i+2}") + for i, label in enumerate(["Original video", "Pre-processed video", "Sleep quality analysis"]) + ] + outputs.extend([ + gr.Image(label="Eye bags detection", elem_classes="eyebags"), + gr.Plot(label="Sleep quality indicators", elem_classes="stat") + ]) + + submit_btn.click( + fn=preprocess_video_and_predict_sleep_quality, + inputs=input_video, + outputs=outputs, + queue=True, + ) + + clear_btn.click( + fn=clear_sleep_quality_info, + outputs=[input_video] + outputs, + queue=True, + ) + + gr.Examples(["./assets/videos/fitness.mp4"], inputs=[input_video]) \ No newline at end of file diff --git a/tabs/speech_emotion_recognition.py b/tabs/speech_emotion_recognition.py new file mode 100644 index 0000000000000000000000000000000000000000..056e1f484fbfd280c42e726f317ed1ab3e916b01 --- /dev/null +++ b/tabs/speech_emotion_recognition.py @@ -0,0 +1,54 @@ +import gradio as gr +import numpy as np +import librosa +import librosa.display +import matplotlib.pyplot as plt +from transformers import pipeline + +emotion_model = pipeline("audio-classification", model="ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition") +transcription_model = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-base-960h") + +emotion_mapping = { + "angry": (0.8, 0.8, -0.5), "happy": (0.6, 0.6, 0.8), "sad": (-0.6, -0.4, -0.6), + "neutral": (0, 0, 0), "fear": (0.3, -0.3, -0.7), "surprise": (0.4, 0.2, 0.2), + "disgust": (0.2, 0.5, -0.6), "calm": (-0.2, 0.1, 0.3), "excited": (0.7, 0.5, 0.7), + "frustrated": (0.6, 0.5, -0.4) +} + +def process_audio(audio_file): + y, sr = librosa.load(audio_file, sr=None) + transcription = transcription_model(audio_file)["text"] + emotion_result = emotion_model(audio_file)[0] + emotion, confidence = emotion_result["label"], emotion_result["score"] + arousal, dominance, valence = emotion_mapping.get(emotion.lower(), (0, 0, 0)) + + plt.figure(figsize=(10, 4)) + librosa.display.waveshow(y, sr=sr) + plt.title("Waveform") + waveform_plot = plt.gcf() + plt.close() + + mel_spec = librosa.feature.melspectrogram(y=y, sr=sr) + plt.figure(figsize=(10, 4)) + librosa.display.specshow(librosa.power_to_db(mel_spec, ref=np.max), sr=sr, x_axis='time', y_axis='mel') + plt.colorbar(format='%+2.0f dB') + plt.title("Mel Spectrogram") + mel_spec_plot = plt.gcf() + plt.close() + + return transcription, emotion, confidence, arousal, dominance, valence, waveform_plot, mel_spec_plot + +def create_emotion_recognition_tab(): + with gr.Row(): + with gr.Column(scale=2): + audio_input = gr.Audio(type="filepath") + gr.Examples(["./assets/audio/fitness.wav"], inputs=[audio_input]) + transcription_output = gr.Textbox(label="Transcription") + emotion_output = gr.Textbox(label="Emotion") + with gr.Column(scale=1): + outputs = [gr.Number(label=label) for label in ["Confidence", "Arousal", "Dominance", "Valence"]] + with gr.Column(scale=1): + plots = [gr.Plot(label=label) for label in ["Waveform", "Mel Spectrogram"]] + + audio_input.change(process_audio, inputs=[audio_input], + outputs=[transcription_output, emotion_output] + outputs + plots) \ No newline at end of file diff --git a/tabs/speech_stress_analysis.py b/tabs/speech_stress_analysis.py new file mode 100644 index 0000000000000000000000000000000000000000..570480dc57431915cd589614458d56287827ad11 --- /dev/null +++ b/tabs/speech_stress_analysis.py @@ -0,0 +1,66 @@ +import gradio as gr +import librosa +import numpy as np +import matplotlib.pyplot as plt +import tempfile + +def extract_audio_features(audio_file): + y, sr = librosa.load(audio_file, sr=None) + mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13) + pitches, magnitudes = librosa.piptrack(y=y, sr=sr) + pitches = pitches[(magnitudes > np.median(magnitudes)) & (pitches > 0)] + energy = librosa.feature.rms(y=y)[0] + return mfccs, pitches, energy + +def analyze_voice_stress(audio_file): + if not audio_file: + return "No audio file provided.", None + + try: + mfccs, pitches, energy = extract_audio_features(audio_file) + + stress_level = (np.var(mfccs) + (np.var(pitches) if len(pitches) > 0 else 0) + np.var(energy)) / 3 + normalized_stress = min(100, (stress_level / 1000) * 100) + + fig, axs = plt.subplots(3, 1, figsize=(10, 12)) + plots = [ + (mfccs, 'MFCCs', 'MFCC Coefficient', 'imshow', {'aspect': 'auto', 'origin': 'lower'}), + (pitches, 'Pitch', 'Frequency (Hz)', 'plot', {}), + (energy, 'Energy', 'RMS Energy', 'plot', {}) + ] + + for i, (data, title, ylabel, plot_type, plot_args) in enumerate(plots): + getattr(axs[i], plot_type)(data, **plot_args) + axs[i].set_title(title) + axs[i].set_ylabel(ylabel) + axs[2].set_xlabel('Time') + + plt.tight_layout() + with tempfile.NamedTemporaryFile(delete=False, suffix='.png') as temp_file: + plt.savefig(temp_file.name) + plt.close() + + stress_interpretation = "Low" if normalized_stress < 33 else "Medium" if normalized_stress < 66 else "High" + return f"{normalized_stress:.2f}% - {stress_interpretation} Stress", temp_file.name + except Exception as e: + return str(e), None + +def create_voice_stress_tab(): + with gr.Row(): + with gr.Column(scale=2): + input_audio = gr.Audio(label="Input Audio", type="filepath") + with gr.Row(): + clear_btn = gr.Button("Clear", scale=1) + submit_btn = gr.Button("Analyze", scale=1, elem_classes="submit") + with gr.Column(scale=1): + output_stress = gr.Label(label="Stress Level") + output_plot = gr.Image(label="Stress Analysis Plot") + + submit_btn.click(analyze_voice_stress, inputs=[input_audio], outputs=[output_stress, output_plot]) + clear_btn.click(lambda: (None, None, None), outputs=[input_audio, output_stress, output_plot]) + + gr.Examples(["./assets/audio/fitness.wav"], inputs=[input_audio]) + +with gr.Blocks() as demo: + gr.Markdown("# Voice Stress Analysis") + create_voice_stress_tab() \ No newline at end of file diff --git a/tabs/whisperVoiceMetrics_OK.py b/tabs/whisperVoiceMetrics_OK.py new file mode 100644 index 0000000000000000000000000000000000000000..6e093fefb24faf7fd18ecac97470e9eaea6609af --- /dev/null +++ b/tabs/whisperVoiceMetrics_OK.py @@ -0,0 +1,63 @@ +import gradio as gr +from transformers import pipeline +import librosa +import numpy as np +import matplotlib.pyplot as plt + +# Load Whisper model using transformers pipeline +transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.en") + +def analyze_audio(audio): + # Convert audio to text using Whisper + transcription_result = transcriber(audio) + transcription = transcription_result["text"] + + # Load audio file + y, sr = librosa.load(audio, sr=None) + + # Extract prosodic features + pitch = librosa.yin(y, fmin=librosa.note_to_hz('C2'), fmax=librosa.note_to_hz('C7')) + tempo, _ = librosa.beat.beat_track(y=y, sr=sr) + + # Calculate pitch variance + pitch_variance = np.var(pitch) + + # Estimate speaking pace (syllables per second) + # This is a simplified estimation based on the number of words + num_syllables = len(transcription.split()) + duration = librosa.get_duration(y=y, sr=sr) + pace = num_syllables / duration + + # Plot pitch + plt.figure(figsize=(10, 4)) + plt.plot(pitch, label='Pitch') + plt.xlabel('Time') + plt.ylabel('Frequency (Hz)') + plt.title('Pitch Over Time') + plt.legend() + pitch_plot_path = '/tmp/pitch_contour.png' + plt.savefig(pitch_plot_path) + plt.close() + + # Voice Stress Analysis (simplified example) + stress_level = np.std(pitch) # Standard deviation as a simple stress indicator + + return transcription, tempo, pace, pitch_variance, pitch_plot_path + +# Create Gradio interface +input_audio = gr.Audio(label="Input Audio", type="filepath") + +iface = gr.Interface( + fn=analyze_audio, + inputs=input_audio, + outputs=[ + gr.Textbox(label="Transcription"), + gr.Number(label="Tempo (BPM)"), + gr.Number(label="Speaking Pace (syllables/sec)"), + gr.Number(label="Pitch Variance"), + gr.Image(label="Pitch Contour Plot") + ], + live=True +) + +iface.launch(share=False) diff --git a/tinnitus.py b/tinnitus.py new file mode 100644 index 0000000000000000000000000000000000000000..2e1cb253150f99a767eb7af7d6f997caa19cc882 --- /dev/null +++ b/tinnitus.py @@ -0,0 +1,79 @@ +import gradio as gr +import numpy as np +import sounddevice as sd + +def generate_tone(freq, duration=1.0, sample_rate=44100, volume=0.1): + t = np.linspace(0, duration, int(sample_rate * duration), False) + return (volume * np.sin(2 * np.pi * freq * t)).astype(np.float32) + +def play_tone(freq, volume): + tone = generate_tone(freq, volume=volume) + sd.play(tone, samplerate=44100, blocking=True) + return f"Played tone at {freq} Hz" + +def calculate_sequence_frequencies(tinnitus_freq): + return [ + int(tinnitus_freq * 0.77), + int(tinnitus_freq * 0.9), + int(tinnitus_freq * 1.1), + int(tinnitus_freq * 1.4) + ] + +def play_sequence(tinnitus_freq, volume): + frequencies = calculate_sequence_frequencies(tinnitus_freq) + sequence = [] + for freq in frequencies: + tone = generate_tone(freq, duration=0.5, volume=volume) + sequence.extend(tone) + sequence.extend(np.zeros(int(44100 * 0.1))) # 0.1s pause between tones + + sd.play(np.array(sequence), samplerate=44100, blocking=True) + return f"Played sequence: {frequencies} Hz" + +def update_frequencies(tinnitus_freq): + frequencies = calculate_sequence_frequencies(tinnitus_freq) + return f"frequencies used in sequence: {', '.join(map(str, frequencies))}" + +with gr.Blocks(title="ACRN Tinnitus Protocol") as demo: + gr.Markdown( + """ + # ACRN Tinnitus Protocol + + This is my attempt at implementing the Acoustic Coordinated Reset Neuromodulation tinnitus treatment protocol using [this paper](https://link-to-paper) as a guide. + + - First lower the volume on your device, so it is not too loud to start. + - Start the tone by pressing the "Play Tone" button. + - Adjust the frequency slider until it matches your tinnitus tone. You can also type in the frequency if you know it already. + - Adjust the volume until it is a little bit louder than your tinnitus tone. + - Switch from "Tone" to "Sequence" mode + + Inspired by [this thread on tinnitustalk.com](https://link-to-thread) and [this reddit thread](https://link-to-reddit-thread). + """ + ) + + with gr.Row(): + tone_btn = gr.Button("Tone") + sequence_btn = gr.Button("Sequence") + + with gr.Row(): + freq_slider = gr.Slider(minimum=100, maximum=20000, value=12694, step=1, label="Frequency") + freq_number = gr.Number(value=12694, label="Frequency") + + volume_slider = gr.Slider(minimum=0.01, maximum=1.0, value=0.1, step=0.01, label="Volume") + + output = gr.Textbox(label="Output") + freq_output = gr.Markdown() + + def update_freq(value, slider): + return gr.Number.update(value=value) if slider else gr.Slider.update(value=value) + + freq_slider.change(update_freq, inputs=[freq_slider, gr.State(True)], outputs=freq_number) + freq_number.change(update_freq, inputs=[freq_number, gr.State(False)], outputs=freq_slider) + + freq_slider.change(update_frequencies, inputs=[freq_slider], outputs=[freq_output]) + freq_number.change(update_frequencies, inputs=[freq_number], outputs=[freq_output]) + + tone_btn.click(play_tone, inputs=[freq_slider, volume_slider], outputs=[output]) + sequence_btn.click(play_sequence, inputs=[freq_slider, volume_slider], outputs=[output]) + +demo.launch() \ No newline at end of file diff --git a/ui_components.py b/ui_components.py new file mode 100644 index 0000000000000000000000000000000000000000..d78b5a7ef23c62d56240f6df82231b0cab10add8 --- /dev/null +++ b/ui_components.py @@ -0,0 +1,58 @@ +# ui_components.py + +CUSTOM_CSS = """ +.main-tab > .tab-nav > button { + font-size: 20px; + font-weight: bold; +} +""" + +HEADER_HTML = """ +
+
+ + Dyagnosys Logo + +

Multi-Modal for Emotion and Sentiment Analysis (MMESA)

+
+ Important Disclaimer +
+""" + +DISCLAIMER_HTML = ''' +
+

IMPORTANT DISCLAIMER

+ +
+

Not a Medical Device

+

This software is not intended to be a medical device as defined by the FDA, EMA, or other regulatory bodies. It is not designed, intended, or authorized for use in the diagnosis of disease or other conditions, or in the cure, mitigation, treatment, or prevention of disease.

+
+ +
+

Research and Educational Use Only

+

This software is provided solely for research, educational, and informational purposes. It should not be relied upon for medical advice, diagnosis, or treatment.

+
+ +
+

No Substitute for Professional Medical Advice

+

The information provided by this software is not a substitute for professional medical advice, diagnosis, or treatment. Always seek the advice of your physician or other qualified health provider with any questions you may have regarding a medical condition.

+
+ +
+

Data Privacy and Security

+

While we implement reasonable data protection measures, users should be aware of the inherent risks of transmitting information over the internet. By using this software, you acknowledge and accept these risks.

+
+ +
+

No Warranty

+

This software is provided "as is" without warranty of any kind, either expressed or implied, including, but not limited to, the implied warranties of merchantability and fitness for a particular purpose.

+
+ +
+

Limitation of Liability

+

In no event shall the creators, copyright holders, or contributors be liable for any direct, indirect, incidental, special, exemplary, or consequential damages (including, but not limited to, procurement of substitute goods or services; loss of use, data, or profits; or business interruption) however caused and on any theory of liability, whether in contract, strict liability, or tort (including negligence or otherwise) arising in any way out of the use of this software, even if advised of the possibility of such damage.

+
+ +

By using this software, you acknowledge that you have read, understood, and agree to be bound by this disclaimer.

+
+''' \ No newline at end of file