diff --git "a/try.ipynb" "b/try.ipynb" deleted file mode 100644--- "a/try.ipynb" +++ /dev/null @@ -1,242 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "ename": "ModuleNotFoundError", - "evalue": "No module named 'model'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[5], line 7\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mIPython\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mdisplay\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mipd\u001b[39;00m\n\u001b[1;32m 6\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtorch\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m nn\n\u001b[0;32m----> 7\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mmodel\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m CNNEmotinoalClassifier\n", - "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'model'" - ] - } - ], - "source": [ - "import gradio as gr\n", - "import torch\n", - "# from lr_ed.model import CNNEmotinoalClassifier\n", - "import torchaudio\n", - "import IPython.display as ipd\n", - "from torch import nn\n", - "from model import CNNEmotinoalClassifier" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "CNNEmotinoalClassifier(\n", - " (conv1): Sequential(\n", - " (0): Conv2d(1, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", - " (1): ReLU()\n", - " (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)\n", - " )\n", - " (conv2): Sequential(\n", - " (0): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", - " (1): ReLU()\n", - " (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)\n", - " )\n", - " (conv3): Sequential(\n", - " (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2))\n", - " (1): ReLU()\n", - " (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)\n", - " )\n", - " (conv4): Sequential(\n", - " (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2))\n", - " (1): ReLU()\n", - " (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)\n", - " )\n", - " (flatten): Flatten(start_dim=1, end_dim=-1)\n", - " (fully_connected): Sequential(\n", - " (0): Linear(in_features=32000, out_features=128, bias=True)\n", - " (1): ReLU()\n", - " (2): Linear(in_features=128, out_features=64, bias=True)\n", - " (3): ReLU()\n", - " (4): Linear(in_features=64, out_features=32, bias=True)\n", - " (5): ReLU()\n", - " (6): Linear(in_features=32, out_features=16, bias=True)\n", - " (7): ReLU()\n", - " (8): Linear(in_features=16, out_features=6, bias=True)\n", - " )\n", - " (softmax): Softmax(dim=1)\n", - ")" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "model = CNNEmotinoalClassifier()\n", - "model.load_state_dict(torch.load('/raid/adal_abilbekov/lr_ed/CNN_emotional_classifier/cnn_class_17.pt'))\n", - "model.eval()" - ] - }, - { - "cell_type": "code", - "execution_count": 47, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - " \n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 47, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# path = '/raid/adal_abilbekov/emodiff_try_2/Emo_diff/demo_190224/Akzhol_happy.wav'\n", - "# path = '/raid/adal_abilbekov/emodiff_try_2/Emo_diff/demo_190224/Akzhol_neutral.wav'\n", - "path = '/raid/adal_abilbekov/emodiff_try_2/Emo_diff/demo_190224/Marzhan_happy.wav'\n", - "waveform, sr = torchaudio.load(path)\n", - "ipd.Audio(data=waveform, rate=sr)" - ] - }, - { - "cell_type": "code", - "execution_count": 48, - "metadata": {}, - "outputs": [], - "source": [ - "to_melspec = torchaudio.transforms.MelSpectrogram(\n", - " sample_rate= 22050,\n", - " n_fft = 1024,\n", - " hop_length = 512,\n", - " n_mels=64\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 49, - "metadata": {}, - "outputs": [], - "source": [ - "def _get_right_pad(target_waveform, waveform):\n", - " target_waveform = target_waveform\n", - " waveform_samples_number = waveform.shape[1]\n", - " if waveform_samples_number < target_waveform:\n", - " right_pad = target_waveform - waveform_samples_number\n", - " padding_touple = (0, right_pad)\n", - " waveform_padded = nn.functional.pad(waveform, padding_touple)\n", - " else:\n", - " waveform_padded = waveform\n", - " return waveform_padded" - ] - }, - { - "cell_type": "code", - "execution_count": 50, - "metadata": {}, - "outputs": [], - "source": [ - "waveform = _get_right_pad(400384, waveform)\n", - "input_x = to_melspec(waveform)\n", - "input_x = torch.unsqueeze(input_x, dim=1)" - ] - }, - { - "cell_type": "code", - "execution_count": 67, - "metadata": {}, - "outputs": [], - "source": [ - "probs = model(input_x)" - ] - }, - { - "cell_type": "code", - "execution_count": 52, - "metadata": {}, - "outputs": [], - "source": [ - "emotions = ['happy', 'angry', 'sad', 'neutral', 'surprised', 'fear']\n", - "emotions = sorted(emotions)" - ] - }, - { - "cell_type": "code", - "execution_count": 59, - "metadata": {}, - "outputs": [], - "source": [ - "# def get_probs(input_x, emotions):\n", - "# probs = model(input_x)\n", - "# prediction = emotions[probs.argmax(dim=1).item()]\n", - "# return prediction, dict(zip(emotions, list(map(float, probs))))" - ] - }, - { - "cell_type": "code", - "execution_count": 70, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "tensor([[2.9495e-18, 6.7292e-20, 9.9882e-01, 2.4566e-18, 1.0296e-12, 1.1847e-03]],\n", - " grad_fn=)" - ] - }, - "execution_count": 70, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "probs" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "asr_hug", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.16" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -}