Spaces:

Tej3
/

ECG_Classification

Runtime error

App Files Files Community

Tej3 commited on May 30, 2023

Commit

71bd54f

•

1 Parent(s): 2dc883d

Committing App

Browse files

Files changed (23) hide show

app.py +114 -0
demo_data/.gitkeep +0 -0
demo_data/model_MMCNN_CAT_epoch_30_acc_84.pt +3 -0
demo_data/model_MMRNN_undersampled_augmented_rn_epoch_20_acc_84.pt +3 -0
demo_data/test/00001_lr.dat +0 -0
demo_data/test/00001_lr.hea +13 -0
demo_data/test/00008_lr.dat +0 -0
demo_data/test/00008_lr.hea +13 -0
demo_data/test/00045_lr.dat +0 -0
demo_data/test/00045_lr.hea +13 -0
demo_data/test/00257_lr.dat +0 -0
demo_data/test/00257_lr.hea +13 -0
models/CNN.py +213 -0
models/RNN.py +71 -0
models/__pycache__/CNN.cpython-39.pyc +0 -0
models/__pycache__/RNN.cpython-39.pyc +0 -0
requirements.txt +13 -0
utils/RNN_utils.py +198 -0
utils/__pycache__/RNN_utils.cpython-39.pyc +0 -0
utils/__pycache__/helper_functions.cpython-39.pyc +0 -0
utils/__pycache__/trainer.cpython-39.pyc +0 -0
utils/helper_functions.py +86 -0
utils/trainer.py +141 -0

app.py ADDED Viewed

	@@ -0,0 +1,114 @@

+import os
+import shutil
+import gradio as gr
+import numpy as np
+import wfdb
+import torch
+from wfdb.plot.plot import plot_wfdb
+from wfdb.io.record import Record, rdrecord
+from models.CNN import CNN, MMCNN_CAT
+from models.RNN import MMRNN
+from utils.helper_functions import predict
+import matplotlib
+matplotlib.use('Agg')
+import matplotlib.pyplot as plt
+from transformers import AutoTokenizer, AutoModel
+from langdetect import detect
+# edit this before Running
+CWD = os.getcwd()
+#CKPT paths
+MMCNN_CAT_ckpt_path = f"{CWD}/demo_data/model_MMCNN_CAT_epoch_30_acc_84.pt"
+MMRNN_ckpt_path = f"{CWD}/demo_data/model_MMRNN_undersampled_augmented_rn_epoch_20_acc_84.pt"
+# Define clinical models and tokenizers
+en_clin_bert = 'emilyalsentzer/Bio_ClinicalBERT'
+ger_clin_bert = 'smanjil/German-MedBERT'
+en_tokenizer = AutoTokenizer.from_pretrained(en_clin_bert)
+en_model = AutoModel.from_pretrained(en_clin_bert)
+g_tokenizer = AutoTokenizer.from_pretrained(ger_clin_bert)
+g_model = AutoModel.from_pretrained(ger_clin_bert)
+def preprocess(data_file_path):
+    data = [wfdb.rdsamp(data_file_path)]
+    data = np.array([signal for signal, meta in data])
+    return data
+def embed(notes):
+    if detect(notes) == 'en':
+        tokens = en_tokenizer(notes, return_tensors='pt')
+        outputs = en_model(**tokens)
+    else:
+        tokens = g_tokenizer(notes, return_tensors='pt')
+        outputs = g_model(**tokens)
+    embeddings = outputs.last_hidden_state
+    embedding = torch.mean(embeddings, dim=1).squeeze(0)
+    return embedding
+    # return torch.load(f'{"./data/embeddings/"}1.pt')
+def plot_ecg(path):
+    record100 = rdrecord(path)
+    return plot_wfdb(record=record100, title='ECG Signal Graph', figsize=(12,10), return_fig=True)
+def infer(model,data, notes):
+    embed_notes = embed(notes).unsqueeze(0)
+    data= torch.tensor(data)
+    if model == "CNN":
+        model = MMCNN_CAT()
+        checkpoint = torch.load(MMCNN_CAT_ckpt_path)
+        model.load_state_dict(checkpoint['model_state_dict'])
+        data = data.transpose(1,2).float()
+    elif model == "RNN":
+        model = MMRNN(device='cpu')
+        model.load_state_dict(torch.load(MMRNN_ckpt_path)['model_state_dict'])
+        data = data.float()
+    model.eval()
+    outputs, predicted = predict(model, data, embed_notes, device='cpu')
+    outputs = torch.sigmoid(outputs)[0]
+    return {'Conduction Disturbance':round(outputs[0].item(),2), 'Hypertrophy':round(outputs[1].item(),2), 'Myocardial Infarction':round(outputs[2].item(),2), 'Normal ECG':round(outputs[3].item(),2), 'ST/T Change':round(outputs[4].item(),2)}
+def run(model_name, header_file, data_file, notes):
+    demo_dir = f"{CWD}/demo_data"
+    hdr_dirname, hdr_basename = os.path.split(header_file.name)
+    data_dirname, data_basename = os.path.split(data_file.name)
+    shutil.copyfile(data_file.name, f"{demo_dir}/{data_basename}")
+    shutil.copyfile(header_file.name, f"{demo_dir}/{hdr_basename}")
+    data = preprocess(f"{demo_dir}/{hdr_basename.split('.')[0]}")
+    ECG_graph = plot_ecg(f"{demo_dir}/{hdr_basename.split('.')[0]}")
+    os.remove(f"{demo_dir}/{data_basename}")
+    os.remove(f"{demo_dir}/{hdr_basename}")
+    output = infer(model_name, data, notes)
+    return output, ECG_graph
+with gr.Blocks() as demo:
+    with  gr.Row():
+        model = gr.Radio(['CNN', 'RNN'], label= "Select Model")
+    with gr.Row():
+        with gr.Column(scale=1):
+            header_file = gr.File(label = "header_file", file_types=[".hea"])
+            data_file = gr.File(label = "data_file", file_types=[".dat"])
+            notes = gr.Textbox(label = "Clinical Notes")
+        with gr.Column(scale=1):
+            output_prob = gr.Label({'Normal ECG':0, 'Myocardial Infarction':0, 'ST/T Change':0, 'Conduction Disturbance':0, 'Hypertrophy':0}, show_label=False)
+    with gr.Row():
+        ecg_graph = gr.Plot(label = "ECG Signal Visualisation")
+    with gr.Row():
+        predict_btn = gr.Button("Predict Class")
+        predict_btn.click(fn= run, inputs = [model, header_file, data_file, notes], outputs=[output_prob, ecg_graph])
+    with gr.Row():
+        gr.Examples(examples=[[f"{CWD}/demo_data/test/00001_lr.hea", f"{CWD}/demo_data/test/00001_lr.dat", "sinusrhythmus periphere niederspannung"],\
+                              [f"{CWD}/demo_data/test/00008_lr.hea", f"{CWD}/demo_data/test/00008_lr.dat", "sinusrhythmus linkstyp qrs(t) abnormal    inferiorer infarkt     alter unbest."], \
+                              [f"{CWD}/demo_data/test/00045_lr.hea", f"{CWD}/demo_data/test/00045_lr.dat", "sinusrhythmus unvollstÃ„ndiger rechtsschenkelblock sonst normales ekg"],\
+                              [f"{CWD}/demo_data/test/00257_lr.hea", f"{CWD}/demo_data/test/00257_lr.dat", "premature atrial contraction(s). sinus rhythm. left atrial enlargement. qs complexes in v2. st segments are slightly elevated in v2,3. st segments are depressed in i, avl. t waves are low or flat in i, v5,6 and inverted in avl. consistent with ischaemic h"],\
+                                ],
+                    inputs = [header_file, data_file, notes])
+if __name__ == "__main__":
+    demo.launch()

demo_data/.gitkeep ADDED Viewed

File without changes

demo_data/model_MMCNN_CAT_epoch_30_acc_84.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3735115ddc15ecab4844a13124616f339364795349aeef0476491accfa8b4eda
+size 25392011

demo_data/model_MMRNN_undersampled_augmented_rn_epoch_20_acc_84.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7cfaa76908e6246b051fd5725152bca28b5111a83ada18fec5848816d8bd6e7a
+size 1340343

demo_data/test/00001_lr.dat ADDED Viewed

Binary file (24 kB). View file

demo_data/test/00001_lr.hea ADDED Viewed

	@@ -0,0 +1,13 @@

+00001_lr 12 100 1000
+00001_lr.dat 16 1000.0(0)/mV 16 0 -119 1508 0 I
+00001_lr.dat 16 1000.0(0)/mV 16 0 -55 723 0 II
+00001_lr.dat 16 1000.0(0)/mV 16 0 64 64758 0 III
+00001_lr.dat 16 1000.0(0)/mV 16 0 86 64423 0 AVR
+00001_lr.dat 16 1000.0(0)/mV 16 0 -91 1211 0 AVL
+00001_lr.dat 16 1000.0(0)/mV 16 0 4 7 0 AVF
+00001_lr.dat 16 1000.0(0)/mV 16 0 -69 63827 0 V1
+00001_lr.dat 16 1000.0(0)/mV 16 0 -31 6999 0 V2
+00001_lr.dat 16 1000.0(0)/mV 16 0 0 63759 0 V3
+00001_lr.dat 16 1000.0(0)/mV 16 0 -26 61447 0 V4
+00001_lr.dat 16 1000.0(0)/mV 16 0 -39 64979 0 V5
+00001_lr.dat 16 1000.0(0)/mV 16 0 -79 832 0 V6

demo_data/test/00008_lr.dat ADDED Viewed

Binary file (24 kB). View file

demo_data/test/00008_lr.hea ADDED Viewed

	@@ -0,0 +1,13 @@

+00008_lr 12 100 1000
+00008_lr.dat 16 1000.0(0)/mV 16 0 -41 2321 0 I
+00008_lr.dat 16 1000.0(0)/mV 16 0 -80 4548 0 II
+00008_lr.dat 16 1000.0(0)/mV 16 0 -39 2234 0 III
+00008_lr.dat 16 1000.0(0)/mV 16 0 60 62047 0 AVR
+00008_lr.dat 16 1000.0(0)/mV 16 0 -1 0 0 AVL
+00008_lr.dat 16 1000.0(0)/mV 16 0 -60 3352 0 AVF
+00008_lr.dat 16 1000.0(0)/mV 16 0 45 232 0 V1
+00008_lr.dat 16 1000.0(0)/mV 16 0 -5 65262 0 V2
+00008_lr.dat 16 1000.0(0)/mV 16 0 5 63785 0 V3
+00008_lr.dat 16 1000.0(0)/mV 16 0 -55 58960 0 V4
+00008_lr.dat 16 1000.0(0)/mV 16 0 -70 3471 0 V5
+00008_lr.dat 16 1000.0(0)/mV 16 0 -40 2065 0 V6

demo_data/test/00045_lr.dat ADDED Viewed

Binary file (24 kB). View file

demo_data/test/00045_lr.hea ADDED Viewed

	@@ -0,0 +1,13 @@

+00045_lr 12 100 1000
+00045_lr.dat 16 1000.0(0)/mV 16 0 -181 1318 0 I
+00045_lr.dat 16 1000.0(0)/mV 16 0 -438 5652 0 II
+00045_lr.dat 16 1000.0(0)/mV 16 0 -257 4356 0 III
+00045_lr.dat 16 1000.0(0)/mV 16 0 310 62008 0 AVR
+00045_lr.dat 16 1000.0(0)/mV 16 0 38 64012 0 AVL
+00045_lr.dat 16 1000.0(0)/mV 16 0 -347 4979 0 AVF
+00045_lr.dat 16 1000.0(0)/mV 16 0 121 3953 0 V1
+00045_lr.dat 16 1000.0(0)/mV 16 0 51 64138 0 V2
+00045_lr.dat 16 1000.0(0)/mV 16 0 82 61158 0 V3
+00045_lr.dat 16 1000.0(0)/mV 16 0 -58 63682 0 V4
+00045_lr.dat 16 1000.0(0)/mV 16 0 -52 65025 0 V5
+00045_lr.dat 16 1000.0(0)/mV 16 0 -134 193 0 V6

demo_data/test/00257_lr.dat ADDED Viewed

Binary file (24 kB). View file

demo_data/test/00257_lr.hea ADDED Viewed

	@@ -0,0 +1,13 @@

+00257_lr 12 100 1000
+00257_lr.dat 16 1000.0(0)/mV 16 0 -8 8043 0 I
+00257_lr.dat 16 1000.0(0)/mV 16 0 24 3049 0 II
+00257_lr.dat 16 1000.0(0)/mV 16 0 32 60557 0 III
+00257_lr.dat 16 1000.0(0)/mV 16 0 -9 59959 0 AVR
+00257_lr.dat 16 1000.0(0)/mV 16 0 -20 6506 0 AVL
+00257_lr.dat 16 1000.0(0)/mV 16 0 28 64558 0 AVF
+00257_lr.dat 16 1000.0(0)/mV 16 0 -29 60014 0 V1
+00257_lr.dat 16 1000.0(0)/mV 16 0 -24 64087 0 V2
+00257_lr.dat 16 1000.0(0)/mV 16 0 138 1192 0 V3
+00257_lr.dat 16 1000.0(0)/mV 16 0 34 65087 0 V4
+00257_lr.dat 16 1000.0(0)/mV 16 0 26 65386 0 V5
+00257_lr.dat 16 1000.0(0)/mV 16 0 32 59612 0 V6

models/CNN.py ADDED Viewed

	@@ -0,0 +1,213 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torchinfo import summary
+# Not in use yet
+class Conv1d_layer(nn.Module):
+    def __init__(self, in_channel, out_channel, kernel_size) -> None:
+        super().__init__()
+        self.conv = nn.Conv1d(in_channels=in_channel, out_channels=out_channel, kernel_size=kernel_size)
+        self.batch_norm = torch.nn.BatchNorm1d(out_channel)
+        self.dropout = nn.Dropout1d(p=0.5)
+    def forward(self, x):
+        x= self.conv(x)
+        x = self.batch_norm(x)
+        x = self.dropout(x)
+        return x
+class CNN(nn.Module):
+    def __init__(self, ecg_channels=12):
+        super(CNN, self).__init__()
+        self.name = "CNN"
+        self.conv1 = nn.Conv1d(ecg_channels, 16, 7)
+        self.pool1 = nn.MaxPool1d(2, 2)
+        self.conv2 = nn.Conv1d(16, 32, 5)
+        self.pool2 = nn.MaxPool1d(2, 2)
+        self.conv3 = nn.Conv1d(32, 48, 3)
+        self.pool3 = nn.MaxPool1d(2, 2)
+        self.fc0 = nn.Linear(5856, 512)
+        self.fc1 = nn.Linear(512, 128)
+        self.fc2 = nn.Linear(128, 5)
+        self.activation = nn.ReLU()
+    def forward(self, x, notes=None):
+        x = self.pool1(self.activation(self.conv1(x)))
+        x = self.pool2(self.activation(self.conv2(x)))
+        x = self.pool3(self.activation(self.conv3(x)))
+        x = x.view(x.size(0),-1)
+        x = self.activation(self.fc0(x))
+        x = self.activation(self.fc1(x))
+        x = self.fc2(x)
+        x = x.squeeze(1)
+        return x
+class MMCNN_SUM(nn.Module):
+    def __init__(self, ecg_channels=12):
+        super(MMCNN_SUM, self).__init__()
+        # ECG processing Layers
+        self.name = "MMCNN_SUM"
+        self.conv1 = Conv1d_layer(ecg_channels, 16, 7)
+        self.pool1 = nn.MaxPool1d(2, 2)
+        self.conv2 = Conv1d_layer(16, 32, 5)
+        self.pool2 = nn.MaxPool1d(2, 2)
+        self.conv3 = Conv1d_layer(32, 48, 3)
+        self.pool3 = nn.MaxPool1d(2, 2)
+        self.fc0 = nn.Linear(5856, 512)
+        self.fc1 = nn.Linear(512, 128)
+        self.fc2 = nn.Linear(128, 5)
+        # Clinical Notes Processing Layers
+        self.fc_emb = nn.Linear(768, 128)
+        self.norm = nn.LayerNorm(128)
+        self.activation = nn.ReLU()
+    def forward(self, x, notes):
+        # ECG Processing
+        x = self.pool1(self.activation(self.conv1(x)))
+        x = self.pool2(self.activation(self.conv2(x)))
+        x = self.pool3(self.activation(self.conv3(x)))
+        x = x.view(x.size(0),-1)
+        x = self.activation(self.fc0(x))
+        x = self.activation(self.fc1(x))
+        # Notes Processing
+        notes = notes.view(notes.size(0),-1)
+        notes = self.activation(self.fc_emb(notes))
+        x = self.fc2(self.norm(x + notes))
+        x = x.squeeze(1)
+        return x
+class MMCNN_CAT(nn.Module):
+    def __init__(self, ecg_channels=12):
+        super(MMCNN_CAT, self).__init__()
+        # ECG processing Layers
+        self.name = "MMCNN_CAT"
+        self.conv1 = nn.Conv1d(ecg_channels, 16, 7)
+        self.pool1 = nn.MaxPool1d(2, 2)
+        self.conv2 = nn.Conv1d(16, 32, 5)
+        self.pool2 = nn.MaxPool1d(2, 2)
+        self.conv3 = nn.Conv1d(32, 48, 3)
+        self.pool3 = nn.MaxPool1d(2, 2)
+        self.fc0 = nn.Linear(5856, 512)
+        self.fc1 = nn.Linear(512, 128)
+        self.fc2 = nn.Linear(256, 5)
+        # Clinical Notes Processing Layers
+        self.fc_emb = nn.Linear(768, 128)
+        self.norm = nn.LayerNorm(128)
+        self.activation = nn.ReLU()
+    def forward(self, x, notes):
+        # ECG Processing
+        x = self.pool1(self.activation(self.conv1(x)))
+        x = self.pool2(self.activation(self.conv2(x)))
+        x = self.pool3(self.activation(self.conv3(x)))
+        x = x.view(x.size(0),-1)
+        x = self.activation(self.fc0(x))
+        x = self.activation(self.fc1(x))
+        # Notes Processing
+        notes = notes.view(notes.size(0),-1)
+        notes = self.activation(self.fc_emb(notes))
+        x = self.fc2(torch.cat((x,notes),dim=1))
+        x = x.squeeze(1)
+        return x
+class MMCNN_ATT(nn.Module):
+    def __init__(self, ecg_channels=12):
+        super(MMCNN_ATT, self).__init__()
+        # ECG processing Layers
+        self.name = "MMCNN_ATT"
+        self.conv1 = nn.Conv1d(ecg_channels, 16, 7)
+        self.pool1 = nn.MaxPool1d(2, 2)
+        self.conv2 = nn.Conv1d(16, 32, 5)
+        self.pool2 = nn.MaxPool1d(2, 2)
+        self.conv3 = nn.Conv1d(32, 48, 3)
+        self.pool3 = nn.MaxPool1d(2, 2)
+        self.fc0 = nn.Linear(5856, 512)
+        self.fc1 = nn.Linear(512, 128)
+        self.fc2 = nn.Linear(128, 5)
+        # Clinical Notes Processing Layers
+        self.fc_emb = nn.Linear(768, 128)
+        self.norm1 = nn.LayerNorm(128)
+        self.norm2 = nn.LayerNorm(128)
+        self.attention = nn.MultiheadAttention(128, 8, batch_first=True)
+        self.activation = nn.ReLU()
+    def forward(self, x, notes):
+        # ECG Processing
+        x = self.pool1(self.activation(self.conv1(x)))
+        x = self.pool2(self.activation(self.conv2(x)))
+        x = self.pool3(self.activation(self.conv3(x)))
+        x = x.view(x.size(0),-1)
+        x = self.activation(self.fc0(x))
+        x = self.activation(self.fc1(x))
+        x = self.norm1(x)
+        # Notes Processing
+        notes = notes.view(notes.size(0),-1)
+        notes = self.activation(self.fc_emb(notes))
+        notes = self.norm2(notes)
+        notes=notes.unsqueeze(1)
+        x=x.unsqueeze(1)
+        x,_= self.attention(notes, x, x)
+        x = self.fc2(x)
+        x = x.squeeze(1)
+        return x
+class MMCNN_SUM_ATT(nn.Module):
+    def __init__(self, ecg_channels=12):
+        super(MMCNN_SUM_ATT, self).__init__()
+        # ECG processing Layers
+        self.name = "MMCNN_SUM_ATT"
+        self.conv1 = nn.Conv1d(ecg_channels, 16, 7)
+        self.pool1 = nn.MaxPool1d(2, 2)
+        self.conv2 = nn.Conv1d(16, 32, 5)
+        self.pool2 = nn.MaxPool1d(2, 2)
+        self.conv3 = nn.Conv1d(32, 48, 3)
+        self.pool3 = nn.MaxPool1d(2, 2)
+        self.fc0 = nn.Linear(5856, 512)
+        self.fc1 = nn.Linear(512, 128)
+        self.fc2 = nn.Linear(128, 5)
+        # Clinical Notes Processing Layers
+        self.fc_emb = nn.Linear(768, 128)
+        self.norm = nn.LayerNorm(128)
+        self.attention = nn.MultiheadAttention(128, 8, batch_first=True)
+        self.activation = nn.ReLU()
+    def forward(self, x, notes):
+        # ECG Processing
+        x = self.pool1(self.activation(self.conv1(x)))
+        x = self.pool2(self.activation(self.conv2(x)))
+        x = self.pool3(self.activation(self.conv3(x)))
+        x = x.view(x.size(0),-1)
+        x = self.activation(self.fc0(x))
+        x = self.activation(self.fc1(x))
+        # Notes Processing
+        notes = notes.view(notes.size(0),-1)
+        notes = self.activation(self.fc_emb(notes))
+        x = self.norm(x + notes)
+        x=x.unsqueeze(1)
+        # print(x.shape)
+        x,_= self.attention(x, x, x)
+        x = self.fc2(x)
+        x = x.squeeze(1)
+        return x
+if __name__ == "__main__":
+    model = CNN()
+    # model = Conv1d_layer(12, 16, 7)
+    summary(model, input_size = (1, 12, 1000))

models/RNN.py ADDED Viewed

	@@ -0,0 +1,71 @@

+import torch
+import torch.nn as nn
+class RNN(nn.Module):
+    def __init__(self, input_dim=12, hidden_dim=64, num_layers=2, num_classes=5, cuda=True, device='cuda'):
+        super(RNN, self).__init__()
+        self.hidden_dim = hidden_dim
+        self.num_layers = num_layers
+        self.device = device
+        self.lstm = nn.LSTM(input_size=input_dim, hidden_size=self.hidden_dim,
+                            num_layers=self.num_layers, batch_first=True)
+        self.fc1 = nn.Linear(self.hidden_dim, self.hidden_dim)
+        self.fc2 = nn.Linear(self.hidden_dim, num_classes)
+        self.relu = nn.ReLU()
+    def forward(self, x, notes):
+        h = torch.zeros(self.num_layers, x.size(0), self.hidden_dim)
+        c = torch.zeros(self.num_layers, x.size(0), self.hidden_dim)
+        nn.init.xavier_normal_(h)
+        nn.init.xavier_normal_(c)
+        h = h.to(self.device)
+        c = c.to(self.device)
+        x = x.to(self.device)
+        output, _ = self.lstm(x, (h, c))
+        out = self.fc2(self.relu(self.fc1(output[:, -1, :])))
+        return out
+class MMRNN(nn.ModuleList):
+    def __init__(self, input_dim=12, hidden_dim=64, num_layers=2, num_classes=5, embed_size=768, device="cuda"):
+        super(MMRNN, self).__init__()
+        self.hidden_dim = hidden_dim
+        self.num_layers = num_layers
+        self.device = device
+        self.lstm = nn.LSTM(input_size=input_dim, hidden_size=self.hidden_dim,
+                            num_layers=self.num_layers, batch_first=True)
+        self.fc1 = nn.Linear(self.hidden_dim, embed_size)
+        self.fc2 = nn.Linear(embed_size, num_classes)
+        self.lnorm_out = nn.LayerNorm(embed_size)
+        self.lnorm_embed = nn.LayerNorm(embed_size)
+    def forward(self, x, note):
+        h = torch.zeros(self.num_layers, x.size(0), self.hidden_dim)
+        c = torch.zeros(self.num_layers, x.size(0), self.hidden_dim)
+        nn.init.xavier_normal_(h)
+        nn.init.xavier_normal_(c)
+        h = h.to(self.device)
+        c = c.to(self.device)
+        x = x.to(self.device)
+        note = note.to(self.device)
+        output, _ = self.lstm(x, (h, c))
+        # Take last hidden state
+        out = self.fc1(output[:, -1, :])
+        note = self.lnorm_embed(note)
+        out = self.lnorm_out(out)
+        out = note + out
+        out = self.fc2(out)
+        return out.squeeze(1)

models/__pycache__/CNN.cpython-39.pyc ADDED Viewed

Binary file (6.49 kB). View file

models/__pycache__/RNN.cpython-39.pyc ADDED Viewed

Binary file (2.34 kB). View file

requirements.txt ADDED Viewed

	@@ -0,0 +1,13 @@

+gradio==3.25.0
+langdetect==1.0.9
+matplotlib==3.6.3
+numpy==1.24.2
+pandas==1.5.3
+PyWavelets==1.4.1
+scikit_learn==1.2.1
+torch==1.12.1
+torchinfo==1.7.2
+torchvision==0.13.1
+tqdm==4.64.1
+transformers==4.28.1
+wfdb==4.1.0

utils/RNN_utils.py ADDED Viewed

	@@ -0,0 +1,198 @@

+import torch
+import matplotlib.pyplot as plt
+from tqdm.autonotebook import tqdm
+import pywt
+import os
+def display_eval(epoch, epochs, tlength, global_step, tcorrect, tsamples, t_valid_samples, average_train_loss, average_valid_loss, total_acc_val):
+    tqdm.write(
+        f'Epoch: [{epoch + 1}/{epochs}], Step [{global_step}/{epochs*tlength}] | Train Loss: {average_train_loss: .3f} \
+        | Train Accuracy: {tcorrect / tsamples: .3f} \
+        | Val Loss: {average_valid_loss: .3f} \
+        | Val Accuracy: {total_acc_val / t_valid_samples: .3f}')
+def save_model(model, optimizer, valid_loss, epoch, path='model.pt'):
+    torch.save({'valid_loss': valid_loss,
+                'model_state_dict': model.state_dict(),
+                'epoch': epoch + 1,
+                'optimizer': optimizer.state_dict()
+                }, path)
+    tqdm.write(f'Model saved to ==> {path}')
+def save_metrics(train_loss_list, valid_loss_list, global_steps_list, path='metrics.pt'):
+    torch.save({'train_loss_list': train_loss_list,
+                'valid_loss_list': valid_loss_list,
+                'global_steps_list': global_steps_list,
+                }, path)
+def plot_losses(metrics_save_name='metrics', save_dir='./'):
+    path = f'{save_dir}metrics_{metrics_save_name}.pt'
+    state = torch.load(path)
+    train_loss_list = state['train_loss_list']
+    valid_loss_list = state['valid_loss_list']
+    global_steps_list = state['global_steps_list']
+    plt.plot(global_steps_list, train_loss_list, label='Train')
+    plt.plot(global_steps_list, valid_loss_list, label='Valid')
+    plt.xlabel('Global Steps')
+    plt.ylabel('Loss')
+    plt.legend()
+    plt.show()
+def train_RNN(epochs, train_loader, valid_loader, model, loss_fn, optimizer, eval_every=0.25, best_valid_loss=float("Inf"), device='cuda', model_save_name='', save_dir='./'):
+    model.train()
+    running_loss = 0.0
+    valid_running_loss = 0.0
+    global_step = 0
+    train_loss_list = []
+    valid_loss_list = []
+    global_steps_list = []
+    wavelet = 'db4'
+    level = 3
+    for epoch in tqdm(range(epochs)):
+        running_loss = 0.0
+        t_correct = 0
+        t_samples = 0
+        for images, labels, notes in train_loader:
+            optimizer.zero_grad()
+            coeffs = pywt.wavedec(images, wavelet, level=level, axis=1)
+            threshold = 0.1 * \
+                torch.median(torch.abs(torch.from_numpy(coeffs[-1])))
+            denoised_coeffs = [pywt.threshold(
+                data=c, mode='hard', value=threshold) for c in coeffs]
+            images = pywt.waverec(denoised_coeffs, wavelet, axis=1)
+            images = torch.tensor(images).float().to(device)
+            labels = labels.to(device)
+            notes = notes.to(device)
+            output = model(images, notes)
+            loss = loss_fn(output, labels.float())
+            running_loss += loss.item()*len(labels)
+            loss.backward()
+            global_step += 1*len(images)
+            optimizer.step()
+            values, indices = torch.max(output, dim=1)
+            t_correct += sum(1 for s, i in enumerate(indices)
+                             if labels[s][i] == 1)
+            t_samples += len(indices)
+            if (global_step % (int(eval_every*len(train_loader.dataset)))) < train_loader.batch_size:
+                model.eval()
+                valid_running_loss = 0.0
+                total_acc_val = 0
+                with torch.no_grad():
+                    for images, labels, notes in valid_loader:
+                        coeffs = pywt.wavedec(
+                            images, wavelet, level=level, axis=1)
+                        threshold = 0.1 * \
+                            torch.median(
+                                torch.abs(torch.from_numpy(coeffs[-1])))
+                        denoised_coeffs = [pywt.threshold(
+                            data=c, mode='hard', value=threshold) for c in coeffs]
+                        images = pywt.waverec(denoised_coeffs, wavelet, axis=1)
+                        images = torch.tensor(images).float().to(device)
+                        labels = labels.to(device)
+                        notes = notes.to(device)
+                        output = model(images, notes)
+                        loss = loss_fn(output, labels.float()).item()
+                        valid_running_loss += loss*len(images)
+                        values, indices = torch.max(output, dim=1)
+                        total_acc_val += sum(1 for s,
+                                             i in enumerate(indices) if labels[s][i] == 1)
+                # evaluation
+                average_train_loss = running_loss / t_samples
+                average_valid_loss = valid_running_loss / \
+                    len(valid_loader.dataset)
+                train_loss_list.append(average_train_loss)
+                valid_loss_list.append(average_valid_loss)
+                global_steps_list.append(global_step)
+                display_eval(epoch, epochs, len(train_loader.dataset), global_step, t_correct, t_samples, len(
+                    valid_loader.dataset), average_train_loss, average_valid_loss, total_acc_val)
+                # resetting running values
+                model.train()
+                if best_valid_loss > average_valid_loss:
+                    best_valid_loss = average_valid_loss
+                    save_model(model, optimizer, best_valid_loss, epoch,
+                               path=f'{save_dir}model_{model_save_name}.pt')
+                    save_metrics(train_loss_list, valid_loss_list,
+                                 global_steps_list, path=f'{save_dir}metrics_{model_save_name}.pt')
+    save_metrics(train_loss_list, valid_loss_list, global_steps_list,
+                 path=f'{save_dir}metrics_{model_save_name}.pt')
+    print("Training complete.")
+    return model
+def evaluate_RNN(model, test_loader, device="cuda"):
+    model.eval()
+    y_pred = []
+    y_true = []
+    wavelet = 'db4'
+    level = 3
+    total_acc_test = 0
+    with torch.no_grad():
+        for images, labels, notes in test_loader:
+            coeffs = pywt.wavedec(images, wavelet, level=level, axis=1)
+            threshold = 0.1 * \
+                torch.median(torch.abs(torch.from_numpy(coeffs[-1])))
+            denoised_coeffs = [pywt.threshold(
+                data=c, mode='hard', value=threshold) for c in coeffs]
+            images = pywt.waverec(denoised_coeffs, wavelet, axis=1)
+            images = torch.tensor(images).float().to(device)
+            labels = labels.to(device)
+            notes = notes.to(device)
+            output = model(images, notes)
+            values, indices = torch.max(output, dim=1)
+            y_pred.extend(indices.tolist())
+            y_true.extend(labels.tolist())
+            total_acc_test += sum(1 for s,
+                                  i in enumerate(indices) if labels[s][i] == 1)
+    test_accuracy = total_acc_test / len(test_loader.dataset)
+    print(f'Test Accuracy: {test_accuracy: .3f}')
+    return test_accuracy
+def rename_with_acc(save_name, save_dir, acc):
+    acc = round(acc*100)
+    # Rename model
+    new_model_name = f'{save_dir}model_{save_name}_acc_{acc}.pt'
+    new_metrics_name = f'{save_dir}metrics_{save_name}_acc_{acc}.pt'
+    if os.path.isfile(new_model_name):
+        os.remove(new_model_name)
+    if os.path.isfile(new_metrics_name):
+        os.remove(new_metrics_name)
+    os.rename(f'{save_dir}model_{save_name}.pt',
+              f'{save_dir}model_{save_name}_acc_{acc}.pt')
+    # Rename metrics
+    os.rename(f'{save_dir}metrics_{save_name}.pt',
+              f'{save_dir}metrics_{save_name}_acc_{acc}.pt')

utils/__pycache__/RNN_utils.cpython-39.pyc ADDED Viewed

Binary file (5.71 kB). View file

utils/__pycache__/helper_functions.cpython-39.pyc ADDED Viewed

Binary file (2.91 kB). View file

utils/__pycache__/trainer.cpython-39.pyc ADDED Viewed

Binary file (3.28 kB). View file

utils/helper_functions.py ADDED Viewed

	@@ -0,0 +1,86 @@

+import torch
+def define_optimizer(model, lr, alpha):
+    # Define optimizer
+    optimizer = torch.optim.RMSprop(model.parameters(), lr=lr, alpha=alpha)
+    optimizer.zero_grad()
+    return optimizer
+def tuple_of_tensors_to_tensor(tuple_of_tensors):
+    return  torch.stack(list(tuple_of_tensors), dim=0)
+def predict(model, inputs, notes, device):
+    outputs = model.forward(inputs, notes)
+    predicted = torch.sigmoid(outputs)
+    predicted = (predicted>0.5).float()
+    return outputs, predicted
+def display_train(epoch, num_epochs, i, model, correct, total, loss, train_loader, valid_loader, device):
+    print (f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{len(train_loader)}], Train Loss: {loss.item():.4f}')
+    train_accuracy = correct/total
+    print(f'Epoch [{epoch+1}/{num_epochs}], Train Accuracy: {train_accuracy:.4f}')
+    valid_loss, valid_accuracy = eval_valid(model, valid_loader, epoch, num_epochs, device)
+    return train_accuracy, valid_accuracy, valid_loss
+def eval_valid(model, valid_loader, epoch, num_epochs, device):
+    # Compute model train accuracy on test after all samples have been seen using test samples
+    model.eval()
+    with torch.no_grad():
+        correct = 0
+        total = 0
+        running_loss = 0
+        for inputs, labels, notes in valid_loader:
+            # Get images and labels from test loader
+            inputs = inputs.transpose(1,2).float().to(device)
+            labels = labels.float().to(device)
+            notes = notes.to(device)
+            # Forward pass and predict class using max
+            # outputs = model(inputs)
+            outputs, predicted = predict(model, inputs, notes, device) #torch.max(outputs.data, 1)
+            loss = torch.nn.functional.binary_cross_entropy_with_logits(outputs, labels)
+            running_loss += loss.item()*len(labels)
+            # Check if predicted class matches label and count numbler of correct predictions
+            total += labels.size(0)
+            #TODO: change acc criteria
+            # correct += torch.nn.functional.cosine_similarity(labels,predicted).sum().item() # (predicted == labels).sum().item()
+            values, indices = torch.max(outputs,dim=1)
+            correct += sum(1 for s, i in enumerate(indices)
+                             if labels[s][i] == 1)
+    # Compute final accuracy and display
+    valid_accuracy = correct/total
+    validation_loss = running_loss/total
+    print(f'Epoch [{epoch+1}/{num_epochs}], Validation Accuracy: {valid_accuracy:.4f}, Validation Loss: {validation_loss:.4f}')
+    return validation_loss, valid_accuracy
+def eval_test(model, test_loader, device):
+    # Compute model test accuracy on test after training
+    model.eval()
+    with torch.no_grad():
+        correct = 0
+        total = 0
+        for inputs, labels, notes in test_loader:
+            # Get images and labels from test loader
+            inputs = inputs.transpose(1,2).float().to(device)
+            labels = labels.float().to(device)
+            notes = notes.to(device)
+            # Forward pass and predict class using max
+            # outputs = model(inputs)
+            outputs, predicted = predict(model, inputs, notes, device)#torch.max(outputs.data, 1)
+            # Check if predicted class matches label and count numbler of correct predictions
+            total += labels.size(0)
+            #TODO: change acc criteria
+            # correct += torch.nn.functional.cosine_similarity(labels,predicted).sum().item() # (predicted == labels).sum().item()
+            values, indices = torch.max(outputs,dim=1)
+            correct += sum(1 for s, i in enumerate(indices)
+                             if labels[s][i] == 1)
+    # Compute final accuracy and display
+    test_accuracy = correct/total
+    print(f'Ended Training, Test Accuracy: {test_accuracy:.4f}')
+    return test_accuracy

utils/trainer.py ADDED Viewed

	@@ -0,0 +1,141 @@

+import torch
+from .helper_functions import define_optimizer, predict, display_train, eval_test
+from tqdm import tqdm
+import matplotlib.pyplot as plt
+def save_model(model, optimizer, valid_loss, epoch, path='model.pt'):
+    torch.save({'valid_loss': valid_loss,
+                'model_state_dict': model.state_dict(),
+                'epoch': epoch + 1,
+                'optimizer': optimizer.state_dict()
+                }, path)
+    tqdm.write(f'Model saved to ==> {path}')
+def save_metrics(train_loss_list, valid_loss_list, global_steps_list, path='metrics.pt'):
+    torch.save({'train_loss_list': train_loss_list,
+                'valid_loss_list': valid_loss_list,
+                'global_steps_list': global_steps_list,
+                }, path)
+def plot_losses(metrics_save_name='metrics', save_dir='./'):
+    path = f'{save_dir}metrics_{metrics_save_name}.pt'
+    state = torch.load(path)
+    train_loss_list = state['train_loss_list']
+    valid_loss_list = state['valid_loss_list']
+    global_steps_list = state['global_steps_list']
+    plt.plot(global_steps_list, train_loss_list, label='Train')
+    plt.plot(global_steps_list, valid_loss_list, label='Valid')
+    plt.xlabel('Global Steps')
+    plt.ylabel('Loss')
+    plt.legend()
+    plt.show()
+def trainer(model, train_loader, test_loader, valid_loader, num_epochs = 10, lr = 0.01, alpha = 0.99, eval_interval = 10, model_save_name='', save_dir='./'):
+    # Use GPU if available, else use CPU
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    print(device)
+    # History for train acc, test acc
+    train_accs = []
+    valid_accs = []
+    global_step = 0
+    train_loss_list = []
+    valid_loss_list = []
+    global_steps_list = []
+    best_valid_loss = float("inf")
+    # Define optimizer
+    optimizer = define_optimizer(model, lr, alpha)
+    # Training model
+    for epoch in range(num_epochs):
+        # Go trough all samples in train dataset
+        model.train()
+        running_loss = 0
+        correct = 0
+        total = 0
+        for i, (inputs, labels, notes) in enumerate(train_loader):
+            # Get from dataloader and send to device
+            inputs = inputs.transpose(1,2).float().to(device)
+            # print(labels.shape)
+            labels = labels.float().to(device)
+            notes = notes.to(device)
+            # print(labels.shape)
+            # Forward pass
+            outputs, predicted = predict(model, inputs, notes, device)
+            # print(predicted.shape, labels.shape)
+            # Check if predicted class matches label and count numbler of correct predictions
+            total += labels.size(0)
+            #TODO: change acc criteria
+            # correct += torch.nn.functional.cosine_similarity(labels,predicted).sum().item()  #(predicted == labels).sum().item()
+            values, indices = torch.max(outputs,dim=1)
+            correct += sum(1 for s, i in enumerate(indices)
+                             if labels[s][i] == 1)
+            # Compute loss
+            # we use outputs before softmax function to the cross_entropy loss
+            loss = torch.nn.functional.binary_cross_entropy_with_logits(outputs, labels)
+            running_loss += loss.item()*len(labels)
+            global_step += 1*len(inputs)
+            # Backward and optimize
+            loss.backward()
+            optimizer.step()
+            optimizer.zero_grad()
+            # Display losses over iterations and evaluate on validation set
+            if (i+1) % eval_interval == 0:
+                train_accuracy, valid_accuracy, valid_loss = display_train(epoch, num_epochs, i, model, \
+                                                               correct, total, loss, \
+                                                               train_loader, valid_loader, device)
+                average_train_loss = running_loss / total
+                # average_valid_loss = valid_loss
+                train_loss_list.append(average_train_loss)
+                valid_loss_list.append(valid_loss)
+                global_steps_list.append(global_step)
+                if valid_loss < best_valid_loss:
+                    best_valid_loss = valid_loss
+                    save_model(model, optimizer, best_valid_loss, epoch, path=f'{save_dir}model_{model_save_name}.pt')
+                    save_metrics(train_loss_list, valid_loss_list, global_steps_list, path=f'{save_dir}metrics_{model_save_name}.pt')
+                    # torch.save(model.state_dict(),  f'./ckpt_mid/{model.name}_best_lr_{lr}.pt')
+        if(len(train_loader)%eval_interval!=0):
+            train_accuracy, valid_accuracy, valid_loss = display_train(epoch, num_epochs, i, model, \
+                                                                    correct, total, loss, \
+                                                                    train_loader, valid_loader, device)
+            average_train_loss = running_loss / total
+            # average_valid_loss = valid_loss/len(valid_loader.dataset)
+            train_loss_list.append(average_train_loss)
+            valid_loss_list.append(valid_loss)
+            global_steps_list.append(global_step)
+            if valid_loss < best_valid_loss:
+                best_valid_loss = valid_loss
+                save_model(model, optimizer, best_valid_loss, epoch, path=f'{save_dir}model_{model_save_name}.pt')
+                save_metrics(train_loss_list, valid_loss_list, global_steps_list, path=f'{save_dir}metrics_{model_save_name}.pt')
+                # torch.save(model.state_dict(),  f'./ckpt_mid/{model.name}_best_lr_{lr}.pt')
+        # Append accuracies to list at the end of each iteration
+        train_accs.append(train_accuracy)
+        valid_accs.append(valid_accuracy)
+        # torch.save(model.state_dict(), f'./ckpt_mid/{model.name}_epoch_{epoch}_lr_{lr}.pt')
+    save_metrics(train_loss_list, valid_loss_list, global_steps_list,
+                 path=f'{save_dir}metrics_{model_save_name}.pt')
+    # Load best_model
+    checkpoint = torch.load(f'{save_dir}model_{model_save_name}.pt')
+    model.load_state_dict(checkpoint['model_state_dict'])
+    # Evaluate on test after training has completed
+    test_acc = eval_test(model, test_loader, device)
+    # Return
+    return train_accs, valid_accs, test_acc