|
import torch |
|
import torch.nn as nn |
|
from torch.nn import functional as F |
|
from torch.nn import MultiheadAttention |
|
|
|
|
|
class attentionLayer(nn.Module): |
|
|
|
def __init__(self, d_model, nhead, dropout=0.1): |
|
super(attentionLayer, self).__init__() |
|
self.self_attn = MultiheadAttention(d_model, nhead, dropout=dropout) |
|
|
|
self.linear1 = nn.Linear(d_model, d_model * 4) |
|
self.dropout = nn.Dropout(dropout) |
|
self.linear2 = nn.Linear(d_model * 4, d_model) |
|
|
|
self.norm1 = nn.LayerNorm(d_model) |
|
self.norm2 = nn.LayerNorm(d_model) |
|
self.dropout1 = nn.Dropout(dropout) |
|
self.dropout2 = nn.Dropout(dropout) |
|
|
|
self.activation = F.relu |
|
|
|
def forward(self, src, tar, adjust=False, attn_mask=None): |
|
|
|
src = src.transpose(0, 1) |
|
tar = tar.transpose(0, 1) |
|
if adjust: |
|
src2 = self.self_attn(src, tar, tar, attn_mask=None, key_padding_mask=None)[0] |
|
else: |
|
src2 = self.self_attn(tar, src, src, attn_mask=None, key_padding_mask=None)[0] |
|
src = src + self.dropout1(src2) |
|
src = self.norm1(src) |
|
|
|
src2 = self.linear2(self.dropout(self.activation(self.linear1(src)))) |
|
src = src + self.dropout2(src2) |
|
src = self.norm2(src) |
|
src = src.transpose(0, 1) |
|
return src |
|
|