MohamedRashad's picture
chore: Add requirements for shakkala and kaldialign
bcc0c7f
import torch.nn as nn
from transformer import *
class Transformer(nn.Module):
def __init__(self, src_pad_idx, trg_pad_idx, enc_voc_size, dec_voc_size, d_model, n_head, max_len,
ffn_hidden, n_layers, drop_prob, learnable_pos_emb=True):
super().__init__()
self.src_pad_idx = src_pad_idx
self.trg_pad_idx = trg_pad_idx
self.encoder = Encoder(d_model=d_model,
n_head=n_head,
max_len=max_len,
ffn_hidden=ffn_hidden,
enc_voc_size=enc_voc_size,
drop_prob=drop_prob,
n_layers=n_layers,
padding_idx=src_pad_idx,
learnable_pos_emb=learnable_pos_emb)
self.decoder = nn.Linear(d_model, dec_voc_size)
def get_device(self):
return next(self.parameters()).device
def forward(self, src):
device = self.get_device()
src_mask = self.make_pad_mask(src, src, self.src_pad_idx, self.src_pad_idx).to(device)
enc_src = self.encoder(src, src_mask)
output = self.decoder(enc_src)
return output
def make_pad_mask(self, q, k, q_pad_idx, k_pad_idx):
len_q, len_k = q.size(1), k.size(1)
# batch_size x 1 x 1 x len_k
k = k.ne(k_pad_idx).unsqueeze(1).unsqueeze(2)
# batch_size x 1 x len_q x len_k
k = k.repeat(1, 1, len_q, 1)
# batch_size x 1 x len_q x 1
q = q.ne(q_pad_idx).unsqueeze(1).unsqueeze(3)
# batch_size x 1 x len_q x len_k
q = q.repeat(1, 1, 1, len_k)
mask = k & q
return mask
def make_no_peak_mask(self, q, k):
len_q, len_k = q.size(1), k.size(1)
# len_q x len_k
mask = torch.tril(torch.ones(len_q, len_k)).type(torch.BoolTensor)
return mask