|
import torch |
|
import torch.nn as nn |
|
import torch.nn.functional as F |
|
|
|
import random |
|
|
|
class MarginLoss(nn.Module): |
|
def __init__(self, similarity_fct, beta=0.1, num_samples=20): |
|
super().__init__() |
|
self.beta = beta |
|
self.similarity_fct = similarity_fct |
|
self.num_samples = num_samples |
|
|
|
def forward(self, input_ids, target_ids, sequence_scores): |
|
B = len(input_ids) |
|
loss = 0.0 |
|
|
|
for b in range(B): |
|
C = input_ids[b].shape[0] |
|
indices = torch.arange(C) |
|
|
|
|
|
pos_indices = torch.multinomial(torch.ones(C) / C, self.num_samples, replacement=True) |
|
neg_indices = torch.multinomial(torch.ones(C) / C, self.num_samples, replacement=True) |
|
|
|
|
|
pos_sim = self.similarity_fct(input_ids[b][pos_indices], target_ids[b].unsqueeze(0).repeat(self.num_samples, 1)) |
|
neg_sim = self.similarity_fct(input_ids[b][neg_indices], target_ids[b].unsqueeze(0).repeat(self.num_samples, 1)) |
|
|
|
|
|
loss_i = self.beta * (pos_sim - neg_sim) - sequence_scores[b][pos_indices] + sequence_scores[b][neg_indices] |
|
loss_j = self.beta * (neg_sim - pos_sim) - sequence_scores[b][neg_indices] + sequence_scores[b][pos_indices] |
|
|
|
loss += torch.sum(torch.relu(loss_i)) + torch.sum(torch.relu(loss_j)) |
|
|
|
return loss |
|
|
|
|
|
|
|
class KLRegularization(nn.Module): |
|
|
|
def __init__(self, model_ref): |
|
super().__init__() |
|
|
|
self.kl_loss = nn.KLDivLoss(reduction="batchmean") |
|
self.model_ref = model_ref |
|
|
|
def forward(self, inputs_ids, scores, targets_ids, **kwargs): |
|
with torch.no_grad(): |
|
scores_ref = F.softmax(self.model_ref(decoder_input_ids=inputs_ids, **kwargs).logits, dim=-1) |
|
|
|
return self.kl_loss(scores, scores_ref) |
|
|
|
class CERegularization(nn.Module): |
|
|
|
def __init__(self): |
|
super().__init__() |
|
|
|
self.nll_loss = nn.NLLLoss() |
|
|
|
def forward(self, inputs_ids, scores, targets_ids, **kwargs): |
|
return self.nll_loss(scores, targets_ids) |
|
|