File size: 2,177 Bytes
5486f6f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import torch
import torch.nn as nn
import torch.nn.functional as F

import random

class MarginLoss(nn.Module):
    def __init__(self, similarity_fct, beta=0.1, num_samples=20):
        super().__init__()
        self.beta = beta
        self.similarity_fct = similarity_fct
        self.num_samples = num_samples

    def forward(self, input_ids, target_ids, sequence_scores):
        B = len(input_ids)
        loss = 0.0

        for b in range(B):
            C = input_ids[b].shape[0]
            indices = torch.arange(C)

            # Sample indices for positive and negative examples
            pos_indices = torch.multinomial(torch.ones(C) / C, self.num_samples, replacement=True)
            neg_indices = torch.multinomial(torch.ones(C) / C, self.num_samples, replacement=True)

            # Compute similarities for positive and negative examples
            pos_sim = self.similarity_fct(input_ids[b][pos_indices], target_ids[b].unsqueeze(0).repeat(self.num_samples, 1))
            neg_sim = self.similarity_fct(input_ids[b][neg_indices], target_ids[b].unsqueeze(0).repeat(self.num_samples, 1))

            # Compute loss
            loss_i = self.beta * (pos_sim - neg_sim) - sequence_scores[b][pos_indices] + sequence_scores[b][neg_indices]
            loss_j = self.beta * (neg_sim - pos_sim) - sequence_scores[b][neg_indices] + sequence_scores[b][pos_indices]

            loss += torch.sum(torch.relu(loss_i)) + torch.sum(torch.relu(loss_j))

        return loss



class KLRegularization(nn.Module):

    def __init__(self, model_ref):
        super().__init__()

        self.kl_loss = nn.KLDivLoss(reduction="batchmean")
        self.model_ref = model_ref

    def forward(self, inputs_ids, scores, targets_ids, **kwargs):
        with torch.no_grad():
            scores_ref = F.softmax(self.model_ref(decoder_input_ids=inputs_ids, **kwargs).logits, dim=-1)

        return self.kl_loss(scores, scores_ref)

class CERegularization(nn.Module):

    def __init__(self):
        super().__init__()

        self.nll_loss = nn.NLLLoss()

    def forward(self, inputs_ids, scores, targets_ids, **kwargs):
        return self.nll_loss(scores, targets_ids)