File size: 5,331 Bytes
d31084e
 
ef7d81f
d31084e
 
 
 
 
 
 
 
 
 
 
 
 
 
ef7d81f
4b1312f
ef7d81f
d31084e
ef7d81f
4b1312f
ef7d81f
d31084e
ef7d81f
4b1312f
ef7d81f
d31084e
ef7d81f
4b1312f
ef7d81f
4b3871e
ef7d81f
83716b7
ef7d81f
d31084e
ef7d81f
83716b7
ef7d81f
c260988
b7cf562
 
 
 
 
 
 
 
 
ef7d81f
b7cf562
ef7d81f
b7cf562
ef7d81f
b7cf562
ef7d81f
b7cf562
ef7d81f
b7cf562
ef7d81f
b7cf562
ef7d81f
b7cf562
ef7d81f
b7cf562
ef7d81f
b7cf562
ef7d81f
b7cf562
ef7d81f
b7cf562
ef7d81f
b7cf562
2775ced
 
 
 
 
 
 
 
 
ef7d81f
2775ced
ef7d81f
2775ced
ef7d81f
2775ced
ef7d81f
2775ced
ef7d81f
2775ced
ef7d81f
2775ced
ef7d81f
2775ced
ef7d81f
2775ced
ef7d81f
2775ced
ef7d81f
2775ced
ef7d81f
2775ced
ef7d81f
2775ced
ef7d81f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59d4418
cb993e1
47f1cac
27050c6
222924b
553d60b
59d4418
553d60b
f0ee7a8
553d60b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d68deeb
553d60b
8b0d86f
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
---
language: en
license: mit
tags:
- summarization
model-index:
- name: SamuelAllen123/t5-efficient-large-nl36_fine_tune_sum_V2
  results:
  - task:
      type: summarization
      name: Summarization
    dataset:
      name: samsum
      type: samsum
      config: samsum
      split: test
    metrics:
    - type: rouge
      value: 50.5049
      name: ROUGE-1
      verified: true
    - type: rouge
      value: 25.6469
      name: ROUGE-2
      verified: true
    - type: rouge
      value: 41.7544
      name: ROUGE-L
      verified: true
    - type: rouge
      value: 46.2055
      name: ROUGE-LSUM
      verified: true
    - type: loss
      value: 1.5158178806304932
      name: loss
      verified: true
    - type: gen_len
      value: 24.0342
      name: gen_len
      verified: true
  - task:
      type: summarization
      name: Summarization
    dataset:
      name: cnn_dailymail
      type: cnn_dailymail
      config: 3.0.0
      split: test
    metrics:
    - type: rouge
      value: 34.4055
      name: ROUGE-1
      verified: true
    - type: rouge
      value: 14.127
      name: ROUGE-2
      verified: true
    - type: rouge
      value: 24.3353
      name: ROUGE-L
      verified: true
    - type: rouge
      value: 31.6582
      name: ROUGE-LSUM
      verified: true
    - type: loss
      value: 2.4456119537353516
      name: loss
      verified: true
    - type: gen_len
      value: 45.928
      name: gen_len
      verified: true
  - task:
      type: summarization
      name: Summarization
    dataset:
      name: samsum
      type: samsum
      config: samsum
      split: train
    metrics:
    - type: rouge
      value: 54.933
      name: ROUGE-1
      verified: true
    - type: rouge
      value: 31.7965
      name: ROUGE-2
      verified: true
    - type: rouge
      value: 47.0057
      name: ROUGE-L
      verified: true
    - type: rouge
      value: 51.2027
      name: ROUGE-LSUM
      verified: true
    - type: loss
      value: 1.130684494972229
      name: loss
      verified: true
    - type: gen_len
      value: 23.7989
      name: gen_len
      verified: true
  - task:
      type: summarization
      name: Summarization
    dataset:
      name: scientific_papers
      type: scientific_papers
      config: pubmed
      split: train
    metrics:
    - type: rouge
      value: 23.6698
      name: ROUGE-1
      verified: true
      verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiMTg4OTMwYjkyNmU1ZjdmN2Q4MWE4YzFkZjUyMDZhNDNhYjBkODg3ZjI5NDQxMTcyNDUyMzkwNDZlNjNhZGRiOSIsInZlcnNpb24iOjF9.0kRK7iA642z0YWAH81v1_-pil6TyM3bezGfZtqGev5O7AgGkxzfQaIDNhkVVvVIJdUPJFD7L36XyLx3AWO5BCQ
    - type: rouge
      value: 7.5691
      name: ROUGE-2
      verified: true
      verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiY2Q2MDc1ZjZlYjRmZDZkNjY3MmFhODAzZWUwZjA1M2RlZGUwYTY2ZjM2ZTM1NzQ3YjAxMDFiMWZlMGMwNTgyOCIsInZlcnNpb24iOjF9._Y59aEEGLn0Ij622V8Rwljp-h4uTuCfoPgJdvMN6GvCyKRzwugHo8tedfTpbTAb6cicjiWjKvKurqXTjpw1KAw
    - type: rouge
      value: 15.6071
      name: ROUGE-L
      verified: true
      verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiNjMwM2Q2ODYwZWE4MzNhNDNlNzlhNjU2NGUxYjlhNDM3MzM5MmJjNzU4YTYxNzI4ZmQ3YzQ1YjMzMDZkMTQ4ZCIsInZlcnNpb24iOjF9.zyfiVsuCEXCTkGAqNxCZ8hTKVxAE0JmJRbNZ04HoBi7qYFB13_7JTB6tOvAEH34W-2yvpOs4cBsFqtXg7RvnCA
    - type: rouge
      value: 21.4565
      name: ROUGE-LSUM
      verified: true
      verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiOTE4MjVlZjI5NDBkZjRmODA3MmIzY2I0YWUwZjEyMzYwNjFjNTY3N2NjMmY3ZThlODBjN2VhZWZlODliZmEyZSIsInZlcnNpb24iOjF9.RFZbr5R9cJtrhzWMKys62fiBxKv8MYe6_115NBjEZ6wOwzVih5SdJE8r2EK-1wdCMF_jLGPYQvZ-zyj3KHGWCw
    - type: loss
      value: 3.9369945526123047
      name: loss
      verified: true
      verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiNTc3MzMwYTg5OWIyZGQxNGJlYzExNTY0MjUyY2M5M2NiOGQ2ODI0MWFiMzJjYWY4ZGNkZmY2MmUyZjVjODRiYSIsInZlcnNpb24iOjF9.iDxSfTwZRV5VboHLjF4a47kPXagG7bY78WIejIM37ykpksXxVYssZlmK6UxtkEmZuWypqbQjz6oOjTjy6x3tDQ
    - type: gen_len
      value: 65.9987
      name: gen_len
      verified: true
      verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiODdmYzFiNzU3N2VlMWMyMGEwZmFkZmExZWRlN2NjNWI3ZGJjNmYzMWExYWM5MWY2MzJkMmY0ZGE2NjFjMjRjYyIsInZlcnNpb24iOjF9.3ByM1s1Ux-PDBBnf6i3FUtFLzpmZXcikIfrsR3vTIi9567r789Wm8sW81blFHNfnST-ZHQxPKJOuv4ho8S4eCg
---
*NOT SELF REPORTED VALUES FOR THE LEADERBOARD, I HAVE NO CLUE WHY ITS BROKE. CHECK PULL REQUEST*

Use summarization without adding summarize to the start of the string.

Trained on Samsum train split. 

Parameters for training:

no_decay = ["bias", "LayerNorm.weight", "layer_norm.weight"]
optimizer_grouped_parameters = [
    {
        "params": [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)],
        "weight_decay": 0.0,
    },
    {
        "params": [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)],
        "weight_decay": 0.0,
    },
]

lr = 0.00005
optimizer = torch.optim.RAdam(optimizer_grouped_parameters, lr=lr)

lr_scheduler = get_scheduler(
        name="linear",
        optimizer=optimizer,
        num_warmup_steps=0,
        num_training_steps=50005)

This was only for 10K steps with a batch size of 10

If you want more info, feel free to message me or email me at:
[email protected]