mohdelgaar commited on
Commit
674b430
1 Parent(s): e048c03

Update layout and samples

Browse files
app.py CHANGED
@@ -3,60 +3,367 @@ import spacy
3
  # nltk.download('wordnet')
4
  # spacy.cli.download('en_core_web_sm')
5
 
6
- from const import name_map
7
- from demo import run_gradio
 
 
 
 
8
  from model import get_model
9
  from options import parse_args
10
- import numpy as np
11
  from transformers import T5Tokenizer
12
- import torch
13
- import joblib
14
 
15
 
16
  def process_examples(samples, full_names):
17
- for i in range(len(samples)):
18
- sample = samples[i]
19
- input_text = tokenizer.decode(sample['sentence1_input_ids'], skip_special_tokens=True)
20
- ling1 = scaler.inverse_transform([sample['sentence1_ling']])[0]
21
- ling2 = scaler.inverse_transform([sample['sentence2_ling']])[0]
22
- ling = pd.DataFrame({'Index': full_names, 'Source': ling1, 'Target': ling2})
23
- samples[i] = [input_text, ling]
24
- return list(samples)
25
 
26
  args, args_list, lng_names = parse_args(ckpt='./ckpt/model.pt')
27
- print(args)
28
- exit()
29
 
30
  tokenizer = T5Tokenizer.from_pretrained(args.model_name)
31
  device = 'cuda' if torch.cuda.is_available() else 'cpu'
32
 
33
- full_names = [name_map[x] for x in lng_names]
34
- # samples = joblib.load('assets/samples.bin')
35
- # examples = process_examples(samples, full_names)
36
- # ling_collection = np.load('assets/ling_collection.npy')
37
 
 
 
 
38
  scaler = joblib.load('assets/scaler.bin')
 
 
39
  model, ling_disc, sem_emb = get_model(args, tokenizer, device)
40
 
41
  state = torch.load(args.ckpt, map_location=torch.device('cpu'))
42
  model.load_state_dict(state['model'], strict=True)
43
  model.eval()
44
- print(model is not None, ling_disc is not None, sem_emb is not None)
45
- exit()
46
-
47
- if args.disc_type == 't5':
48
- state = torch.load(args.disc_ckpt)
49
- if 'model' in state:
50
- ling_disc.load_state_dict(state['model'], strict=False)
51
- else:
52
- ling_disc.load_state_dict(state, strict=False)
53
  ling_disc.eval()
54
 
55
  state = torch.load(args.sem_ckpt)
56
- if 'model' in state:
57
- sem_emb.load_state_dict(state['model'], strict=False)
58
- else:
59
- sem_emb.load_state_dict(state, strict=False)
60
  sem_emb.eval()
61
 
62
- run_gradio(model, tokenizer, scaler, ling_collection, examples, full_names)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  # nltk.download('wordnet')
4
  # spacy.cli.download('en_core_web_sm')
5
 
6
+ import torch
7
+ import joblib, json
8
+ import numpy as np
9
+ import pandas as pd
10
+ import gradio as gr
11
+ from const import used_indices, name_map
12
  from model import get_model
13
  from options import parse_args
 
14
  from transformers import T5Tokenizer
15
+ from compute_lng import compute_lng
 
16
 
17
 
18
  def process_examples(samples, full_names):
19
+ processed = []
20
+ for sample in samples:
21
+ processed.append([
22
+ sample['sentence1'],
23
+ pd.DataFrame({'Index': full_names, 'Source': sample['sentence1_ling'], 'Target': sample['sentence2_ling']})
24
+ ])
25
+ return processed
 
26
 
27
  args, args_list, lng_names = parse_args(ckpt='./ckpt/model.pt')
 
 
28
 
29
  tokenizer = T5Tokenizer.from_pretrained(args.model_name)
30
  device = 'cuda' if torch.cuda.is_available() else 'cpu'
31
 
32
+ lng_names = [name_map[x] for x in lng_names]
33
+ examples = json.load(open('assets/examples.json'))
34
+ examples = process_examples(examples, lng_names)
 
35
 
36
+ stats = json.load(open('assets/stats.json'))
37
+
38
+ ling_collection = np.load('assets/ling_collection.npy')
39
  scaler = joblib.load('assets/scaler.bin')
40
+ scale_ratio = np.load('assets/ratios.npy')
41
+
42
  model, ling_disc, sem_emb = get_model(args, tokenizer, device)
43
 
44
  state = torch.load(args.ckpt, map_location=torch.device('cpu'))
45
  model.load_state_dict(state['model'], strict=True)
46
  model.eval()
 
 
 
 
 
 
 
 
 
47
  ling_disc.eval()
48
 
49
  state = torch.load(args.sem_ckpt)
50
+ sem_emb.load_state_dict(state['model'], strict=True)
 
 
 
51
  sem_emb.eval()
52
 
53
+ device = model.backbone.device
54
+
55
+ ############# Start demo code
56
+ def round_ling(x):
57
+ is_int = stats['is_int']
58
+ mins = stats['min']
59
+ maxs = stats['max']
60
+ for i in range(len(x)):
61
+ # if is_int[i]:
62
+ # x[i] = round(x[i])
63
+ # else:
64
+ # x[i] = round(x[i], 3)
65
+ x[i] = round(x[i], 3)
66
+ return np.clip(x, mins, maxs)
67
+
68
+ def visibility(mode):
69
+ if mode == 0:
70
+ vis_group = group1
71
+ elif mode == 1:
72
+ vis_group = group2
73
+ elif mode == 2:
74
+ vis_group = group3
75
+
76
+ output = [gr.update(value=''), gr.update(value='')]
77
+ for component in components:
78
+ if component in vis_group:
79
+ output.append(gr.update(visible=True))
80
+ else:
81
+ output.append(gr.update(visible=False))
82
+ return output
83
+
84
+ def generate(sent1, ling):
85
+ input_ids = tokenizer.encode(sent1, return_tensors='pt').to(device)
86
+ ling1 = scaler.transform([ling['Source']])
87
+ ling2 = scaler.transform([ling['Target']])
88
+ inputs = {'sentence1_input_ids': input_ids,
89
+ 'sentence1_ling': torch.tensor(ling1).float().to(device),
90
+ 'sentence2_ling': torch.tensor(ling2).float().to(device),
91
+ 'sentence1_attention_mask': torch.ones_like(input_ids)}
92
+ preds = []
93
+ with torch.no_grad():
94
+ pred = model.infer(inputs).cpu().numpy()
95
+ pred = tokenizer.batch_decode(pred,
96
+ skip_special_tokens=True)[0]
97
+
98
+ return pred
99
+
100
+ def generate_with_feedback(sent1, ling, approx):
101
+ if sent1 == '':
102
+ return ['Please input a source text.', '']
103
+
104
+
105
+ input_ids = tokenizer.encode(sent1, return_tensors='pt').to(device)
106
+ ling2 = torch.tensor(scaler.transform([ling['Target']])).float().to(device)
107
+ inputs = {
108
+ 'sentence1_input_ids': input_ids,
109
+ 'sentence2_ling': ling2,
110
+ 'sentence1_attention_mask': torch.ones_like(input_ids)
111
+ }
112
+
113
+ pred, (pred_text, interpolations) = model.infer_with_feedback_BP(ling_disc, sem_emb, inputs, tokenizer)
114
+
115
+ interpolation = '-- ' + '\n-- '.join(interpolations)
116
+ return [pred_text, interpolation]
117
+
118
+ def generate_random(sent1, ling, count, approx):
119
+ preds, interpolations = [], []
120
+ for c in range(count):
121
+ idx = np.random.randint(0, len(ling_collection))
122
+ ling_ex = ling_collection[idx]
123
+ ling['Target'] = ling_ex
124
+ pred, interpolation = generate_with_feedback(sent1, ling, approx)
125
+ preds.append(pred)
126
+ interpolations.append(interpolation)
127
+ return '\n***\n'.join(preds), '\n***\n'.join(interpolations), ling
128
+
129
+ def estimate_gen(sent1, sent2, ling, approx):
130
+ if 'approximate' in approx:
131
+ input_ids = tokenizer.encode(sent2, return_tensors='pt').to(device)
132
+ with torch.no_grad():
133
+ ling_pred = ling_disc(input_ids=input_ids).cpu().numpy()
134
+ ling_pred = scaler.inverse_transform(ling_pred)[0]
135
+ elif 'exact' in approx:
136
+ ling_pred = np.array(compute_lng(sent2))[used_indices]
137
+ else:
138
+ raise ValueError()
139
+
140
+ ling_pred = round_ling(ling_pred)
141
+ ling['Target'] = ling_pred
142
+ gen = generate_with_feedback(sent1, ling, approx)
143
+ results = gen + [ling]
144
+
145
+ return results
146
+
147
+ def estimate_tgt(sent2, ling, approx):
148
+ if 'approximate' in approx:
149
+ input_ids = tokenizer.encode(sent2, return_tensors='pt').to(device)
150
+ with torch.no_grad():
151
+ ling_pred = ling_disc(input_ids=input_ids).cpu().numpy()
152
+ ling_pred = scaler.inverse_transform(ling_pred)[0]
153
+ elif 'exact' in approx:
154
+ ling_pred = np.array(compute_lng(sent2))[used_indices]
155
+ else:
156
+ raise ValueError()
157
+
158
+ ling_pred = round_ling(ling_pred)
159
+ ling['Target'] = ling_pred
160
+ return ling
161
+
162
+ def estimate_src(sent1, ling, approx):
163
+ if 'approximate' in approx:
164
+ input_ids = tokenizer.encode(sent1, return_tensors='pt').to(device)
165
+ with torch.no_grad():
166
+ ling_pred = ling_disc(input_ids=input_ids).cpu().numpy()
167
+ ling_pred = scaler.inverse_transform(ling_pred)[0]
168
+ elif 'exact' in approx:
169
+ ling_pred = np.array(compute_lng(sent1))[used_indices]
170
+ else:
171
+ raise ValueError()
172
+
173
+ ling['Source'] = ling_pred
174
+ return ling
175
+
176
+ def rand_target(ling):
177
+ ling['Target'] = scaler.inverse_transform([np.random.randn(*ling['Target'].shape)])[0]
178
+ return ling
179
+
180
+ def rand_ex_target(ling):
181
+ idx = np.random.randint(0, len(ling_collection))
182
+ ling_ex = ling_collection[idx]
183
+ ling['Target'] = ling_ex
184
+ return ling
185
+
186
+ def copy(ling):
187
+ ling['Target'] = ling['Source']
188
+ return ling
189
+
190
+ def add(ling):
191
+ scale_stepsize = np.random.uniform(1.0, 5.0)
192
+ x = ling['Target'] + scale_stepsize * scale_ratio
193
+ x = round_ling(x)
194
+ ling['Target'] = x
195
+ return ling
196
+
197
+ def sub(ling):
198
+ scale_stepsize = np.random.uniform(1.0, 5.0)
199
+ x = ling['Target'] - scale_stepsize * scale_ratio
200
+ x = round_ling(x)
201
+ ling['Target'] = x
202
+ return ling
203
+
204
+ title = """
205
+ <h1 style="text-align: center;">Controlled Paraphrase Generation with Linguistic Feature Control</h1>
206
+
207
+ <p style="font-size:1.2em;">This system utilizes an encoder-decoder model to generate text with controlled complexity, guided by 40 linguistic complexity indices.
208
+ The model can generate diverse paraphrases of a given sentence, each adjusted to maintain consistent meaning while varying
209
+ in linguistic complexity according to the desired level.</p>
210
+ <p style="font-size:1.2em;">It is important to note that not all index combinations are feasible (e.g., a sentence of "length" 5 with 10 "unique words").
211
+ To ensure high-quality outputs, our approach interpolates the embeddings of linguistic indices to identify the closest,
212
+ achievable set of indices for the given target.</p>
213
+ """
214
+
215
+ guide = """
216
+ You may use the system in on of the following ways:
217
+
218
+ **Randomized Paraphrase Generation**: Select this option to produce multiple paraphrases with a range
219
+ of linguistic complexity. You need to provide a source text, specify the number of paraphrases you want,
220
+ and click "Generate." The linguistic complexity of the paraphrases will be determined randomly.
221
+
222
+ **Complexity-Matched Paraphrasing**: Select this option to generate a paraphrase of the given source
223
+ sentence that closely mirrors the linguistic complexity of another given sentence. Input your source
224
+ sentence along with another sentence (which will serve only to extract linguistic indices for the
225
+ paraphrase generation). Then, click "Generate."
226
+
227
+ **Manual Linguistic Control**: Select this option to manually control the linguistic complexity of the
228
+ generated text. We provided a set of tools for manual adjustments of the desired linguistic complexity of
229
+ the target sentence. These tools enable the user to extract linguistic indices from a given sentence,
230
+ generate a random (yet coherent) set of linguistic indices, and add or remove noise from the indices.
231
+ These tools are designed for experimental use and require the user to possess linguistic expertise for
232
+ effective input of linguistic indices. To use these tools, select "Tools to assist in setting linguistic
233
+ indices." Once indices are entered, click "Generate."
234
+
235
+
236
+ Second, you may select to use exact or approximate computation of linguistic indices (used in mode (2) and
237
+ in quality control of the genration). Approximate computation is significantly faster.
238
+
239
+ Third, you may view the intermediate sentences of the quality control process by selecting the checkbox.
240
+
241
+ Fourth, you may try out some examples by clicking on "Examples...". Examples consist of a source sentences,
242
+ the indices of the source sentences, and a sample set of target linguistic indices.
243
+
244
+ Please make your choice below.
245
+
246
+ """
247
+
248
+ sent1 = gr.Textbox(label='Source text')
249
+ ling = gr.Dataframe(value = [[x, 0, 0] for x in lng_names],
250
+ headers=['Index', 'Source', 'Target'],
251
+ datatype=['str', 'number', 'number'], visible=False)
252
+ css = """
253
+ #guide span.svelte-1w6vloh {font-size: 22px !important; font-weight: 600 !important}
254
+ #mode span.svelte-1gfkn6j {font-size: 18px !important; font-weight: 600 !important}
255
+ #mode {border: 0px; box-shadow: none}
256
+ #mode .block {padding: 0px}
257
+
258
+ div.gradio-container {color: black}
259
+ div.form {background: inherit}
260
+
261
+ body {
262
+ --text-sm: 12px;
263
+ --text-md: 16px;
264
+ --text-lg: 18px;
265
+ --input-text-size: 16px;
266
+ --section-text-size: 16px;
267
+ --input-background: --neutral-50;
268
+ }
269
+
270
+ .separator {
271
+ width: 100%;
272
+ height: 3px; /* Adjust the height for boldness */
273
+ background-color: #000; /* Adjust the color as needed */
274
+ margin: 20px 0; /* Adjust the margin as needed */
275
+ }
276
+ """
277
+
278
+ with gr.Blocks(
279
+ theme=gr.themes.Default(
280
+ spacing_size=gr.themes.sizes.spacing_md,
281
+ text_size=gr.themes.sizes.text_md,
282
+ ),
283
+ css=css) as demo:
284
+ gr.Image('assets/logo.png', height=100, container=False, show_download_button=False)
285
+ gr.Markdown(title)
286
+ with gr.Accordion("🚀 Quick Start Guide", open=False, elem_id='guide'):
287
+ gr.Markdown(guide)
288
+
289
+ with gr.Group(elem_classes='separator'):
290
+ pass
291
+ with gr.Group(elem_id='mode'):
292
+ mode = gr.Radio(
293
+ value='Randomized Paraphrase Generation',
294
+ label='How would you like to use this system?',
295
+ type="index",
296
+ choices=['🔄 Randomized Paraphrase Generation',
297
+ '⚖️ Complexity-Matched Paraphrasing',
298
+ '🎛️ Manual Linguistic Control'],
299
+ )
300
+ with gr.Accordion("⚙️ Advanced Options", open=False):
301
+ approx = gr.Radio(value='Use approximate computation of linguistic indices (faster)',
302
+ choices=['Use approximate computation of linguistic indices (faster)',
303
+ 'Use exact computation of linguistic indices'], container=False, show_label=False)
304
+ control_interpolation = gr.Checkbox(label='View the intermediate sentences in the interpolation of linguistic indices')
305
+
306
+ with gr.Accordion("📑 Examples...", open=False):
307
+ gr.Examples(examples, [sent1, ling], examples_per_page=4, label=None)
308
+
309
+ with gr.Row():
310
+ sent1.render()
311
+ with gr.Column():
312
+ sent2 = gr.Textbox(label='Generated text')
313
+ interpolation = gr.Textbox(label='Quality control interpolation', visible=False, lines=5)
314
+ with gr.Group(elem_classes='separator'):
315
+ pass
316
+ #####################
317
+ with gr.Row():
318
+ generate_random_btn = gr.Button("Generate",
319
+ variant='primary', scale=1, visible=True)
320
+ count = gr.Number(label='Number of generated sentences', value=3, precision=0, scale=1, visible=True)
321
+ # generate_fb_btn = gr.Button("Generate with auto-adjust (towards pred)")
322
+ # generate_fb_s_btn = gr.Button("Generate with auto-adjust (moving s)")
323
+ #####################
324
+ with gr.Row():
325
+ estimate_gen_btn = gr.Button("Generate",
326
+ variant='primary',
327
+ scale=1, visible=False)
328
+ sent_ling_gen = gr.Textbox(label='Text to estimate linguistic indices', scale=1, visible=False)
329
+ #####################
330
+ generate_btn = gr.Button("Generate", variant='primary', visible=False)
331
+ with gr.Accordion("Tools to assist in the setting of linguistic indices...", open=False, visible=False) as ling_tools:
332
+ with gr.Row():
333
+ estimate_tgt_btn = gr.Button("Estimate linguistic indices of this sentence", visible=False)
334
+ sent_ling_est = gr.Textbox(label='Text to estimate linguistic indices', scale=2, visible=False)
335
+ estimate_src_btn = gr.Button("Estimate linguistic indices of source sentence", visible=False)
336
+ # rand_btn = gr.Button("Random target")
337
+ rand_ex_btn = gr.Button("Random target", size='lg', visible=False)
338
+ copy_btn = gr.Button("Copy linguistic indices of source to target", size='sm', visible=False)
339
+ with gr.Row():
340
+ sub_btn = gr.Button('Subtract \u03B5 from target linguistic indices', visible=False)
341
+ add_btn = gr.Button('Add \u03B5 to target linguistic indices', visible=False)
342
+ ling.render()
343
+ #####################
344
+
345
+ estimate_src_btn.click(estimate_src, inputs=[sent1, ling, approx], outputs=[ling])
346
+ estimate_tgt_btn.click(estimate_tgt, inputs=[sent_ling_est, ling, approx], outputs=[ling])
347
+ # estimate_tgt_btn.click(estimate_tgt, inputs=[sent_ling, ling], outputs=[ling])
348
+ estimate_gen_btn.click(estimate_gen, inputs=[sent1, sent_ling_gen, ling, approx], outputs=[sent2, interpolation, ling])
349
+ # rand_btn.click(rand_target, inputs=[ling], outputs=[ling])
350
+ rand_ex_btn.click(rand_ex_target, inputs=[ling], outputs=[ling])
351
+ copy_btn.click(copy, inputs=[ling], outputs=[ling])
352
+ generate_btn.click(generate_with_feedback, inputs=[sent1, ling, approx], outputs=[sent2, interpolation])
353
+ generate_random_btn.click(generate_random, inputs=[sent1, ling, count, approx],
354
+ outputs=[sent2, interpolation, ling])
355
+ # generate_fb_btn.click(generate_with_feedback, inputs=[sent1, ling], outputs=sent2s)
356
+ # generate_fb_s_btn.click(generate_with_feedbacks, inputs=[sent1, ling], outputs=sent2s)
357
+ add_btn.click(add, inputs=[ling], outputs=[ling])
358
+ sub_btn.click(sub, inputs=[ling], outputs=[ling])
359
+
360
+ group1 = [generate_random_btn, count]
361
+ group2 = [estimate_gen_btn, sent_ling_gen]
362
+ group3 = [generate_btn, estimate_src_btn, estimate_tgt_btn, sent_ling_est, rand_ex_btn, copy_btn, add_btn, sub_btn, ling, ling_tools]
363
+ components = group1 + group2 + group3
364
+ mode.change(visibility, inputs=[mode], outputs=[sent2, interpolation] + components)
365
+ control_interpolation.change(lambda v: gr.update(visible=v), inputs=[control_interpolation],
366
+ outputs=[interpolation])
367
+
368
+ print('Finished loading')
369
+ demo.launch(share=True)
assets/ling_collection.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1755705e1c6e2b40a091b7ec8b147c1e9b7dfac5a7c4f1e3d5ff092223a0a10
3
+ size 320128
assets/logo.png ADDED
assets/ratios.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc60ebcd53fd467fd7f3c9e9652fb9364285e2833325b6ab46b1c86e2e136b3a
3
+ size 448
assets/samples.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5de4fd9314c1df65f14187cc13fb07300b3a359f57c9bd69ab834ef6148a8368
3
+ size 80651
assets/scaler.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d3dc9e74494b2049672441b0587cd73bae605b271941528ea585672bf48d1a84
3
+ size 1414
assets/stats.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {"min": [0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 2.0, 1.25, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -12.951, 0.004],
2
+ "max": [100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 23.0, 100.0],
3
+ "is_int": [true, true, true, true, true, true, true, false, false, false, false, false, false, true, true, true, true, true, true, true, true, true, true, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false]}
ckpt/ling_disc/checkpoint-41000/config.json ADDED
@@ -0,0 +1,120 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "microsoft/deberta-v3-small",
3
+ "architectures": [
4
+ "DebertaReplacedTokenizer"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "hidden_act": "gelu",
8
+ "hidden_dropout_prob": 0.1,
9
+ "hidden_size": 768,
10
+ "id2label": {
11
+ "0": "LABEL_0",
12
+ "1": "LABEL_1",
13
+ "2": "LABEL_2",
14
+ "3": "LABEL_3",
15
+ "4": "LABEL_4",
16
+ "5": "LABEL_5",
17
+ "6": "LABEL_6",
18
+ "7": "LABEL_7",
19
+ "8": "LABEL_8",
20
+ "9": "LABEL_9",
21
+ "10": "LABEL_10",
22
+ "11": "LABEL_11",
23
+ "12": "LABEL_12",
24
+ "13": "LABEL_13",
25
+ "14": "LABEL_14",
26
+ "15": "LABEL_15",
27
+ "16": "LABEL_16",
28
+ "17": "LABEL_17",
29
+ "18": "LABEL_18",
30
+ "19": "LABEL_19",
31
+ "20": "LABEL_20",
32
+ "21": "LABEL_21",
33
+ "22": "LABEL_22",
34
+ "23": "LABEL_23",
35
+ "24": "LABEL_24",
36
+ "25": "LABEL_25",
37
+ "26": "LABEL_26",
38
+ "27": "LABEL_27",
39
+ "28": "LABEL_28",
40
+ "29": "LABEL_29",
41
+ "30": "LABEL_30",
42
+ "31": "LABEL_31",
43
+ "32": "LABEL_32",
44
+ "33": "LABEL_33",
45
+ "34": "LABEL_34",
46
+ "35": "LABEL_35",
47
+ "36": "LABEL_36",
48
+ "37": "LABEL_37",
49
+ "38": "LABEL_38",
50
+ "39": "LABEL_39"
51
+ },
52
+ "initializer_range": 0.02,
53
+ "intermediate_size": 3072,
54
+ "label2id": {
55
+ "LABEL_0": 0,
56
+ "LABEL_1": 1,
57
+ "LABEL_10": 10,
58
+ "LABEL_11": 11,
59
+ "LABEL_12": 12,
60
+ "LABEL_13": 13,
61
+ "LABEL_14": 14,
62
+ "LABEL_15": 15,
63
+ "LABEL_16": 16,
64
+ "LABEL_17": 17,
65
+ "LABEL_18": 18,
66
+ "LABEL_19": 19,
67
+ "LABEL_2": 2,
68
+ "LABEL_20": 20,
69
+ "LABEL_21": 21,
70
+ "LABEL_22": 22,
71
+ "LABEL_23": 23,
72
+ "LABEL_24": 24,
73
+ "LABEL_25": 25,
74
+ "LABEL_26": 26,
75
+ "LABEL_27": 27,
76
+ "LABEL_28": 28,
77
+ "LABEL_29": 29,
78
+ "LABEL_3": 3,
79
+ "LABEL_30": 30,
80
+ "LABEL_31": 31,
81
+ "LABEL_32": 32,
82
+ "LABEL_33": 33,
83
+ "LABEL_34": 34,
84
+ "LABEL_35": 35,
85
+ "LABEL_36": 36,
86
+ "LABEL_37": 37,
87
+ "LABEL_38": 38,
88
+ "LABEL_39": 39,
89
+ "LABEL_4": 4,
90
+ "LABEL_5": 5,
91
+ "LABEL_6": 6,
92
+ "LABEL_7": 7,
93
+ "LABEL_8": 8,
94
+ "LABEL_9": 9
95
+ },
96
+ "layer_norm_eps": 1e-07,
97
+ "max_position_embeddings": 512,
98
+ "max_relative_positions": -1,
99
+ "model_type": "deberta-v2",
100
+ "norm_rel_ebd": "layer_norm",
101
+ "num_attention_heads": 12,
102
+ "num_hidden_layers": 6,
103
+ "pad_token_id": 0,
104
+ "pooler_dropout": 0,
105
+ "pooler_hidden_act": "gelu",
106
+ "pooler_hidden_size": 768,
107
+ "pos_att_type": [
108
+ "p2c",
109
+ "c2p"
110
+ ],
111
+ "position_biased_input": false,
112
+ "position_buckets": 256,
113
+ "problem_type": "regression",
114
+ "relative_attention": true,
115
+ "share_att_key": true,
116
+ "torch_dtype": "float32",
117
+ "transformers_version": "4.39.3",
118
+ "type_vocab_size": 0,
119
+ "vocab_size": 128100
120
+ }
ckpt/ling_disc/checkpoint-41000/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:15221fd5565118b32b1adf7b42c27cae6a3d8dd32b0ef85473b70bb072964661
3
+ size 275252064
ckpt/ling_disc/checkpoint-41000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7dce4669eb4c8d092887dca957afda50838e0d8821093ac6ec80dfc38c786041
3
+ size 550568634
ckpt/ling_disc/checkpoint-41000/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b153bd123a079b6d0ee9f3616a0498be47197aca1c9c7764282514bc91fdc08d
3
+ size 14244
ckpt/ling_disc/checkpoint-41000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7600c7adf0e16517c635d7b3eee259739a7966140efc08f2afff26d19bb4fb29
3
+ size 1064
ckpt/ling_disc/checkpoint-41000/special_tokens_map.json ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<extra_id_0>",
4
+ "<extra_id_1>",
5
+ "<extra_id_2>",
6
+ "<extra_id_3>",
7
+ "<extra_id_4>",
8
+ "<extra_id_5>",
9
+ "<extra_id_6>",
10
+ "<extra_id_7>",
11
+ "<extra_id_8>",
12
+ "<extra_id_9>",
13
+ "<extra_id_10>",
14
+ "<extra_id_11>",
15
+ "<extra_id_12>",
16
+ "<extra_id_13>",
17
+ "<extra_id_14>",
18
+ "<extra_id_15>",
19
+ "<extra_id_16>",
20
+ "<extra_id_17>",
21
+ "<extra_id_18>",
22
+ "<extra_id_19>",
23
+ "<extra_id_20>",
24
+ "<extra_id_21>",
25
+ "<extra_id_22>",
26
+ "<extra_id_23>",
27
+ "<extra_id_24>",
28
+ "<extra_id_25>",
29
+ "<extra_id_26>",
30
+ "<extra_id_27>",
31
+ "<extra_id_28>",
32
+ "<extra_id_29>",
33
+ "<extra_id_30>",
34
+ "<extra_id_31>",
35
+ "<extra_id_32>",
36
+ "<extra_id_33>",
37
+ "<extra_id_34>",
38
+ "<extra_id_35>",
39
+ "<extra_id_36>",
40
+ "<extra_id_37>",
41
+ "<extra_id_38>",
42
+ "<extra_id_39>",
43
+ "<extra_id_40>",
44
+ "<extra_id_41>",
45
+ "<extra_id_42>",
46
+ "<extra_id_43>",
47
+ "<extra_id_44>",
48
+ "<extra_id_45>",
49
+ "<extra_id_46>",
50
+ "<extra_id_47>",
51
+ "<extra_id_48>",
52
+ "<extra_id_49>",
53
+ "<extra_id_50>",
54
+ "<extra_id_51>",
55
+ "<extra_id_52>",
56
+ "<extra_id_53>",
57
+ "<extra_id_54>",
58
+ "<extra_id_55>",
59
+ "<extra_id_56>",
60
+ "<extra_id_57>",
61
+ "<extra_id_58>",
62
+ "<extra_id_59>",
63
+ "<extra_id_60>",
64
+ "<extra_id_61>",
65
+ "<extra_id_62>",
66
+ "<extra_id_63>",
67
+ "<extra_id_64>",
68
+ "<extra_id_65>",
69
+ "<extra_id_66>",
70
+ "<extra_id_67>",
71
+ "<extra_id_68>",
72
+ "<extra_id_69>",
73
+ "<extra_id_70>",
74
+ "<extra_id_71>",
75
+ "<extra_id_72>",
76
+ "<extra_id_73>",
77
+ "<extra_id_74>",
78
+ "<extra_id_75>",
79
+ "<extra_id_76>",
80
+ "<extra_id_77>",
81
+ "<extra_id_78>",
82
+ "<extra_id_79>",
83
+ "<extra_id_80>",
84
+ "<extra_id_81>",
85
+ "<extra_id_82>",
86
+ "<extra_id_83>",
87
+ "<extra_id_84>",
88
+ "<extra_id_85>",
89
+ "<extra_id_86>",
90
+ "<extra_id_87>",
91
+ "<extra_id_88>",
92
+ "<extra_id_89>",
93
+ "<extra_id_90>",
94
+ "<extra_id_91>",
95
+ "<extra_id_92>",
96
+ "<extra_id_93>",
97
+ "<extra_id_94>",
98
+ "<extra_id_95>",
99
+ "<extra_id_96>",
100
+ "<extra_id_97>",
101
+ "<extra_id_98>",
102
+ "<extra_id_99>"
103
+ ],
104
+ "eos_token": {
105
+ "content": "</s>",
106
+ "lstrip": false,
107
+ "normalized": false,
108
+ "rstrip": false,
109
+ "single_word": false
110
+ },
111
+ "pad_token": "</s>",
112
+ "unk_token": {
113
+ "content": "<unk>",
114
+ "lstrip": false,
115
+ "normalized": false,
116
+ "rstrip": false,
117
+ "single_word": false
118
+ }
119
+ }
ckpt/ling_disc/checkpoint-41000/spiece.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d60acb128cf7b7f2536e8f38a5b18a05535c9e14c7a355904270e15b0945ea86
3
+ size 791656
ckpt/ling_disc/checkpoint-41000/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
ckpt/ling_disc/checkpoint-41000/tokenizer_config.json ADDED
@@ -0,0 +1,938 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<pad>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "</s>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "<unk>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "32000": {
28
+ "content": "<extra_id_99>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "32001": {
36
+ "content": "<extra_id_98>",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ },
43
+ "32002": {
44
+ "content": "<extra_id_97>",
45
+ "lstrip": false,
46
+ "normalized": false,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": true
50
+ },
51
+ "32003": {
52
+ "content": "<extra_id_96>",
53
+ "lstrip": false,
54
+ "normalized": false,
55
+ "rstrip": false,
56
+ "single_word": false,
57
+ "special": true
58
+ },
59
+ "32004": {
60
+ "content": "<extra_id_95>",
61
+ "lstrip": false,
62
+ "normalized": false,
63
+ "rstrip": false,
64
+ "single_word": false,
65
+ "special": true
66
+ },
67
+ "32005": {
68
+ "content": "<extra_id_94>",
69
+ "lstrip": false,
70
+ "normalized": false,
71
+ "rstrip": false,
72
+ "single_word": false,
73
+ "special": true
74
+ },
75
+ "32006": {
76
+ "content": "<extra_id_93>",
77
+ "lstrip": false,
78
+ "normalized": false,
79
+ "rstrip": false,
80
+ "single_word": false,
81
+ "special": true
82
+ },
83
+ "32007": {
84
+ "content": "<extra_id_92>",
85
+ "lstrip": false,
86
+ "normalized": false,
87
+ "rstrip": false,
88
+ "single_word": false,
89
+ "special": true
90
+ },
91
+ "32008": {
92
+ "content": "<extra_id_91>",
93
+ "lstrip": false,
94
+ "normalized": false,
95
+ "rstrip": false,
96
+ "single_word": false,
97
+ "special": true
98
+ },
99
+ "32009": {
100
+ "content": "<extra_id_90>",
101
+ "lstrip": false,
102
+ "normalized": false,
103
+ "rstrip": false,
104
+ "single_word": false,
105
+ "special": true
106
+ },
107
+ "32010": {
108
+ "content": "<extra_id_89>",
109
+ "lstrip": false,
110
+ "normalized": false,
111
+ "rstrip": false,
112
+ "single_word": false,
113
+ "special": true
114
+ },
115
+ "32011": {
116
+ "content": "<extra_id_88>",
117
+ "lstrip": false,
118
+ "normalized": false,
119
+ "rstrip": false,
120
+ "single_word": false,
121
+ "special": true
122
+ },
123
+ "32012": {
124
+ "content": "<extra_id_87>",
125
+ "lstrip": false,
126
+ "normalized": false,
127
+ "rstrip": false,
128
+ "single_word": false,
129
+ "special": true
130
+ },
131
+ "32013": {
132
+ "content": "<extra_id_86>",
133
+ "lstrip": false,
134
+ "normalized": false,
135
+ "rstrip": false,
136
+ "single_word": false,
137
+ "special": true
138
+ },
139
+ "32014": {
140
+ "content": "<extra_id_85>",
141
+ "lstrip": false,
142
+ "normalized": false,
143
+ "rstrip": false,
144
+ "single_word": false,
145
+ "special": true
146
+ },
147
+ "32015": {
148
+ "content": "<extra_id_84>",
149
+ "lstrip": false,
150
+ "normalized": false,
151
+ "rstrip": false,
152
+ "single_word": false,
153
+ "special": true
154
+ },
155
+ "32016": {
156
+ "content": "<extra_id_83>",
157
+ "lstrip": false,
158
+ "normalized": false,
159
+ "rstrip": false,
160
+ "single_word": false,
161
+ "special": true
162
+ },
163
+ "32017": {
164
+ "content": "<extra_id_82>",
165
+ "lstrip": false,
166
+ "normalized": false,
167
+ "rstrip": false,
168
+ "single_word": false,
169
+ "special": true
170
+ },
171
+ "32018": {
172
+ "content": "<extra_id_81>",
173
+ "lstrip": false,
174
+ "normalized": false,
175
+ "rstrip": false,
176
+ "single_word": false,
177
+ "special": true
178
+ },
179
+ "32019": {
180
+ "content": "<extra_id_80>",
181
+ "lstrip": false,
182
+ "normalized": false,
183
+ "rstrip": false,
184
+ "single_word": false,
185
+ "special": true
186
+ },
187
+ "32020": {
188
+ "content": "<extra_id_79>",
189
+ "lstrip": false,
190
+ "normalized": false,
191
+ "rstrip": false,
192
+ "single_word": false,
193
+ "special": true
194
+ },
195
+ "32021": {
196
+ "content": "<extra_id_78>",
197
+ "lstrip": false,
198
+ "normalized": false,
199
+ "rstrip": false,
200
+ "single_word": false,
201
+ "special": true
202
+ },
203
+ "32022": {
204
+ "content": "<extra_id_77>",
205
+ "lstrip": false,
206
+ "normalized": false,
207
+ "rstrip": false,
208
+ "single_word": false,
209
+ "special": true
210
+ },
211
+ "32023": {
212
+ "content": "<extra_id_76>",
213
+ "lstrip": false,
214
+ "normalized": false,
215
+ "rstrip": false,
216
+ "single_word": false,
217
+ "special": true
218
+ },
219
+ "32024": {
220
+ "content": "<extra_id_75>",
221
+ "lstrip": false,
222
+ "normalized": false,
223
+ "rstrip": false,
224
+ "single_word": false,
225
+ "special": true
226
+ },
227
+ "32025": {
228
+ "content": "<extra_id_74>",
229
+ "lstrip": false,
230
+ "normalized": false,
231
+ "rstrip": false,
232
+ "single_word": false,
233
+ "special": true
234
+ },
235
+ "32026": {
236
+ "content": "<extra_id_73>",
237
+ "lstrip": false,
238
+ "normalized": false,
239
+ "rstrip": false,
240
+ "single_word": false,
241
+ "special": true
242
+ },
243
+ "32027": {
244
+ "content": "<extra_id_72>",
245
+ "lstrip": false,
246
+ "normalized": false,
247
+ "rstrip": false,
248
+ "single_word": false,
249
+ "special": true
250
+ },
251
+ "32028": {
252
+ "content": "<extra_id_71>",
253
+ "lstrip": false,
254
+ "normalized": false,
255
+ "rstrip": false,
256
+ "single_word": false,
257
+ "special": true
258
+ },
259
+ "32029": {
260
+ "content": "<extra_id_70>",
261
+ "lstrip": false,
262
+ "normalized": false,
263
+ "rstrip": false,
264
+ "single_word": false,
265
+ "special": true
266
+ },
267
+ "32030": {
268
+ "content": "<extra_id_69>",
269
+ "lstrip": false,
270
+ "normalized": false,
271
+ "rstrip": false,
272
+ "single_word": false,
273
+ "special": true
274
+ },
275
+ "32031": {
276
+ "content": "<extra_id_68>",
277
+ "lstrip": false,
278
+ "normalized": false,
279
+ "rstrip": false,
280
+ "single_word": false,
281
+ "special": true
282
+ },
283
+ "32032": {
284
+ "content": "<extra_id_67>",
285
+ "lstrip": false,
286
+ "normalized": false,
287
+ "rstrip": false,
288
+ "single_word": false,
289
+ "special": true
290
+ },
291
+ "32033": {
292
+ "content": "<extra_id_66>",
293
+ "lstrip": false,
294
+ "normalized": false,
295
+ "rstrip": false,
296
+ "single_word": false,
297
+ "special": true
298
+ },
299
+ "32034": {
300
+ "content": "<extra_id_65>",
301
+ "lstrip": false,
302
+ "normalized": false,
303
+ "rstrip": false,
304
+ "single_word": false,
305
+ "special": true
306
+ },
307
+ "32035": {
308
+ "content": "<extra_id_64>",
309
+ "lstrip": false,
310
+ "normalized": false,
311
+ "rstrip": false,
312
+ "single_word": false,
313
+ "special": true
314
+ },
315
+ "32036": {
316
+ "content": "<extra_id_63>",
317
+ "lstrip": false,
318
+ "normalized": false,
319
+ "rstrip": false,
320
+ "single_word": false,
321
+ "special": true
322
+ },
323
+ "32037": {
324
+ "content": "<extra_id_62>",
325
+ "lstrip": false,
326
+ "normalized": false,
327
+ "rstrip": false,
328
+ "single_word": false,
329
+ "special": true
330
+ },
331
+ "32038": {
332
+ "content": "<extra_id_61>",
333
+ "lstrip": false,
334
+ "normalized": false,
335
+ "rstrip": false,
336
+ "single_word": false,
337
+ "special": true
338
+ },
339
+ "32039": {
340
+ "content": "<extra_id_60>",
341
+ "lstrip": false,
342
+ "normalized": false,
343
+ "rstrip": false,
344
+ "single_word": false,
345
+ "special": true
346
+ },
347
+ "32040": {
348
+ "content": "<extra_id_59>",
349
+ "lstrip": false,
350
+ "normalized": false,
351
+ "rstrip": false,
352
+ "single_word": false,
353
+ "special": true
354
+ },
355
+ "32041": {
356
+ "content": "<extra_id_58>",
357
+ "lstrip": false,
358
+ "normalized": false,
359
+ "rstrip": false,
360
+ "single_word": false,
361
+ "special": true
362
+ },
363
+ "32042": {
364
+ "content": "<extra_id_57>",
365
+ "lstrip": false,
366
+ "normalized": false,
367
+ "rstrip": false,
368
+ "single_word": false,
369
+ "special": true
370
+ },
371
+ "32043": {
372
+ "content": "<extra_id_56>",
373
+ "lstrip": false,
374
+ "normalized": false,
375
+ "rstrip": false,
376
+ "single_word": false,
377
+ "special": true
378
+ },
379
+ "32044": {
380
+ "content": "<extra_id_55>",
381
+ "lstrip": false,
382
+ "normalized": false,
383
+ "rstrip": false,
384
+ "single_word": false,
385
+ "special": true
386
+ },
387
+ "32045": {
388
+ "content": "<extra_id_54>",
389
+ "lstrip": false,
390
+ "normalized": false,
391
+ "rstrip": false,
392
+ "single_word": false,
393
+ "special": true
394
+ },
395
+ "32046": {
396
+ "content": "<extra_id_53>",
397
+ "lstrip": false,
398
+ "normalized": false,
399
+ "rstrip": false,
400
+ "single_word": false,
401
+ "special": true
402
+ },
403
+ "32047": {
404
+ "content": "<extra_id_52>",
405
+ "lstrip": false,
406
+ "normalized": false,
407
+ "rstrip": false,
408
+ "single_word": false,
409
+ "special": true
410
+ },
411
+ "32048": {
412
+ "content": "<extra_id_51>",
413
+ "lstrip": false,
414
+ "normalized": false,
415
+ "rstrip": false,
416
+ "single_word": false,
417
+ "special": true
418
+ },
419
+ "32049": {
420
+ "content": "<extra_id_50>",
421
+ "lstrip": false,
422
+ "normalized": false,
423
+ "rstrip": false,
424
+ "single_word": false,
425
+ "special": true
426
+ },
427
+ "32050": {
428
+ "content": "<extra_id_49>",
429
+ "lstrip": false,
430
+ "normalized": false,
431
+ "rstrip": false,
432
+ "single_word": false,
433
+ "special": true
434
+ },
435
+ "32051": {
436
+ "content": "<extra_id_48>",
437
+ "lstrip": false,
438
+ "normalized": false,
439
+ "rstrip": false,
440
+ "single_word": false,
441
+ "special": true
442
+ },
443
+ "32052": {
444
+ "content": "<extra_id_47>",
445
+ "lstrip": false,
446
+ "normalized": false,
447
+ "rstrip": false,
448
+ "single_word": false,
449
+ "special": true
450
+ },
451
+ "32053": {
452
+ "content": "<extra_id_46>",
453
+ "lstrip": false,
454
+ "normalized": false,
455
+ "rstrip": false,
456
+ "single_word": false,
457
+ "special": true
458
+ },
459
+ "32054": {
460
+ "content": "<extra_id_45>",
461
+ "lstrip": false,
462
+ "normalized": false,
463
+ "rstrip": false,
464
+ "single_word": false,
465
+ "special": true
466
+ },
467
+ "32055": {
468
+ "content": "<extra_id_44>",
469
+ "lstrip": false,
470
+ "normalized": false,
471
+ "rstrip": false,
472
+ "single_word": false,
473
+ "special": true
474
+ },
475
+ "32056": {
476
+ "content": "<extra_id_43>",
477
+ "lstrip": false,
478
+ "normalized": false,
479
+ "rstrip": false,
480
+ "single_word": false,
481
+ "special": true
482
+ },
483
+ "32057": {
484
+ "content": "<extra_id_42>",
485
+ "lstrip": false,
486
+ "normalized": false,
487
+ "rstrip": false,
488
+ "single_word": false,
489
+ "special": true
490
+ },
491
+ "32058": {
492
+ "content": "<extra_id_41>",
493
+ "lstrip": false,
494
+ "normalized": false,
495
+ "rstrip": false,
496
+ "single_word": false,
497
+ "special": true
498
+ },
499
+ "32059": {
500
+ "content": "<extra_id_40>",
501
+ "lstrip": false,
502
+ "normalized": false,
503
+ "rstrip": false,
504
+ "single_word": false,
505
+ "special": true
506
+ },
507
+ "32060": {
508
+ "content": "<extra_id_39>",
509
+ "lstrip": false,
510
+ "normalized": false,
511
+ "rstrip": false,
512
+ "single_word": false,
513
+ "special": true
514
+ },
515
+ "32061": {
516
+ "content": "<extra_id_38>",
517
+ "lstrip": false,
518
+ "normalized": false,
519
+ "rstrip": false,
520
+ "single_word": false,
521
+ "special": true
522
+ },
523
+ "32062": {
524
+ "content": "<extra_id_37>",
525
+ "lstrip": false,
526
+ "normalized": false,
527
+ "rstrip": false,
528
+ "single_word": false,
529
+ "special": true
530
+ },
531
+ "32063": {
532
+ "content": "<extra_id_36>",
533
+ "lstrip": false,
534
+ "normalized": false,
535
+ "rstrip": false,
536
+ "single_word": false,
537
+ "special": true
538
+ },
539
+ "32064": {
540
+ "content": "<extra_id_35>",
541
+ "lstrip": false,
542
+ "normalized": false,
543
+ "rstrip": false,
544
+ "single_word": false,
545
+ "special": true
546
+ },
547
+ "32065": {
548
+ "content": "<extra_id_34>",
549
+ "lstrip": false,
550
+ "normalized": false,
551
+ "rstrip": false,
552
+ "single_word": false,
553
+ "special": true
554
+ },
555
+ "32066": {
556
+ "content": "<extra_id_33>",
557
+ "lstrip": false,
558
+ "normalized": false,
559
+ "rstrip": false,
560
+ "single_word": false,
561
+ "special": true
562
+ },
563
+ "32067": {
564
+ "content": "<extra_id_32>",
565
+ "lstrip": false,
566
+ "normalized": false,
567
+ "rstrip": false,
568
+ "single_word": false,
569
+ "special": true
570
+ },
571
+ "32068": {
572
+ "content": "<extra_id_31>",
573
+ "lstrip": false,
574
+ "normalized": false,
575
+ "rstrip": false,
576
+ "single_word": false,
577
+ "special": true
578
+ },
579
+ "32069": {
580
+ "content": "<extra_id_30>",
581
+ "lstrip": false,
582
+ "normalized": false,
583
+ "rstrip": false,
584
+ "single_word": false,
585
+ "special": true
586
+ },
587
+ "32070": {
588
+ "content": "<extra_id_29>",
589
+ "lstrip": false,
590
+ "normalized": false,
591
+ "rstrip": false,
592
+ "single_word": false,
593
+ "special": true
594
+ },
595
+ "32071": {
596
+ "content": "<extra_id_28>",
597
+ "lstrip": false,
598
+ "normalized": false,
599
+ "rstrip": false,
600
+ "single_word": false,
601
+ "special": true
602
+ },
603
+ "32072": {
604
+ "content": "<extra_id_27>",
605
+ "lstrip": false,
606
+ "normalized": false,
607
+ "rstrip": false,
608
+ "single_word": false,
609
+ "special": true
610
+ },
611
+ "32073": {
612
+ "content": "<extra_id_26>",
613
+ "lstrip": false,
614
+ "normalized": false,
615
+ "rstrip": false,
616
+ "single_word": false,
617
+ "special": true
618
+ },
619
+ "32074": {
620
+ "content": "<extra_id_25>",
621
+ "lstrip": false,
622
+ "normalized": false,
623
+ "rstrip": false,
624
+ "single_word": false,
625
+ "special": true
626
+ },
627
+ "32075": {
628
+ "content": "<extra_id_24>",
629
+ "lstrip": false,
630
+ "normalized": false,
631
+ "rstrip": false,
632
+ "single_word": false,
633
+ "special": true
634
+ },
635
+ "32076": {
636
+ "content": "<extra_id_23>",
637
+ "lstrip": false,
638
+ "normalized": false,
639
+ "rstrip": false,
640
+ "single_word": false,
641
+ "special": true
642
+ },
643
+ "32077": {
644
+ "content": "<extra_id_22>",
645
+ "lstrip": false,
646
+ "normalized": false,
647
+ "rstrip": false,
648
+ "single_word": false,
649
+ "special": true
650
+ },
651
+ "32078": {
652
+ "content": "<extra_id_21>",
653
+ "lstrip": false,
654
+ "normalized": false,
655
+ "rstrip": false,
656
+ "single_word": false,
657
+ "special": true
658
+ },
659
+ "32079": {
660
+ "content": "<extra_id_20>",
661
+ "lstrip": false,
662
+ "normalized": false,
663
+ "rstrip": false,
664
+ "single_word": false,
665
+ "special": true
666
+ },
667
+ "32080": {
668
+ "content": "<extra_id_19>",
669
+ "lstrip": false,
670
+ "normalized": false,
671
+ "rstrip": false,
672
+ "single_word": false,
673
+ "special": true
674
+ },
675
+ "32081": {
676
+ "content": "<extra_id_18>",
677
+ "lstrip": false,
678
+ "normalized": false,
679
+ "rstrip": false,
680
+ "single_word": false,
681
+ "special": true
682
+ },
683
+ "32082": {
684
+ "content": "<extra_id_17>",
685
+ "lstrip": false,
686
+ "normalized": false,
687
+ "rstrip": false,
688
+ "single_word": false,
689
+ "special": true
690
+ },
691
+ "32083": {
692
+ "content": "<extra_id_16>",
693
+ "lstrip": false,
694
+ "normalized": false,
695
+ "rstrip": false,
696
+ "single_word": false,
697
+ "special": true
698
+ },
699
+ "32084": {
700
+ "content": "<extra_id_15>",
701
+ "lstrip": false,
702
+ "normalized": false,
703
+ "rstrip": false,
704
+ "single_word": false,
705
+ "special": true
706
+ },
707
+ "32085": {
708
+ "content": "<extra_id_14>",
709
+ "lstrip": false,
710
+ "normalized": false,
711
+ "rstrip": false,
712
+ "single_word": false,
713
+ "special": true
714
+ },
715
+ "32086": {
716
+ "content": "<extra_id_13>",
717
+ "lstrip": false,
718
+ "normalized": false,
719
+ "rstrip": false,
720
+ "single_word": false,
721
+ "special": true
722
+ },
723
+ "32087": {
724
+ "content": "<extra_id_12>",
725
+ "lstrip": false,
726
+ "normalized": false,
727
+ "rstrip": false,
728
+ "single_word": false,
729
+ "special": true
730
+ },
731
+ "32088": {
732
+ "content": "<extra_id_11>",
733
+ "lstrip": false,
734
+ "normalized": false,
735
+ "rstrip": false,
736
+ "single_word": false,
737
+ "special": true
738
+ },
739
+ "32089": {
740
+ "content": "<extra_id_10>",
741
+ "lstrip": false,
742
+ "normalized": false,
743
+ "rstrip": false,
744
+ "single_word": false,
745
+ "special": true
746
+ },
747
+ "32090": {
748
+ "content": "<extra_id_9>",
749
+ "lstrip": false,
750
+ "normalized": false,
751
+ "rstrip": false,
752
+ "single_word": false,
753
+ "special": true
754
+ },
755
+ "32091": {
756
+ "content": "<extra_id_8>",
757
+ "lstrip": false,
758
+ "normalized": false,
759
+ "rstrip": false,
760
+ "single_word": false,
761
+ "special": true
762
+ },
763
+ "32092": {
764
+ "content": "<extra_id_7>",
765
+ "lstrip": false,
766
+ "normalized": false,
767
+ "rstrip": false,
768
+ "single_word": false,
769
+ "special": true
770
+ },
771
+ "32093": {
772
+ "content": "<extra_id_6>",
773
+ "lstrip": false,
774
+ "normalized": false,
775
+ "rstrip": false,
776
+ "single_word": false,
777
+ "special": true
778
+ },
779
+ "32094": {
780
+ "content": "<extra_id_5>",
781
+ "lstrip": false,
782
+ "normalized": false,
783
+ "rstrip": false,
784
+ "single_word": false,
785
+ "special": true
786
+ },
787
+ "32095": {
788
+ "content": "<extra_id_4>",
789
+ "lstrip": false,
790
+ "normalized": false,
791
+ "rstrip": false,
792
+ "single_word": false,
793
+ "special": true
794
+ },
795
+ "32096": {
796
+ "content": "<extra_id_3>",
797
+ "lstrip": false,
798
+ "normalized": false,
799
+ "rstrip": false,
800
+ "single_word": false,
801
+ "special": true
802
+ },
803
+ "32097": {
804
+ "content": "<extra_id_2>",
805
+ "lstrip": false,
806
+ "normalized": false,
807
+ "rstrip": false,
808
+ "single_word": false,
809
+ "special": true
810
+ },
811
+ "32098": {
812
+ "content": "<extra_id_1>",
813
+ "lstrip": false,
814
+ "normalized": false,
815
+ "rstrip": false,
816
+ "single_word": false,
817
+ "special": true
818
+ },
819
+ "32099": {
820
+ "content": "<extra_id_0>",
821
+ "lstrip": false,
822
+ "normalized": false,
823
+ "rstrip": false,
824
+ "single_word": false,
825
+ "special": true
826
+ }
827
+ },
828
+ "additional_special_tokens": [
829
+ "<extra_id_0>",
830
+ "<extra_id_1>",
831
+ "<extra_id_2>",
832
+ "<extra_id_3>",
833
+ "<extra_id_4>",
834
+ "<extra_id_5>",
835
+ "<extra_id_6>",
836
+ "<extra_id_7>",
837
+ "<extra_id_8>",
838
+ "<extra_id_9>",
839
+ "<extra_id_10>",
840
+ "<extra_id_11>",
841
+ "<extra_id_12>",
842
+ "<extra_id_13>",
843
+ "<extra_id_14>",
844
+ "<extra_id_15>",
845
+ "<extra_id_16>",
846
+ "<extra_id_17>",
847
+ "<extra_id_18>",
848
+ "<extra_id_19>",
849
+ "<extra_id_20>",
850
+ "<extra_id_21>",
851
+ "<extra_id_22>",
852
+ "<extra_id_23>",
853
+ "<extra_id_24>",
854
+ "<extra_id_25>",
855
+ "<extra_id_26>",
856
+ "<extra_id_27>",
857
+ "<extra_id_28>",
858
+ "<extra_id_29>",
859
+ "<extra_id_30>",
860
+ "<extra_id_31>",
861
+ "<extra_id_32>",
862
+ "<extra_id_33>",
863
+ "<extra_id_34>",
864
+ "<extra_id_35>",
865
+ "<extra_id_36>",
866
+ "<extra_id_37>",
867
+ "<extra_id_38>",
868
+ "<extra_id_39>",
869
+ "<extra_id_40>",
870
+ "<extra_id_41>",
871
+ "<extra_id_42>",
872
+ "<extra_id_43>",
873
+ "<extra_id_44>",
874
+ "<extra_id_45>",
875
+ "<extra_id_46>",
876
+ "<extra_id_47>",
877
+ "<extra_id_48>",
878
+ "<extra_id_49>",
879
+ "<extra_id_50>",
880
+ "<extra_id_51>",
881
+ "<extra_id_52>",
882
+ "<extra_id_53>",
883
+ "<extra_id_54>",
884
+ "<extra_id_55>",
885
+ "<extra_id_56>",
886
+ "<extra_id_57>",
887
+ "<extra_id_58>",
888
+ "<extra_id_59>",
889
+ "<extra_id_60>",
890
+ "<extra_id_61>",
891
+ "<extra_id_62>",
892
+ "<extra_id_63>",
893
+ "<extra_id_64>",
894
+ "<extra_id_65>",
895
+ "<extra_id_66>",
896
+ "<extra_id_67>",
897
+ "<extra_id_68>",
898
+ "<extra_id_69>",
899
+ "<extra_id_70>",
900
+ "<extra_id_71>",
901
+ "<extra_id_72>",
902
+ "<extra_id_73>",
903
+ "<extra_id_74>",
904
+ "<extra_id_75>",
905
+ "<extra_id_76>",
906
+ "<extra_id_77>",
907
+ "<extra_id_78>",
908
+ "<extra_id_79>",
909
+ "<extra_id_80>",
910
+ "<extra_id_81>",
911
+ "<extra_id_82>",
912
+ "<extra_id_83>",
913
+ "<extra_id_84>",
914
+ "<extra_id_85>",
915
+ "<extra_id_86>",
916
+ "<extra_id_87>",
917
+ "<extra_id_88>",
918
+ "<extra_id_89>",
919
+ "<extra_id_90>",
920
+ "<extra_id_91>",
921
+ "<extra_id_92>",
922
+ "<extra_id_93>",
923
+ "<extra_id_94>",
924
+ "<extra_id_95>",
925
+ "<extra_id_96>",
926
+ "<extra_id_97>",
927
+ "<extra_id_98>",
928
+ "<extra_id_99>"
929
+ ],
930
+ "clean_up_tokenization_spaces": true,
931
+ "eos_token": "</s>",
932
+ "extra_ids": 100,
933
+ "model_max_length": 512,
934
+ "pad_token": "</s>",
935
+ "sp_model_kwargs": {},
936
+ "tokenizer_class": "T5Tokenizer",
937
+ "unk_token": "<unk>"
938
+ }
ckpt/ling_disc/checkpoint-41000/trainer_state.json ADDED
@@ -0,0 +1,636 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.05535305291414261,
3
+ "best_model_checkpoint": "/data/mohamed/checkpoints/ling_disc/deberta-v3-small_flan-t5-base_40/checkpoint-41000",
4
+ "epoch": 29.306647605432453,
5
+ "eval_steps": 1000,
6
+ "global_step": 41000,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.71,
13
+ "grad_norm": 0.855617344379425,
14
+ "learning_rate": 1.1913271384322135e-05,
15
+ "loss": 0.9117,
16
+ "step": 1000
17
+ },
18
+ {
19
+ "epoch": 0.71,
20
+ "eval_loss": 0.6742472052574158,
21
+ "eval_runtime": 27.0595,
22
+ "eval_samples_per_second": 1111.549,
23
+ "eval_steps_per_second": 5.58,
24
+ "step": 1000
25
+ },
26
+ {
27
+ "epoch": 1.43,
28
+ "grad_norm": 4.203719139099121,
29
+ "learning_rate": 2.382654276864427e-05,
30
+ "loss": 0.4114,
31
+ "step": 2000
32
+ },
33
+ {
34
+ "epoch": 1.43,
35
+ "eval_loss": 0.3266257345676422,
36
+ "eval_runtime": 26.9318,
37
+ "eval_samples_per_second": 1116.822,
38
+ "eval_steps_per_second": 5.607,
39
+ "step": 2000
40
+ },
41
+ {
42
+ "epoch": 2.14,
43
+ "grad_norm": 3.1638591289520264,
44
+ "learning_rate": 3.57398141529664e-05,
45
+ "loss": 0.2624,
46
+ "step": 3000
47
+ },
48
+ {
49
+ "epoch": 2.14,
50
+ "eval_loss": 0.24602766335010529,
51
+ "eval_runtime": 27.0604,
52
+ "eval_samples_per_second": 1111.512,
53
+ "eval_steps_per_second": 5.58,
54
+ "step": 3000
55
+ },
56
+ {
57
+ "epoch": 2.86,
58
+ "grad_norm": 1.7417826652526855,
59
+ "learning_rate": 4.765308553728854e-05,
60
+ "loss": 0.2002,
61
+ "step": 4000
62
+ },
63
+ {
64
+ "epoch": 2.86,
65
+ "eval_loss": 0.1770436018705368,
66
+ "eval_runtime": 26.8812,
67
+ "eval_samples_per_second": 1118.922,
68
+ "eval_steps_per_second": 5.617,
69
+ "step": 4000
70
+ },
71
+ {
72
+ "epoch": 3.57,
73
+ "grad_norm": 1.1299816370010376,
74
+ "learning_rate": 4.893707145315437e-05,
75
+ "loss": 0.1635,
76
+ "step": 5000
77
+ },
78
+ {
79
+ "epoch": 3.57,
80
+ "eval_loss": 0.14757415652275085,
81
+ "eval_runtime": 26.7857,
82
+ "eval_samples_per_second": 1122.914,
83
+ "eval_steps_per_second": 5.637,
84
+ "step": 5000
85
+ },
86
+ {
87
+ "epoch": 4.29,
88
+ "grad_norm": 1.210856556892395,
89
+ "learning_rate": 4.761337463267413e-05,
90
+ "loss": 0.1404,
91
+ "step": 6000
92
+ },
93
+ {
94
+ "epoch": 4.29,
95
+ "eval_loss": 0.12851941585540771,
96
+ "eval_runtime": 26.9893,
97
+ "eval_samples_per_second": 1114.44,
98
+ "eval_steps_per_second": 5.595,
99
+ "step": 6000
100
+ },
101
+ {
102
+ "epoch": 5.0,
103
+ "grad_norm": 2.0565412044525146,
104
+ "learning_rate": 4.62896778121939e-05,
105
+ "loss": 0.1263,
106
+ "step": 7000
107
+ },
108
+ {
109
+ "epoch": 5.0,
110
+ "eval_loss": 0.12228666245937347,
111
+ "eval_runtime": 26.7363,
112
+ "eval_samples_per_second": 1124.987,
113
+ "eval_steps_per_second": 5.648,
114
+ "step": 7000
115
+ },
116
+ {
117
+ "epoch": 5.72,
118
+ "grad_norm": 1.8667607307434082,
119
+ "learning_rate": 4.496598099171366e-05,
120
+ "loss": 0.1127,
121
+ "step": 8000
122
+ },
123
+ {
124
+ "epoch": 5.72,
125
+ "eval_loss": 0.11036147177219391,
126
+ "eval_runtime": 26.7509,
127
+ "eval_samples_per_second": 1124.375,
128
+ "eval_steps_per_second": 5.645,
129
+ "step": 8000
130
+ },
131
+ {
132
+ "epoch": 6.43,
133
+ "grad_norm": 0.7492337226867676,
134
+ "learning_rate": 4.364228417123342e-05,
135
+ "loss": 0.1059,
136
+ "step": 9000
137
+ },
138
+ {
139
+ "epoch": 6.43,
140
+ "eval_loss": 0.10317497700452805,
141
+ "eval_runtime": 27.0158,
142
+ "eval_samples_per_second": 1113.349,
143
+ "eval_steps_per_second": 5.589,
144
+ "step": 9000
145
+ },
146
+ {
147
+ "epoch": 7.15,
148
+ "grad_norm": 0.7611485123634338,
149
+ "learning_rate": 4.231858735075319e-05,
150
+ "loss": 0.0993,
151
+ "step": 10000
152
+ },
153
+ {
154
+ "epoch": 7.15,
155
+ "eval_loss": 0.10284282267093658,
156
+ "eval_runtime": 26.795,
157
+ "eval_samples_per_second": 1122.524,
158
+ "eval_steps_per_second": 5.635,
159
+ "step": 10000
160
+ },
161
+ {
162
+ "epoch": 7.86,
163
+ "grad_norm": 0.5870215892791748,
164
+ "learning_rate": 4.099489053027295e-05,
165
+ "loss": 0.0887,
166
+ "step": 11000
167
+ },
168
+ {
169
+ "epoch": 7.86,
170
+ "eval_loss": 0.09789762645959854,
171
+ "eval_runtime": 26.8453,
172
+ "eval_samples_per_second": 1120.419,
173
+ "eval_steps_per_second": 5.625,
174
+ "step": 11000
175
+ },
176
+ {
177
+ "epoch": 8.58,
178
+ "grad_norm": 0.48922085762023926,
179
+ "learning_rate": 3.9671193709792706e-05,
180
+ "loss": 0.0842,
181
+ "step": 12000
182
+ },
183
+ {
184
+ "epoch": 8.58,
185
+ "eval_loss": 0.09349656105041504,
186
+ "eval_runtime": 26.8273,
187
+ "eval_samples_per_second": 1121.172,
188
+ "eval_steps_per_second": 5.629,
189
+ "step": 12000
190
+ },
191
+ {
192
+ "epoch": 9.29,
193
+ "grad_norm": 0.4252859354019165,
194
+ "learning_rate": 3.8347496889312476e-05,
195
+ "loss": 0.0793,
196
+ "step": 13000
197
+ },
198
+ {
199
+ "epoch": 9.29,
200
+ "eval_loss": 0.09415590018033981,
201
+ "eval_runtime": 25.9362,
202
+ "eval_samples_per_second": 1159.693,
203
+ "eval_steps_per_second": 5.822,
204
+ "step": 13000
205
+ },
206
+ {
207
+ "epoch": 10.01,
208
+ "grad_norm": 0.44548505544662476,
209
+ "learning_rate": 3.702380006883224e-05,
210
+ "loss": 0.076,
211
+ "step": 14000
212
+ },
213
+ {
214
+ "epoch": 10.01,
215
+ "eval_loss": 0.08913980424404144,
216
+ "eval_runtime": 26.7379,
217
+ "eval_samples_per_second": 1124.919,
218
+ "eval_steps_per_second": 5.647,
219
+ "step": 14000
220
+ },
221
+ {
222
+ "epoch": 10.72,
223
+ "grad_norm": 0.2965373694896698,
224
+ "learning_rate": 3.5700103248352e-05,
225
+ "loss": 0.0714,
226
+ "step": 15000
227
+ },
228
+ {
229
+ "epoch": 10.72,
230
+ "eval_loss": 0.08456840366125107,
231
+ "eval_runtime": 26.787,
232
+ "eval_samples_per_second": 1122.857,
233
+ "eval_steps_per_second": 5.637,
234
+ "step": 15000
235
+ },
236
+ {
237
+ "epoch": 11.44,
238
+ "grad_norm": 0.3205694854259491,
239
+ "learning_rate": 3.437640642787176e-05,
240
+ "loss": 0.0677,
241
+ "step": 16000
242
+ },
243
+ {
244
+ "epoch": 11.44,
245
+ "eval_loss": 0.07863688468933105,
246
+ "eval_runtime": 26.8242,
247
+ "eval_samples_per_second": 1121.299,
248
+ "eval_steps_per_second": 5.629,
249
+ "step": 16000
250
+ },
251
+ {
252
+ "epoch": 12.15,
253
+ "grad_norm": 0.2736203670501709,
254
+ "learning_rate": 3.3052709607391525e-05,
255
+ "loss": 0.0636,
256
+ "step": 17000
257
+ },
258
+ {
259
+ "epoch": 12.15,
260
+ "eval_loss": 0.07664181292057037,
261
+ "eval_runtime": 26.7818,
262
+ "eval_samples_per_second": 1123.077,
263
+ "eval_steps_per_second": 5.638,
264
+ "step": 17000
265
+ },
266
+ {
267
+ "epoch": 12.87,
268
+ "grad_norm": 0.25644680857658386,
269
+ "learning_rate": 3.172901278691129e-05,
270
+ "loss": 0.0618,
271
+ "step": 18000
272
+ },
273
+ {
274
+ "epoch": 12.87,
275
+ "eval_loss": 0.07351888716220856,
276
+ "eval_runtime": 26.8445,
277
+ "eval_samples_per_second": 1120.453,
278
+ "eval_steps_per_second": 5.625,
279
+ "step": 18000
280
+ },
281
+ {
282
+ "epoch": 13.58,
283
+ "grad_norm": 0.2748676538467407,
284
+ "learning_rate": 3.0405315966431053e-05,
285
+ "loss": 0.0584,
286
+ "step": 19000
287
+ },
288
+ {
289
+ "epoch": 13.58,
290
+ "eval_loss": 0.07314006239175797,
291
+ "eval_runtime": 26.8333,
292
+ "eval_samples_per_second": 1120.921,
293
+ "eval_steps_per_second": 5.627,
294
+ "step": 19000
295
+ },
296
+ {
297
+ "epoch": 14.3,
298
+ "grad_norm": 0.30235132575035095,
299
+ "learning_rate": 2.9081619145950812e-05,
300
+ "loss": 0.057,
301
+ "step": 20000
302
+ },
303
+ {
304
+ "epoch": 14.3,
305
+ "eval_loss": 0.07568340748548508,
306
+ "eval_runtime": 27.0109,
307
+ "eval_samples_per_second": 1113.55,
308
+ "eval_steps_per_second": 5.59,
309
+ "step": 20000
310
+ },
311
+ {
312
+ "epoch": 15.01,
313
+ "grad_norm": 0.2508692145347595,
314
+ "learning_rate": 2.7757922325470574e-05,
315
+ "loss": 0.0558,
316
+ "step": 21000
317
+ },
318
+ {
319
+ "epoch": 15.01,
320
+ "eval_loss": 0.07675843685865402,
321
+ "eval_runtime": 26.9026,
322
+ "eval_samples_per_second": 1118.032,
323
+ "eval_steps_per_second": 5.613,
324
+ "step": 21000
325
+ },
326
+ {
327
+ "epoch": 15.73,
328
+ "grad_norm": 0.3341030478477478,
329
+ "learning_rate": 2.643422550499034e-05,
330
+ "loss": 0.0533,
331
+ "step": 22000
332
+ },
333
+ {
334
+ "epoch": 15.73,
335
+ "eval_loss": 0.07339715212583542,
336
+ "eval_runtime": 26.8727,
337
+ "eval_samples_per_second": 1119.278,
338
+ "eval_steps_per_second": 5.619,
339
+ "step": 22000
340
+ },
341
+ {
342
+ "epoch": 16.44,
343
+ "grad_norm": 0.30433303117752075,
344
+ "learning_rate": 2.51105286845101e-05,
345
+ "loss": 0.0516,
346
+ "step": 23000
347
+ },
348
+ {
349
+ "epoch": 16.44,
350
+ "eval_loss": 0.0694783553481102,
351
+ "eval_runtime": 26.8551,
352
+ "eval_samples_per_second": 1120.012,
353
+ "eval_steps_per_second": 5.623,
354
+ "step": 23000
355
+ },
356
+ {
357
+ "epoch": 17.16,
358
+ "grad_norm": 0.39424875378608704,
359
+ "learning_rate": 2.378683186402986e-05,
360
+ "loss": 0.049,
361
+ "step": 24000
362
+ },
363
+ {
364
+ "epoch": 17.16,
365
+ "eval_loss": 0.06750107556581497,
366
+ "eval_runtime": 26.9045,
367
+ "eval_samples_per_second": 1117.954,
368
+ "eval_steps_per_second": 5.612,
369
+ "step": 24000
370
+ },
371
+ {
372
+ "epoch": 17.87,
373
+ "grad_norm": 0.29526183009147644,
374
+ "learning_rate": 2.2463135043549627e-05,
375
+ "loss": 0.0478,
376
+ "step": 25000
377
+ },
378
+ {
379
+ "epoch": 17.87,
380
+ "eval_loss": 0.06841529905796051,
381
+ "eval_runtime": 26.9131,
382
+ "eval_samples_per_second": 1117.597,
383
+ "eval_steps_per_second": 5.611,
384
+ "step": 25000
385
+ },
386
+ {
387
+ "epoch": 18.58,
388
+ "grad_norm": 0.2802821099758148,
389
+ "learning_rate": 2.113943822306939e-05,
390
+ "loss": 0.0472,
391
+ "step": 26000
392
+ },
393
+ {
394
+ "epoch": 18.58,
395
+ "eval_loss": 0.0680340975522995,
396
+ "eval_runtime": 26.8442,
397
+ "eval_samples_per_second": 1120.467,
398
+ "eval_steps_per_second": 5.625,
399
+ "step": 26000
400
+ },
401
+ {
402
+ "epoch": 19.3,
403
+ "grad_norm": 0.198490172624588,
404
+ "learning_rate": 1.9815741402589152e-05,
405
+ "loss": 0.0445,
406
+ "step": 27000
407
+ },
408
+ {
409
+ "epoch": 19.3,
410
+ "eval_loss": 0.059882719069719315,
411
+ "eval_runtime": 26.9691,
412
+ "eval_samples_per_second": 1115.275,
413
+ "eval_steps_per_second": 5.599,
414
+ "step": 27000
415
+ },
416
+ {
417
+ "epoch": 20.01,
418
+ "grad_norm": 0.3383251130580902,
419
+ "learning_rate": 1.8492044582108914e-05,
420
+ "loss": 0.0435,
421
+ "step": 28000
422
+ },
423
+ {
424
+ "epoch": 20.01,
425
+ "eval_loss": 0.06356318295001984,
426
+ "eval_runtime": 26.8538,
427
+ "eval_samples_per_second": 1120.066,
428
+ "eval_steps_per_second": 5.623,
429
+ "step": 28000
430
+ },
431
+ {
432
+ "epoch": 20.73,
433
+ "grad_norm": 0.16571784019470215,
434
+ "learning_rate": 1.7168347761628677e-05,
435
+ "loss": 0.0419,
436
+ "step": 29000
437
+ },
438
+ {
439
+ "epoch": 20.73,
440
+ "eval_loss": 0.06056862324476242,
441
+ "eval_runtime": 27.0748,
442
+ "eval_samples_per_second": 1110.924,
443
+ "eval_steps_per_second": 5.577,
444
+ "step": 29000
445
+ },
446
+ {
447
+ "epoch": 21.44,
448
+ "grad_norm": 0.19518467783927917,
449
+ "learning_rate": 1.584465094114844e-05,
450
+ "loss": 0.0409,
451
+ "step": 30000
452
+ },
453
+ {
454
+ "epoch": 21.44,
455
+ "eval_loss": 0.06490638852119446,
456
+ "eval_runtime": 26.8481,
457
+ "eval_samples_per_second": 1120.301,
458
+ "eval_steps_per_second": 5.624,
459
+ "step": 30000
460
+ },
461
+ {
462
+ "epoch": 22.16,
463
+ "grad_norm": 0.15420591831207275,
464
+ "learning_rate": 1.4520954120668203e-05,
465
+ "loss": 0.0397,
466
+ "step": 31000
467
+ },
468
+ {
469
+ "epoch": 22.16,
470
+ "eval_loss": 0.05918469280004501,
471
+ "eval_runtime": 26.8143,
472
+ "eval_samples_per_second": 1121.713,
473
+ "eval_steps_per_second": 5.631,
474
+ "step": 31000
475
+ },
476
+ {
477
+ "epoch": 22.87,
478
+ "grad_norm": 0.26854997873306274,
479
+ "learning_rate": 1.3197257300187965e-05,
480
+ "loss": 0.0387,
481
+ "step": 32000
482
+ },
483
+ {
484
+ "epoch": 22.87,
485
+ "eval_loss": 0.06144551932811737,
486
+ "eval_runtime": 26.8852,
487
+ "eval_samples_per_second": 1118.757,
488
+ "eval_steps_per_second": 5.616,
489
+ "step": 32000
490
+ },
491
+ {
492
+ "epoch": 23.59,
493
+ "grad_norm": 0.17430314421653748,
494
+ "learning_rate": 1.1873560479707728e-05,
495
+ "loss": 0.0373,
496
+ "step": 33000
497
+ },
498
+ {
499
+ "epoch": 23.59,
500
+ "eval_loss": 0.06159648299217224,
501
+ "eval_runtime": 26.7887,
502
+ "eval_samples_per_second": 1122.785,
503
+ "eval_steps_per_second": 5.637,
504
+ "step": 33000
505
+ },
506
+ {
507
+ "epoch": 24.3,
508
+ "grad_norm": 0.14911049604415894,
509
+ "learning_rate": 1.054986365922749e-05,
510
+ "loss": 0.0369,
511
+ "step": 34000
512
+ },
513
+ {
514
+ "epoch": 24.3,
515
+ "eval_loss": 0.05931873992085457,
516
+ "eval_runtime": 26.8571,
517
+ "eval_samples_per_second": 1119.926,
518
+ "eval_steps_per_second": 5.622,
519
+ "step": 34000
520
+ },
521
+ {
522
+ "epoch": 25.02,
523
+ "grad_norm": 0.13620807230472565,
524
+ "learning_rate": 9.226166838747254e-06,
525
+ "loss": 0.0361,
526
+ "step": 35000
527
+ },
528
+ {
529
+ "epoch": 25.02,
530
+ "eval_loss": 0.05695568770170212,
531
+ "eval_runtime": 26.8966,
532
+ "eval_samples_per_second": 1118.283,
533
+ "eval_steps_per_second": 5.614,
534
+ "step": 35000
535
+ },
536
+ {
537
+ "epoch": 25.73,
538
+ "grad_norm": 0.13764438033103943,
539
+ "learning_rate": 7.902470018267017e-06,
540
+ "loss": 0.0349,
541
+ "step": 36000
542
+ },
543
+ {
544
+ "epoch": 25.73,
545
+ "eval_loss": 0.05707501247525215,
546
+ "eval_runtime": 26.986,
547
+ "eval_samples_per_second": 1114.578,
548
+ "eval_steps_per_second": 5.595,
549
+ "step": 36000
550
+ },
551
+ {
552
+ "epoch": 26.45,
553
+ "grad_norm": 0.2389635145664215,
554
+ "learning_rate": 6.578773197786779e-06,
555
+ "loss": 0.0343,
556
+ "step": 37000
557
+ },
558
+ {
559
+ "epoch": 26.45,
560
+ "eval_loss": 0.0577365942299366,
561
+ "eval_runtime": 26.9903,
562
+ "eval_samples_per_second": 1114.401,
563
+ "eval_steps_per_second": 5.595,
564
+ "step": 37000
565
+ },
566
+ {
567
+ "epoch": 27.16,
568
+ "grad_norm": 0.15828461945056915,
569
+ "learning_rate": 5.255076377306542e-06,
570
+ "loss": 0.034,
571
+ "step": 38000
572
+ },
573
+ {
574
+ "epoch": 27.16,
575
+ "eval_loss": 0.05767366662621498,
576
+ "eval_runtime": 27.1454,
577
+ "eval_samples_per_second": 1108.035,
578
+ "eval_steps_per_second": 5.563,
579
+ "step": 38000
580
+ },
581
+ {
582
+ "epoch": 27.88,
583
+ "grad_norm": 0.1059570387005806,
584
+ "learning_rate": 3.9313795568263045e-06,
585
+ "loss": 0.0332,
586
+ "step": 39000
587
+ },
588
+ {
589
+ "epoch": 27.88,
590
+ "eval_loss": 0.056225307285785675,
591
+ "eval_runtime": 26.9534,
592
+ "eval_samples_per_second": 1115.928,
593
+ "eval_steps_per_second": 5.602,
594
+ "step": 39000
595
+ },
596
+ {
597
+ "epoch": 28.59,
598
+ "grad_norm": 0.1975150853395462,
599
+ "learning_rate": 2.6076827363460673e-06,
600
+ "loss": 0.0329,
601
+ "step": 40000
602
+ },
603
+ {
604
+ "epoch": 28.59,
605
+ "eval_loss": 0.05555161088705063,
606
+ "eval_runtime": 27.1187,
607
+ "eval_samples_per_second": 1109.122,
608
+ "eval_steps_per_second": 5.568,
609
+ "step": 40000
610
+ },
611
+ {
612
+ "epoch": 29.31,
613
+ "grad_norm": 0.1037423312664032,
614
+ "learning_rate": 1.28398591586583e-06,
615
+ "loss": 0.0319,
616
+ "step": 41000
617
+ },
618
+ {
619
+ "epoch": 29.31,
620
+ "eval_loss": 0.05535305291414261,
621
+ "eval_runtime": 26.8353,
622
+ "eval_samples_per_second": 1120.838,
623
+ "eval_steps_per_second": 5.627,
624
+ "step": 41000
625
+ }
626
+ ],
627
+ "logging_steps": 1000,
628
+ "max_steps": 41970,
629
+ "num_input_tokens_seen": 0,
630
+ "num_train_epochs": 30,
631
+ "save_steps": 1000,
632
+ "total_flos": 3.270624085088659e+16,
633
+ "train_batch_size": 200,
634
+ "trial_name": null,
635
+ "trial_params": null
636
+ }
ckpt/ling_disc/checkpoint-41000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:deb9dc15db671f7ae0b9e7e7bc26ca9e20c0fde45babc266a60753e2b23d6328
3
+ size 4984
ckpt/ling_disc/config.json ADDED
@@ -0,0 +1,120 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "microsoft/deberta-v3-small",
3
+ "architectures": [
4
+ "DebertaReplacedTokenizer"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "hidden_act": "gelu",
8
+ "hidden_dropout_prob": 0.1,
9
+ "hidden_size": 768,
10
+ "id2label": {
11
+ "0": "LABEL_0",
12
+ "1": "LABEL_1",
13
+ "2": "LABEL_2",
14
+ "3": "LABEL_3",
15
+ "4": "LABEL_4",
16
+ "5": "LABEL_5",
17
+ "6": "LABEL_6",
18
+ "7": "LABEL_7",
19
+ "8": "LABEL_8",
20
+ "9": "LABEL_9",
21
+ "10": "LABEL_10",
22
+ "11": "LABEL_11",
23
+ "12": "LABEL_12",
24
+ "13": "LABEL_13",
25
+ "14": "LABEL_14",
26
+ "15": "LABEL_15",
27
+ "16": "LABEL_16",
28
+ "17": "LABEL_17",
29
+ "18": "LABEL_18",
30
+ "19": "LABEL_19",
31
+ "20": "LABEL_20",
32
+ "21": "LABEL_21",
33
+ "22": "LABEL_22",
34
+ "23": "LABEL_23",
35
+ "24": "LABEL_24",
36
+ "25": "LABEL_25",
37
+ "26": "LABEL_26",
38
+ "27": "LABEL_27",
39
+ "28": "LABEL_28",
40
+ "29": "LABEL_29",
41
+ "30": "LABEL_30",
42
+ "31": "LABEL_31",
43
+ "32": "LABEL_32",
44
+ "33": "LABEL_33",
45
+ "34": "LABEL_34",
46
+ "35": "LABEL_35",
47
+ "36": "LABEL_36",
48
+ "37": "LABEL_37",
49
+ "38": "LABEL_38",
50
+ "39": "LABEL_39"
51
+ },
52
+ "initializer_range": 0.02,
53
+ "intermediate_size": 3072,
54
+ "label2id": {
55
+ "LABEL_0": 0,
56
+ "LABEL_1": 1,
57
+ "LABEL_10": 10,
58
+ "LABEL_11": 11,
59
+ "LABEL_12": 12,
60
+ "LABEL_13": 13,
61
+ "LABEL_14": 14,
62
+ "LABEL_15": 15,
63
+ "LABEL_16": 16,
64
+ "LABEL_17": 17,
65
+ "LABEL_18": 18,
66
+ "LABEL_19": 19,
67
+ "LABEL_2": 2,
68
+ "LABEL_20": 20,
69
+ "LABEL_21": 21,
70
+ "LABEL_22": 22,
71
+ "LABEL_23": 23,
72
+ "LABEL_24": 24,
73
+ "LABEL_25": 25,
74
+ "LABEL_26": 26,
75
+ "LABEL_27": 27,
76
+ "LABEL_28": 28,
77
+ "LABEL_29": 29,
78
+ "LABEL_3": 3,
79
+ "LABEL_30": 30,
80
+ "LABEL_31": 31,
81
+ "LABEL_32": 32,
82
+ "LABEL_33": 33,
83
+ "LABEL_34": 34,
84
+ "LABEL_35": 35,
85
+ "LABEL_36": 36,
86
+ "LABEL_37": 37,
87
+ "LABEL_38": 38,
88
+ "LABEL_39": 39,
89
+ "LABEL_4": 4,
90
+ "LABEL_5": 5,
91
+ "LABEL_6": 6,
92
+ "LABEL_7": 7,
93
+ "LABEL_8": 8,
94
+ "LABEL_9": 9
95
+ },
96
+ "layer_norm_eps": 1e-07,
97
+ "max_position_embeddings": 512,
98
+ "max_relative_positions": -1,
99
+ "model_type": "deberta-v2",
100
+ "norm_rel_ebd": "layer_norm",
101
+ "num_attention_heads": 12,
102
+ "num_hidden_layers": 6,
103
+ "pad_token_id": 0,
104
+ "pooler_dropout": 0,
105
+ "pooler_hidden_act": "gelu",
106
+ "pooler_hidden_size": 768,
107
+ "pos_att_type": [
108
+ "p2c",
109
+ "c2p"
110
+ ],
111
+ "position_biased_input": false,
112
+ "position_buckets": 256,
113
+ "problem_type": "regression",
114
+ "relative_attention": true,
115
+ "share_att_key": true,
116
+ "torch_dtype": "float32",
117
+ "transformers_version": "4.39.3",
118
+ "type_vocab_size": 0,
119
+ "vocab_size": 128100
120
+ }
ckpt/ling_disc/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:15221fd5565118b32b1adf7b42c27cae6a3d8dd32b0ef85473b70bb072964661
3
+ size 275252064
ckpt/ling_disc/scaler.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1dbad9723e37379b55bb6d7300abf6ad705f320bd599ca7f583e574f4a26f4a4
3
+ size 1575
ckpt/ling_disc/special_tokens_map.json ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<extra_id_0>",
4
+ "<extra_id_1>",
5
+ "<extra_id_2>",
6
+ "<extra_id_3>",
7
+ "<extra_id_4>",
8
+ "<extra_id_5>",
9
+ "<extra_id_6>",
10
+ "<extra_id_7>",
11
+ "<extra_id_8>",
12
+ "<extra_id_9>",
13
+ "<extra_id_10>",
14
+ "<extra_id_11>",
15
+ "<extra_id_12>",
16
+ "<extra_id_13>",
17
+ "<extra_id_14>",
18
+ "<extra_id_15>",
19
+ "<extra_id_16>",
20
+ "<extra_id_17>",
21
+ "<extra_id_18>",
22
+ "<extra_id_19>",
23
+ "<extra_id_20>",
24
+ "<extra_id_21>",
25
+ "<extra_id_22>",
26
+ "<extra_id_23>",
27
+ "<extra_id_24>",
28
+ "<extra_id_25>",
29
+ "<extra_id_26>",
30
+ "<extra_id_27>",
31
+ "<extra_id_28>",
32
+ "<extra_id_29>",
33
+ "<extra_id_30>",
34
+ "<extra_id_31>",
35
+ "<extra_id_32>",
36
+ "<extra_id_33>",
37
+ "<extra_id_34>",
38
+ "<extra_id_35>",
39
+ "<extra_id_36>",
40
+ "<extra_id_37>",
41
+ "<extra_id_38>",
42
+ "<extra_id_39>",
43
+ "<extra_id_40>",
44
+ "<extra_id_41>",
45
+ "<extra_id_42>",
46
+ "<extra_id_43>",
47
+ "<extra_id_44>",
48
+ "<extra_id_45>",
49
+ "<extra_id_46>",
50
+ "<extra_id_47>",
51
+ "<extra_id_48>",
52
+ "<extra_id_49>",
53
+ "<extra_id_50>",
54
+ "<extra_id_51>",
55
+ "<extra_id_52>",
56
+ "<extra_id_53>",
57
+ "<extra_id_54>",
58
+ "<extra_id_55>",
59
+ "<extra_id_56>",
60
+ "<extra_id_57>",
61
+ "<extra_id_58>",
62
+ "<extra_id_59>",
63
+ "<extra_id_60>",
64
+ "<extra_id_61>",
65
+ "<extra_id_62>",
66
+ "<extra_id_63>",
67
+ "<extra_id_64>",
68
+ "<extra_id_65>",
69
+ "<extra_id_66>",
70
+ "<extra_id_67>",
71
+ "<extra_id_68>",
72
+ "<extra_id_69>",
73
+ "<extra_id_70>",
74
+ "<extra_id_71>",
75
+ "<extra_id_72>",
76
+ "<extra_id_73>",
77
+ "<extra_id_74>",
78
+ "<extra_id_75>",
79
+ "<extra_id_76>",
80
+ "<extra_id_77>",
81
+ "<extra_id_78>",
82
+ "<extra_id_79>",
83
+ "<extra_id_80>",
84
+ "<extra_id_81>",
85
+ "<extra_id_82>",
86
+ "<extra_id_83>",
87
+ "<extra_id_84>",
88
+ "<extra_id_85>",
89
+ "<extra_id_86>",
90
+ "<extra_id_87>",
91
+ "<extra_id_88>",
92
+ "<extra_id_89>",
93
+ "<extra_id_90>",
94
+ "<extra_id_91>",
95
+ "<extra_id_92>",
96
+ "<extra_id_93>",
97
+ "<extra_id_94>",
98
+ "<extra_id_95>",
99
+ "<extra_id_96>",
100
+ "<extra_id_97>",
101
+ "<extra_id_98>",
102
+ "<extra_id_99>"
103
+ ],
104
+ "eos_token": {
105
+ "content": "</s>",
106
+ "lstrip": false,
107
+ "normalized": false,
108
+ "rstrip": false,
109
+ "single_word": false
110
+ },
111
+ "pad_token": "</s>",
112
+ "unk_token": {
113
+ "content": "<unk>",
114
+ "lstrip": false,
115
+ "normalized": false,
116
+ "rstrip": false,
117
+ "single_word": false
118
+ }
119
+ }
ckpt/ling_disc/spiece.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d60acb128cf7b7f2536e8f38a5b18a05535c9e14c7a355904270e15b0945ea86
3
+ size 791656
ckpt/ling_disc/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
ckpt/ling_disc/tokenizer_config.json ADDED
@@ -0,0 +1,938 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<pad>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "</s>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "<unk>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "32000": {
28
+ "content": "<extra_id_99>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "32001": {
36
+ "content": "<extra_id_98>",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ },
43
+ "32002": {
44
+ "content": "<extra_id_97>",
45
+ "lstrip": false,
46
+ "normalized": false,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": true
50
+ },
51
+ "32003": {
52
+ "content": "<extra_id_96>",
53
+ "lstrip": false,
54
+ "normalized": false,
55
+ "rstrip": false,
56
+ "single_word": false,
57
+ "special": true
58
+ },
59
+ "32004": {
60
+ "content": "<extra_id_95>",
61
+ "lstrip": false,
62
+ "normalized": false,
63
+ "rstrip": false,
64
+ "single_word": false,
65
+ "special": true
66
+ },
67
+ "32005": {
68
+ "content": "<extra_id_94>",
69
+ "lstrip": false,
70
+ "normalized": false,
71
+ "rstrip": false,
72
+ "single_word": false,
73
+ "special": true
74
+ },
75
+ "32006": {
76
+ "content": "<extra_id_93>",
77
+ "lstrip": false,
78
+ "normalized": false,
79
+ "rstrip": false,
80
+ "single_word": false,
81
+ "special": true
82
+ },
83
+ "32007": {
84
+ "content": "<extra_id_92>",
85
+ "lstrip": false,
86
+ "normalized": false,
87
+ "rstrip": false,
88
+ "single_word": false,
89
+ "special": true
90
+ },
91
+ "32008": {
92
+ "content": "<extra_id_91>",
93
+ "lstrip": false,
94
+ "normalized": false,
95
+ "rstrip": false,
96
+ "single_word": false,
97
+ "special": true
98
+ },
99
+ "32009": {
100
+ "content": "<extra_id_90>",
101
+ "lstrip": false,
102
+ "normalized": false,
103
+ "rstrip": false,
104
+ "single_word": false,
105
+ "special": true
106
+ },
107
+ "32010": {
108
+ "content": "<extra_id_89>",
109
+ "lstrip": false,
110
+ "normalized": false,
111
+ "rstrip": false,
112
+ "single_word": false,
113
+ "special": true
114
+ },
115
+ "32011": {
116
+ "content": "<extra_id_88>",
117
+ "lstrip": false,
118
+ "normalized": false,
119
+ "rstrip": false,
120
+ "single_word": false,
121
+ "special": true
122
+ },
123
+ "32012": {
124
+ "content": "<extra_id_87>",
125
+ "lstrip": false,
126
+ "normalized": false,
127
+ "rstrip": false,
128
+ "single_word": false,
129
+ "special": true
130
+ },
131
+ "32013": {
132
+ "content": "<extra_id_86>",
133
+ "lstrip": false,
134
+ "normalized": false,
135
+ "rstrip": false,
136
+ "single_word": false,
137
+ "special": true
138
+ },
139
+ "32014": {
140
+ "content": "<extra_id_85>",
141
+ "lstrip": false,
142
+ "normalized": false,
143
+ "rstrip": false,
144
+ "single_word": false,
145
+ "special": true
146
+ },
147
+ "32015": {
148
+ "content": "<extra_id_84>",
149
+ "lstrip": false,
150
+ "normalized": false,
151
+ "rstrip": false,
152
+ "single_word": false,
153
+ "special": true
154
+ },
155
+ "32016": {
156
+ "content": "<extra_id_83>",
157
+ "lstrip": false,
158
+ "normalized": false,
159
+ "rstrip": false,
160
+ "single_word": false,
161
+ "special": true
162
+ },
163
+ "32017": {
164
+ "content": "<extra_id_82>",
165
+ "lstrip": false,
166
+ "normalized": false,
167
+ "rstrip": false,
168
+ "single_word": false,
169
+ "special": true
170
+ },
171
+ "32018": {
172
+ "content": "<extra_id_81>",
173
+ "lstrip": false,
174
+ "normalized": false,
175
+ "rstrip": false,
176
+ "single_word": false,
177
+ "special": true
178
+ },
179
+ "32019": {
180
+ "content": "<extra_id_80>",
181
+ "lstrip": false,
182
+ "normalized": false,
183
+ "rstrip": false,
184
+ "single_word": false,
185
+ "special": true
186
+ },
187
+ "32020": {
188
+ "content": "<extra_id_79>",
189
+ "lstrip": false,
190
+ "normalized": false,
191
+ "rstrip": false,
192
+ "single_word": false,
193
+ "special": true
194
+ },
195
+ "32021": {
196
+ "content": "<extra_id_78>",
197
+ "lstrip": false,
198
+ "normalized": false,
199
+ "rstrip": false,
200
+ "single_word": false,
201
+ "special": true
202
+ },
203
+ "32022": {
204
+ "content": "<extra_id_77>",
205
+ "lstrip": false,
206
+ "normalized": false,
207
+ "rstrip": false,
208
+ "single_word": false,
209
+ "special": true
210
+ },
211
+ "32023": {
212
+ "content": "<extra_id_76>",
213
+ "lstrip": false,
214
+ "normalized": false,
215
+ "rstrip": false,
216
+ "single_word": false,
217
+ "special": true
218
+ },
219
+ "32024": {
220
+ "content": "<extra_id_75>",
221
+ "lstrip": false,
222
+ "normalized": false,
223
+ "rstrip": false,
224
+ "single_word": false,
225
+ "special": true
226
+ },
227
+ "32025": {
228
+ "content": "<extra_id_74>",
229
+ "lstrip": false,
230
+ "normalized": false,
231
+ "rstrip": false,
232
+ "single_word": false,
233
+ "special": true
234
+ },
235
+ "32026": {
236
+ "content": "<extra_id_73>",
237
+ "lstrip": false,
238
+ "normalized": false,
239
+ "rstrip": false,
240
+ "single_word": false,
241
+ "special": true
242
+ },
243
+ "32027": {
244
+ "content": "<extra_id_72>",
245
+ "lstrip": false,
246
+ "normalized": false,
247
+ "rstrip": false,
248
+ "single_word": false,
249
+ "special": true
250
+ },
251
+ "32028": {
252
+ "content": "<extra_id_71>",
253
+ "lstrip": false,
254
+ "normalized": false,
255
+ "rstrip": false,
256
+ "single_word": false,
257
+ "special": true
258
+ },
259
+ "32029": {
260
+ "content": "<extra_id_70>",
261
+ "lstrip": false,
262
+ "normalized": false,
263
+ "rstrip": false,
264
+ "single_word": false,
265
+ "special": true
266
+ },
267
+ "32030": {
268
+ "content": "<extra_id_69>",
269
+ "lstrip": false,
270
+ "normalized": false,
271
+ "rstrip": false,
272
+ "single_word": false,
273
+ "special": true
274
+ },
275
+ "32031": {
276
+ "content": "<extra_id_68>",
277
+ "lstrip": false,
278
+ "normalized": false,
279
+ "rstrip": false,
280
+ "single_word": false,
281
+ "special": true
282
+ },
283
+ "32032": {
284
+ "content": "<extra_id_67>",
285
+ "lstrip": false,
286
+ "normalized": false,
287
+ "rstrip": false,
288
+ "single_word": false,
289
+ "special": true
290
+ },
291
+ "32033": {
292
+ "content": "<extra_id_66>",
293
+ "lstrip": false,
294
+ "normalized": false,
295
+ "rstrip": false,
296
+ "single_word": false,
297
+ "special": true
298
+ },
299
+ "32034": {
300
+ "content": "<extra_id_65>",
301
+ "lstrip": false,
302
+ "normalized": false,
303
+ "rstrip": false,
304
+ "single_word": false,
305
+ "special": true
306
+ },
307
+ "32035": {
308
+ "content": "<extra_id_64>",
309
+ "lstrip": false,
310
+ "normalized": false,
311
+ "rstrip": false,
312
+ "single_word": false,
313
+ "special": true
314
+ },
315
+ "32036": {
316
+ "content": "<extra_id_63>",
317
+ "lstrip": false,
318
+ "normalized": false,
319
+ "rstrip": false,
320
+ "single_word": false,
321
+ "special": true
322
+ },
323
+ "32037": {
324
+ "content": "<extra_id_62>",
325
+ "lstrip": false,
326
+ "normalized": false,
327
+ "rstrip": false,
328
+ "single_word": false,
329
+ "special": true
330
+ },
331
+ "32038": {
332
+ "content": "<extra_id_61>",
333
+ "lstrip": false,
334
+ "normalized": false,
335
+ "rstrip": false,
336
+ "single_word": false,
337
+ "special": true
338
+ },
339
+ "32039": {
340
+ "content": "<extra_id_60>",
341
+ "lstrip": false,
342
+ "normalized": false,
343
+ "rstrip": false,
344
+ "single_word": false,
345
+ "special": true
346
+ },
347
+ "32040": {
348
+ "content": "<extra_id_59>",
349
+ "lstrip": false,
350
+ "normalized": false,
351
+ "rstrip": false,
352
+ "single_word": false,
353
+ "special": true
354
+ },
355
+ "32041": {
356
+ "content": "<extra_id_58>",
357
+ "lstrip": false,
358
+ "normalized": false,
359
+ "rstrip": false,
360
+ "single_word": false,
361
+ "special": true
362
+ },
363
+ "32042": {
364
+ "content": "<extra_id_57>",
365
+ "lstrip": false,
366
+ "normalized": false,
367
+ "rstrip": false,
368
+ "single_word": false,
369
+ "special": true
370
+ },
371
+ "32043": {
372
+ "content": "<extra_id_56>",
373
+ "lstrip": false,
374
+ "normalized": false,
375
+ "rstrip": false,
376
+ "single_word": false,
377
+ "special": true
378
+ },
379
+ "32044": {
380
+ "content": "<extra_id_55>",
381
+ "lstrip": false,
382
+ "normalized": false,
383
+ "rstrip": false,
384
+ "single_word": false,
385
+ "special": true
386
+ },
387
+ "32045": {
388
+ "content": "<extra_id_54>",
389
+ "lstrip": false,
390
+ "normalized": false,
391
+ "rstrip": false,
392
+ "single_word": false,
393
+ "special": true
394
+ },
395
+ "32046": {
396
+ "content": "<extra_id_53>",
397
+ "lstrip": false,
398
+ "normalized": false,
399
+ "rstrip": false,
400
+ "single_word": false,
401
+ "special": true
402
+ },
403
+ "32047": {
404
+ "content": "<extra_id_52>",
405
+ "lstrip": false,
406
+ "normalized": false,
407
+ "rstrip": false,
408
+ "single_word": false,
409
+ "special": true
410
+ },
411
+ "32048": {
412
+ "content": "<extra_id_51>",
413
+ "lstrip": false,
414
+ "normalized": false,
415
+ "rstrip": false,
416
+ "single_word": false,
417
+ "special": true
418
+ },
419
+ "32049": {
420
+ "content": "<extra_id_50>",
421
+ "lstrip": false,
422
+ "normalized": false,
423
+ "rstrip": false,
424
+ "single_word": false,
425
+ "special": true
426
+ },
427
+ "32050": {
428
+ "content": "<extra_id_49>",
429
+ "lstrip": false,
430
+ "normalized": false,
431
+ "rstrip": false,
432
+ "single_word": false,
433
+ "special": true
434
+ },
435
+ "32051": {
436
+ "content": "<extra_id_48>",
437
+ "lstrip": false,
438
+ "normalized": false,
439
+ "rstrip": false,
440
+ "single_word": false,
441
+ "special": true
442
+ },
443
+ "32052": {
444
+ "content": "<extra_id_47>",
445
+ "lstrip": false,
446
+ "normalized": false,
447
+ "rstrip": false,
448
+ "single_word": false,
449
+ "special": true
450
+ },
451
+ "32053": {
452
+ "content": "<extra_id_46>",
453
+ "lstrip": false,
454
+ "normalized": false,
455
+ "rstrip": false,
456
+ "single_word": false,
457
+ "special": true
458
+ },
459
+ "32054": {
460
+ "content": "<extra_id_45>",
461
+ "lstrip": false,
462
+ "normalized": false,
463
+ "rstrip": false,
464
+ "single_word": false,
465
+ "special": true
466
+ },
467
+ "32055": {
468
+ "content": "<extra_id_44>",
469
+ "lstrip": false,
470
+ "normalized": false,
471
+ "rstrip": false,
472
+ "single_word": false,
473
+ "special": true
474
+ },
475
+ "32056": {
476
+ "content": "<extra_id_43>",
477
+ "lstrip": false,
478
+ "normalized": false,
479
+ "rstrip": false,
480
+ "single_word": false,
481
+ "special": true
482
+ },
483
+ "32057": {
484
+ "content": "<extra_id_42>",
485
+ "lstrip": false,
486
+ "normalized": false,
487
+ "rstrip": false,
488
+ "single_word": false,
489
+ "special": true
490
+ },
491
+ "32058": {
492
+ "content": "<extra_id_41>",
493
+ "lstrip": false,
494
+ "normalized": false,
495
+ "rstrip": false,
496
+ "single_word": false,
497
+ "special": true
498
+ },
499
+ "32059": {
500
+ "content": "<extra_id_40>",
501
+ "lstrip": false,
502
+ "normalized": false,
503
+ "rstrip": false,
504
+ "single_word": false,
505
+ "special": true
506
+ },
507
+ "32060": {
508
+ "content": "<extra_id_39>",
509
+ "lstrip": false,
510
+ "normalized": false,
511
+ "rstrip": false,
512
+ "single_word": false,
513
+ "special": true
514
+ },
515
+ "32061": {
516
+ "content": "<extra_id_38>",
517
+ "lstrip": false,
518
+ "normalized": false,
519
+ "rstrip": false,
520
+ "single_word": false,
521
+ "special": true
522
+ },
523
+ "32062": {
524
+ "content": "<extra_id_37>",
525
+ "lstrip": false,
526
+ "normalized": false,
527
+ "rstrip": false,
528
+ "single_word": false,
529
+ "special": true
530
+ },
531
+ "32063": {
532
+ "content": "<extra_id_36>",
533
+ "lstrip": false,
534
+ "normalized": false,
535
+ "rstrip": false,
536
+ "single_word": false,
537
+ "special": true
538
+ },
539
+ "32064": {
540
+ "content": "<extra_id_35>",
541
+ "lstrip": false,
542
+ "normalized": false,
543
+ "rstrip": false,
544
+ "single_word": false,
545
+ "special": true
546
+ },
547
+ "32065": {
548
+ "content": "<extra_id_34>",
549
+ "lstrip": false,
550
+ "normalized": false,
551
+ "rstrip": false,
552
+ "single_word": false,
553
+ "special": true
554
+ },
555
+ "32066": {
556
+ "content": "<extra_id_33>",
557
+ "lstrip": false,
558
+ "normalized": false,
559
+ "rstrip": false,
560
+ "single_word": false,
561
+ "special": true
562
+ },
563
+ "32067": {
564
+ "content": "<extra_id_32>",
565
+ "lstrip": false,
566
+ "normalized": false,
567
+ "rstrip": false,
568
+ "single_word": false,
569
+ "special": true
570
+ },
571
+ "32068": {
572
+ "content": "<extra_id_31>",
573
+ "lstrip": false,
574
+ "normalized": false,
575
+ "rstrip": false,
576
+ "single_word": false,
577
+ "special": true
578
+ },
579
+ "32069": {
580
+ "content": "<extra_id_30>",
581
+ "lstrip": false,
582
+ "normalized": false,
583
+ "rstrip": false,
584
+ "single_word": false,
585
+ "special": true
586
+ },
587
+ "32070": {
588
+ "content": "<extra_id_29>",
589
+ "lstrip": false,
590
+ "normalized": false,
591
+ "rstrip": false,
592
+ "single_word": false,
593
+ "special": true
594
+ },
595
+ "32071": {
596
+ "content": "<extra_id_28>",
597
+ "lstrip": false,
598
+ "normalized": false,
599
+ "rstrip": false,
600
+ "single_word": false,
601
+ "special": true
602
+ },
603
+ "32072": {
604
+ "content": "<extra_id_27>",
605
+ "lstrip": false,
606
+ "normalized": false,
607
+ "rstrip": false,
608
+ "single_word": false,
609
+ "special": true
610
+ },
611
+ "32073": {
612
+ "content": "<extra_id_26>",
613
+ "lstrip": false,
614
+ "normalized": false,
615
+ "rstrip": false,
616
+ "single_word": false,
617
+ "special": true
618
+ },
619
+ "32074": {
620
+ "content": "<extra_id_25>",
621
+ "lstrip": false,
622
+ "normalized": false,
623
+ "rstrip": false,
624
+ "single_word": false,
625
+ "special": true
626
+ },
627
+ "32075": {
628
+ "content": "<extra_id_24>",
629
+ "lstrip": false,
630
+ "normalized": false,
631
+ "rstrip": false,
632
+ "single_word": false,
633
+ "special": true
634
+ },
635
+ "32076": {
636
+ "content": "<extra_id_23>",
637
+ "lstrip": false,
638
+ "normalized": false,
639
+ "rstrip": false,
640
+ "single_word": false,
641
+ "special": true
642
+ },
643
+ "32077": {
644
+ "content": "<extra_id_22>",
645
+ "lstrip": false,
646
+ "normalized": false,
647
+ "rstrip": false,
648
+ "single_word": false,
649
+ "special": true
650
+ },
651
+ "32078": {
652
+ "content": "<extra_id_21>",
653
+ "lstrip": false,
654
+ "normalized": false,
655
+ "rstrip": false,
656
+ "single_word": false,
657
+ "special": true
658
+ },
659
+ "32079": {
660
+ "content": "<extra_id_20>",
661
+ "lstrip": false,
662
+ "normalized": false,
663
+ "rstrip": false,
664
+ "single_word": false,
665
+ "special": true
666
+ },
667
+ "32080": {
668
+ "content": "<extra_id_19>",
669
+ "lstrip": false,
670
+ "normalized": false,
671
+ "rstrip": false,
672
+ "single_word": false,
673
+ "special": true
674
+ },
675
+ "32081": {
676
+ "content": "<extra_id_18>",
677
+ "lstrip": false,
678
+ "normalized": false,
679
+ "rstrip": false,
680
+ "single_word": false,
681
+ "special": true
682
+ },
683
+ "32082": {
684
+ "content": "<extra_id_17>",
685
+ "lstrip": false,
686
+ "normalized": false,
687
+ "rstrip": false,
688
+ "single_word": false,
689
+ "special": true
690
+ },
691
+ "32083": {
692
+ "content": "<extra_id_16>",
693
+ "lstrip": false,
694
+ "normalized": false,
695
+ "rstrip": false,
696
+ "single_word": false,
697
+ "special": true
698
+ },
699
+ "32084": {
700
+ "content": "<extra_id_15>",
701
+ "lstrip": false,
702
+ "normalized": false,
703
+ "rstrip": false,
704
+ "single_word": false,
705
+ "special": true
706
+ },
707
+ "32085": {
708
+ "content": "<extra_id_14>",
709
+ "lstrip": false,
710
+ "normalized": false,
711
+ "rstrip": false,
712
+ "single_word": false,
713
+ "special": true
714
+ },
715
+ "32086": {
716
+ "content": "<extra_id_13>",
717
+ "lstrip": false,
718
+ "normalized": false,
719
+ "rstrip": false,
720
+ "single_word": false,
721
+ "special": true
722
+ },
723
+ "32087": {
724
+ "content": "<extra_id_12>",
725
+ "lstrip": false,
726
+ "normalized": false,
727
+ "rstrip": false,
728
+ "single_word": false,
729
+ "special": true
730
+ },
731
+ "32088": {
732
+ "content": "<extra_id_11>",
733
+ "lstrip": false,
734
+ "normalized": false,
735
+ "rstrip": false,
736
+ "single_word": false,
737
+ "special": true
738
+ },
739
+ "32089": {
740
+ "content": "<extra_id_10>",
741
+ "lstrip": false,
742
+ "normalized": false,
743
+ "rstrip": false,
744
+ "single_word": false,
745
+ "special": true
746
+ },
747
+ "32090": {
748
+ "content": "<extra_id_9>",
749
+ "lstrip": false,
750
+ "normalized": false,
751
+ "rstrip": false,
752
+ "single_word": false,
753
+ "special": true
754
+ },
755
+ "32091": {
756
+ "content": "<extra_id_8>",
757
+ "lstrip": false,
758
+ "normalized": false,
759
+ "rstrip": false,
760
+ "single_word": false,
761
+ "special": true
762
+ },
763
+ "32092": {
764
+ "content": "<extra_id_7>",
765
+ "lstrip": false,
766
+ "normalized": false,
767
+ "rstrip": false,
768
+ "single_word": false,
769
+ "special": true
770
+ },
771
+ "32093": {
772
+ "content": "<extra_id_6>",
773
+ "lstrip": false,
774
+ "normalized": false,
775
+ "rstrip": false,
776
+ "single_word": false,
777
+ "special": true
778
+ },
779
+ "32094": {
780
+ "content": "<extra_id_5>",
781
+ "lstrip": false,
782
+ "normalized": false,
783
+ "rstrip": false,
784
+ "single_word": false,
785
+ "special": true
786
+ },
787
+ "32095": {
788
+ "content": "<extra_id_4>",
789
+ "lstrip": false,
790
+ "normalized": false,
791
+ "rstrip": false,
792
+ "single_word": false,
793
+ "special": true
794
+ },
795
+ "32096": {
796
+ "content": "<extra_id_3>",
797
+ "lstrip": false,
798
+ "normalized": false,
799
+ "rstrip": false,
800
+ "single_word": false,
801
+ "special": true
802
+ },
803
+ "32097": {
804
+ "content": "<extra_id_2>",
805
+ "lstrip": false,
806
+ "normalized": false,
807
+ "rstrip": false,
808
+ "single_word": false,
809
+ "special": true
810
+ },
811
+ "32098": {
812
+ "content": "<extra_id_1>",
813
+ "lstrip": false,
814
+ "normalized": false,
815
+ "rstrip": false,
816
+ "single_word": false,
817
+ "special": true
818
+ },
819
+ "32099": {
820
+ "content": "<extra_id_0>",
821
+ "lstrip": false,
822
+ "normalized": false,
823
+ "rstrip": false,
824
+ "single_word": false,
825
+ "special": true
826
+ }
827
+ },
828
+ "additional_special_tokens": [
829
+ "<extra_id_0>",
830
+ "<extra_id_1>",
831
+ "<extra_id_2>",
832
+ "<extra_id_3>",
833
+ "<extra_id_4>",
834
+ "<extra_id_5>",
835
+ "<extra_id_6>",
836
+ "<extra_id_7>",
837
+ "<extra_id_8>",
838
+ "<extra_id_9>",
839
+ "<extra_id_10>",
840
+ "<extra_id_11>",
841
+ "<extra_id_12>",
842
+ "<extra_id_13>",
843
+ "<extra_id_14>",
844
+ "<extra_id_15>",
845
+ "<extra_id_16>",
846
+ "<extra_id_17>",
847
+ "<extra_id_18>",
848
+ "<extra_id_19>",
849
+ "<extra_id_20>",
850
+ "<extra_id_21>",
851
+ "<extra_id_22>",
852
+ "<extra_id_23>",
853
+ "<extra_id_24>",
854
+ "<extra_id_25>",
855
+ "<extra_id_26>",
856
+ "<extra_id_27>",
857
+ "<extra_id_28>",
858
+ "<extra_id_29>",
859
+ "<extra_id_30>",
860
+ "<extra_id_31>",
861
+ "<extra_id_32>",
862
+ "<extra_id_33>",
863
+ "<extra_id_34>",
864
+ "<extra_id_35>",
865
+ "<extra_id_36>",
866
+ "<extra_id_37>",
867
+ "<extra_id_38>",
868
+ "<extra_id_39>",
869
+ "<extra_id_40>",
870
+ "<extra_id_41>",
871
+ "<extra_id_42>",
872
+ "<extra_id_43>",
873
+ "<extra_id_44>",
874
+ "<extra_id_45>",
875
+ "<extra_id_46>",
876
+ "<extra_id_47>",
877
+ "<extra_id_48>",
878
+ "<extra_id_49>",
879
+ "<extra_id_50>",
880
+ "<extra_id_51>",
881
+ "<extra_id_52>",
882
+ "<extra_id_53>",
883
+ "<extra_id_54>",
884
+ "<extra_id_55>",
885
+ "<extra_id_56>",
886
+ "<extra_id_57>",
887
+ "<extra_id_58>",
888
+ "<extra_id_59>",
889
+ "<extra_id_60>",
890
+ "<extra_id_61>",
891
+ "<extra_id_62>",
892
+ "<extra_id_63>",
893
+ "<extra_id_64>",
894
+ "<extra_id_65>",
895
+ "<extra_id_66>",
896
+ "<extra_id_67>",
897
+ "<extra_id_68>",
898
+ "<extra_id_69>",
899
+ "<extra_id_70>",
900
+ "<extra_id_71>",
901
+ "<extra_id_72>",
902
+ "<extra_id_73>",
903
+ "<extra_id_74>",
904
+ "<extra_id_75>",
905
+ "<extra_id_76>",
906
+ "<extra_id_77>",
907
+ "<extra_id_78>",
908
+ "<extra_id_79>",
909
+ "<extra_id_80>",
910
+ "<extra_id_81>",
911
+ "<extra_id_82>",
912
+ "<extra_id_83>",
913
+ "<extra_id_84>",
914
+ "<extra_id_85>",
915
+ "<extra_id_86>",
916
+ "<extra_id_87>",
917
+ "<extra_id_88>",
918
+ "<extra_id_89>",
919
+ "<extra_id_90>",
920
+ "<extra_id_91>",
921
+ "<extra_id_92>",
922
+ "<extra_id_93>",
923
+ "<extra_id_94>",
924
+ "<extra_id_95>",
925
+ "<extra_id_96>",
926
+ "<extra_id_97>",
927
+ "<extra_id_98>",
928
+ "<extra_id_99>"
929
+ ],
930
+ "clean_up_tokenization_spaces": true,
931
+ "eos_token": "</s>",
932
+ "extra_ids": 100,
933
+ "model_max_length": 512,
934
+ "pad_token": "</s>",
935
+ "sp_model_kwargs": {},
936
+ "tokenizer_class": "T5Tokenizer",
937
+ "unk_token": "<unk>"
938
+ }
ckpt/ling_disc/trainer_state.json ADDED
@@ -0,0 +1,645 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.05535305291414261,
3
+ "best_model_checkpoint": "/data/mohamed/checkpoints/ling_disc/deberta-v3-small_flan-t5-base_40/checkpoint-41000",
4
+ "epoch": 30.0,
5
+ "eval_steps": 1000,
6
+ "global_step": 41970,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.71,
13
+ "grad_norm": 0.855617344379425,
14
+ "learning_rate": 1.1913271384322135e-05,
15
+ "loss": 0.9117,
16
+ "step": 1000
17
+ },
18
+ {
19
+ "epoch": 0.71,
20
+ "eval_loss": 0.6742472052574158,
21
+ "eval_runtime": 27.0595,
22
+ "eval_samples_per_second": 1111.549,
23
+ "eval_steps_per_second": 5.58,
24
+ "step": 1000
25
+ },
26
+ {
27
+ "epoch": 1.43,
28
+ "grad_norm": 4.203719139099121,
29
+ "learning_rate": 2.382654276864427e-05,
30
+ "loss": 0.4114,
31
+ "step": 2000
32
+ },
33
+ {
34
+ "epoch": 1.43,
35
+ "eval_loss": 0.3266257345676422,
36
+ "eval_runtime": 26.9318,
37
+ "eval_samples_per_second": 1116.822,
38
+ "eval_steps_per_second": 5.607,
39
+ "step": 2000
40
+ },
41
+ {
42
+ "epoch": 2.14,
43
+ "grad_norm": 3.1638591289520264,
44
+ "learning_rate": 3.57398141529664e-05,
45
+ "loss": 0.2624,
46
+ "step": 3000
47
+ },
48
+ {
49
+ "epoch": 2.14,
50
+ "eval_loss": 0.24602766335010529,
51
+ "eval_runtime": 27.0604,
52
+ "eval_samples_per_second": 1111.512,
53
+ "eval_steps_per_second": 5.58,
54
+ "step": 3000
55
+ },
56
+ {
57
+ "epoch": 2.86,
58
+ "grad_norm": 1.7417826652526855,
59
+ "learning_rate": 4.765308553728854e-05,
60
+ "loss": 0.2002,
61
+ "step": 4000
62
+ },
63
+ {
64
+ "epoch": 2.86,
65
+ "eval_loss": 0.1770436018705368,
66
+ "eval_runtime": 26.8812,
67
+ "eval_samples_per_second": 1118.922,
68
+ "eval_steps_per_second": 5.617,
69
+ "step": 4000
70
+ },
71
+ {
72
+ "epoch": 3.57,
73
+ "grad_norm": 1.1299816370010376,
74
+ "learning_rate": 4.893707145315437e-05,
75
+ "loss": 0.1635,
76
+ "step": 5000
77
+ },
78
+ {
79
+ "epoch": 3.57,
80
+ "eval_loss": 0.14757415652275085,
81
+ "eval_runtime": 26.7857,
82
+ "eval_samples_per_second": 1122.914,
83
+ "eval_steps_per_second": 5.637,
84
+ "step": 5000
85
+ },
86
+ {
87
+ "epoch": 4.29,
88
+ "grad_norm": 1.210856556892395,
89
+ "learning_rate": 4.761337463267413e-05,
90
+ "loss": 0.1404,
91
+ "step": 6000
92
+ },
93
+ {
94
+ "epoch": 4.29,
95
+ "eval_loss": 0.12851941585540771,
96
+ "eval_runtime": 26.9893,
97
+ "eval_samples_per_second": 1114.44,
98
+ "eval_steps_per_second": 5.595,
99
+ "step": 6000
100
+ },
101
+ {
102
+ "epoch": 5.0,
103
+ "grad_norm": 2.0565412044525146,
104
+ "learning_rate": 4.62896778121939e-05,
105
+ "loss": 0.1263,
106
+ "step": 7000
107
+ },
108
+ {
109
+ "epoch": 5.0,
110
+ "eval_loss": 0.12228666245937347,
111
+ "eval_runtime": 26.7363,
112
+ "eval_samples_per_second": 1124.987,
113
+ "eval_steps_per_second": 5.648,
114
+ "step": 7000
115
+ },
116
+ {
117
+ "epoch": 5.72,
118
+ "grad_norm": 1.8667607307434082,
119
+ "learning_rate": 4.496598099171366e-05,
120
+ "loss": 0.1127,
121
+ "step": 8000
122
+ },
123
+ {
124
+ "epoch": 5.72,
125
+ "eval_loss": 0.11036147177219391,
126
+ "eval_runtime": 26.7509,
127
+ "eval_samples_per_second": 1124.375,
128
+ "eval_steps_per_second": 5.645,
129
+ "step": 8000
130
+ },
131
+ {
132
+ "epoch": 6.43,
133
+ "grad_norm": 0.7492337226867676,
134
+ "learning_rate": 4.364228417123342e-05,
135
+ "loss": 0.1059,
136
+ "step": 9000
137
+ },
138
+ {
139
+ "epoch": 6.43,
140
+ "eval_loss": 0.10317497700452805,
141
+ "eval_runtime": 27.0158,
142
+ "eval_samples_per_second": 1113.349,
143
+ "eval_steps_per_second": 5.589,
144
+ "step": 9000
145
+ },
146
+ {
147
+ "epoch": 7.15,
148
+ "grad_norm": 0.7611485123634338,
149
+ "learning_rate": 4.231858735075319e-05,
150
+ "loss": 0.0993,
151
+ "step": 10000
152
+ },
153
+ {
154
+ "epoch": 7.15,
155
+ "eval_loss": 0.10284282267093658,
156
+ "eval_runtime": 26.795,
157
+ "eval_samples_per_second": 1122.524,
158
+ "eval_steps_per_second": 5.635,
159
+ "step": 10000
160
+ },
161
+ {
162
+ "epoch": 7.86,
163
+ "grad_norm": 0.5870215892791748,
164
+ "learning_rate": 4.099489053027295e-05,
165
+ "loss": 0.0887,
166
+ "step": 11000
167
+ },
168
+ {
169
+ "epoch": 7.86,
170
+ "eval_loss": 0.09789762645959854,
171
+ "eval_runtime": 26.8453,
172
+ "eval_samples_per_second": 1120.419,
173
+ "eval_steps_per_second": 5.625,
174
+ "step": 11000
175
+ },
176
+ {
177
+ "epoch": 8.58,
178
+ "grad_norm": 0.48922085762023926,
179
+ "learning_rate": 3.9671193709792706e-05,
180
+ "loss": 0.0842,
181
+ "step": 12000
182
+ },
183
+ {
184
+ "epoch": 8.58,
185
+ "eval_loss": 0.09349656105041504,
186
+ "eval_runtime": 26.8273,
187
+ "eval_samples_per_second": 1121.172,
188
+ "eval_steps_per_second": 5.629,
189
+ "step": 12000
190
+ },
191
+ {
192
+ "epoch": 9.29,
193
+ "grad_norm": 0.4252859354019165,
194
+ "learning_rate": 3.8347496889312476e-05,
195
+ "loss": 0.0793,
196
+ "step": 13000
197
+ },
198
+ {
199
+ "epoch": 9.29,
200
+ "eval_loss": 0.09415590018033981,
201
+ "eval_runtime": 25.9362,
202
+ "eval_samples_per_second": 1159.693,
203
+ "eval_steps_per_second": 5.822,
204
+ "step": 13000
205
+ },
206
+ {
207
+ "epoch": 10.01,
208
+ "grad_norm": 0.44548505544662476,
209
+ "learning_rate": 3.702380006883224e-05,
210
+ "loss": 0.076,
211
+ "step": 14000
212
+ },
213
+ {
214
+ "epoch": 10.01,
215
+ "eval_loss": 0.08913980424404144,
216
+ "eval_runtime": 26.7379,
217
+ "eval_samples_per_second": 1124.919,
218
+ "eval_steps_per_second": 5.647,
219
+ "step": 14000
220
+ },
221
+ {
222
+ "epoch": 10.72,
223
+ "grad_norm": 0.2965373694896698,
224
+ "learning_rate": 3.5700103248352e-05,
225
+ "loss": 0.0714,
226
+ "step": 15000
227
+ },
228
+ {
229
+ "epoch": 10.72,
230
+ "eval_loss": 0.08456840366125107,
231
+ "eval_runtime": 26.787,
232
+ "eval_samples_per_second": 1122.857,
233
+ "eval_steps_per_second": 5.637,
234
+ "step": 15000
235
+ },
236
+ {
237
+ "epoch": 11.44,
238
+ "grad_norm": 0.3205694854259491,
239
+ "learning_rate": 3.437640642787176e-05,
240
+ "loss": 0.0677,
241
+ "step": 16000
242
+ },
243
+ {
244
+ "epoch": 11.44,
245
+ "eval_loss": 0.07863688468933105,
246
+ "eval_runtime": 26.8242,
247
+ "eval_samples_per_second": 1121.299,
248
+ "eval_steps_per_second": 5.629,
249
+ "step": 16000
250
+ },
251
+ {
252
+ "epoch": 12.15,
253
+ "grad_norm": 0.2736203670501709,
254
+ "learning_rate": 3.3052709607391525e-05,
255
+ "loss": 0.0636,
256
+ "step": 17000
257
+ },
258
+ {
259
+ "epoch": 12.15,
260
+ "eval_loss": 0.07664181292057037,
261
+ "eval_runtime": 26.7818,
262
+ "eval_samples_per_second": 1123.077,
263
+ "eval_steps_per_second": 5.638,
264
+ "step": 17000
265
+ },
266
+ {
267
+ "epoch": 12.87,
268
+ "grad_norm": 0.25644680857658386,
269
+ "learning_rate": 3.172901278691129e-05,
270
+ "loss": 0.0618,
271
+ "step": 18000
272
+ },
273
+ {
274
+ "epoch": 12.87,
275
+ "eval_loss": 0.07351888716220856,
276
+ "eval_runtime": 26.8445,
277
+ "eval_samples_per_second": 1120.453,
278
+ "eval_steps_per_second": 5.625,
279
+ "step": 18000
280
+ },
281
+ {
282
+ "epoch": 13.58,
283
+ "grad_norm": 0.2748676538467407,
284
+ "learning_rate": 3.0405315966431053e-05,
285
+ "loss": 0.0584,
286
+ "step": 19000
287
+ },
288
+ {
289
+ "epoch": 13.58,
290
+ "eval_loss": 0.07314006239175797,
291
+ "eval_runtime": 26.8333,
292
+ "eval_samples_per_second": 1120.921,
293
+ "eval_steps_per_second": 5.627,
294
+ "step": 19000
295
+ },
296
+ {
297
+ "epoch": 14.3,
298
+ "grad_norm": 0.30235132575035095,
299
+ "learning_rate": 2.9081619145950812e-05,
300
+ "loss": 0.057,
301
+ "step": 20000
302
+ },
303
+ {
304
+ "epoch": 14.3,
305
+ "eval_loss": 0.07568340748548508,
306
+ "eval_runtime": 27.0109,
307
+ "eval_samples_per_second": 1113.55,
308
+ "eval_steps_per_second": 5.59,
309
+ "step": 20000
310
+ },
311
+ {
312
+ "epoch": 15.01,
313
+ "grad_norm": 0.2508692145347595,
314
+ "learning_rate": 2.7757922325470574e-05,
315
+ "loss": 0.0558,
316
+ "step": 21000
317
+ },
318
+ {
319
+ "epoch": 15.01,
320
+ "eval_loss": 0.07675843685865402,
321
+ "eval_runtime": 26.9026,
322
+ "eval_samples_per_second": 1118.032,
323
+ "eval_steps_per_second": 5.613,
324
+ "step": 21000
325
+ },
326
+ {
327
+ "epoch": 15.73,
328
+ "grad_norm": 0.3341030478477478,
329
+ "learning_rate": 2.643422550499034e-05,
330
+ "loss": 0.0533,
331
+ "step": 22000
332
+ },
333
+ {
334
+ "epoch": 15.73,
335
+ "eval_loss": 0.07339715212583542,
336
+ "eval_runtime": 26.8727,
337
+ "eval_samples_per_second": 1119.278,
338
+ "eval_steps_per_second": 5.619,
339
+ "step": 22000
340
+ },
341
+ {
342
+ "epoch": 16.44,
343
+ "grad_norm": 0.30433303117752075,
344
+ "learning_rate": 2.51105286845101e-05,
345
+ "loss": 0.0516,
346
+ "step": 23000
347
+ },
348
+ {
349
+ "epoch": 16.44,
350
+ "eval_loss": 0.0694783553481102,
351
+ "eval_runtime": 26.8551,
352
+ "eval_samples_per_second": 1120.012,
353
+ "eval_steps_per_second": 5.623,
354
+ "step": 23000
355
+ },
356
+ {
357
+ "epoch": 17.16,
358
+ "grad_norm": 0.39424875378608704,
359
+ "learning_rate": 2.378683186402986e-05,
360
+ "loss": 0.049,
361
+ "step": 24000
362
+ },
363
+ {
364
+ "epoch": 17.16,
365
+ "eval_loss": 0.06750107556581497,
366
+ "eval_runtime": 26.9045,
367
+ "eval_samples_per_second": 1117.954,
368
+ "eval_steps_per_second": 5.612,
369
+ "step": 24000
370
+ },
371
+ {
372
+ "epoch": 17.87,
373
+ "grad_norm": 0.29526183009147644,
374
+ "learning_rate": 2.2463135043549627e-05,
375
+ "loss": 0.0478,
376
+ "step": 25000
377
+ },
378
+ {
379
+ "epoch": 17.87,
380
+ "eval_loss": 0.06841529905796051,
381
+ "eval_runtime": 26.9131,
382
+ "eval_samples_per_second": 1117.597,
383
+ "eval_steps_per_second": 5.611,
384
+ "step": 25000
385
+ },
386
+ {
387
+ "epoch": 18.58,
388
+ "grad_norm": 0.2802821099758148,
389
+ "learning_rate": 2.113943822306939e-05,
390
+ "loss": 0.0472,
391
+ "step": 26000
392
+ },
393
+ {
394
+ "epoch": 18.58,
395
+ "eval_loss": 0.0680340975522995,
396
+ "eval_runtime": 26.8442,
397
+ "eval_samples_per_second": 1120.467,
398
+ "eval_steps_per_second": 5.625,
399
+ "step": 26000
400
+ },
401
+ {
402
+ "epoch": 19.3,
403
+ "grad_norm": 0.198490172624588,
404
+ "learning_rate": 1.9815741402589152e-05,
405
+ "loss": 0.0445,
406
+ "step": 27000
407
+ },
408
+ {
409
+ "epoch": 19.3,
410
+ "eval_loss": 0.059882719069719315,
411
+ "eval_runtime": 26.9691,
412
+ "eval_samples_per_second": 1115.275,
413
+ "eval_steps_per_second": 5.599,
414
+ "step": 27000
415
+ },
416
+ {
417
+ "epoch": 20.01,
418
+ "grad_norm": 0.3383251130580902,
419
+ "learning_rate": 1.8492044582108914e-05,
420
+ "loss": 0.0435,
421
+ "step": 28000
422
+ },
423
+ {
424
+ "epoch": 20.01,
425
+ "eval_loss": 0.06356318295001984,
426
+ "eval_runtime": 26.8538,
427
+ "eval_samples_per_second": 1120.066,
428
+ "eval_steps_per_second": 5.623,
429
+ "step": 28000
430
+ },
431
+ {
432
+ "epoch": 20.73,
433
+ "grad_norm": 0.16571784019470215,
434
+ "learning_rate": 1.7168347761628677e-05,
435
+ "loss": 0.0419,
436
+ "step": 29000
437
+ },
438
+ {
439
+ "epoch": 20.73,
440
+ "eval_loss": 0.06056862324476242,
441
+ "eval_runtime": 27.0748,
442
+ "eval_samples_per_second": 1110.924,
443
+ "eval_steps_per_second": 5.577,
444
+ "step": 29000
445
+ },
446
+ {
447
+ "epoch": 21.44,
448
+ "grad_norm": 0.19518467783927917,
449
+ "learning_rate": 1.584465094114844e-05,
450
+ "loss": 0.0409,
451
+ "step": 30000
452
+ },
453
+ {
454
+ "epoch": 21.44,
455
+ "eval_loss": 0.06490638852119446,
456
+ "eval_runtime": 26.8481,
457
+ "eval_samples_per_second": 1120.301,
458
+ "eval_steps_per_second": 5.624,
459
+ "step": 30000
460
+ },
461
+ {
462
+ "epoch": 22.16,
463
+ "grad_norm": 0.15420591831207275,
464
+ "learning_rate": 1.4520954120668203e-05,
465
+ "loss": 0.0397,
466
+ "step": 31000
467
+ },
468
+ {
469
+ "epoch": 22.16,
470
+ "eval_loss": 0.05918469280004501,
471
+ "eval_runtime": 26.8143,
472
+ "eval_samples_per_second": 1121.713,
473
+ "eval_steps_per_second": 5.631,
474
+ "step": 31000
475
+ },
476
+ {
477
+ "epoch": 22.87,
478
+ "grad_norm": 0.26854997873306274,
479
+ "learning_rate": 1.3197257300187965e-05,
480
+ "loss": 0.0387,
481
+ "step": 32000
482
+ },
483
+ {
484
+ "epoch": 22.87,
485
+ "eval_loss": 0.06144551932811737,
486
+ "eval_runtime": 26.8852,
487
+ "eval_samples_per_second": 1118.757,
488
+ "eval_steps_per_second": 5.616,
489
+ "step": 32000
490
+ },
491
+ {
492
+ "epoch": 23.59,
493
+ "grad_norm": 0.17430314421653748,
494
+ "learning_rate": 1.1873560479707728e-05,
495
+ "loss": 0.0373,
496
+ "step": 33000
497
+ },
498
+ {
499
+ "epoch": 23.59,
500
+ "eval_loss": 0.06159648299217224,
501
+ "eval_runtime": 26.7887,
502
+ "eval_samples_per_second": 1122.785,
503
+ "eval_steps_per_second": 5.637,
504
+ "step": 33000
505
+ },
506
+ {
507
+ "epoch": 24.3,
508
+ "grad_norm": 0.14911049604415894,
509
+ "learning_rate": 1.054986365922749e-05,
510
+ "loss": 0.0369,
511
+ "step": 34000
512
+ },
513
+ {
514
+ "epoch": 24.3,
515
+ "eval_loss": 0.05931873992085457,
516
+ "eval_runtime": 26.8571,
517
+ "eval_samples_per_second": 1119.926,
518
+ "eval_steps_per_second": 5.622,
519
+ "step": 34000
520
+ },
521
+ {
522
+ "epoch": 25.02,
523
+ "grad_norm": 0.13620807230472565,
524
+ "learning_rate": 9.226166838747254e-06,
525
+ "loss": 0.0361,
526
+ "step": 35000
527
+ },
528
+ {
529
+ "epoch": 25.02,
530
+ "eval_loss": 0.05695568770170212,
531
+ "eval_runtime": 26.8966,
532
+ "eval_samples_per_second": 1118.283,
533
+ "eval_steps_per_second": 5.614,
534
+ "step": 35000
535
+ },
536
+ {
537
+ "epoch": 25.73,
538
+ "grad_norm": 0.13764438033103943,
539
+ "learning_rate": 7.902470018267017e-06,
540
+ "loss": 0.0349,
541
+ "step": 36000
542
+ },
543
+ {
544
+ "epoch": 25.73,
545
+ "eval_loss": 0.05707501247525215,
546
+ "eval_runtime": 26.986,
547
+ "eval_samples_per_second": 1114.578,
548
+ "eval_steps_per_second": 5.595,
549
+ "step": 36000
550
+ },
551
+ {
552
+ "epoch": 26.45,
553
+ "grad_norm": 0.2389635145664215,
554
+ "learning_rate": 6.578773197786779e-06,
555
+ "loss": 0.0343,
556
+ "step": 37000
557
+ },
558
+ {
559
+ "epoch": 26.45,
560
+ "eval_loss": 0.0577365942299366,
561
+ "eval_runtime": 26.9903,
562
+ "eval_samples_per_second": 1114.401,
563
+ "eval_steps_per_second": 5.595,
564
+ "step": 37000
565
+ },
566
+ {
567
+ "epoch": 27.16,
568
+ "grad_norm": 0.15828461945056915,
569
+ "learning_rate": 5.255076377306542e-06,
570
+ "loss": 0.034,
571
+ "step": 38000
572
+ },
573
+ {
574
+ "epoch": 27.16,
575
+ "eval_loss": 0.05767366662621498,
576
+ "eval_runtime": 27.1454,
577
+ "eval_samples_per_second": 1108.035,
578
+ "eval_steps_per_second": 5.563,
579
+ "step": 38000
580
+ },
581
+ {
582
+ "epoch": 27.88,
583
+ "grad_norm": 0.1059570387005806,
584
+ "learning_rate": 3.9313795568263045e-06,
585
+ "loss": 0.0332,
586
+ "step": 39000
587
+ },
588
+ {
589
+ "epoch": 27.88,
590
+ "eval_loss": 0.056225307285785675,
591
+ "eval_runtime": 26.9534,
592
+ "eval_samples_per_second": 1115.928,
593
+ "eval_steps_per_second": 5.602,
594
+ "step": 39000
595
+ },
596
+ {
597
+ "epoch": 28.59,
598
+ "grad_norm": 0.1975150853395462,
599
+ "learning_rate": 2.6076827363460673e-06,
600
+ "loss": 0.0329,
601
+ "step": 40000
602
+ },
603
+ {
604
+ "epoch": 28.59,
605
+ "eval_loss": 0.05555161088705063,
606
+ "eval_runtime": 27.1187,
607
+ "eval_samples_per_second": 1109.122,
608
+ "eval_steps_per_second": 5.568,
609
+ "step": 40000
610
+ },
611
+ {
612
+ "epoch": 29.31,
613
+ "grad_norm": 0.1037423312664032,
614
+ "learning_rate": 1.28398591586583e-06,
615
+ "loss": 0.0319,
616
+ "step": 41000
617
+ },
618
+ {
619
+ "epoch": 29.31,
620
+ "eval_loss": 0.05535305291414261,
621
+ "eval_runtime": 26.8353,
622
+ "eval_samples_per_second": 1120.838,
623
+ "eval_steps_per_second": 5.627,
624
+ "step": 41000
625
+ },
626
+ {
627
+ "epoch": 30.0,
628
+ "step": 41970,
629
+ "total_flos": 3.347206753110317e+16,
630
+ "train_loss": 0.09860551060169176,
631
+ "train_runtime": 13103.021,
632
+ "train_samples_per_second": 640.368,
633
+ "train_steps_per_second": 3.203
634
+ }
635
+ ],
636
+ "logging_steps": 1000,
637
+ "max_steps": 41970,
638
+ "num_input_tokens_seen": 0,
639
+ "num_train_epochs": 30,
640
+ "save_steps": 1000,
641
+ "total_flos": 3.347206753110317e+16,
642
+ "train_batch_size": 200,
643
+ "trial_name": null,
644
+ "trial_params": null
645
+ }
ckpt/ling_disc/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:deb9dc15db671f7ae0b9e7e7bc26ca9e20c0fde45babc266a60753e2b23d6328
3
+ size 4984
ckpt/model.json ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "data": "ling_conversion",
3
+ "data_sources": ["qqp", "mrpc", "stsb"],
4
+ "data_type": "text",
5
+ "kld_annealing": "cyclic",
6
+ "lingpred_annealing": "mono",
7
+ "ling_embed_type": "one-layer",
8
+ "combine_weight": 1,
9
+ "alpha_kld": 1,
10
+ "alpha_lingpred": 1,
11
+ "alpha_sem": 1,
12
+ "max_grad_norm": 10,
13
+ "sem_loss_tao": 0.5,
14
+ "sem_loss_eps": 1,
15
+ "ckpt": "./ckpt/model.pt",
16
+ "disc_type": "deberta",
17
+ "disc_ckpt": "./ckpt/ling_disc",
18
+ "sem_ckpt": "./ckpt/sem_emb.pt",
19
+ "lng_ids": null,
20
+ "lng_ids_idx": null,
21
+ "model_name": "google/flan-t5-base",
22
+ "aim_exp": "lingconv-0606",
23
+ "sem_loss_type": "dedicated",
24
+ "combine_method": "decoder_add_first",
25
+ "train_log": 200,
26
+ "val_log": 2000,
27
+ "batch_size": 80,
28
+ "eval_batch_size": 200,
29
+ "max_eval_samples": 1000,
30
+ "test_batch_size": 1,
31
+ "hidden_dim": 500,
32
+ "latent_dim": 150,
33
+ "lng_dim": 40,
34
+ "disc_lng_dim": 40,
35
+ "use_lora": false,
36
+ "lora_r": 64,
37
+ "gpu": "4",
38
+ "epochs": 20,
39
+ "grad_accumulation": 1,
40
+ "n_ica": 10,
41
+ "max_length": 200,
42
+ "total_steps": null,
43
+ "kld_const": 1,
44
+ "lr": 0.001,
45
+ "kl_weight": 0.1,
46
+ "weight_decay": 0.01,
47
+ "ling_dropout": 0.1,
48
+ "predict_fn": "logs/test.txt",
49
+ "save_predict": false,
50
+ "use_ica": false,
51
+ "pretrain_gen": false,
52
+ "pretrain_sem": false,
53
+ "pretrain_disc": false,
54
+ "linggen_type": "none",
55
+ "linggen_input": "s+l",
56
+ "aug_same": false,
57
+ "ling_vae": false,
58
+ "process_lingpred": false,
59
+ "fudge_lambda": 1.0,
60
+ "use_lingpred": false,
61
+ "ling2_only": true,
62
+ "cycle_loss": false,
63
+ "disc_loss": false,
64
+ "sem_loss": false,
65
+ "sim_loss": false,
66
+ "optuna": false,
67
+ "debug": false,
68
+ "demo": false,
69
+ "fudge": false,
70
+ "out_fn": "logs/default",
71
+ "eval_only": false,
72
+ "predict_with_feedback": false,
73
+ "feedback_param": "s",
74
+ "eval_ling": false,
75
+ "seed": 0,
76
+ "major_arg": 0,
77
+ "quantize_lng": false,
78
+ "quant_nbins": 20,
79
+ "src_lng": "ling",
80
+ "to_restore": [],
81
+ "disc_steps": 0
82
+ }
ckpt/model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a675026d23bf857c796e00fda67b500e4cc13b43db030b08fdfaef14823fbe42
3
+ size 2971737146
ckpt/sem_emb.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c58f890cb0121eacf8ac99d2fac53e2962f457d8c02e0b6386a4b3e342ac10c
3
+ size 1315675291
compute_lng.py CHANGED
@@ -4,38 +4,6 @@ import lftk
4
  import spacy
5
  nlp = spacy.load("en_core_web_sm")
6
 
7
- def extract_lingfeat(text):
8
- from lingfeat import extractor
9
- LingFeat = extractor.pass_text(text)
10
- LingFeat.preprocess()
11
-
12
- d = {}
13
- d.update(LingFeat.WoKF_()) # Wikipedia Knowledge Features
14
- d.update(LingFeat.WBKF_()) # WeeBit Corpus Knowledge Features
15
- d.update(LingFeat.OSKF_()) # OneStopEng Corpus Knowledge Features
16
-
17
- # Discourse (Disco) Features
18
- d.update(LingFeat.EnDF_()) # Entity Density Features
19
- d.update(LingFeat.EnGF_()) # Entity Grid Features
20
-
21
- # Syntactic (Synta) Features
22
- # d.update(LingFeat.PhrF_()) # Noun/Verb/Adj/Adv/... Phrasal Features (logging stanza)
23
- # d.update(LingFeat.TrSF_()) # (Parse) Tree Structural Features (logging stanza)
24
- d.update(LingFeat.POSF_()) # Noun/Verb/Adj/Adv/... Part-of-Speech Features
25
-
26
- # Lexico Semantic (LxSem) Features
27
- d.update(LingFeat.TTRF_()) # Type Token Ratio Features
28
- d.update(LingFeat.VarF_()) # Noun/Verb/Adj/Adv Variation Features
29
- d.update(LingFeat.PsyF_()) # Psycholinguistic Difficulty of Words (AoA Kuperman)
30
- d.update(LingFeat.WorF_()) # Word Familiarity from Frequency Count (SubtlexUS)
31
-
32
- # Shallow Traditional (ShTra) Features
33
- d.update(LingFeat.ShaF_()) # Shallow Features (e.g. avg number of tokens)
34
- d.update(LingFeat.TraF_()) # Traditional Formulas
35
-
36
- return list(d.values())
37
-
38
-
39
  def extract_lftk(text):
40
  if text == '':
41
  return [0.] * 220
@@ -45,12 +13,9 @@ def extract_lftk(text):
45
  feats = LFTK.extract()
46
  return list(feats.values())
47
 
48
- def compute_lng(text, shortcut = False):
49
  lca_feats = lca(text)
50
- if shortcut:
51
- sca_feats = [0] * 23
52
- else:
53
- sca_feats = sca(text)
54
  lftk = extract_lftk(text)
55
  all_feats = lca_feats + sca_feats + lftk
56
 
 
4
  import spacy
5
  nlp = spacy.load("en_core_web_sm")
6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  def extract_lftk(text):
8
  if text == '':
9
  return [0.] * 220
 
13
  feats = LFTK.extract()
14
  return list(feats.values())
15
 
16
+ def compute_lng(text):
17
  lca_feats = lca(text)
18
+ sca_feats = sca(text)
 
 
 
19
  lftk = extract_lftk(text)
20
  all_feats = lca_feats + sca_feats + lftk
21
 
demo.py DELETED
@@ -1,371 +0,0 @@
1
- def run_gradio(model, tokenizer, scaler, ling_collection, examples=None, lng_names=None, M=None):
2
- import numpy as np
3
- import torch
4
- from datetime import datetime
5
- from compute_lng import compute_lng
6
- import gradio as gr
7
- m = np.load('assets/m.npy')
8
- m = -1/m
9
- m[m == -np.inf] = 0
10
- m /= 100
11
- device = model.backbone.device
12
-
13
- def visibility(mode):
14
- if mode == 0:
15
- vis_group = group1
16
- elif mode == 1:
17
- vis_group = group2
18
- elif mode == 2:
19
- vis_group = group3
20
-
21
- output = [gr.update(value=''), gr.update(value='')]
22
- for component in components:
23
- if component in vis_group:
24
- output.append(gr.update(visible=True))
25
- else:
26
- output.append(gr.update(visible=False))
27
- return output
28
-
29
- def generate(sent1, ling):
30
- input_ids = tokenizer.encode(sent1, return_tensors='pt').to(device)
31
- ling1 = scaler.transform([ling['Source']])
32
- ling2 = scaler.transform([ling['Target']])
33
- inputs = {'sentence1_input_ids': input_ids,
34
- 'sentence1_ling': torch.tensor(ling1).float().to(device),
35
- 'sentence2_ling': torch.tensor(ling2).float().to(device),
36
- 'sentence1_attention_mask': torch.ones_like(input_ids)}
37
- preds = []
38
- with torch.no_grad():
39
- pred = model.infer(inputs).cpu().numpy()
40
- pred = tokenizer.batch_decode(pred,
41
- skip_special_tokens=True)[0]
42
-
43
- return pred
44
-
45
- def generate_with_feedbacks(sent1, ling):
46
- preds = []
47
- eta = 0.1
48
- input_ids = tokenizer.encode(sent1, return_tensors='pt').to(device)
49
- ling1 = torch.tensor(scaler.transform([ling['Source']])).float().to(device)
50
- ling2 = torch.tensor(scaler.transform([ling['Target']])).float().to(device)
51
- ling1_embed = model.ling_embed(ling1)
52
- ling2_embed = model.ling_embed(ling2)
53
- cur_ling = ling1_embed + eta * (ling2_embed - ling1_embed)
54
- inputs = {'sentence1_input_ids': input_ids,
55
- 'sent1_ling_embed': ling1_embed,
56
- 'sent2_ling_embed': ling2_embed,
57
- 'sentence1_attention_mask': torch.ones_like(input_ids)}
58
- converged = False
59
- c = 0
60
- while not converged:
61
- with torch.no_grad():
62
- pred = model.infer(inputs)
63
- inputs_pred = inputs.copy()
64
- inputs_pred.update({'input_ids': pred,
65
- 'attention_mask': torch.ones_like(pred)})
66
- ling_pred = model.ling_disc(**inputs_pred)
67
- ling_pred_embed = model.ling_embed(ling_pred)
68
-
69
- if len(interpolations) == 0 or pred != interpolations[-1]:
70
- interpolations.append(pred)
71
-
72
- diff = torch.mean((ling2_embed - ling_pred_embed)**2)
73
- scale = torch.norm(cur_ling)/torch.norm(ling2)
74
-
75
- # print(f'Diff: {diff.item():.3f} / Scale: ({scale.item():.3f})>> {tokenizer.batch_decode(pred.cpu().numpy(), skip_special_tokens=True)[0]}')
76
- if diff < 1e-5 or c >= 50:
77
- converged = True
78
- else:
79
- # cur_ling = cur_ling + eta * (ling2_embed - ling_pred_embed)
80
- inputs.update({
81
- 'sentence1_input_ids': pred,
82
- # 'sent2_ling_embed': ling2_embed,
83
- 'sentence1_attention_mask': torch.ones_like(pred)
84
- })
85
- c += 1
86
-
87
- pred = tokenizer.batch_decode(pred.cpu().numpy(),
88
- skip_special_tokens=True)[0]
89
-
90
- return pred
91
- def generate_with_feedback(sent1, ling, approx):
92
- if sent1 == '':
93
- return ['Please input a source text.', '']
94
- preds = []
95
- interpolations = []
96
- input_ids = tokenizer.encode(sent1, return_tensors='pt').to(device)
97
- ling1 = torch.tensor(scaler.transform([ling['Source']])).float().to(device)
98
- ling2 = torch.tensor(scaler.transform([ling['Target']])).float().to(device)
99
- ling1_embed = model.ling_embed(ling1)
100
- ling2_embed = model.ling_embed(ling2)
101
- inputs = {'sentence1_input_ids': input_ids,
102
- 'sent1_ling_embed': ling1_embed,
103
- 'sent2_ling_embed': ling2_embed,
104
- 'sentence1_attention_mask': torch.ones_like(input_ids)}
105
- converged = False
106
- c = 0
107
- eta = 0.3
108
- while not converged:
109
- with torch.no_grad():
110
- pred = model.infer(inputs)
111
- inputs_pred = inputs.copy()
112
- inputs_pred.update({'input_ids': pred,
113
- 'attention_mask': torch.ones_like(pred)})
114
- pred_text = tokenizer.batch_decode(pred.cpu().numpy(),
115
- skip_special_tokens=True)[0]
116
- if 'approximate' in approx:
117
- ling_pred = model.ling_disc(**inputs_pred)
118
- elif 'exact' in approx:
119
- ling_pred = compute_lng(pred_text)
120
- ling_pred = scaler.transform([ling_pred])[0]
121
- ling_pred = torch.tensor(ling_pred).to(pred.device).float()
122
- else:
123
- raise ValueError()
124
- ling_pred_embed = model.ling_embed(ling_pred)
125
-
126
- if len(interpolations) == 0 or pred_text != interpolations[-1]:
127
- interpolations.append(pred_text)
128
-
129
- diff = torch.mean((ling2_embed - ling_pred_embed)**2)
130
-
131
- # print(f'Diff {diff.item():.3f}>> {tokenizer.batch_decode(pred.cpu().numpy(), skip_special_tokens=True)[0]}')
132
- if diff < 10 or c >= 50:
133
- converged = True
134
- else:
135
- ling2_embed = ling2_embed + eta * (ling_pred_embed - ling2_embed)
136
- inputs.update({'sent2_ling_embed': ling2_embed})
137
- c += 1
138
-
139
-
140
- interpolation = '-- ' + '\n-- '.join(interpolations)
141
- return [pred_text, interpolation]
142
-
143
- def generate_random(sent1, ling, count, approx):
144
- preds, interpolations = [], []
145
- for c in range(count):
146
- idx = np.random.randint(0, len(ling_collection))
147
- ling_ex = ling_collection[idx]
148
- ling['Target'] = ling_ex
149
- pred, interpolation = generate_with_feedback(sent1, ling, approx)
150
- preds.append(pred)
151
- interpolations.append(interpolation)
152
- return '\n***\n'.join(preds), '\n***\n'.join(interpolations), ling
153
-
154
- def estimate_gen(sent1, sent2, ling, approx):
155
- if 'approximate' in approx:
156
- input_ids = tokenizer.encode(sent2, return_tensors='pt').to(device)
157
- with torch.no_grad():
158
- ling_pred = model.ling_disc(input_ids=input_ids).cpu().numpy()
159
- ling_pred = scaler.inverse_transform(ling_pred)[0]
160
- elif 'exact' in approx:
161
- ling_pred = compute_lng(sent2)
162
- else:
163
- raise ValueError()
164
-
165
- ling['Target'] = ling_pred
166
- gen = generate_with_feedback(sent1, ling, approx)
167
- results = gen + [ling]
168
-
169
- return results
170
-
171
- def estimate_tgt(sent2, ling, approx):
172
- if 'approximate' in approx:
173
- input_ids = tokenizer.encode(sent2, return_tensors='pt').to(device)
174
- with torch.no_grad():
175
- ling_pred = model.ling_disc(input_ids=input_ids).cpu().numpy()
176
- ling_pred = scaler.inverse_transform(ling_pred)[0]
177
- elif 'exact' in approx:
178
- ling_pred = compute_lng(sent2)
179
- else:
180
- raise ValueError()
181
-
182
- ling['Target'] = ling_pred
183
- return ling
184
-
185
- def estimate_src(sent1, ling, approx):
186
- if 'approximate' in approx:
187
- input_ids = tokenizer.encode(sent1, return_tensors='pt').to(device)
188
- with torch.no_grad():
189
- ling_pred = model.ling_disc(input_ids=input_ids).cpu().numpy()
190
- ling_pred = scaler.inverse_transform(ling_pred)[0]
191
- elif 'exact' in approx:
192
- ling_pred = compute_lng(sent1)
193
- else:
194
- raise ValueError()
195
-
196
- ling['Source'] = ling_pred
197
- return ling
198
-
199
- def rand_target(ling):
200
- ling['Target'] = scaler.inverse_transform([np.random.randn(*ling['Target'].shape)])[0]
201
- return ling
202
-
203
- def rand_ex_target(ling):
204
- idx = np.random.randint(0, len(examples))
205
- ling_ex = examples[idx][1]
206
- ling['Target'] = ling_ex['Target']
207
- return ling
208
-
209
- def copy(ling):
210
- ling['Target'] = ling['Source']
211
- return ling
212
-
213
- def add_noise(ling):
214
- x = scaler.transform([ling['Target']])
215
- x += np.random.randn(*ling['Target'].shape)
216
- x = scaler.inverse_transform(x)[0]
217
- ling['Target'] = x
218
- return ling
219
-
220
- def add(ling):
221
- x = scaler.transform([ling['Target']])
222
- x += m
223
- x = scaler.inverse_transform(x)[0]
224
- ling['Target'] = x
225
- return ling
226
-
227
- def sub(ling):
228
- x = scaler.transform([ling['Target']])
229
- x -= m
230
- x = scaler.inverse_transform(x)[0]
231
- ling['Target'] = x
232
- return ling
233
-
234
- # title = ''
235
- # for i, model in enumerate(models):
236
- # if i > 0:
237
- # title += '\n'
238
- # title += f"model ({i})\n\tUsing VAE = {model.args.ling_vae}\n\tUsing ICA = {model.args.use_ica}\n\tNumber of features = {model.args.lng_dim if not model.args.use_ica else model.args.n_ica}"
239
- title = """
240
- # LingConv: A System for Controlled Linguistic Conversion
241
-
242
- ## Description
243
-
244
- This system is an encoder-decoder model for complexity controlled text generation, guided by 241
245
- linguistic complexity indices as key attributes. Given a sentence and a desired level of linguistic
246
- complexity, the model can generate diverse paraphrases that maintain consistent meaning, adjusted for
247
- different linguistic complexity levels. However, it's important to note that not all index combinations are
248
- feasible (such as requesting a sentence of "length" 5 with 10 "unique words"). To ensure high quality
249
- outputs, our approach interpolates the embedding of linguistic indices to locate the most closely matched,
250
- achievable set of indices for the given target.
251
- """
252
-
253
- guide = """
254
- You may use the system in on of the following ways:
255
-
256
- **Randomized Paraphrase Generation**: Select this option to produce multiple paraphrases with a range
257
- of linguistic complexity. You need to provide a source text, specify the number of paraphrases you want,
258
- and click "Generate." The linguistic complexity of the paraphrases will be determined randomly.
259
-
260
- **Complexity-Matched Paraphrasing**: Select this option to generate a paraphrase of the given source
261
- sentence that closely mirrors the linguistic complexity of another given sentence. Input your source
262
- sentence along with another sentence (which will serve only to extract linguistic indices for the
263
- paraphrase generation). Then, click "Generate."
264
-
265
- **Manual Linguistic Control**: Select this option to manually control the linguistic complexity of the
266
- generated text. We provided a set of tools for manual adjustments of the desired linguistic complexity of
267
- the target sentence. These tools enable the user to extract linguistic indices from a given sentence,
268
- generate a random (yet coherent) set of linguistic indices, and add or remove noise from the indices.
269
- These tools are designed for experimental use and require the user to possess linguistic expertise for
270
- effective input of linguistic indices. To use these tools, select "Tools to assist in setting linguistic
271
- indices." Once indices are entered, click "Generate."
272
-
273
-
274
- Second, you may select to use exact or approximate computation of linguistic indices (used in mode (2) and
275
- in quality control of the genration). Approximate computation is significantly faster.
276
-
277
- Third, you may view the intermediate sentences of the quality control process by selecting the checkbox.
278
-
279
- Fourth, you may try out some examples by clicking on "Examples...". Examples consist of a source sentences,
280
- the indices of the source sentences, and a sample set of target linguistic indices.
281
-
282
- Please make your choice below.
283
-
284
- """
285
-
286
- sent1 = gr.Textbox(label='Source text')
287
- ling = gr.Dataframe(value = [[x, 0, 0] for x in lng_names],
288
- headers=['Index', 'Source', 'Target'],
289
- datatype=['str', 'number', 'number'], visible=False)
290
- css = """
291
- #guide span.svelte-s1r2yt {font-size: 22px !important;
292
- font-weight: 600 !important}
293
- """
294
- with gr.Blocks(css=css) as demo:
295
- gr.Markdown(title)
296
- with gr.Accordion("Quick Start Guide", open=False, elem_id='guide'):
297
- gr.Markdown(guide)
298
-
299
- mode = gr.Radio(value='Randomized Paraphrase Generation',
300
- label='How would you like to use this system?',
301
- type="index",
302
- choices=['Randomized Paraphrase Generation',
303
- 'Complexity-Matched Paraphrasing', 'Manual Linguistic Control'])
304
- approx = gr.Radio(value='Use approximate computation of linguistic indices (faster)',
305
- choices=['Use approximate computation of linguistic indices (faster)',
306
- 'Use exact computation of linguistic indices'], container=False, show_label=False)
307
- control_interpolation = gr.Checkbox(label='View the intermediate sentences in the interpolation of linguistic indices')
308
-
309
- with gr.Accordion("Examples...", open=False):
310
- gr.Examples(examples, [sent1, ling], examples_per_page=4, label=None)
311
-
312
- with gr.Row():
313
- sent1.render()
314
- with gr.Column():
315
- sent2 = gr.Textbox(label='Generated text')
316
- interpolation = gr.Textbox(label='Quality control interpolation', visible=False, lines=5)
317
- #####################
318
- with gr.Row():
319
- generate_random_btn = gr.Button("Generate",
320
- variant='primary', scale=1, visible=True)
321
- count = gr.Number(label='Number of generated sentences', value=3, precision=0, scale=1, visible=True)
322
- # generate_fb_btn = gr.Button("Generate with auto-adjust (towards pred)")
323
- # generate_fb_s_btn = gr.Button("Generate with auto-adjust (moving s)")
324
- # add_noise_btn = gr.Button('Add noise to target linguistic indices')
325
- #####################
326
- with gr.Row():
327
- estimate_gen_btn = gr.Button("Generate",
328
- variant='primary',
329
- scale=1, visible=False)
330
- sent_ling_gen = gr.Textbox(label='Text to estimate linguistic indices', scale=1, visible=False)
331
- #####################
332
- generate_btn = gr.Button("Generate", variant='primary', visible=False)
333
- with gr.Accordion("Tools to assist in the setting of linguistic indices...", open=False, visible=False) as ling_tools:
334
- with gr.Row():
335
- estimate_tgt_btn = gr.Button("Estimate linguistic indices of this sentence", visible=False)
336
- sent_ling_est = gr.Textbox(label='Text to estimate linguistic indices', scale=2, visible=False)
337
- estimate_src_btn = gr.Button("Estimate linguistic indices of source sentence", visible=False)
338
- # rand_btn = gr.Button("Random target")
339
- rand_ex_btn = gr.Button("Random target", size='lg', visible=False)
340
- copy_btn = gr.Button("Copy linguistic indices of source to target", size='sm', visible=False)
341
- with gr.Row():
342
- add_btn = gr.Button('Add \u03B5 to target linguistic indices', visible=False)
343
- sub_btn = gr.Button('Subtract \u03B5 from target linguistic indices', visible=False)
344
- ling.render()
345
- #####################
346
-
347
- estimate_src_btn.click(estimate_src, inputs=[sent1, ling, approx], outputs=[ling])
348
- estimate_tgt_btn.click(estimate_tgt, inputs=[sent_ling_est, ling, approx], outputs=[ling])
349
- # estimate_tgt_btn.click(estimate_tgt, inputs=[sent_ling, ling], outputs=[ling])
350
- estimate_gen_btn.click(estimate_gen, inputs=[sent1, sent_ling_gen, ling, approx], outputs=[sent2, interpolation, ling])
351
- # rand_btn.click(rand_target, inputs=[ling], outputs=[ling])
352
- rand_ex_btn.click(rand_ex_target, inputs=[ling], outputs=[ling])
353
- copy_btn.click(copy, inputs=[ling], outputs=[ling])
354
- generate_btn.click(generate_with_feedback, inputs=[sent1, ling, approx], outputs=[sent2, interpolation])
355
- generate_random_btn.click(generate_random, inputs=[sent1, ling, count, approx],
356
- outputs=[sent2, interpolation, ling])
357
- # generate_fb_btn.click(generate_with_feedback, inputs=[sent1, ling], outputs=sent2s)
358
- # generate_fb_s_btn.click(generate_with_feedbacks, inputs=[sent1, ling], outputs=sent2s)
359
- add_btn.click(add, inputs=[ling], outputs=[ling])
360
- sub_btn.click(sub, inputs=[ling], outputs=[ling])
361
- # add_noise_btn.click(add_noise, inputs=[ling], outputs=[ling])
362
-
363
- group1 = [generate_random_btn, count]
364
- group2 = [estimate_gen_btn, sent_ling_gen]
365
- group3 = [generate_btn, estimate_src_btn, estimate_tgt_btn, sent_ling_est, rand_ex_btn, copy_btn, add_btn, sub_btn, ling, ling_tools]
366
- components = group1 + group2 + group3
367
- mode.change(visibility, inputs=[mode], outputs=[sent2, interpolation] + components)
368
- control_interpolation.change(lambda v: gr.update(visible=v), inputs=[control_interpolation],
369
- outputs=[interpolation])
370
-
371
- demo.launch(share=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
lftk_ids.csv ADDED
@@ -0,0 +1,221 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ key,name,formulation,domain,family,language
2
+ t_word,total_number_of_words,foundation,surface,wordsent,general
3
+ t_stopword,total_number_of_stop_words,foundation,surface,wordsent,general
4
+ t_punct,total_number_of_puntuations,foundation,syntax,wordsent,general
5
+ t_syll,total_number_of_syllables,foundation,surface,wordsent,en
6
+ t_syll2,total_number_of_words_more_than_two_syllables,foundation,surface,wordsent,en
7
+ t_syll3,total_number_of_words_more_than_three_syllables,foundation,surface,wordsent,en
8
+ t_uword,total_number_of_unique_words,foundation,surface,wordsent,general
9
+ t_sent,total_number_of_sentences,foundation,surface,wordsent,general
10
+ t_char,total_number_of_characters,foundation,surface,wordsent,general
11
+ a_word_ps,average_number_of_words_per_sentence,derivation,surface,avgwordsent,general
12
+ a_char_ps,average_number_of_characters_per_sentence,derivation,surface,avgwordsent,general
13
+ a_char_pw,average_number_of_characters_per_word,derivation,surface,avgwordsent,general
14
+ a_syll_ps,average_number_of_syllables_per_sentence,derivation,surface,avgwordsent,en
15
+ a_syll_pw,average_number_of_syllables_per_word,derivation,surface,avgwordsent,en
16
+ a_stopword_ps,average_number_of_stop_words_per_sentence,derivation,surface,avgwordsent,en
17
+ a_stopword_pw,average_number_of_stop_words_per_word,derivation,surface,avgwordsent,en
18
+ t_kup,total_kuperman_age_of_acquistion_of_words,foundation,lexico-semantics,worddiff,en
19
+ t_bry,total_brysbaert_age_of_acquistion_of_words,foundation,lexico-semantics,worddiff,en
20
+ t_subtlex_us_zipf,total_subtlex_us_zipf_of_words,foundation,lexico-semantics,worddiff,en
21
+ a_kup_pw,average_kuperman_age_of_acquistion_of_words_per_word,derivation,lexico-semantics,avgworddiff,en
22
+ a_bry_pw,average_brysbaert_age_of_acquistion_of_words_per_word,derivation,lexico-semantics,avgworddiff,en
23
+ a_kup_ps,average_kuperman_age_of_acquistion_of_words_per_sentence,derivation,lexico-semantics,avgworddiff,en
24
+ a_bry_ps,average_brysbaert_age_of_acquistion_of_words_per_sentence,derivation,lexico-semantics,avgworddiff,en
25
+ a_subtlex_us_zipf_pw,average_subtlex_us_zipf_of_words_per_word,derivation,lexico-semantics,avgworddiff,en
26
+ a_subtlex_us_zipf_ps,average_subtlex_us_zipf_of_words_per_sentence,derivation,lexico-semantics,avgworddiff,en
27
+ t_n_ent,total_number_of_named_entities,foundation,discourse,entity,general
28
+ t_n_ent_person,total_number_of_named_entities_person,foundation,discourse,entity,en
29
+ t_n_ent_norp,total_number_of_named_entities_norp,foundation,discourse,entity,en
30
+ t_n_ent_fac,total_number_of_named_entities_fac,foundation,discourse,entity,en
31
+ t_n_ent_org,total_number_of_named_entities_org,foundation,discourse,entity,en
32
+ t_n_ent_gpe,total_number_of_named_entities_gpe,foundation,discourse,entity,en
33
+ t_n_ent_loc,total_number_of_named_entities_loc,foundation,discourse,entity,en
34
+ t_n_ent_product,total_number_of_named_entities_product,foundation,discourse,entity,en
35
+ t_n_ent_event,total_number_of_named_entities_event,foundation,discourse,entity,en
36
+ t_n_ent_art,total_number_of_named_entities_art,foundation,discourse,entity,en
37
+ t_n_ent_law,total_number_of_named_entities_law,foundation,discourse,entity,en
38
+ t_n_ent_language,total_number_of_named_entities_language,foundation,discourse,entity,en
39
+ t_n_ent_date,total_number_of_named_entities_date,foundation,discourse,entity,en
40
+ t_n_ent_time,total_number_of_named_entities_time,foundation,discourse,entity,en
41
+ t_n_ent_percent,total_number_of_named_entities_percent,foundation,discourse,entity,en
42
+ t_n_ent_money,total_number_of_named_entities_money,foundation,discourse,entity,en
43
+ t_n_ent_quantity,total_number_of_named_entities_quantity,foundation,discourse,entity,en
44
+ t_n_ent_ordinal,total_number_of_named_entities_ordinal,foundation,discourse,entity,en
45
+ t_n_ent_cardinal,total_number_of_named_entities_cardinal,foundation,discourse,entity,en
46
+ a_n_ent_pw,average_number_of_named_entities_per_word,derivation,discourse,avgentity,general
47
+ a_n_ent_person_pw,average_number_of_named_entities_person_per_word,derivation,discourse,avgentity,en
48
+ a_n_ent_norp_pw,average_number_of_named_entities_norp_per_word,derivation,discourse,avgentity,en
49
+ a_n_ent_fac_pw,average_number_of_named_entities_fac_per_word,derivation,discourse,avgentity,en
50
+ a_n_ent_org_pw,average_number_of_named_entities_org_per_word,derivation,discourse,avgentity,en
51
+ a_n_ent_gpe_pw,average_number_of_named_entities_gpe_per_word,derivation,discourse,avgentity,en
52
+ a_n_ent_loc_pw,average_number_of_named_entities_loc_per_word,derivation,discourse,avgentity,en
53
+ a_n_ent_product_pw,average_number_of_named_entities_product_per_word,derivation,discourse,avgentity,en
54
+ a_n_ent_event_pw,average_number_of_named_entities_event_per_word,derivation,discourse,avgentity,en
55
+ a_n_ent_art_pw,average_number_of_named_entities_art_per_word,derivation,discourse,avgentity,en
56
+ a_n_ent_law_pw,average_number_of_named_entities_law_per_word,derivation,discourse,avgentity,en
57
+ a_n_ent_language_pw,average_number_of_named_entities_language_per_word,derivation,discourse,avgentity,en
58
+ a_n_ent_date_pw,average_number_of_named_entities_date_per_word,derivation,discourse,avgentity,en
59
+ a_n_ent_time_pw,average_number_of_named_entities_time_per_word,derivation,discourse,avgentity,en
60
+ a_n_ent_percent_pw,average_number_of_named_entities_percent_per_word,derivation,discourse,avgentity,en
61
+ a_n_ent_money_pw,average_number_of_named_entities_money_per_word,derivation,discourse,avgentity,en
62
+ a_n_ent_quantity_pw,average_number_of_named_entities_quantity_per_word,derivation,discourse,avgentity,en
63
+ a_n_ent_ordinal_pw,average_number_of_named_entities_ordinal_per_word,derivation,discourse,avgentity,en
64
+ a_n_ent_cardinal_pw,average_number_of_named_entities_cardinal_per_word,derivation,discourse,avgentity,en
65
+ a_n_ent_ps,average_number_of_named_entities_per_sentence,derivation,discourse,avgentity,general
66
+ a_n_ent_person_ps,average_number_of_named_entities_person_per_sentence,derivation,discourse,avgentity,en
67
+ a_n_ent_norp_ps,average_number_of_named_entities_norp_per_sentence,derivation,discourse,avgentity,en
68
+ a_n_ent_fac_ps,average_number_of_named_entities_fac_per_sentence,derivation,discourse,avgentity,en
69
+ a_n_ent_org_ps,average_number_of_named_entities_org_per_sentence,derivation,discourse,avgentity,en
70
+ a_n_ent_gpe_ps,average_number_of_named_entities_gpe_per_sentence,derivation,discourse,avgentity,en
71
+ a_n_ent_loc_ps,average_number_of_named_entities_loc_per_sentence,derivation,discourse,avgentity,en
72
+ a_n_ent_product_ps,average_number_of_named_entities_product_per_sentence,derivation,discourse,avgentity,en
73
+ a_n_ent_event_ps,average_number_of_named_entities_event_per_sentence,derivation,discourse,avgentity,en
74
+ a_n_ent_art_ps,average_number_of_named_entities_art_per_sentence,derivation,discourse,avgentity,en
75
+ a_n_ent_law_ps,average_number_of_named_entities_law_per_sentence,derivation,discourse,avgentity,en
76
+ a_n_ent_language_ps,average_number_of_named_entities_language_per_sentence,derivation,discourse,avgentity,en
77
+ a_n_ent_date_ps,average_number_of_named_entities_date_per_sentence,derivation,discourse,avgentity,en
78
+ a_n_ent_time_ps,average_number_of_named_entities_time_per_sentence,derivation,discourse,avgentity,en
79
+ a_n_ent_percent_ps,average_number_of_named_entities_percent_per_sentence,derivation,discourse,avgentity,en
80
+ a_n_ent_money_ps,average_number_of_named_entities_money_per_sentence,derivation,discourse,avgentity,en
81
+ a_n_ent_quantity_ps,average_number_of_named_entities_quantity_per_sentence,derivation,discourse,avgentity,en
82
+ a_n_ent_ordinal_ps,average_number_of_named_entities_ordinal_per_sentence,derivation,discourse,avgentity,en
83
+ a_n_ent_cardinal_ps,average_number_of_named_entities_cardinal_per_sentence,derivation,discourse,avgentity,en
84
+ simp_adj_var,simple_adjectives_variation,derivation,lexico-semantics,lexicalvariation,general
85
+ simp_adp_var,simple_adpositions_variation,derivation,lexico-semantics,lexicalvariation,general
86
+ simp_adv_var,simple_adverbs_variation,derivation,lexico-semantics,lexicalvariation,general
87
+ simp_aux_var,simple_auxiliaries_variation,derivation,lexico-semantics,lexicalvariation,general
88
+ simp_cconj_var,simple_coordinating_conjunctions_variation,derivation,lexico-semantics,lexicalvariation,general
89
+ simp_det_var,simple_determiners_variation,derivation,lexico-semantics,lexicalvariation,general
90
+ simp_intj_var,simple_interjections_variation,derivation,lexico-semantics,lexicalvariation,general
91
+ simp_noun_var,simple_nouns_variation,derivation,lexico-semantics,lexicalvariation,general
92
+ simp_num_var,simple_numerals_variation,derivation,lexico-semantics,lexicalvariation,general
93
+ simp_part_var,simple_particles_variation,derivation,lexico-semantics,lexicalvariation,general
94
+ simp_pron_var,simple_pronouns_variation,derivation,lexico-semantics,lexicalvariation,general
95
+ simp_propn_var,simple_proper_nouns_variation,derivation,lexico-semantics,lexicalvariation,general
96
+ simp_punct_var,simple_punctuations_variation,derivation,lexico-semantics,lexicalvariation,general
97
+ simp_sconj_var,simple_subordinating_conjunctions_variation,derivation,lexico-semantics,lexicalvariation,general
98
+ simp_sym_var,simple_symbols_variation,derivation,lexico-semantics,lexicalvariation,general
99
+ simp_verb_var,simple_verbs_variation,derivation,lexico-semantics,lexicalvariation,general
100
+ simp_space_var,simple_spaces_variation,derivation,lexico-semantics,lexicalvariation,general
101
+ root_adj_var,root_adjectives_variation,derivation,lexico-semantics,lexicalvariation,general
102
+ root_adp_var,root_adpositions_variation,derivation,lexico-semantics,lexicalvariation,general
103
+ root_adv_var,root_adverbs_variation,derivation,lexico-semantics,lexicalvariation,general
104
+ root_aux_var,root_auxiliaries_variation,derivation,lexico-semantics,lexicalvariation,general
105
+ root_cconj_var,root_coordinating_conjunctions_variation,derivation,lexico-semantics,lexicalvariation,general
106
+ root_det_var,root_determiners_variation,derivation,lexico-semantics,lexicalvariation,general
107
+ root_intj_var,root_interjections_variation,derivation,lexico-semantics,lexicalvariation,general
108
+ root_noun_var,root_nouns_variation,derivation,lexico-semantics,lexicalvariation,general
109
+ root_num_var,root_numerals_variation,derivation,lexico-semantics,lexicalvariation,general
110
+ root_part_var,root_particles_variation,derivation,lexico-semantics,lexicalvariation,general
111
+ root_pron_var,root_pronouns_variation,derivation,lexico-semantics,lexicalvariation,general
112
+ root_propn_var,root_proper_nouns_variation,derivation,lexico-semantics,lexicalvariation,general
113
+ root_punct_var,root_punctuations_variation,derivation,lexico-semantics,lexicalvariation,general
114
+ root_sconj_var,root_subordinating_conjunctions_variation,derivation,lexico-semantics,lexicalvariation,general
115
+ root_sym_var,root_symbols_variation,derivation,lexico-semantics,lexicalvariation,general
116
+ root_verb_var,root_verbs_variation,derivation,lexico-semantics,lexicalvariation,general
117
+ root_space_var,root_spaces_variation,derivation,lexico-semantics,lexicalvariation,general
118
+ corr_adj_var,corrected_adjectives_variation,derivation,lexico-semantics,lexicalvariation,general
119
+ corr_adp_var,corrected_adpositions_variation,derivation,lexico-semantics,lexicalvariation,general
120
+ corr_adv_var,corrected_adverbs_variation,derivation,lexico-semantics,lexicalvariation,general
121
+ corr_aux_var,corrected_auxiliaries_variation,derivation,lexico-semantics,lexicalvariation,general
122
+ corr_cconj_var,corrected_coordinating_conjunctions_variation,derivation,lexico-semantics,lexicalvariation,general
123
+ corr_det_var,corrected_determiners_variation,derivation,lexico-semantics,lexicalvariation,general
124
+ corr_intj_var,corrected_interjections_variation,derivation,lexico-semantics,lexicalvariation,general
125
+ corr_noun_var,corrected_nouns_variation,derivation,lexico-semantics,lexicalvariation,general
126
+ corr_num_var,corrected_numerals_variation,derivation,lexico-semantics,lexicalvariation,general
127
+ corr_part_var,corrected_particles_variation,derivation,lexico-semantics,lexicalvariation,general
128
+ corr_pron_var,corrected_pronouns_variation,derivation,lexico-semantics,lexicalvariation,general
129
+ corr_propn_var,corrected_proper_nouns_variation,derivation,lexico-semantics,lexicalvariation,general
130
+ corr_punct_var,corrected_punctuations_variation,derivation,lexico-semantics,lexicalvariation,general
131
+ corr_sconj_var,corrected_subordinating_conjunctions_variation,derivation,lexico-semantics,lexicalvariation,general
132
+ corr_sym_var,corrected_symbols_variation,derivation,lexico-semantics,lexicalvariation,general
133
+ corr_verb_var,corrected_verbs_variation,derivation,lexico-semantics,lexicalvariation,general
134
+ corr_space_var,corrected_spaces_variation,derivation,lexico-semantics,lexicalvariation,general
135
+ simp_ttr,simple_type_token_ratio,derivation,lexico-semantics,typetokenratio,general
136
+ root_ttr,root_type_token_ratio,derivation,lexico-semantics,typetokenratio,general
137
+ corr_ttr,corrected_type_token_ratio,derivation,lexico-semantics,typetokenratio,general
138
+ bilog_ttr,bilogarithmic_type_token_ratio,derivation,lexico-semantics,typetokenratio,general
139
+ uber_ttr,uber_type_token_ratio,derivation,lexico-semantics,typetokenratio,general
140
+ simp_ttr_no_lem,simple_type_token_ratio_no_lemma,derivation,lexico-semantics,typetokenratio,general
141
+ root_ttr_no_lem,root_type_token_ratio_no_lemma,derivation,lexico-semantics,typetokenratio,general
142
+ corr_ttr_no_lem,corrected_type_token_ratio_no_lemma,derivation,lexico-semantics,typetokenratio,general
143
+ bilog_ttr_no_lem,bilogarithmic_type_token_ratio_no_lemma,derivation,lexico-semantics,typetokenratio,general
144
+ uber_ttr_no_lem,uber_type_token_ratio_no_lemma,derivation,lexico-semantics,typetokenratio,general
145
+ n_adj,total_number_of_adjectives,foundation,syntax,partofspeech,general
146
+ n_adp,total_number_of_adpositions,foundation,syntax,partofspeech,general
147
+ n_adv,total_number_of_adverbs,foundation,syntax,partofspeech,general
148
+ n_aux,total_number_of_auxiliaries,foundation,syntax,partofspeech,general
149
+ n_cconj,total_number_of_coordinating_conjunctions,foundation,syntax,partofspeech,general
150
+ n_det,total_number_of_determiners,foundation,syntax,partofspeech,general
151
+ n_intj,total_number_of_interjections,foundation,syntax,partofspeech,general
152
+ n_noun,total_number_of_nouns,foundation,syntax,partofspeech,general
153
+ n_num,total_number_of_numerals,foundation,syntax,partofspeech,general
154
+ n_part,total_number_of_particles,foundation,syntax,partofspeech,general
155
+ n_pron,total_number_of_pronouns,foundation,syntax,partofspeech,general
156
+ n_propn,total_number_of_proper_nouns,foundation,syntax,partofspeech,general
157
+ n_punct,total_number_of_punctuations,foundation,syntax,partofspeech,general
158
+ n_sconj,total_number_of_subordinating_conjunctions,foundation,syntax,partofspeech,general
159
+ n_sym,total_number_of_symbols,foundation,syntax,partofspeech,general
160
+ n_verb,total_number_of_verbs,foundation,syntax,partofspeech,general
161
+ n_space,total_number_of_spaces,foundation,syntax,partofspeech,general
162
+ n_uadj,total_number_of_unique_adjectives,foundation,syntax,partofspeech,general
163
+ n_uadp,total_number_of_unique_adpositions,foundation,syntax,partofspeech,general
164
+ n_uadv,total_number_of_unique_adverbs,foundation,syntax,partofspeech,general
165
+ n_uaux,total_number_of_unique_auxiliaries,foundation,syntax,partofspeech,general
166
+ n_ucconj,total_number_of_unique_coordinating_conjunctions,foundation,syntax,partofspeech,general
167
+ n_udet,total_number_of_unique_determiners,foundation,syntax,partofspeech,general
168
+ n_uintj,total_number_of_unique_interjections,foundation,syntax,partofspeech,general
169
+ n_unoun,total_number_of_unique_nouns,foundation,syntax,partofspeech,general
170
+ n_unum,total_number_of_unique_numerals,foundation,syntax,partofspeech,general
171
+ n_upart,total_number_of_unique_particles,foundation,syntax,partofspeech,general
172
+ n_upron,total_number_of_unique_pronouns,foundation,syntax,partofspeech,general
173
+ n_upropn,total_number_of_unique_proper_nouns,foundation,syntax,partofspeech,general
174
+ n_upunct,total_number_of_unique_punctuations,foundation,syntax,partofspeech,general
175
+ n_usconj,total_number_of_unique_subordinating_conjunctions,foundation,syntax,partofspeech,general
176
+ n_usym,total_number_of_unique_symbols,foundation,syntax,partofspeech,general
177
+ n_uverb,total_number_of_unique_verbs,foundation,syntax,partofspeech,general
178
+ n_uspace,total_number_of_unique_spaces,foundation,syntax,partofspeech,general
179
+ a_adj_pw,average_number_of_adjectives_per_word,derivation,syntax,avgpartofspeech,general
180
+ a_adp_pw,average_number_of_adpositions_per_word,derivation,syntax,avgpartofspeech,general
181
+ a_adv_pw,average_number_of_adverbs_per_word,derivation,syntax,avgpartofspeech,general
182
+ a_aux_pw,average_number_of_auxiliaries_per_word,derivation,syntax,avgpartofspeech,general
183
+ a_cconj_pw,average_number_of_coordinating_conjunctions_per_word,derivation,syntax,avgpartofspeech,general
184
+ a_det_pw,average_number_of_determiners_per_word,derivation,syntax,avgpartofspeech,general
185
+ a_intj_pw,average_number_of_interjections_per_word,derivation,syntax,avgpartofspeech,general
186
+ a_noun_pw,average_number_of_nouns_per_word,derivation,syntax,avgpartofspeech,general
187
+ a_num_pw,average_number_of_numerals_per_word,derivation,syntax,avgpartofspeech,general
188
+ a_part_pw,average_number_of_particles_per_word,derivation,syntax,avgpartofspeech,general
189
+ a_pron_pw,average_number_of_pronouns_per_word,derivation,syntax,avgpartofspeech,general
190
+ a_propn_pw,average_number_of_proper_nouns_per_word,derivation,syntax,avgpartofspeech,general
191
+ a_punct_pw,average_number_of_punctuations_per_word,derivation,syntax,avgpartofspeech,general
192
+ a_sconj_pw,average_number_of_subordinating_conjunctions_per_word,derivation,syntax,avgpartofspeech,general
193
+ a_sym_pw,average_number_of_symbols_per_word,derivation,syntax,avgpartofspeech,general
194
+ a_verb_pw,average_number_of_verbs_per_word,derivation,syntax,avgpartofspeech,general
195
+ a_space_pw,average_number_of_spaces_per_word,derivation,syntax,avgpartofspeech,general
196
+ a_adj_ps,average_number_of_adjectives_per_sentence,derivation,syntax,avgpartofspeech,general
197
+ a_adp_ps,average_number_of_adpositions_per_sentence,derivation,syntax,avgpartofspeech,general
198
+ a_adv_ps,average_number_of_adverbs_per_sentence,derivation,syntax,avgpartofspeech,general
199
+ a_aux_ps,average_number_of_auxiliaries_per_sentence,derivation,syntax,avgpartofspeech,general
200
+ a_cconj_ps,average_number_of_coordinating_conjunctions_per_sentence,derivation,syntax,avgpartofspeech,general
201
+ a_det_ps,average_number_of_determiners_per_sentence,derivation,syntax,avgpartofspeech,general
202
+ a_intj_ps,average_number_of_interjections_per_sentence,derivation,syntax,avgpartofspeech,general
203
+ a_noun_ps,average_number_of_nouns_per_sentence,derivation,syntax,avgpartofspeech,general
204
+ a_num_ps,average_number_of_numerals_per_sentence,derivation,syntax,avgpartofspeech,general
205
+ a_part_ps,average_number_of_particles_per_sentence,derivation,syntax,avgpartofspeech,general
206
+ a_pron_ps,average_number_of_pronouns_per_sentence,derivation,syntax,avgpartofspeech,general
207
+ a_propn_ps,average_number_of_proper_nouns_per_sentence,derivation,syntax,avgpartofspeech,general
208
+ a_punct_ps,average_number_of_punctuations_per_sentence,derivation,syntax,avgpartofspeech,general
209
+ a_sconj_ps,average_number_of_subordinating_conjunctions_per_sentence,derivation,syntax,avgpartofspeech,general
210
+ a_sym_ps,average_number_of_symbols_per_sentence,derivation,syntax,avgpartofspeech,general
211
+ a_verb_ps,average_number_of_verbs_per_sentence,derivation,syntax,avgpartofspeech,general
212
+ a_space_ps,average_number_of_spaces_per_sentence,derivation,syntax,avgpartofspeech,general
213
+ fkre,flesch_kincaid_reading_ease,derivation,surface,readformula,en
214
+ fkgl,flesch_kincaid_grade_level,derivation,surface,readformula,en
215
+ fogi,gunning_fog_index,derivation,surface,readformula,en
216
+ smog,smog_index,derivation,surface,readformula,en
217
+ cole,coleman_liau_index,derivation,surface,readformula,en
218
+ auto,automated_readability_index,derivation,surface,readformula,en
219
+ rt_fast,reading_time_for_fast_readers,derivation,surface,readtimeformula,en
220
+ rt_average,reading_time_for_average_readers,derivation,surface,readtimeformula,en
221
+ rt_slow,reading_time_for_slow_readers,derivation,surface,readtimeformula,en
model.py CHANGED
@@ -504,7 +504,7 @@ class EncoderDecoderVAE(nn.Module):
504
  dec_output, _ = self.infer_with_cache(batch)
505
  return dec_output
506
 
507
- def infer_with_feedback_BP(self, ling_disc, sem_emb, batch, tokenizer, scaler):
508
  from torch.autograd import grad
509
  interpolations = []
510
  def line_search():
@@ -519,8 +519,6 @@ class EncoderDecoderVAE(nn.Module):
519
  new_loss, pred = get_loss(param_)
520
  max_len = pred.shape[1]
521
  lens = torch.where(pred == self.eos_token_id, 1, 0).argmax(-1) + 1
522
- # if lens.item() == 1:
523
- # patience -= 1
524
  batch.update({
525
  'sentence2_input_ids': pred,
526
  'sentence2_attention_mask': sequence_mask(lens, max_len = max_len)
@@ -528,8 +526,6 @@ class EncoderDecoderVAE(nn.Module):
528
  sem_prob = torch.sigmoid(sem_emb(**batch)).item()
529
  # if sem_prob <= 0.1:
530
  # patience -= 1
531
- # f.write(f'[{eta}], [{new_loss.item():.2f}], [{sem_prob:.2f}], {tokenizer.decode(pred[0])}\n')
532
- # print(f'[{eta}], [{new_loss.item():.2f}], [{sem_prob:.2f}], {tokenizer.decode(pred[0])}\n')
533
  if new_loss < loss and sem_prob >= 0.90 and lens.item() > 1:
534
  return param_
535
  eta *= 2.25
@@ -565,18 +561,11 @@ class EncoderDecoderVAE(nn.Module):
565
  elif self.args.feedback_param == 'logits':
566
  logits = self.infer_with_cache(batch)[1]['scores']
567
  param = torch.nn.Parameter(logits, requires_grad = True)
568
- f = open(self.args.fb_log, 'a') if self.args.fb_log else None
569
  target_np = batch['sentence2_ling'][0].cpu().numpy()
570
  while True:
571
  loss, pred = get_loss(param)
572
  pred_text = tokenizer.batch_decode(pred.cpu().numpy(),
573
  skip_special_tokens=True)[0]
574
- if f:
575
- # from compute_lng import compute_lng
576
- # lng_pred = scaler.transform(np.array([compute_lng(pred_text)])[:,used_indices])[0]
577
- # real_loss = np.mean((lng_pred - target_np)**2)
578
- # f.write(f'Loss: {loss.item():.2f}\tReal loss:{real_loss:.2f}\t{pred_text}\n')
579
- f.write(f'*** [{loss.item():.2f}], {pred_text}\n')
580
  interpolations.append(pred_text)
581
  if loss < 1:
582
  break
@@ -585,65 +574,8 @@ class EncoderDecoderVAE(nn.Module):
585
  param = line_search()
586
  if param is False:
587
  break
588
- if f:
589
- f.write(f'[return] {pred_text}\n\n')
590
- f.close()
591
  return pred, [pred_text, interpolations]
592
 
593
- def infer_with_feedback(self, ling_disc, batch, tokenizer, scaler, approx=False):
594
- interpolations = []
595
- converged = False
596
- c = 0
597
- eta = 0.3
598
- use_embed = True
599
- if use_embed:
600
- ling1_embed = self.ling_embed(batch['sentence1_ling'])
601
- ling2_embed = self.ling_embed(batch['sentence2_ling'])
602
- batch.update({
603
- 'sent1_ling_embed': ling1_embed,
604
- 'sent2_ling_embed': ling2_embed,
605
- })
606
- else:
607
- ling2 = batch['sentence2_ling']
608
- ling2_orig = batch['sentence2_ling'].clone()
609
- while not converged:
610
- with torch.no_grad():
611
- pred = self.infer(batch)
612
- inputs_pred = batch.copy()
613
- inputs_pred.update({'input_ids': pred,
614
- 'attention_mask': torch.ones_like(pred)})
615
- pred_text = tokenizer.batch_decode(pred.cpu().numpy(),
616
- skip_special_tokens=True)[0]
617
- if approx:
618
- ling_pred = ling_disc(**inputs_pred)
619
- else:
620
- ling_pred = compute_lng(pred_text)
621
- ling_pred = scaler.transform([ling_pred])[0]
622
- ling_pred = torch.tensor(ling_pred).to(pred.device).float()
623
- if use_embed:
624
- ling_pred_embed = self.ling_embed(ling_pred)
625
- # diff = torch.mean((ling2_embed - ling_pred_embed)**2)
626
- # else:
627
- diff = torch.mean((ling2_orig - ling_pred)**2)
628
-
629
-
630
- # print(f'Diff {diff.item():.3f}>> {tokenizer.batch_decode(pred.cpu().numpy(), skip_special_tokens=True)[0]}')
631
- if diff < 1e-1 or c == 6:
632
- converged = True
633
- elif use_embed:
634
- ling2_embed = ling2_embed + eta * (ling_pred_embed - ling2_embed)
635
- batch.update({'sent2_ling_embed': ling2_embed})
636
- else:
637
- ling2 = ling2 + eta * (ling_pred - ling2)
638
- batch.update({'sentence2_ling': ling2})
639
-
640
- c += 1
641
-
642
- if len(interpolations) == 0 or pred_text != interpolations[-1]:
643
- interpolations.append(pred_text)
644
-
645
- return [pred_text, interpolations]
646
-
647
  def set_grad(module, state):
648
  if module is not None:
649
  for p in module.parameters():
@@ -694,3 +626,42 @@ class LingDiscPipeline():
694
  with torch.no_grad():
695
  ling_pred = self.model(input_ids=inputs.input_ids.cuda())
696
  return ling_pred
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
504
  dec_output, _ = self.infer_with_cache(batch)
505
  return dec_output
506
 
507
+ def infer_with_feedback_BP(self, ling_disc, sem_emb, batch, tokenizer):
508
  from torch.autograd import grad
509
  interpolations = []
510
  def line_search():
 
519
  new_loss, pred = get_loss(param_)
520
  max_len = pred.shape[1]
521
  lens = torch.where(pred == self.eos_token_id, 1, 0).argmax(-1) + 1
 
 
522
  batch.update({
523
  'sentence2_input_ids': pred,
524
  'sentence2_attention_mask': sequence_mask(lens, max_len = max_len)
 
526
  sem_prob = torch.sigmoid(sem_emb(**batch)).item()
527
  # if sem_prob <= 0.1:
528
  # patience -= 1
 
 
529
  if new_loss < loss and sem_prob >= 0.90 and lens.item() > 1:
530
  return param_
531
  eta *= 2.25
 
561
  elif self.args.feedback_param == 'logits':
562
  logits = self.infer_with_cache(batch)[1]['scores']
563
  param = torch.nn.Parameter(logits, requires_grad = True)
 
564
  target_np = batch['sentence2_ling'][0].cpu().numpy()
565
  while True:
566
  loss, pred = get_loss(param)
567
  pred_text = tokenizer.batch_decode(pred.cpu().numpy(),
568
  skip_special_tokens=True)[0]
 
 
 
 
 
 
569
  interpolations.append(pred_text)
570
  if loss < 1:
571
  break
 
574
  param = line_search()
575
  if param is False:
576
  break
 
 
 
577
  return pred, [pred_text, interpolations]
578
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
579
  def set_grad(module, state):
580
  if module is not None:
581
  for p in module.parameters():
 
626
  with torch.no_grad():
627
  ling_pred = self.model(input_ids=inputs.input_ids.cuda())
628
  return ling_pred
629
+
630
+ def get_model(args, tokenizer, device):
631
+ if args.pretrain_disc or args.disc_loss or args.disc_ckpt:
632
+ ling_disc = LingDisc(args.model_name, args.disc_type, args.disc_ckpt).to(device)
633
+ else:
634
+ ling_disc = None
635
+ if args.linggen_type != 'none':
636
+ ling_gen = LingGenerator(args).to(device)
637
+ if args.sem_loss or args.sem_ckpt:
638
+ if args.sem_loss_type == 'shared':
639
+ sem_emb = seld.backbone.encoder
640
+ elif args.sem_loss_type == 'dedicated':
641
+ sem_emb = SemEmb(T5EncoderModel.from_pretrained('google/flan-t5-base'), tokenizer.eos_token_id).to(device)
642
+ else:
643
+ raise NotImplementedError('Semantic loss type')
644
+ else:
645
+ sem_emb = None
646
+
647
+ if not args.pretrain_disc:
648
+ model = EncoderDecoderVAE(args, tokenizer.pad_token_id, tokenizer.eos_token_id).to(device)
649
+ if args.use_lora:
650
+ target_modules = ["Attention.k", "Attention.q", "Attention.v", "Attention.o", "lm_head", "wi_0", "wi_1", "wo"]
651
+ target_modules = '|'.join(f'(.*{module})' for module in target_modules)
652
+ target_modules = f'backbone.({target_modules})'
653
+ config = LoraConfig(
654
+ r=args.lora_r,
655
+ lora_alpha=args.lora_r * 2,
656
+ target_modules=target_modules,
657
+ lora_dropout=0.1,
658
+ bias="lora_only",
659
+ modules_to_save=['ling_embed'],
660
+ )
661
+ model = get_peft_model(model, config)
662
+ model.print_trainable_parameters()
663
+ else:
664
+ model = ling_disc
665
+
666
+ return model, ling_disc, sem_emb
667
+
options.py CHANGED
@@ -1,9 +1,10 @@
 
1
  import argparse
 
2
  from datetime import datetime
3
- from const import lca_names, sca_names, lingfeat_names
4
- import os, json
5
  from copy import deepcopy
6
- import numpy as np
7
 
8
  def parse_args(ckpt=None):
9
  parser = argparse.ArgumentParser()
@@ -97,8 +98,6 @@ def parse_args(ckpt=None):
97
 
98
  major_arg = args.major_arg
99
  to_restore = [
100
- 'total_steps','major_arg','gpu','demo', 'eval_only', 'save_predict', 'predict_fn', 'fudge', 'predict_with_feedback',
101
- 'feedback_param', 'fb_log', 'data_dir', 'data', 'disc_ckpt', 'disc_type', 'sem_ckpt', 'fudge_lambda', 'test_batch_size', 'src_lng'
102
  ] + args.to_restore
103
  to_restore = {k: args.__dict__[k] for k in to_restore}
104
 
@@ -130,7 +129,7 @@ def parse_args(ckpt=None):
130
  args.__dict__.update(to_restore)
131
  args.ckpt = ckpt
132
 
133
- lng_names = lca_names + sca_names + lingfeat_names
134
  for i in range(len(args_list)):
135
  if args_list[i].lng_ids or args_list[i].lng_ids_idx:
136
  if args_list[i].lng_ids_idx:
 
1
+ import os, json
2
  import argparse
3
+ import numpy as np
4
  from datetime import datetime
5
+ from const import lftkplus_names
 
6
  from copy import deepcopy
7
+
8
 
9
  def parse_args(ckpt=None):
10
  parser = argparse.ArgumentParser()
 
98
 
99
  major_arg = args.major_arg
100
  to_restore = [
 
 
101
  ] + args.to_restore
102
  to_restore = {k: args.__dict__[k] for k in to_restore}
103
 
 
129
  args.__dict__.update(to_restore)
130
  args.ckpt = ckpt
131
 
132
+ lng_names = lftkplus_names
133
  for i in range(len(args_list)):
134
  if args_list[i].lng_ids or args_list[i].lng_ids_idx:
135
  if args_list[i].lng_ids_idx: