Spaces:
Sleeping
Sleeping
mohdelgaar
commited on
Commit
•
674b430
1
Parent(s):
e048c03
Update layout and samples
Browse files- app.py +340 -33
- assets/ling_collection.npy +3 -0
- assets/logo.png +0 -0
- assets/ratios.npy +3 -0
- assets/samples.bin +3 -0
- assets/scaler.bin +3 -0
- assets/stats.json +3 -0
- ckpt/ling_disc/checkpoint-41000/config.json +120 -0
- ckpt/ling_disc/checkpoint-41000/model.safetensors +3 -0
- ckpt/ling_disc/checkpoint-41000/optimizer.pt +3 -0
- ckpt/ling_disc/checkpoint-41000/rng_state.pth +3 -0
- ckpt/ling_disc/checkpoint-41000/scheduler.pt +3 -0
- ckpt/ling_disc/checkpoint-41000/special_tokens_map.json +119 -0
- ckpt/ling_disc/checkpoint-41000/spiece.model +3 -0
- ckpt/ling_disc/checkpoint-41000/tokenizer.json +0 -0
- ckpt/ling_disc/checkpoint-41000/tokenizer_config.json +938 -0
- ckpt/ling_disc/checkpoint-41000/trainer_state.json +636 -0
- ckpt/ling_disc/checkpoint-41000/training_args.bin +3 -0
- ckpt/ling_disc/config.json +120 -0
- ckpt/ling_disc/model.safetensors +3 -0
- ckpt/ling_disc/scaler.bin +3 -0
- ckpt/ling_disc/special_tokens_map.json +119 -0
- ckpt/ling_disc/spiece.model +3 -0
- ckpt/ling_disc/tokenizer.json +0 -0
- ckpt/ling_disc/tokenizer_config.json +938 -0
- ckpt/ling_disc/trainer_state.json +645 -0
- ckpt/ling_disc/training_args.bin +3 -0
- ckpt/model.json +82 -0
- ckpt/model.pt +3 -0
- ckpt/sem_emb.pt +3 -0
- compute_lng.py +2 -37
- demo.py +0 -371
- lftk_ids.csv +221 -0
- model.py +40 -69
- options.py +5 -6
app.py
CHANGED
@@ -3,60 +3,367 @@ import spacy
|
|
3 |
# nltk.download('wordnet')
|
4 |
# spacy.cli.download('en_core_web_sm')
|
5 |
|
6 |
-
|
7 |
-
|
|
|
|
|
|
|
|
|
8 |
from model import get_model
|
9 |
from options import parse_args
|
10 |
-
import numpy as np
|
11 |
from transformers import T5Tokenizer
|
12 |
-
import
|
13 |
-
import joblib
|
14 |
|
15 |
|
16 |
def process_examples(samples, full_names):
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
return list(samples)
|
25 |
|
26 |
args, args_list, lng_names = parse_args(ckpt='./ckpt/model.pt')
|
27 |
-
print(args)
|
28 |
-
exit()
|
29 |
|
30 |
tokenizer = T5Tokenizer.from_pretrained(args.model_name)
|
31 |
device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
32 |
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
# ling_collection = np.load('assets/ling_collection.npy')
|
37 |
|
|
|
|
|
|
|
38 |
scaler = joblib.load('assets/scaler.bin')
|
|
|
|
|
39 |
model, ling_disc, sem_emb = get_model(args, tokenizer, device)
|
40 |
|
41 |
state = torch.load(args.ckpt, map_location=torch.device('cpu'))
|
42 |
model.load_state_dict(state['model'], strict=True)
|
43 |
model.eval()
|
44 |
-
print(model is not None, ling_disc is not None, sem_emb is not None)
|
45 |
-
exit()
|
46 |
-
|
47 |
-
if args.disc_type == 't5':
|
48 |
-
state = torch.load(args.disc_ckpt)
|
49 |
-
if 'model' in state:
|
50 |
-
ling_disc.load_state_dict(state['model'], strict=False)
|
51 |
-
else:
|
52 |
-
ling_disc.load_state_dict(state, strict=False)
|
53 |
ling_disc.eval()
|
54 |
|
55 |
state = torch.load(args.sem_ckpt)
|
56 |
-
|
57 |
-
sem_emb.load_state_dict(state['model'], strict=False)
|
58 |
-
else:
|
59 |
-
sem_emb.load_state_dict(state, strict=False)
|
60 |
sem_emb.eval()
|
61 |
|
62 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
# nltk.download('wordnet')
|
4 |
# spacy.cli.download('en_core_web_sm')
|
5 |
|
6 |
+
import torch
|
7 |
+
import joblib, json
|
8 |
+
import numpy as np
|
9 |
+
import pandas as pd
|
10 |
+
import gradio as gr
|
11 |
+
from const import used_indices, name_map
|
12 |
from model import get_model
|
13 |
from options import parse_args
|
|
|
14 |
from transformers import T5Tokenizer
|
15 |
+
from compute_lng import compute_lng
|
|
|
16 |
|
17 |
|
18 |
def process_examples(samples, full_names):
|
19 |
+
processed = []
|
20 |
+
for sample in samples:
|
21 |
+
processed.append([
|
22 |
+
sample['sentence1'],
|
23 |
+
pd.DataFrame({'Index': full_names, 'Source': sample['sentence1_ling'], 'Target': sample['sentence2_ling']})
|
24 |
+
])
|
25 |
+
return processed
|
|
|
26 |
|
27 |
args, args_list, lng_names = parse_args(ckpt='./ckpt/model.pt')
|
|
|
|
|
28 |
|
29 |
tokenizer = T5Tokenizer.from_pretrained(args.model_name)
|
30 |
device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
31 |
|
32 |
+
lng_names = [name_map[x] for x in lng_names]
|
33 |
+
examples = json.load(open('assets/examples.json'))
|
34 |
+
examples = process_examples(examples, lng_names)
|
|
|
35 |
|
36 |
+
stats = json.load(open('assets/stats.json'))
|
37 |
+
|
38 |
+
ling_collection = np.load('assets/ling_collection.npy')
|
39 |
scaler = joblib.load('assets/scaler.bin')
|
40 |
+
scale_ratio = np.load('assets/ratios.npy')
|
41 |
+
|
42 |
model, ling_disc, sem_emb = get_model(args, tokenizer, device)
|
43 |
|
44 |
state = torch.load(args.ckpt, map_location=torch.device('cpu'))
|
45 |
model.load_state_dict(state['model'], strict=True)
|
46 |
model.eval()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
ling_disc.eval()
|
48 |
|
49 |
state = torch.load(args.sem_ckpt)
|
50 |
+
sem_emb.load_state_dict(state['model'], strict=True)
|
|
|
|
|
|
|
51 |
sem_emb.eval()
|
52 |
|
53 |
+
device = model.backbone.device
|
54 |
+
|
55 |
+
############# Start demo code
|
56 |
+
def round_ling(x):
|
57 |
+
is_int = stats['is_int']
|
58 |
+
mins = stats['min']
|
59 |
+
maxs = stats['max']
|
60 |
+
for i in range(len(x)):
|
61 |
+
# if is_int[i]:
|
62 |
+
# x[i] = round(x[i])
|
63 |
+
# else:
|
64 |
+
# x[i] = round(x[i], 3)
|
65 |
+
x[i] = round(x[i], 3)
|
66 |
+
return np.clip(x, mins, maxs)
|
67 |
+
|
68 |
+
def visibility(mode):
|
69 |
+
if mode == 0:
|
70 |
+
vis_group = group1
|
71 |
+
elif mode == 1:
|
72 |
+
vis_group = group2
|
73 |
+
elif mode == 2:
|
74 |
+
vis_group = group3
|
75 |
+
|
76 |
+
output = [gr.update(value=''), gr.update(value='')]
|
77 |
+
for component in components:
|
78 |
+
if component in vis_group:
|
79 |
+
output.append(gr.update(visible=True))
|
80 |
+
else:
|
81 |
+
output.append(gr.update(visible=False))
|
82 |
+
return output
|
83 |
+
|
84 |
+
def generate(sent1, ling):
|
85 |
+
input_ids = tokenizer.encode(sent1, return_tensors='pt').to(device)
|
86 |
+
ling1 = scaler.transform([ling['Source']])
|
87 |
+
ling2 = scaler.transform([ling['Target']])
|
88 |
+
inputs = {'sentence1_input_ids': input_ids,
|
89 |
+
'sentence1_ling': torch.tensor(ling1).float().to(device),
|
90 |
+
'sentence2_ling': torch.tensor(ling2).float().to(device),
|
91 |
+
'sentence1_attention_mask': torch.ones_like(input_ids)}
|
92 |
+
preds = []
|
93 |
+
with torch.no_grad():
|
94 |
+
pred = model.infer(inputs).cpu().numpy()
|
95 |
+
pred = tokenizer.batch_decode(pred,
|
96 |
+
skip_special_tokens=True)[0]
|
97 |
+
|
98 |
+
return pred
|
99 |
+
|
100 |
+
def generate_with_feedback(sent1, ling, approx):
|
101 |
+
if sent1 == '':
|
102 |
+
return ['Please input a source text.', '']
|
103 |
+
|
104 |
+
|
105 |
+
input_ids = tokenizer.encode(sent1, return_tensors='pt').to(device)
|
106 |
+
ling2 = torch.tensor(scaler.transform([ling['Target']])).float().to(device)
|
107 |
+
inputs = {
|
108 |
+
'sentence1_input_ids': input_ids,
|
109 |
+
'sentence2_ling': ling2,
|
110 |
+
'sentence1_attention_mask': torch.ones_like(input_ids)
|
111 |
+
}
|
112 |
+
|
113 |
+
pred, (pred_text, interpolations) = model.infer_with_feedback_BP(ling_disc, sem_emb, inputs, tokenizer)
|
114 |
+
|
115 |
+
interpolation = '-- ' + '\n-- '.join(interpolations)
|
116 |
+
return [pred_text, interpolation]
|
117 |
+
|
118 |
+
def generate_random(sent1, ling, count, approx):
|
119 |
+
preds, interpolations = [], []
|
120 |
+
for c in range(count):
|
121 |
+
idx = np.random.randint(0, len(ling_collection))
|
122 |
+
ling_ex = ling_collection[idx]
|
123 |
+
ling['Target'] = ling_ex
|
124 |
+
pred, interpolation = generate_with_feedback(sent1, ling, approx)
|
125 |
+
preds.append(pred)
|
126 |
+
interpolations.append(interpolation)
|
127 |
+
return '\n***\n'.join(preds), '\n***\n'.join(interpolations), ling
|
128 |
+
|
129 |
+
def estimate_gen(sent1, sent2, ling, approx):
|
130 |
+
if 'approximate' in approx:
|
131 |
+
input_ids = tokenizer.encode(sent2, return_tensors='pt').to(device)
|
132 |
+
with torch.no_grad():
|
133 |
+
ling_pred = ling_disc(input_ids=input_ids).cpu().numpy()
|
134 |
+
ling_pred = scaler.inverse_transform(ling_pred)[0]
|
135 |
+
elif 'exact' in approx:
|
136 |
+
ling_pred = np.array(compute_lng(sent2))[used_indices]
|
137 |
+
else:
|
138 |
+
raise ValueError()
|
139 |
+
|
140 |
+
ling_pred = round_ling(ling_pred)
|
141 |
+
ling['Target'] = ling_pred
|
142 |
+
gen = generate_with_feedback(sent1, ling, approx)
|
143 |
+
results = gen + [ling]
|
144 |
+
|
145 |
+
return results
|
146 |
+
|
147 |
+
def estimate_tgt(sent2, ling, approx):
|
148 |
+
if 'approximate' in approx:
|
149 |
+
input_ids = tokenizer.encode(sent2, return_tensors='pt').to(device)
|
150 |
+
with torch.no_grad():
|
151 |
+
ling_pred = ling_disc(input_ids=input_ids).cpu().numpy()
|
152 |
+
ling_pred = scaler.inverse_transform(ling_pred)[0]
|
153 |
+
elif 'exact' in approx:
|
154 |
+
ling_pred = np.array(compute_lng(sent2))[used_indices]
|
155 |
+
else:
|
156 |
+
raise ValueError()
|
157 |
+
|
158 |
+
ling_pred = round_ling(ling_pred)
|
159 |
+
ling['Target'] = ling_pred
|
160 |
+
return ling
|
161 |
+
|
162 |
+
def estimate_src(sent1, ling, approx):
|
163 |
+
if 'approximate' in approx:
|
164 |
+
input_ids = tokenizer.encode(sent1, return_tensors='pt').to(device)
|
165 |
+
with torch.no_grad():
|
166 |
+
ling_pred = ling_disc(input_ids=input_ids).cpu().numpy()
|
167 |
+
ling_pred = scaler.inverse_transform(ling_pred)[0]
|
168 |
+
elif 'exact' in approx:
|
169 |
+
ling_pred = np.array(compute_lng(sent1))[used_indices]
|
170 |
+
else:
|
171 |
+
raise ValueError()
|
172 |
+
|
173 |
+
ling['Source'] = ling_pred
|
174 |
+
return ling
|
175 |
+
|
176 |
+
def rand_target(ling):
|
177 |
+
ling['Target'] = scaler.inverse_transform([np.random.randn(*ling['Target'].shape)])[0]
|
178 |
+
return ling
|
179 |
+
|
180 |
+
def rand_ex_target(ling):
|
181 |
+
idx = np.random.randint(0, len(ling_collection))
|
182 |
+
ling_ex = ling_collection[idx]
|
183 |
+
ling['Target'] = ling_ex
|
184 |
+
return ling
|
185 |
+
|
186 |
+
def copy(ling):
|
187 |
+
ling['Target'] = ling['Source']
|
188 |
+
return ling
|
189 |
+
|
190 |
+
def add(ling):
|
191 |
+
scale_stepsize = np.random.uniform(1.0, 5.0)
|
192 |
+
x = ling['Target'] + scale_stepsize * scale_ratio
|
193 |
+
x = round_ling(x)
|
194 |
+
ling['Target'] = x
|
195 |
+
return ling
|
196 |
+
|
197 |
+
def sub(ling):
|
198 |
+
scale_stepsize = np.random.uniform(1.0, 5.0)
|
199 |
+
x = ling['Target'] - scale_stepsize * scale_ratio
|
200 |
+
x = round_ling(x)
|
201 |
+
ling['Target'] = x
|
202 |
+
return ling
|
203 |
+
|
204 |
+
title = """
|
205 |
+
<h1 style="text-align: center;">Controlled Paraphrase Generation with Linguistic Feature Control</h1>
|
206 |
+
|
207 |
+
<p style="font-size:1.2em;">This system utilizes an encoder-decoder model to generate text with controlled complexity, guided by 40 linguistic complexity indices.
|
208 |
+
The model can generate diverse paraphrases of a given sentence, each adjusted to maintain consistent meaning while varying
|
209 |
+
in linguistic complexity according to the desired level.</p>
|
210 |
+
<p style="font-size:1.2em;">It is important to note that not all index combinations are feasible (e.g., a sentence of "length" 5 with 10 "unique words").
|
211 |
+
To ensure high-quality outputs, our approach interpolates the embeddings of linguistic indices to identify the closest,
|
212 |
+
achievable set of indices for the given target.</p>
|
213 |
+
"""
|
214 |
+
|
215 |
+
guide = """
|
216 |
+
You may use the system in on of the following ways:
|
217 |
+
|
218 |
+
**Randomized Paraphrase Generation**: Select this option to produce multiple paraphrases with a range
|
219 |
+
of linguistic complexity. You need to provide a source text, specify the number of paraphrases you want,
|
220 |
+
and click "Generate." The linguistic complexity of the paraphrases will be determined randomly.
|
221 |
+
|
222 |
+
**Complexity-Matched Paraphrasing**: Select this option to generate a paraphrase of the given source
|
223 |
+
sentence that closely mirrors the linguistic complexity of another given sentence. Input your source
|
224 |
+
sentence along with another sentence (which will serve only to extract linguistic indices for the
|
225 |
+
paraphrase generation). Then, click "Generate."
|
226 |
+
|
227 |
+
**Manual Linguistic Control**: Select this option to manually control the linguistic complexity of the
|
228 |
+
generated text. We provided a set of tools for manual adjustments of the desired linguistic complexity of
|
229 |
+
the target sentence. These tools enable the user to extract linguistic indices from a given sentence,
|
230 |
+
generate a random (yet coherent) set of linguistic indices, and add or remove noise from the indices.
|
231 |
+
These tools are designed for experimental use and require the user to possess linguistic expertise for
|
232 |
+
effective input of linguistic indices. To use these tools, select "Tools to assist in setting linguistic
|
233 |
+
indices." Once indices are entered, click "Generate."
|
234 |
+
|
235 |
+
|
236 |
+
Second, you may select to use exact or approximate computation of linguistic indices (used in mode (2) and
|
237 |
+
in quality control of the genration). Approximate computation is significantly faster.
|
238 |
+
|
239 |
+
Third, you may view the intermediate sentences of the quality control process by selecting the checkbox.
|
240 |
+
|
241 |
+
Fourth, you may try out some examples by clicking on "Examples...". Examples consist of a source sentences,
|
242 |
+
the indices of the source sentences, and a sample set of target linguistic indices.
|
243 |
+
|
244 |
+
Please make your choice below.
|
245 |
+
|
246 |
+
"""
|
247 |
+
|
248 |
+
sent1 = gr.Textbox(label='Source text')
|
249 |
+
ling = gr.Dataframe(value = [[x, 0, 0] for x in lng_names],
|
250 |
+
headers=['Index', 'Source', 'Target'],
|
251 |
+
datatype=['str', 'number', 'number'], visible=False)
|
252 |
+
css = """
|
253 |
+
#guide span.svelte-1w6vloh {font-size: 22px !important; font-weight: 600 !important}
|
254 |
+
#mode span.svelte-1gfkn6j {font-size: 18px !important; font-weight: 600 !important}
|
255 |
+
#mode {border: 0px; box-shadow: none}
|
256 |
+
#mode .block {padding: 0px}
|
257 |
+
|
258 |
+
div.gradio-container {color: black}
|
259 |
+
div.form {background: inherit}
|
260 |
+
|
261 |
+
body {
|
262 |
+
--text-sm: 12px;
|
263 |
+
--text-md: 16px;
|
264 |
+
--text-lg: 18px;
|
265 |
+
--input-text-size: 16px;
|
266 |
+
--section-text-size: 16px;
|
267 |
+
--input-background: --neutral-50;
|
268 |
+
}
|
269 |
+
|
270 |
+
.separator {
|
271 |
+
width: 100%;
|
272 |
+
height: 3px; /* Adjust the height for boldness */
|
273 |
+
background-color: #000; /* Adjust the color as needed */
|
274 |
+
margin: 20px 0; /* Adjust the margin as needed */
|
275 |
+
}
|
276 |
+
"""
|
277 |
+
|
278 |
+
with gr.Blocks(
|
279 |
+
theme=gr.themes.Default(
|
280 |
+
spacing_size=gr.themes.sizes.spacing_md,
|
281 |
+
text_size=gr.themes.sizes.text_md,
|
282 |
+
),
|
283 |
+
css=css) as demo:
|
284 |
+
gr.Image('assets/logo.png', height=100, container=False, show_download_button=False)
|
285 |
+
gr.Markdown(title)
|
286 |
+
with gr.Accordion("🚀 Quick Start Guide", open=False, elem_id='guide'):
|
287 |
+
gr.Markdown(guide)
|
288 |
+
|
289 |
+
with gr.Group(elem_classes='separator'):
|
290 |
+
pass
|
291 |
+
with gr.Group(elem_id='mode'):
|
292 |
+
mode = gr.Radio(
|
293 |
+
value='Randomized Paraphrase Generation',
|
294 |
+
label='How would you like to use this system?',
|
295 |
+
type="index",
|
296 |
+
choices=['🔄 Randomized Paraphrase Generation',
|
297 |
+
'⚖️ Complexity-Matched Paraphrasing',
|
298 |
+
'🎛️ Manual Linguistic Control'],
|
299 |
+
)
|
300 |
+
with gr.Accordion("⚙️ Advanced Options", open=False):
|
301 |
+
approx = gr.Radio(value='Use approximate computation of linguistic indices (faster)',
|
302 |
+
choices=['Use approximate computation of linguistic indices (faster)',
|
303 |
+
'Use exact computation of linguistic indices'], container=False, show_label=False)
|
304 |
+
control_interpolation = gr.Checkbox(label='View the intermediate sentences in the interpolation of linguistic indices')
|
305 |
+
|
306 |
+
with gr.Accordion("📑 Examples...", open=False):
|
307 |
+
gr.Examples(examples, [sent1, ling], examples_per_page=4, label=None)
|
308 |
+
|
309 |
+
with gr.Row():
|
310 |
+
sent1.render()
|
311 |
+
with gr.Column():
|
312 |
+
sent2 = gr.Textbox(label='Generated text')
|
313 |
+
interpolation = gr.Textbox(label='Quality control interpolation', visible=False, lines=5)
|
314 |
+
with gr.Group(elem_classes='separator'):
|
315 |
+
pass
|
316 |
+
#####################
|
317 |
+
with gr.Row():
|
318 |
+
generate_random_btn = gr.Button("Generate",
|
319 |
+
variant='primary', scale=1, visible=True)
|
320 |
+
count = gr.Number(label='Number of generated sentences', value=3, precision=0, scale=1, visible=True)
|
321 |
+
# generate_fb_btn = gr.Button("Generate with auto-adjust (towards pred)")
|
322 |
+
# generate_fb_s_btn = gr.Button("Generate with auto-adjust (moving s)")
|
323 |
+
#####################
|
324 |
+
with gr.Row():
|
325 |
+
estimate_gen_btn = gr.Button("Generate",
|
326 |
+
variant='primary',
|
327 |
+
scale=1, visible=False)
|
328 |
+
sent_ling_gen = gr.Textbox(label='Text to estimate linguistic indices', scale=1, visible=False)
|
329 |
+
#####################
|
330 |
+
generate_btn = gr.Button("Generate", variant='primary', visible=False)
|
331 |
+
with gr.Accordion("Tools to assist in the setting of linguistic indices...", open=False, visible=False) as ling_tools:
|
332 |
+
with gr.Row():
|
333 |
+
estimate_tgt_btn = gr.Button("Estimate linguistic indices of this sentence", visible=False)
|
334 |
+
sent_ling_est = gr.Textbox(label='Text to estimate linguistic indices', scale=2, visible=False)
|
335 |
+
estimate_src_btn = gr.Button("Estimate linguistic indices of source sentence", visible=False)
|
336 |
+
# rand_btn = gr.Button("Random target")
|
337 |
+
rand_ex_btn = gr.Button("Random target", size='lg', visible=False)
|
338 |
+
copy_btn = gr.Button("Copy linguistic indices of source to target", size='sm', visible=False)
|
339 |
+
with gr.Row():
|
340 |
+
sub_btn = gr.Button('Subtract \u03B5 from target linguistic indices', visible=False)
|
341 |
+
add_btn = gr.Button('Add \u03B5 to target linguistic indices', visible=False)
|
342 |
+
ling.render()
|
343 |
+
#####################
|
344 |
+
|
345 |
+
estimate_src_btn.click(estimate_src, inputs=[sent1, ling, approx], outputs=[ling])
|
346 |
+
estimate_tgt_btn.click(estimate_tgt, inputs=[sent_ling_est, ling, approx], outputs=[ling])
|
347 |
+
# estimate_tgt_btn.click(estimate_tgt, inputs=[sent_ling, ling], outputs=[ling])
|
348 |
+
estimate_gen_btn.click(estimate_gen, inputs=[sent1, sent_ling_gen, ling, approx], outputs=[sent2, interpolation, ling])
|
349 |
+
# rand_btn.click(rand_target, inputs=[ling], outputs=[ling])
|
350 |
+
rand_ex_btn.click(rand_ex_target, inputs=[ling], outputs=[ling])
|
351 |
+
copy_btn.click(copy, inputs=[ling], outputs=[ling])
|
352 |
+
generate_btn.click(generate_with_feedback, inputs=[sent1, ling, approx], outputs=[sent2, interpolation])
|
353 |
+
generate_random_btn.click(generate_random, inputs=[sent1, ling, count, approx],
|
354 |
+
outputs=[sent2, interpolation, ling])
|
355 |
+
# generate_fb_btn.click(generate_with_feedback, inputs=[sent1, ling], outputs=sent2s)
|
356 |
+
# generate_fb_s_btn.click(generate_with_feedbacks, inputs=[sent1, ling], outputs=sent2s)
|
357 |
+
add_btn.click(add, inputs=[ling], outputs=[ling])
|
358 |
+
sub_btn.click(sub, inputs=[ling], outputs=[ling])
|
359 |
+
|
360 |
+
group1 = [generate_random_btn, count]
|
361 |
+
group2 = [estimate_gen_btn, sent_ling_gen]
|
362 |
+
group3 = [generate_btn, estimate_src_btn, estimate_tgt_btn, sent_ling_est, rand_ex_btn, copy_btn, add_btn, sub_btn, ling, ling_tools]
|
363 |
+
components = group1 + group2 + group3
|
364 |
+
mode.change(visibility, inputs=[mode], outputs=[sent2, interpolation] + components)
|
365 |
+
control_interpolation.change(lambda v: gr.update(visible=v), inputs=[control_interpolation],
|
366 |
+
outputs=[interpolation])
|
367 |
+
|
368 |
+
print('Finished loading')
|
369 |
+
demo.launch(share=True)
|
assets/ling_collection.npy
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b1755705e1c6e2b40a091b7ec8b147c1e9b7dfac5a7c4f1e3d5ff092223a0a10
|
3 |
+
size 320128
|
assets/logo.png
ADDED
assets/ratios.npy
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dc60ebcd53fd467fd7f3c9e9652fb9364285e2833325b6ab46b1c86e2e136b3a
|
3 |
+
size 448
|
assets/samples.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5de4fd9314c1df65f14187cc13fb07300b3a359f57c9bd69ab834ef6148a8368
|
3 |
+
size 80651
|
assets/scaler.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d3dc9e74494b2049672441b0587cd73bae605b271941528ea585672bf48d1a84
|
3 |
+
size 1414
|
assets/stats.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
{"min": [0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 2.0, 1.25, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -12.951, 0.004],
|
2 |
+
"max": [100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 23.0, 100.0],
|
3 |
+
"is_int": [true, true, true, true, true, true, true, false, false, false, false, false, false, true, true, true, true, true, true, true, true, true, true, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false]}
|
ckpt/ling_disc/checkpoint-41000/config.json
ADDED
@@ -0,0 +1,120 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "microsoft/deberta-v3-small",
|
3 |
+
"architectures": [
|
4 |
+
"DebertaReplacedTokenizer"
|
5 |
+
],
|
6 |
+
"attention_probs_dropout_prob": 0.1,
|
7 |
+
"hidden_act": "gelu",
|
8 |
+
"hidden_dropout_prob": 0.1,
|
9 |
+
"hidden_size": 768,
|
10 |
+
"id2label": {
|
11 |
+
"0": "LABEL_0",
|
12 |
+
"1": "LABEL_1",
|
13 |
+
"2": "LABEL_2",
|
14 |
+
"3": "LABEL_3",
|
15 |
+
"4": "LABEL_4",
|
16 |
+
"5": "LABEL_5",
|
17 |
+
"6": "LABEL_6",
|
18 |
+
"7": "LABEL_7",
|
19 |
+
"8": "LABEL_8",
|
20 |
+
"9": "LABEL_9",
|
21 |
+
"10": "LABEL_10",
|
22 |
+
"11": "LABEL_11",
|
23 |
+
"12": "LABEL_12",
|
24 |
+
"13": "LABEL_13",
|
25 |
+
"14": "LABEL_14",
|
26 |
+
"15": "LABEL_15",
|
27 |
+
"16": "LABEL_16",
|
28 |
+
"17": "LABEL_17",
|
29 |
+
"18": "LABEL_18",
|
30 |
+
"19": "LABEL_19",
|
31 |
+
"20": "LABEL_20",
|
32 |
+
"21": "LABEL_21",
|
33 |
+
"22": "LABEL_22",
|
34 |
+
"23": "LABEL_23",
|
35 |
+
"24": "LABEL_24",
|
36 |
+
"25": "LABEL_25",
|
37 |
+
"26": "LABEL_26",
|
38 |
+
"27": "LABEL_27",
|
39 |
+
"28": "LABEL_28",
|
40 |
+
"29": "LABEL_29",
|
41 |
+
"30": "LABEL_30",
|
42 |
+
"31": "LABEL_31",
|
43 |
+
"32": "LABEL_32",
|
44 |
+
"33": "LABEL_33",
|
45 |
+
"34": "LABEL_34",
|
46 |
+
"35": "LABEL_35",
|
47 |
+
"36": "LABEL_36",
|
48 |
+
"37": "LABEL_37",
|
49 |
+
"38": "LABEL_38",
|
50 |
+
"39": "LABEL_39"
|
51 |
+
},
|
52 |
+
"initializer_range": 0.02,
|
53 |
+
"intermediate_size": 3072,
|
54 |
+
"label2id": {
|
55 |
+
"LABEL_0": 0,
|
56 |
+
"LABEL_1": 1,
|
57 |
+
"LABEL_10": 10,
|
58 |
+
"LABEL_11": 11,
|
59 |
+
"LABEL_12": 12,
|
60 |
+
"LABEL_13": 13,
|
61 |
+
"LABEL_14": 14,
|
62 |
+
"LABEL_15": 15,
|
63 |
+
"LABEL_16": 16,
|
64 |
+
"LABEL_17": 17,
|
65 |
+
"LABEL_18": 18,
|
66 |
+
"LABEL_19": 19,
|
67 |
+
"LABEL_2": 2,
|
68 |
+
"LABEL_20": 20,
|
69 |
+
"LABEL_21": 21,
|
70 |
+
"LABEL_22": 22,
|
71 |
+
"LABEL_23": 23,
|
72 |
+
"LABEL_24": 24,
|
73 |
+
"LABEL_25": 25,
|
74 |
+
"LABEL_26": 26,
|
75 |
+
"LABEL_27": 27,
|
76 |
+
"LABEL_28": 28,
|
77 |
+
"LABEL_29": 29,
|
78 |
+
"LABEL_3": 3,
|
79 |
+
"LABEL_30": 30,
|
80 |
+
"LABEL_31": 31,
|
81 |
+
"LABEL_32": 32,
|
82 |
+
"LABEL_33": 33,
|
83 |
+
"LABEL_34": 34,
|
84 |
+
"LABEL_35": 35,
|
85 |
+
"LABEL_36": 36,
|
86 |
+
"LABEL_37": 37,
|
87 |
+
"LABEL_38": 38,
|
88 |
+
"LABEL_39": 39,
|
89 |
+
"LABEL_4": 4,
|
90 |
+
"LABEL_5": 5,
|
91 |
+
"LABEL_6": 6,
|
92 |
+
"LABEL_7": 7,
|
93 |
+
"LABEL_8": 8,
|
94 |
+
"LABEL_9": 9
|
95 |
+
},
|
96 |
+
"layer_norm_eps": 1e-07,
|
97 |
+
"max_position_embeddings": 512,
|
98 |
+
"max_relative_positions": -1,
|
99 |
+
"model_type": "deberta-v2",
|
100 |
+
"norm_rel_ebd": "layer_norm",
|
101 |
+
"num_attention_heads": 12,
|
102 |
+
"num_hidden_layers": 6,
|
103 |
+
"pad_token_id": 0,
|
104 |
+
"pooler_dropout": 0,
|
105 |
+
"pooler_hidden_act": "gelu",
|
106 |
+
"pooler_hidden_size": 768,
|
107 |
+
"pos_att_type": [
|
108 |
+
"p2c",
|
109 |
+
"c2p"
|
110 |
+
],
|
111 |
+
"position_biased_input": false,
|
112 |
+
"position_buckets": 256,
|
113 |
+
"problem_type": "regression",
|
114 |
+
"relative_attention": true,
|
115 |
+
"share_att_key": true,
|
116 |
+
"torch_dtype": "float32",
|
117 |
+
"transformers_version": "4.39.3",
|
118 |
+
"type_vocab_size": 0,
|
119 |
+
"vocab_size": 128100
|
120 |
+
}
|
ckpt/ling_disc/checkpoint-41000/model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:15221fd5565118b32b1adf7b42c27cae6a3d8dd32b0ef85473b70bb072964661
|
3 |
+
size 275252064
|
ckpt/ling_disc/checkpoint-41000/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7dce4669eb4c8d092887dca957afda50838e0d8821093ac6ec80dfc38c786041
|
3 |
+
size 550568634
|
ckpt/ling_disc/checkpoint-41000/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b153bd123a079b6d0ee9f3616a0498be47197aca1c9c7764282514bc91fdc08d
|
3 |
+
size 14244
|
ckpt/ling_disc/checkpoint-41000/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7600c7adf0e16517c635d7b3eee259739a7966140efc08f2afff26d19bb4fb29
|
3 |
+
size 1064
|
ckpt/ling_disc/checkpoint-41000/special_tokens_map.json
ADDED
@@ -0,0 +1,119 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"additional_special_tokens": [
|
3 |
+
"<extra_id_0>",
|
4 |
+
"<extra_id_1>",
|
5 |
+
"<extra_id_2>",
|
6 |
+
"<extra_id_3>",
|
7 |
+
"<extra_id_4>",
|
8 |
+
"<extra_id_5>",
|
9 |
+
"<extra_id_6>",
|
10 |
+
"<extra_id_7>",
|
11 |
+
"<extra_id_8>",
|
12 |
+
"<extra_id_9>",
|
13 |
+
"<extra_id_10>",
|
14 |
+
"<extra_id_11>",
|
15 |
+
"<extra_id_12>",
|
16 |
+
"<extra_id_13>",
|
17 |
+
"<extra_id_14>",
|
18 |
+
"<extra_id_15>",
|
19 |
+
"<extra_id_16>",
|
20 |
+
"<extra_id_17>",
|
21 |
+
"<extra_id_18>",
|
22 |
+
"<extra_id_19>",
|
23 |
+
"<extra_id_20>",
|
24 |
+
"<extra_id_21>",
|
25 |
+
"<extra_id_22>",
|
26 |
+
"<extra_id_23>",
|
27 |
+
"<extra_id_24>",
|
28 |
+
"<extra_id_25>",
|
29 |
+
"<extra_id_26>",
|
30 |
+
"<extra_id_27>",
|
31 |
+
"<extra_id_28>",
|
32 |
+
"<extra_id_29>",
|
33 |
+
"<extra_id_30>",
|
34 |
+
"<extra_id_31>",
|
35 |
+
"<extra_id_32>",
|
36 |
+
"<extra_id_33>",
|
37 |
+
"<extra_id_34>",
|
38 |
+
"<extra_id_35>",
|
39 |
+
"<extra_id_36>",
|
40 |
+
"<extra_id_37>",
|
41 |
+
"<extra_id_38>",
|
42 |
+
"<extra_id_39>",
|
43 |
+
"<extra_id_40>",
|
44 |
+
"<extra_id_41>",
|
45 |
+
"<extra_id_42>",
|
46 |
+
"<extra_id_43>",
|
47 |
+
"<extra_id_44>",
|
48 |
+
"<extra_id_45>",
|
49 |
+
"<extra_id_46>",
|
50 |
+
"<extra_id_47>",
|
51 |
+
"<extra_id_48>",
|
52 |
+
"<extra_id_49>",
|
53 |
+
"<extra_id_50>",
|
54 |
+
"<extra_id_51>",
|
55 |
+
"<extra_id_52>",
|
56 |
+
"<extra_id_53>",
|
57 |
+
"<extra_id_54>",
|
58 |
+
"<extra_id_55>",
|
59 |
+
"<extra_id_56>",
|
60 |
+
"<extra_id_57>",
|
61 |
+
"<extra_id_58>",
|
62 |
+
"<extra_id_59>",
|
63 |
+
"<extra_id_60>",
|
64 |
+
"<extra_id_61>",
|
65 |
+
"<extra_id_62>",
|
66 |
+
"<extra_id_63>",
|
67 |
+
"<extra_id_64>",
|
68 |
+
"<extra_id_65>",
|
69 |
+
"<extra_id_66>",
|
70 |
+
"<extra_id_67>",
|
71 |
+
"<extra_id_68>",
|
72 |
+
"<extra_id_69>",
|
73 |
+
"<extra_id_70>",
|
74 |
+
"<extra_id_71>",
|
75 |
+
"<extra_id_72>",
|
76 |
+
"<extra_id_73>",
|
77 |
+
"<extra_id_74>",
|
78 |
+
"<extra_id_75>",
|
79 |
+
"<extra_id_76>",
|
80 |
+
"<extra_id_77>",
|
81 |
+
"<extra_id_78>",
|
82 |
+
"<extra_id_79>",
|
83 |
+
"<extra_id_80>",
|
84 |
+
"<extra_id_81>",
|
85 |
+
"<extra_id_82>",
|
86 |
+
"<extra_id_83>",
|
87 |
+
"<extra_id_84>",
|
88 |
+
"<extra_id_85>",
|
89 |
+
"<extra_id_86>",
|
90 |
+
"<extra_id_87>",
|
91 |
+
"<extra_id_88>",
|
92 |
+
"<extra_id_89>",
|
93 |
+
"<extra_id_90>",
|
94 |
+
"<extra_id_91>",
|
95 |
+
"<extra_id_92>",
|
96 |
+
"<extra_id_93>",
|
97 |
+
"<extra_id_94>",
|
98 |
+
"<extra_id_95>",
|
99 |
+
"<extra_id_96>",
|
100 |
+
"<extra_id_97>",
|
101 |
+
"<extra_id_98>",
|
102 |
+
"<extra_id_99>"
|
103 |
+
],
|
104 |
+
"eos_token": {
|
105 |
+
"content": "</s>",
|
106 |
+
"lstrip": false,
|
107 |
+
"normalized": false,
|
108 |
+
"rstrip": false,
|
109 |
+
"single_word": false
|
110 |
+
},
|
111 |
+
"pad_token": "</s>",
|
112 |
+
"unk_token": {
|
113 |
+
"content": "<unk>",
|
114 |
+
"lstrip": false,
|
115 |
+
"normalized": false,
|
116 |
+
"rstrip": false,
|
117 |
+
"single_word": false
|
118 |
+
}
|
119 |
+
}
|
ckpt/ling_disc/checkpoint-41000/spiece.model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d60acb128cf7b7f2536e8f38a5b18a05535c9e14c7a355904270e15b0945ea86
|
3 |
+
size 791656
|
ckpt/ling_disc/checkpoint-41000/tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
ckpt/ling_disc/checkpoint-41000/tokenizer_config.json
ADDED
@@ -0,0 +1,938 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"added_tokens_decoder": {
|
3 |
+
"0": {
|
4 |
+
"content": "<pad>",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false,
|
9 |
+
"special": true
|
10 |
+
},
|
11 |
+
"1": {
|
12 |
+
"content": "</s>",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": false,
|
17 |
+
"special": true
|
18 |
+
},
|
19 |
+
"2": {
|
20 |
+
"content": "<unk>",
|
21 |
+
"lstrip": false,
|
22 |
+
"normalized": false,
|
23 |
+
"rstrip": false,
|
24 |
+
"single_word": false,
|
25 |
+
"special": true
|
26 |
+
},
|
27 |
+
"32000": {
|
28 |
+
"content": "<extra_id_99>",
|
29 |
+
"lstrip": false,
|
30 |
+
"normalized": false,
|
31 |
+
"rstrip": false,
|
32 |
+
"single_word": false,
|
33 |
+
"special": true
|
34 |
+
},
|
35 |
+
"32001": {
|
36 |
+
"content": "<extra_id_98>",
|
37 |
+
"lstrip": false,
|
38 |
+
"normalized": false,
|
39 |
+
"rstrip": false,
|
40 |
+
"single_word": false,
|
41 |
+
"special": true
|
42 |
+
},
|
43 |
+
"32002": {
|
44 |
+
"content": "<extra_id_97>",
|
45 |
+
"lstrip": false,
|
46 |
+
"normalized": false,
|
47 |
+
"rstrip": false,
|
48 |
+
"single_word": false,
|
49 |
+
"special": true
|
50 |
+
},
|
51 |
+
"32003": {
|
52 |
+
"content": "<extra_id_96>",
|
53 |
+
"lstrip": false,
|
54 |
+
"normalized": false,
|
55 |
+
"rstrip": false,
|
56 |
+
"single_word": false,
|
57 |
+
"special": true
|
58 |
+
},
|
59 |
+
"32004": {
|
60 |
+
"content": "<extra_id_95>",
|
61 |
+
"lstrip": false,
|
62 |
+
"normalized": false,
|
63 |
+
"rstrip": false,
|
64 |
+
"single_word": false,
|
65 |
+
"special": true
|
66 |
+
},
|
67 |
+
"32005": {
|
68 |
+
"content": "<extra_id_94>",
|
69 |
+
"lstrip": false,
|
70 |
+
"normalized": false,
|
71 |
+
"rstrip": false,
|
72 |
+
"single_word": false,
|
73 |
+
"special": true
|
74 |
+
},
|
75 |
+
"32006": {
|
76 |
+
"content": "<extra_id_93>",
|
77 |
+
"lstrip": false,
|
78 |
+
"normalized": false,
|
79 |
+
"rstrip": false,
|
80 |
+
"single_word": false,
|
81 |
+
"special": true
|
82 |
+
},
|
83 |
+
"32007": {
|
84 |
+
"content": "<extra_id_92>",
|
85 |
+
"lstrip": false,
|
86 |
+
"normalized": false,
|
87 |
+
"rstrip": false,
|
88 |
+
"single_word": false,
|
89 |
+
"special": true
|
90 |
+
},
|
91 |
+
"32008": {
|
92 |
+
"content": "<extra_id_91>",
|
93 |
+
"lstrip": false,
|
94 |
+
"normalized": false,
|
95 |
+
"rstrip": false,
|
96 |
+
"single_word": false,
|
97 |
+
"special": true
|
98 |
+
},
|
99 |
+
"32009": {
|
100 |
+
"content": "<extra_id_90>",
|
101 |
+
"lstrip": false,
|
102 |
+
"normalized": false,
|
103 |
+
"rstrip": false,
|
104 |
+
"single_word": false,
|
105 |
+
"special": true
|
106 |
+
},
|
107 |
+
"32010": {
|
108 |
+
"content": "<extra_id_89>",
|
109 |
+
"lstrip": false,
|
110 |
+
"normalized": false,
|
111 |
+
"rstrip": false,
|
112 |
+
"single_word": false,
|
113 |
+
"special": true
|
114 |
+
},
|
115 |
+
"32011": {
|
116 |
+
"content": "<extra_id_88>",
|
117 |
+
"lstrip": false,
|
118 |
+
"normalized": false,
|
119 |
+
"rstrip": false,
|
120 |
+
"single_word": false,
|
121 |
+
"special": true
|
122 |
+
},
|
123 |
+
"32012": {
|
124 |
+
"content": "<extra_id_87>",
|
125 |
+
"lstrip": false,
|
126 |
+
"normalized": false,
|
127 |
+
"rstrip": false,
|
128 |
+
"single_word": false,
|
129 |
+
"special": true
|
130 |
+
},
|
131 |
+
"32013": {
|
132 |
+
"content": "<extra_id_86>",
|
133 |
+
"lstrip": false,
|
134 |
+
"normalized": false,
|
135 |
+
"rstrip": false,
|
136 |
+
"single_word": false,
|
137 |
+
"special": true
|
138 |
+
},
|
139 |
+
"32014": {
|
140 |
+
"content": "<extra_id_85>",
|
141 |
+
"lstrip": false,
|
142 |
+
"normalized": false,
|
143 |
+
"rstrip": false,
|
144 |
+
"single_word": false,
|
145 |
+
"special": true
|
146 |
+
},
|
147 |
+
"32015": {
|
148 |
+
"content": "<extra_id_84>",
|
149 |
+
"lstrip": false,
|
150 |
+
"normalized": false,
|
151 |
+
"rstrip": false,
|
152 |
+
"single_word": false,
|
153 |
+
"special": true
|
154 |
+
},
|
155 |
+
"32016": {
|
156 |
+
"content": "<extra_id_83>",
|
157 |
+
"lstrip": false,
|
158 |
+
"normalized": false,
|
159 |
+
"rstrip": false,
|
160 |
+
"single_word": false,
|
161 |
+
"special": true
|
162 |
+
},
|
163 |
+
"32017": {
|
164 |
+
"content": "<extra_id_82>",
|
165 |
+
"lstrip": false,
|
166 |
+
"normalized": false,
|
167 |
+
"rstrip": false,
|
168 |
+
"single_word": false,
|
169 |
+
"special": true
|
170 |
+
},
|
171 |
+
"32018": {
|
172 |
+
"content": "<extra_id_81>",
|
173 |
+
"lstrip": false,
|
174 |
+
"normalized": false,
|
175 |
+
"rstrip": false,
|
176 |
+
"single_word": false,
|
177 |
+
"special": true
|
178 |
+
},
|
179 |
+
"32019": {
|
180 |
+
"content": "<extra_id_80>",
|
181 |
+
"lstrip": false,
|
182 |
+
"normalized": false,
|
183 |
+
"rstrip": false,
|
184 |
+
"single_word": false,
|
185 |
+
"special": true
|
186 |
+
},
|
187 |
+
"32020": {
|
188 |
+
"content": "<extra_id_79>",
|
189 |
+
"lstrip": false,
|
190 |
+
"normalized": false,
|
191 |
+
"rstrip": false,
|
192 |
+
"single_word": false,
|
193 |
+
"special": true
|
194 |
+
},
|
195 |
+
"32021": {
|
196 |
+
"content": "<extra_id_78>",
|
197 |
+
"lstrip": false,
|
198 |
+
"normalized": false,
|
199 |
+
"rstrip": false,
|
200 |
+
"single_word": false,
|
201 |
+
"special": true
|
202 |
+
},
|
203 |
+
"32022": {
|
204 |
+
"content": "<extra_id_77>",
|
205 |
+
"lstrip": false,
|
206 |
+
"normalized": false,
|
207 |
+
"rstrip": false,
|
208 |
+
"single_word": false,
|
209 |
+
"special": true
|
210 |
+
},
|
211 |
+
"32023": {
|
212 |
+
"content": "<extra_id_76>",
|
213 |
+
"lstrip": false,
|
214 |
+
"normalized": false,
|
215 |
+
"rstrip": false,
|
216 |
+
"single_word": false,
|
217 |
+
"special": true
|
218 |
+
},
|
219 |
+
"32024": {
|
220 |
+
"content": "<extra_id_75>",
|
221 |
+
"lstrip": false,
|
222 |
+
"normalized": false,
|
223 |
+
"rstrip": false,
|
224 |
+
"single_word": false,
|
225 |
+
"special": true
|
226 |
+
},
|
227 |
+
"32025": {
|
228 |
+
"content": "<extra_id_74>",
|
229 |
+
"lstrip": false,
|
230 |
+
"normalized": false,
|
231 |
+
"rstrip": false,
|
232 |
+
"single_word": false,
|
233 |
+
"special": true
|
234 |
+
},
|
235 |
+
"32026": {
|
236 |
+
"content": "<extra_id_73>",
|
237 |
+
"lstrip": false,
|
238 |
+
"normalized": false,
|
239 |
+
"rstrip": false,
|
240 |
+
"single_word": false,
|
241 |
+
"special": true
|
242 |
+
},
|
243 |
+
"32027": {
|
244 |
+
"content": "<extra_id_72>",
|
245 |
+
"lstrip": false,
|
246 |
+
"normalized": false,
|
247 |
+
"rstrip": false,
|
248 |
+
"single_word": false,
|
249 |
+
"special": true
|
250 |
+
},
|
251 |
+
"32028": {
|
252 |
+
"content": "<extra_id_71>",
|
253 |
+
"lstrip": false,
|
254 |
+
"normalized": false,
|
255 |
+
"rstrip": false,
|
256 |
+
"single_word": false,
|
257 |
+
"special": true
|
258 |
+
},
|
259 |
+
"32029": {
|
260 |
+
"content": "<extra_id_70>",
|
261 |
+
"lstrip": false,
|
262 |
+
"normalized": false,
|
263 |
+
"rstrip": false,
|
264 |
+
"single_word": false,
|
265 |
+
"special": true
|
266 |
+
},
|
267 |
+
"32030": {
|
268 |
+
"content": "<extra_id_69>",
|
269 |
+
"lstrip": false,
|
270 |
+
"normalized": false,
|
271 |
+
"rstrip": false,
|
272 |
+
"single_word": false,
|
273 |
+
"special": true
|
274 |
+
},
|
275 |
+
"32031": {
|
276 |
+
"content": "<extra_id_68>",
|
277 |
+
"lstrip": false,
|
278 |
+
"normalized": false,
|
279 |
+
"rstrip": false,
|
280 |
+
"single_word": false,
|
281 |
+
"special": true
|
282 |
+
},
|
283 |
+
"32032": {
|
284 |
+
"content": "<extra_id_67>",
|
285 |
+
"lstrip": false,
|
286 |
+
"normalized": false,
|
287 |
+
"rstrip": false,
|
288 |
+
"single_word": false,
|
289 |
+
"special": true
|
290 |
+
},
|
291 |
+
"32033": {
|
292 |
+
"content": "<extra_id_66>",
|
293 |
+
"lstrip": false,
|
294 |
+
"normalized": false,
|
295 |
+
"rstrip": false,
|
296 |
+
"single_word": false,
|
297 |
+
"special": true
|
298 |
+
},
|
299 |
+
"32034": {
|
300 |
+
"content": "<extra_id_65>",
|
301 |
+
"lstrip": false,
|
302 |
+
"normalized": false,
|
303 |
+
"rstrip": false,
|
304 |
+
"single_word": false,
|
305 |
+
"special": true
|
306 |
+
},
|
307 |
+
"32035": {
|
308 |
+
"content": "<extra_id_64>",
|
309 |
+
"lstrip": false,
|
310 |
+
"normalized": false,
|
311 |
+
"rstrip": false,
|
312 |
+
"single_word": false,
|
313 |
+
"special": true
|
314 |
+
},
|
315 |
+
"32036": {
|
316 |
+
"content": "<extra_id_63>",
|
317 |
+
"lstrip": false,
|
318 |
+
"normalized": false,
|
319 |
+
"rstrip": false,
|
320 |
+
"single_word": false,
|
321 |
+
"special": true
|
322 |
+
},
|
323 |
+
"32037": {
|
324 |
+
"content": "<extra_id_62>",
|
325 |
+
"lstrip": false,
|
326 |
+
"normalized": false,
|
327 |
+
"rstrip": false,
|
328 |
+
"single_word": false,
|
329 |
+
"special": true
|
330 |
+
},
|
331 |
+
"32038": {
|
332 |
+
"content": "<extra_id_61>",
|
333 |
+
"lstrip": false,
|
334 |
+
"normalized": false,
|
335 |
+
"rstrip": false,
|
336 |
+
"single_word": false,
|
337 |
+
"special": true
|
338 |
+
},
|
339 |
+
"32039": {
|
340 |
+
"content": "<extra_id_60>",
|
341 |
+
"lstrip": false,
|
342 |
+
"normalized": false,
|
343 |
+
"rstrip": false,
|
344 |
+
"single_word": false,
|
345 |
+
"special": true
|
346 |
+
},
|
347 |
+
"32040": {
|
348 |
+
"content": "<extra_id_59>",
|
349 |
+
"lstrip": false,
|
350 |
+
"normalized": false,
|
351 |
+
"rstrip": false,
|
352 |
+
"single_word": false,
|
353 |
+
"special": true
|
354 |
+
},
|
355 |
+
"32041": {
|
356 |
+
"content": "<extra_id_58>",
|
357 |
+
"lstrip": false,
|
358 |
+
"normalized": false,
|
359 |
+
"rstrip": false,
|
360 |
+
"single_word": false,
|
361 |
+
"special": true
|
362 |
+
},
|
363 |
+
"32042": {
|
364 |
+
"content": "<extra_id_57>",
|
365 |
+
"lstrip": false,
|
366 |
+
"normalized": false,
|
367 |
+
"rstrip": false,
|
368 |
+
"single_word": false,
|
369 |
+
"special": true
|
370 |
+
},
|
371 |
+
"32043": {
|
372 |
+
"content": "<extra_id_56>",
|
373 |
+
"lstrip": false,
|
374 |
+
"normalized": false,
|
375 |
+
"rstrip": false,
|
376 |
+
"single_word": false,
|
377 |
+
"special": true
|
378 |
+
},
|
379 |
+
"32044": {
|
380 |
+
"content": "<extra_id_55>",
|
381 |
+
"lstrip": false,
|
382 |
+
"normalized": false,
|
383 |
+
"rstrip": false,
|
384 |
+
"single_word": false,
|
385 |
+
"special": true
|
386 |
+
},
|
387 |
+
"32045": {
|
388 |
+
"content": "<extra_id_54>",
|
389 |
+
"lstrip": false,
|
390 |
+
"normalized": false,
|
391 |
+
"rstrip": false,
|
392 |
+
"single_word": false,
|
393 |
+
"special": true
|
394 |
+
},
|
395 |
+
"32046": {
|
396 |
+
"content": "<extra_id_53>",
|
397 |
+
"lstrip": false,
|
398 |
+
"normalized": false,
|
399 |
+
"rstrip": false,
|
400 |
+
"single_word": false,
|
401 |
+
"special": true
|
402 |
+
},
|
403 |
+
"32047": {
|
404 |
+
"content": "<extra_id_52>",
|
405 |
+
"lstrip": false,
|
406 |
+
"normalized": false,
|
407 |
+
"rstrip": false,
|
408 |
+
"single_word": false,
|
409 |
+
"special": true
|
410 |
+
},
|
411 |
+
"32048": {
|
412 |
+
"content": "<extra_id_51>",
|
413 |
+
"lstrip": false,
|
414 |
+
"normalized": false,
|
415 |
+
"rstrip": false,
|
416 |
+
"single_word": false,
|
417 |
+
"special": true
|
418 |
+
},
|
419 |
+
"32049": {
|
420 |
+
"content": "<extra_id_50>",
|
421 |
+
"lstrip": false,
|
422 |
+
"normalized": false,
|
423 |
+
"rstrip": false,
|
424 |
+
"single_word": false,
|
425 |
+
"special": true
|
426 |
+
},
|
427 |
+
"32050": {
|
428 |
+
"content": "<extra_id_49>",
|
429 |
+
"lstrip": false,
|
430 |
+
"normalized": false,
|
431 |
+
"rstrip": false,
|
432 |
+
"single_word": false,
|
433 |
+
"special": true
|
434 |
+
},
|
435 |
+
"32051": {
|
436 |
+
"content": "<extra_id_48>",
|
437 |
+
"lstrip": false,
|
438 |
+
"normalized": false,
|
439 |
+
"rstrip": false,
|
440 |
+
"single_word": false,
|
441 |
+
"special": true
|
442 |
+
},
|
443 |
+
"32052": {
|
444 |
+
"content": "<extra_id_47>",
|
445 |
+
"lstrip": false,
|
446 |
+
"normalized": false,
|
447 |
+
"rstrip": false,
|
448 |
+
"single_word": false,
|
449 |
+
"special": true
|
450 |
+
},
|
451 |
+
"32053": {
|
452 |
+
"content": "<extra_id_46>",
|
453 |
+
"lstrip": false,
|
454 |
+
"normalized": false,
|
455 |
+
"rstrip": false,
|
456 |
+
"single_word": false,
|
457 |
+
"special": true
|
458 |
+
},
|
459 |
+
"32054": {
|
460 |
+
"content": "<extra_id_45>",
|
461 |
+
"lstrip": false,
|
462 |
+
"normalized": false,
|
463 |
+
"rstrip": false,
|
464 |
+
"single_word": false,
|
465 |
+
"special": true
|
466 |
+
},
|
467 |
+
"32055": {
|
468 |
+
"content": "<extra_id_44>",
|
469 |
+
"lstrip": false,
|
470 |
+
"normalized": false,
|
471 |
+
"rstrip": false,
|
472 |
+
"single_word": false,
|
473 |
+
"special": true
|
474 |
+
},
|
475 |
+
"32056": {
|
476 |
+
"content": "<extra_id_43>",
|
477 |
+
"lstrip": false,
|
478 |
+
"normalized": false,
|
479 |
+
"rstrip": false,
|
480 |
+
"single_word": false,
|
481 |
+
"special": true
|
482 |
+
},
|
483 |
+
"32057": {
|
484 |
+
"content": "<extra_id_42>",
|
485 |
+
"lstrip": false,
|
486 |
+
"normalized": false,
|
487 |
+
"rstrip": false,
|
488 |
+
"single_word": false,
|
489 |
+
"special": true
|
490 |
+
},
|
491 |
+
"32058": {
|
492 |
+
"content": "<extra_id_41>",
|
493 |
+
"lstrip": false,
|
494 |
+
"normalized": false,
|
495 |
+
"rstrip": false,
|
496 |
+
"single_word": false,
|
497 |
+
"special": true
|
498 |
+
},
|
499 |
+
"32059": {
|
500 |
+
"content": "<extra_id_40>",
|
501 |
+
"lstrip": false,
|
502 |
+
"normalized": false,
|
503 |
+
"rstrip": false,
|
504 |
+
"single_word": false,
|
505 |
+
"special": true
|
506 |
+
},
|
507 |
+
"32060": {
|
508 |
+
"content": "<extra_id_39>",
|
509 |
+
"lstrip": false,
|
510 |
+
"normalized": false,
|
511 |
+
"rstrip": false,
|
512 |
+
"single_word": false,
|
513 |
+
"special": true
|
514 |
+
},
|
515 |
+
"32061": {
|
516 |
+
"content": "<extra_id_38>",
|
517 |
+
"lstrip": false,
|
518 |
+
"normalized": false,
|
519 |
+
"rstrip": false,
|
520 |
+
"single_word": false,
|
521 |
+
"special": true
|
522 |
+
},
|
523 |
+
"32062": {
|
524 |
+
"content": "<extra_id_37>",
|
525 |
+
"lstrip": false,
|
526 |
+
"normalized": false,
|
527 |
+
"rstrip": false,
|
528 |
+
"single_word": false,
|
529 |
+
"special": true
|
530 |
+
},
|
531 |
+
"32063": {
|
532 |
+
"content": "<extra_id_36>",
|
533 |
+
"lstrip": false,
|
534 |
+
"normalized": false,
|
535 |
+
"rstrip": false,
|
536 |
+
"single_word": false,
|
537 |
+
"special": true
|
538 |
+
},
|
539 |
+
"32064": {
|
540 |
+
"content": "<extra_id_35>",
|
541 |
+
"lstrip": false,
|
542 |
+
"normalized": false,
|
543 |
+
"rstrip": false,
|
544 |
+
"single_word": false,
|
545 |
+
"special": true
|
546 |
+
},
|
547 |
+
"32065": {
|
548 |
+
"content": "<extra_id_34>",
|
549 |
+
"lstrip": false,
|
550 |
+
"normalized": false,
|
551 |
+
"rstrip": false,
|
552 |
+
"single_word": false,
|
553 |
+
"special": true
|
554 |
+
},
|
555 |
+
"32066": {
|
556 |
+
"content": "<extra_id_33>",
|
557 |
+
"lstrip": false,
|
558 |
+
"normalized": false,
|
559 |
+
"rstrip": false,
|
560 |
+
"single_word": false,
|
561 |
+
"special": true
|
562 |
+
},
|
563 |
+
"32067": {
|
564 |
+
"content": "<extra_id_32>",
|
565 |
+
"lstrip": false,
|
566 |
+
"normalized": false,
|
567 |
+
"rstrip": false,
|
568 |
+
"single_word": false,
|
569 |
+
"special": true
|
570 |
+
},
|
571 |
+
"32068": {
|
572 |
+
"content": "<extra_id_31>",
|
573 |
+
"lstrip": false,
|
574 |
+
"normalized": false,
|
575 |
+
"rstrip": false,
|
576 |
+
"single_word": false,
|
577 |
+
"special": true
|
578 |
+
},
|
579 |
+
"32069": {
|
580 |
+
"content": "<extra_id_30>",
|
581 |
+
"lstrip": false,
|
582 |
+
"normalized": false,
|
583 |
+
"rstrip": false,
|
584 |
+
"single_word": false,
|
585 |
+
"special": true
|
586 |
+
},
|
587 |
+
"32070": {
|
588 |
+
"content": "<extra_id_29>",
|
589 |
+
"lstrip": false,
|
590 |
+
"normalized": false,
|
591 |
+
"rstrip": false,
|
592 |
+
"single_word": false,
|
593 |
+
"special": true
|
594 |
+
},
|
595 |
+
"32071": {
|
596 |
+
"content": "<extra_id_28>",
|
597 |
+
"lstrip": false,
|
598 |
+
"normalized": false,
|
599 |
+
"rstrip": false,
|
600 |
+
"single_word": false,
|
601 |
+
"special": true
|
602 |
+
},
|
603 |
+
"32072": {
|
604 |
+
"content": "<extra_id_27>",
|
605 |
+
"lstrip": false,
|
606 |
+
"normalized": false,
|
607 |
+
"rstrip": false,
|
608 |
+
"single_word": false,
|
609 |
+
"special": true
|
610 |
+
},
|
611 |
+
"32073": {
|
612 |
+
"content": "<extra_id_26>",
|
613 |
+
"lstrip": false,
|
614 |
+
"normalized": false,
|
615 |
+
"rstrip": false,
|
616 |
+
"single_word": false,
|
617 |
+
"special": true
|
618 |
+
},
|
619 |
+
"32074": {
|
620 |
+
"content": "<extra_id_25>",
|
621 |
+
"lstrip": false,
|
622 |
+
"normalized": false,
|
623 |
+
"rstrip": false,
|
624 |
+
"single_word": false,
|
625 |
+
"special": true
|
626 |
+
},
|
627 |
+
"32075": {
|
628 |
+
"content": "<extra_id_24>",
|
629 |
+
"lstrip": false,
|
630 |
+
"normalized": false,
|
631 |
+
"rstrip": false,
|
632 |
+
"single_word": false,
|
633 |
+
"special": true
|
634 |
+
},
|
635 |
+
"32076": {
|
636 |
+
"content": "<extra_id_23>",
|
637 |
+
"lstrip": false,
|
638 |
+
"normalized": false,
|
639 |
+
"rstrip": false,
|
640 |
+
"single_word": false,
|
641 |
+
"special": true
|
642 |
+
},
|
643 |
+
"32077": {
|
644 |
+
"content": "<extra_id_22>",
|
645 |
+
"lstrip": false,
|
646 |
+
"normalized": false,
|
647 |
+
"rstrip": false,
|
648 |
+
"single_word": false,
|
649 |
+
"special": true
|
650 |
+
},
|
651 |
+
"32078": {
|
652 |
+
"content": "<extra_id_21>",
|
653 |
+
"lstrip": false,
|
654 |
+
"normalized": false,
|
655 |
+
"rstrip": false,
|
656 |
+
"single_word": false,
|
657 |
+
"special": true
|
658 |
+
},
|
659 |
+
"32079": {
|
660 |
+
"content": "<extra_id_20>",
|
661 |
+
"lstrip": false,
|
662 |
+
"normalized": false,
|
663 |
+
"rstrip": false,
|
664 |
+
"single_word": false,
|
665 |
+
"special": true
|
666 |
+
},
|
667 |
+
"32080": {
|
668 |
+
"content": "<extra_id_19>",
|
669 |
+
"lstrip": false,
|
670 |
+
"normalized": false,
|
671 |
+
"rstrip": false,
|
672 |
+
"single_word": false,
|
673 |
+
"special": true
|
674 |
+
},
|
675 |
+
"32081": {
|
676 |
+
"content": "<extra_id_18>",
|
677 |
+
"lstrip": false,
|
678 |
+
"normalized": false,
|
679 |
+
"rstrip": false,
|
680 |
+
"single_word": false,
|
681 |
+
"special": true
|
682 |
+
},
|
683 |
+
"32082": {
|
684 |
+
"content": "<extra_id_17>",
|
685 |
+
"lstrip": false,
|
686 |
+
"normalized": false,
|
687 |
+
"rstrip": false,
|
688 |
+
"single_word": false,
|
689 |
+
"special": true
|
690 |
+
},
|
691 |
+
"32083": {
|
692 |
+
"content": "<extra_id_16>",
|
693 |
+
"lstrip": false,
|
694 |
+
"normalized": false,
|
695 |
+
"rstrip": false,
|
696 |
+
"single_word": false,
|
697 |
+
"special": true
|
698 |
+
},
|
699 |
+
"32084": {
|
700 |
+
"content": "<extra_id_15>",
|
701 |
+
"lstrip": false,
|
702 |
+
"normalized": false,
|
703 |
+
"rstrip": false,
|
704 |
+
"single_word": false,
|
705 |
+
"special": true
|
706 |
+
},
|
707 |
+
"32085": {
|
708 |
+
"content": "<extra_id_14>",
|
709 |
+
"lstrip": false,
|
710 |
+
"normalized": false,
|
711 |
+
"rstrip": false,
|
712 |
+
"single_word": false,
|
713 |
+
"special": true
|
714 |
+
},
|
715 |
+
"32086": {
|
716 |
+
"content": "<extra_id_13>",
|
717 |
+
"lstrip": false,
|
718 |
+
"normalized": false,
|
719 |
+
"rstrip": false,
|
720 |
+
"single_word": false,
|
721 |
+
"special": true
|
722 |
+
},
|
723 |
+
"32087": {
|
724 |
+
"content": "<extra_id_12>",
|
725 |
+
"lstrip": false,
|
726 |
+
"normalized": false,
|
727 |
+
"rstrip": false,
|
728 |
+
"single_word": false,
|
729 |
+
"special": true
|
730 |
+
},
|
731 |
+
"32088": {
|
732 |
+
"content": "<extra_id_11>",
|
733 |
+
"lstrip": false,
|
734 |
+
"normalized": false,
|
735 |
+
"rstrip": false,
|
736 |
+
"single_word": false,
|
737 |
+
"special": true
|
738 |
+
},
|
739 |
+
"32089": {
|
740 |
+
"content": "<extra_id_10>",
|
741 |
+
"lstrip": false,
|
742 |
+
"normalized": false,
|
743 |
+
"rstrip": false,
|
744 |
+
"single_word": false,
|
745 |
+
"special": true
|
746 |
+
},
|
747 |
+
"32090": {
|
748 |
+
"content": "<extra_id_9>",
|
749 |
+
"lstrip": false,
|
750 |
+
"normalized": false,
|
751 |
+
"rstrip": false,
|
752 |
+
"single_word": false,
|
753 |
+
"special": true
|
754 |
+
},
|
755 |
+
"32091": {
|
756 |
+
"content": "<extra_id_8>",
|
757 |
+
"lstrip": false,
|
758 |
+
"normalized": false,
|
759 |
+
"rstrip": false,
|
760 |
+
"single_word": false,
|
761 |
+
"special": true
|
762 |
+
},
|
763 |
+
"32092": {
|
764 |
+
"content": "<extra_id_7>",
|
765 |
+
"lstrip": false,
|
766 |
+
"normalized": false,
|
767 |
+
"rstrip": false,
|
768 |
+
"single_word": false,
|
769 |
+
"special": true
|
770 |
+
},
|
771 |
+
"32093": {
|
772 |
+
"content": "<extra_id_6>",
|
773 |
+
"lstrip": false,
|
774 |
+
"normalized": false,
|
775 |
+
"rstrip": false,
|
776 |
+
"single_word": false,
|
777 |
+
"special": true
|
778 |
+
},
|
779 |
+
"32094": {
|
780 |
+
"content": "<extra_id_5>",
|
781 |
+
"lstrip": false,
|
782 |
+
"normalized": false,
|
783 |
+
"rstrip": false,
|
784 |
+
"single_word": false,
|
785 |
+
"special": true
|
786 |
+
},
|
787 |
+
"32095": {
|
788 |
+
"content": "<extra_id_4>",
|
789 |
+
"lstrip": false,
|
790 |
+
"normalized": false,
|
791 |
+
"rstrip": false,
|
792 |
+
"single_word": false,
|
793 |
+
"special": true
|
794 |
+
},
|
795 |
+
"32096": {
|
796 |
+
"content": "<extra_id_3>",
|
797 |
+
"lstrip": false,
|
798 |
+
"normalized": false,
|
799 |
+
"rstrip": false,
|
800 |
+
"single_word": false,
|
801 |
+
"special": true
|
802 |
+
},
|
803 |
+
"32097": {
|
804 |
+
"content": "<extra_id_2>",
|
805 |
+
"lstrip": false,
|
806 |
+
"normalized": false,
|
807 |
+
"rstrip": false,
|
808 |
+
"single_word": false,
|
809 |
+
"special": true
|
810 |
+
},
|
811 |
+
"32098": {
|
812 |
+
"content": "<extra_id_1>",
|
813 |
+
"lstrip": false,
|
814 |
+
"normalized": false,
|
815 |
+
"rstrip": false,
|
816 |
+
"single_word": false,
|
817 |
+
"special": true
|
818 |
+
},
|
819 |
+
"32099": {
|
820 |
+
"content": "<extra_id_0>",
|
821 |
+
"lstrip": false,
|
822 |
+
"normalized": false,
|
823 |
+
"rstrip": false,
|
824 |
+
"single_word": false,
|
825 |
+
"special": true
|
826 |
+
}
|
827 |
+
},
|
828 |
+
"additional_special_tokens": [
|
829 |
+
"<extra_id_0>",
|
830 |
+
"<extra_id_1>",
|
831 |
+
"<extra_id_2>",
|
832 |
+
"<extra_id_3>",
|
833 |
+
"<extra_id_4>",
|
834 |
+
"<extra_id_5>",
|
835 |
+
"<extra_id_6>",
|
836 |
+
"<extra_id_7>",
|
837 |
+
"<extra_id_8>",
|
838 |
+
"<extra_id_9>",
|
839 |
+
"<extra_id_10>",
|
840 |
+
"<extra_id_11>",
|
841 |
+
"<extra_id_12>",
|
842 |
+
"<extra_id_13>",
|
843 |
+
"<extra_id_14>",
|
844 |
+
"<extra_id_15>",
|
845 |
+
"<extra_id_16>",
|
846 |
+
"<extra_id_17>",
|
847 |
+
"<extra_id_18>",
|
848 |
+
"<extra_id_19>",
|
849 |
+
"<extra_id_20>",
|
850 |
+
"<extra_id_21>",
|
851 |
+
"<extra_id_22>",
|
852 |
+
"<extra_id_23>",
|
853 |
+
"<extra_id_24>",
|
854 |
+
"<extra_id_25>",
|
855 |
+
"<extra_id_26>",
|
856 |
+
"<extra_id_27>",
|
857 |
+
"<extra_id_28>",
|
858 |
+
"<extra_id_29>",
|
859 |
+
"<extra_id_30>",
|
860 |
+
"<extra_id_31>",
|
861 |
+
"<extra_id_32>",
|
862 |
+
"<extra_id_33>",
|
863 |
+
"<extra_id_34>",
|
864 |
+
"<extra_id_35>",
|
865 |
+
"<extra_id_36>",
|
866 |
+
"<extra_id_37>",
|
867 |
+
"<extra_id_38>",
|
868 |
+
"<extra_id_39>",
|
869 |
+
"<extra_id_40>",
|
870 |
+
"<extra_id_41>",
|
871 |
+
"<extra_id_42>",
|
872 |
+
"<extra_id_43>",
|
873 |
+
"<extra_id_44>",
|
874 |
+
"<extra_id_45>",
|
875 |
+
"<extra_id_46>",
|
876 |
+
"<extra_id_47>",
|
877 |
+
"<extra_id_48>",
|
878 |
+
"<extra_id_49>",
|
879 |
+
"<extra_id_50>",
|
880 |
+
"<extra_id_51>",
|
881 |
+
"<extra_id_52>",
|
882 |
+
"<extra_id_53>",
|
883 |
+
"<extra_id_54>",
|
884 |
+
"<extra_id_55>",
|
885 |
+
"<extra_id_56>",
|
886 |
+
"<extra_id_57>",
|
887 |
+
"<extra_id_58>",
|
888 |
+
"<extra_id_59>",
|
889 |
+
"<extra_id_60>",
|
890 |
+
"<extra_id_61>",
|
891 |
+
"<extra_id_62>",
|
892 |
+
"<extra_id_63>",
|
893 |
+
"<extra_id_64>",
|
894 |
+
"<extra_id_65>",
|
895 |
+
"<extra_id_66>",
|
896 |
+
"<extra_id_67>",
|
897 |
+
"<extra_id_68>",
|
898 |
+
"<extra_id_69>",
|
899 |
+
"<extra_id_70>",
|
900 |
+
"<extra_id_71>",
|
901 |
+
"<extra_id_72>",
|
902 |
+
"<extra_id_73>",
|
903 |
+
"<extra_id_74>",
|
904 |
+
"<extra_id_75>",
|
905 |
+
"<extra_id_76>",
|
906 |
+
"<extra_id_77>",
|
907 |
+
"<extra_id_78>",
|
908 |
+
"<extra_id_79>",
|
909 |
+
"<extra_id_80>",
|
910 |
+
"<extra_id_81>",
|
911 |
+
"<extra_id_82>",
|
912 |
+
"<extra_id_83>",
|
913 |
+
"<extra_id_84>",
|
914 |
+
"<extra_id_85>",
|
915 |
+
"<extra_id_86>",
|
916 |
+
"<extra_id_87>",
|
917 |
+
"<extra_id_88>",
|
918 |
+
"<extra_id_89>",
|
919 |
+
"<extra_id_90>",
|
920 |
+
"<extra_id_91>",
|
921 |
+
"<extra_id_92>",
|
922 |
+
"<extra_id_93>",
|
923 |
+
"<extra_id_94>",
|
924 |
+
"<extra_id_95>",
|
925 |
+
"<extra_id_96>",
|
926 |
+
"<extra_id_97>",
|
927 |
+
"<extra_id_98>",
|
928 |
+
"<extra_id_99>"
|
929 |
+
],
|
930 |
+
"clean_up_tokenization_spaces": true,
|
931 |
+
"eos_token": "</s>",
|
932 |
+
"extra_ids": 100,
|
933 |
+
"model_max_length": 512,
|
934 |
+
"pad_token": "</s>",
|
935 |
+
"sp_model_kwargs": {},
|
936 |
+
"tokenizer_class": "T5Tokenizer",
|
937 |
+
"unk_token": "<unk>"
|
938 |
+
}
|
ckpt/ling_disc/checkpoint-41000/trainer_state.json
ADDED
@@ -0,0 +1,636 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": 0.05535305291414261,
|
3 |
+
"best_model_checkpoint": "/data/mohamed/checkpoints/ling_disc/deberta-v3-small_flan-t5-base_40/checkpoint-41000",
|
4 |
+
"epoch": 29.306647605432453,
|
5 |
+
"eval_steps": 1000,
|
6 |
+
"global_step": 41000,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.71,
|
13 |
+
"grad_norm": 0.855617344379425,
|
14 |
+
"learning_rate": 1.1913271384322135e-05,
|
15 |
+
"loss": 0.9117,
|
16 |
+
"step": 1000
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 0.71,
|
20 |
+
"eval_loss": 0.6742472052574158,
|
21 |
+
"eval_runtime": 27.0595,
|
22 |
+
"eval_samples_per_second": 1111.549,
|
23 |
+
"eval_steps_per_second": 5.58,
|
24 |
+
"step": 1000
|
25 |
+
},
|
26 |
+
{
|
27 |
+
"epoch": 1.43,
|
28 |
+
"grad_norm": 4.203719139099121,
|
29 |
+
"learning_rate": 2.382654276864427e-05,
|
30 |
+
"loss": 0.4114,
|
31 |
+
"step": 2000
|
32 |
+
},
|
33 |
+
{
|
34 |
+
"epoch": 1.43,
|
35 |
+
"eval_loss": 0.3266257345676422,
|
36 |
+
"eval_runtime": 26.9318,
|
37 |
+
"eval_samples_per_second": 1116.822,
|
38 |
+
"eval_steps_per_second": 5.607,
|
39 |
+
"step": 2000
|
40 |
+
},
|
41 |
+
{
|
42 |
+
"epoch": 2.14,
|
43 |
+
"grad_norm": 3.1638591289520264,
|
44 |
+
"learning_rate": 3.57398141529664e-05,
|
45 |
+
"loss": 0.2624,
|
46 |
+
"step": 3000
|
47 |
+
},
|
48 |
+
{
|
49 |
+
"epoch": 2.14,
|
50 |
+
"eval_loss": 0.24602766335010529,
|
51 |
+
"eval_runtime": 27.0604,
|
52 |
+
"eval_samples_per_second": 1111.512,
|
53 |
+
"eval_steps_per_second": 5.58,
|
54 |
+
"step": 3000
|
55 |
+
},
|
56 |
+
{
|
57 |
+
"epoch": 2.86,
|
58 |
+
"grad_norm": 1.7417826652526855,
|
59 |
+
"learning_rate": 4.765308553728854e-05,
|
60 |
+
"loss": 0.2002,
|
61 |
+
"step": 4000
|
62 |
+
},
|
63 |
+
{
|
64 |
+
"epoch": 2.86,
|
65 |
+
"eval_loss": 0.1770436018705368,
|
66 |
+
"eval_runtime": 26.8812,
|
67 |
+
"eval_samples_per_second": 1118.922,
|
68 |
+
"eval_steps_per_second": 5.617,
|
69 |
+
"step": 4000
|
70 |
+
},
|
71 |
+
{
|
72 |
+
"epoch": 3.57,
|
73 |
+
"grad_norm": 1.1299816370010376,
|
74 |
+
"learning_rate": 4.893707145315437e-05,
|
75 |
+
"loss": 0.1635,
|
76 |
+
"step": 5000
|
77 |
+
},
|
78 |
+
{
|
79 |
+
"epoch": 3.57,
|
80 |
+
"eval_loss": 0.14757415652275085,
|
81 |
+
"eval_runtime": 26.7857,
|
82 |
+
"eval_samples_per_second": 1122.914,
|
83 |
+
"eval_steps_per_second": 5.637,
|
84 |
+
"step": 5000
|
85 |
+
},
|
86 |
+
{
|
87 |
+
"epoch": 4.29,
|
88 |
+
"grad_norm": 1.210856556892395,
|
89 |
+
"learning_rate": 4.761337463267413e-05,
|
90 |
+
"loss": 0.1404,
|
91 |
+
"step": 6000
|
92 |
+
},
|
93 |
+
{
|
94 |
+
"epoch": 4.29,
|
95 |
+
"eval_loss": 0.12851941585540771,
|
96 |
+
"eval_runtime": 26.9893,
|
97 |
+
"eval_samples_per_second": 1114.44,
|
98 |
+
"eval_steps_per_second": 5.595,
|
99 |
+
"step": 6000
|
100 |
+
},
|
101 |
+
{
|
102 |
+
"epoch": 5.0,
|
103 |
+
"grad_norm": 2.0565412044525146,
|
104 |
+
"learning_rate": 4.62896778121939e-05,
|
105 |
+
"loss": 0.1263,
|
106 |
+
"step": 7000
|
107 |
+
},
|
108 |
+
{
|
109 |
+
"epoch": 5.0,
|
110 |
+
"eval_loss": 0.12228666245937347,
|
111 |
+
"eval_runtime": 26.7363,
|
112 |
+
"eval_samples_per_second": 1124.987,
|
113 |
+
"eval_steps_per_second": 5.648,
|
114 |
+
"step": 7000
|
115 |
+
},
|
116 |
+
{
|
117 |
+
"epoch": 5.72,
|
118 |
+
"grad_norm": 1.8667607307434082,
|
119 |
+
"learning_rate": 4.496598099171366e-05,
|
120 |
+
"loss": 0.1127,
|
121 |
+
"step": 8000
|
122 |
+
},
|
123 |
+
{
|
124 |
+
"epoch": 5.72,
|
125 |
+
"eval_loss": 0.11036147177219391,
|
126 |
+
"eval_runtime": 26.7509,
|
127 |
+
"eval_samples_per_second": 1124.375,
|
128 |
+
"eval_steps_per_second": 5.645,
|
129 |
+
"step": 8000
|
130 |
+
},
|
131 |
+
{
|
132 |
+
"epoch": 6.43,
|
133 |
+
"grad_norm": 0.7492337226867676,
|
134 |
+
"learning_rate": 4.364228417123342e-05,
|
135 |
+
"loss": 0.1059,
|
136 |
+
"step": 9000
|
137 |
+
},
|
138 |
+
{
|
139 |
+
"epoch": 6.43,
|
140 |
+
"eval_loss": 0.10317497700452805,
|
141 |
+
"eval_runtime": 27.0158,
|
142 |
+
"eval_samples_per_second": 1113.349,
|
143 |
+
"eval_steps_per_second": 5.589,
|
144 |
+
"step": 9000
|
145 |
+
},
|
146 |
+
{
|
147 |
+
"epoch": 7.15,
|
148 |
+
"grad_norm": 0.7611485123634338,
|
149 |
+
"learning_rate": 4.231858735075319e-05,
|
150 |
+
"loss": 0.0993,
|
151 |
+
"step": 10000
|
152 |
+
},
|
153 |
+
{
|
154 |
+
"epoch": 7.15,
|
155 |
+
"eval_loss": 0.10284282267093658,
|
156 |
+
"eval_runtime": 26.795,
|
157 |
+
"eval_samples_per_second": 1122.524,
|
158 |
+
"eval_steps_per_second": 5.635,
|
159 |
+
"step": 10000
|
160 |
+
},
|
161 |
+
{
|
162 |
+
"epoch": 7.86,
|
163 |
+
"grad_norm": 0.5870215892791748,
|
164 |
+
"learning_rate": 4.099489053027295e-05,
|
165 |
+
"loss": 0.0887,
|
166 |
+
"step": 11000
|
167 |
+
},
|
168 |
+
{
|
169 |
+
"epoch": 7.86,
|
170 |
+
"eval_loss": 0.09789762645959854,
|
171 |
+
"eval_runtime": 26.8453,
|
172 |
+
"eval_samples_per_second": 1120.419,
|
173 |
+
"eval_steps_per_second": 5.625,
|
174 |
+
"step": 11000
|
175 |
+
},
|
176 |
+
{
|
177 |
+
"epoch": 8.58,
|
178 |
+
"grad_norm": 0.48922085762023926,
|
179 |
+
"learning_rate": 3.9671193709792706e-05,
|
180 |
+
"loss": 0.0842,
|
181 |
+
"step": 12000
|
182 |
+
},
|
183 |
+
{
|
184 |
+
"epoch": 8.58,
|
185 |
+
"eval_loss": 0.09349656105041504,
|
186 |
+
"eval_runtime": 26.8273,
|
187 |
+
"eval_samples_per_second": 1121.172,
|
188 |
+
"eval_steps_per_second": 5.629,
|
189 |
+
"step": 12000
|
190 |
+
},
|
191 |
+
{
|
192 |
+
"epoch": 9.29,
|
193 |
+
"grad_norm": 0.4252859354019165,
|
194 |
+
"learning_rate": 3.8347496889312476e-05,
|
195 |
+
"loss": 0.0793,
|
196 |
+
"step": 13000
|
197 |
+
},
|
198 |
+
{
|
199 |
+
"epoch": 9.29,
|
200 |
+
"eval_loss": 0.09415590018033981,
|
201 |
+
"eval_runtime": 25.9362,
|
202 |
+
"eval_samples_per_second": 1159.693,
|
203 |
+
"eval_steps_per_second": 5.822,
|
204 |
+
"step": 13000
|
205 |
+
},
|
206 |
+
{
|
207 |
+
"epoch": 10.01,
|
208 |
+
"grad_norm": 0.44548505544662476,
|
209 |
+
"learning_rate": 3.702380006883224e-05,
|
210 |
+
"loss": 0.076,
|
211 |
+
"step": 14000
|
212 |
+
},
|
213 |
+
{
|
214 |
+
"epoch": 10.01,
|
215 |
+
"eval_loss": 0.08913980424404144,
|
216 |
+
"eval_runtime": 26.7379,
|
217 |
+
"eval_samples_per_second": 1124.919,
|
218 |
+
"eval_steps_per_second": 5.647,
|
219 |
+
"step": 14000
|
220 |
+
},
|
221 |
+
{
|
222 |
+
"epoch": 10.72,
|
223 |
+
"grad_norm": 0.2965373694896698,
|
224 |
+
"learning_rate": 3.5700103248352e-05,
|
225 |
+
"loss": 0.0714,
|
226 |
+
"step": 15000
|
227 |
+
},
|
228 |
+
{
|
229 |
+
"epoch": 10.72,
|
230 |
+
"eval_loss": 0.08456840366125107,
|
231 |
+
"eval_runtime": 26.787,
|
232 |
+
"eval_samples_per_second": 1122.857,
|
233 |
+
"eval_steps_per_second": 5.637,
|
234 |
+
"step": 15000
|
235 |
+
},
|
236 |
+
{
|
237 |
+
"epoch": 11.44,
|
238 |
+
"grad_norm": 0.3205694854259491,
|
239 |
+
"learning_rate": 3.437640642787176e-05,
|
240 |
+
"loss": 0.0677,
|
241 |
+
"step": 16000
|
242 |
+
},
|
243 |
+
{
|
244 |
+
"epoch": 11.44,
|
245 |
+
"eval_loss": 0.07863688468933105,
|
246 |
+
"eval_runtime": 26.8242,
|
247 |
+
"eval_samples_per_second": 1121.299,
|
248 |
+
"eval_steps_per_second": 5.629,
|
249 |
+
"step": 16000
|
250 |
+
},
|
251 |
+
{
|
252 |
+
"epoch": 12.15,
|
253 |
+
"grad_norm": 0.2736203670501709,
|
254 |
+
"learning_rate": 3.3052709607391525e-05,
|
255 |
+
"loss": 0.0636,
|
256 |
+
"step": 17000
|
257 |
+
},
|
258 |
+
{
|
259 |
+
"epoch": 12.15,
|
260 |
+
"eval_loss": 0.07664181292057037,
|
261 |
+
"eval_runtime": 26.7818,
|
262 |
+
"eval_samples_per_second": 1123.077,
|
263 |
+
"eval_steps_per_second": 5.638,
|
264 |
+
"step": 17000
|
265 |
+
},
|
266 |
+
{
|
267 |
+
"epoch": 12.87,
|
268 |
+
"grad_norm": 0.25644680857658386,
|
269 |
+
"learning_rate": 3.172901278691129e-05,
|
270 |
+
"loss": 0.0618,
|
271 |
+
"step": 18000
|
272 |
+
},
|
273 |
+
{
|
274 |
+
"epoch": 12.87,
|
275 |
+
"eval_loss": 0.07351888716220856,
|
276 |
+
"eval_runtime": 26.8445,
|
277 |
+
"eval_samples_per_second": 1120.453,
|
278 |
+
"eval_steps_per_second": 5.625,
|
279 |
+
"step": 18000
|
280 |
+
},
|
281 |
+
{
|
282 |
+
"epoch": 13.58,
|
283 |
+
"grad_norm": 0.2748676538467407,
|
284 |
+
"learning_rate": 3.0405315966431053e-05,
|
285 |
+
"loss": 0.0584,
|
286 |
+
"step": 19000
|
287 |
+
},
|
288 |
+
{
|
289 |
+
"epoch": 13.58,
|
290 |
+
"eval_loss": 0.07314006239175797,
|
291 |
+
"eval_runtime": 26.8333,
|
292 |
+
"eval_samples_per_second": 1120.921,
|
293 |
+
"eval_steps_per_second": 5.627,
|
294 |
+
"step": 19000
|
295 |
+
},
|
296 |
+
{
|
297 |
+
"epoch": 14.3,
|
298 |
+
"grad_norm": 0.30235132575035095,
|
299 |
+
"learning_rate": 2.9081619145950812e-05,
|
300 |
+
"loss": 0.057,
|
301 |
+
"step": 20000
|
302 |
+
},
|
303 |
+
{
|
304 |
+
"epoch": 14.3,
|
305 |
+
"eval_loss": 0.07568340748548508,
|
306 |
+
"eval_runtime": 27.0109,
|
307 |
+
"eval_samples_per_second": 1113.55,
|
308 |
+
"eval_steps_per_second": 5.59,
|
309 |
+
"step": 20000
|
310 |
+
},
|
311 |
+
{
|
312 |
+
"epoch": 15.01,
|
313 |
+
"grad_norm": 0.2508692145347595,
|
314 |
+
"learning_rate": 2.7757922325470574e-05,
|
315 |
+
"loss": 0.0558,
|
316 |
+
"step": 21000
|
317 |
+
},
|
318 |
+
{
|
319 |
+
"epoch": 15.01,
|
320 |
+
"eval_loss": 0.07675843685865402,
|
321 |
+
"eval_runtime": 26.9026,
|
322 |
+
"eval_samples_per_second": 1118.032,
|
323 |
+
"eval_steps_per_second": 5.613,
|
324 |
+
"step": 21000
|
325 |
+
},
|
326 |
+
{
|
327 |
+
"epoch": 15.73,
|
328 |
+
"grad_norm": 0.3341030478477478,
|
329 |
+
"learning_rate": 2.643422550499034e-05,
|
330 |
+
"loss": 0.0533,
|
331 |
+
"step": 22000
|
332 |
+
},
|
333 |
+
{
|
334 |
+
"epoch": 15.73,
|
335 |
+
"eval_loss": 0.07339715212583542,
|
336 |
+
"eval_runtime": 26.8727,
|
337 |
+
"eval_samples_per_second": 1119.278,
|
338 |
+
"eval_steps_per_second": 5.619,
|
339 |
+
"step": 22000
|
340 |
+
},
|
341 |
+
{
|
342 |
+
"epoch": 16.44,
|
343 |
+
"grad_norm": 0.30433303117752075,
|
344 |
+
"learning_rate": 2.51105286845101e-05,
|
345 |
+
"loss": 0.0516,
|
346 |
+
"step": 23000
|
347 |
+
},
|
348 |
+
{
|
349 |
+
"epoch": 16.44,
|
350 |
+
"eval_loss": 0.0694783553481102,
|
351 |
+
"eval_runtime": 26.8551,
|
352 |
+
"eval_samples_per_second": 1120.012,
|
353 |
+
"eval_steps_per_second": 5.623,
|
354 |
+
"step": 23000
|
355 |
+
},
|
356 |
+
{
|
357 |
+
"epoch": 17.16,
|
358 |
+
"grad_norm": 0.39424875378608704,
|
359 |
+
"learning_rate": 2.378683186402986e-05,
|
360 |
+
"loss": 0.049,
|
361 |
+
"step": 24000
|
362 |
+
},
|
363 |
+
{
|
364 |
+
"epoch": 17.16,
|
365 |
+
"eval_loss": 0.06750107556581497,
|
366 |
+
"eval_runtime": 26.9045,
|
367 |
+
"eval_samples_per_second": 1117.954,
|
368 |
+
"eval_steps_per_second": 5.612,
|
369 |
+
"step": 24000
|
370 |
+
},
|
371 |
+
{
|
372 |
+
"epoch": 17.87,
|
373 |
+
"grad_norm": 0.29526183009147644,
|
374 |
+
"learning_rate": 2.2463135043549627e-05,
|
375 |
+
"loss": 0.0478,
|
376 |
+
"step": 25000
|
377 |
+
},
|
378 |
+
{
|
379 |
+
"epoch": 17.87,
|
380 |
+
"eval_loss": 0.06841529905796051,
|
381 |
+
"eval_runtime": 26.9131,
|
382 |
+
"eval_samples_per_second": 1117.597,
|
383 |
+
"eval_steps_per_second": 5.611,
|
384 |
+
"step": 25000
|
385 |
+
},
|
386 |
+
{
|
387 |
+
"epoch": 18.58,
|
388 |
+
"grad_norm": 0.2802821099758148,
|
389 |
+
"learning_rate": 2.113943822306939e-05,
|
390 |
+
"loss": 0.0472,
|
391 |
+
"step": 26000
|
392 |
+
},
|
393 |
+
{
|
394 |
+
"epoch": 18.58,
|
395 |
+
"eval_loss": 0.0680340975522995,
|
396 |
+
"eval_runtime": 26.8442,
|
397 |
+
"eval_samples_per_second": 1120.467,
|
398 |
+
"eval_steps_per_second": 5.625,
|
399 |
+
"step": 26000
|
400 |
+
},
|
401 |
+
{
|
402 |
+
"epoch": 19.3,
|
403 |
+
"grad_norm": 0.198490172624588,
|
404 |
+
"learning_rate": 1.9815741402589152e-05,
|
405 |
+
"loss": 0.0445,
|
406 |
+
"step": 27000
|
407 |
+
},
|
408 |
+
{
|
409 |
+
"epoch": 19.3,
|
410 |
+
"eval_loss": 0.059882719069719315,
|
411 |
+
"eval_runtime": 26.9691,
|
412 |
+
"eval_samples_per_second": 1115.275,
|
413 |
+
"eval_steps_per_second": 5.599,
|
414 |
+
"step": 27000
|
415 |
+
},
|
416 |
+
{
|
417 |
+
"epoch": 20.01,
|
418 |
+
"grad_norm": 0.3383251130580902,
|
419 |
+
"learning_rate": 1.8492044582108914e-05,
|
420 |
+
"loss": 0.0435,
|
421 |
+
"step": 28000
|
422 |
+
},
|
423 |
+
{
|
424 |
+
"epoch": 20.01,
|
425 |
+
"eval_loss": 0.06356318295001984,
|
426 |
+
"eval_runtime": 26.8538,
|
427 |
+
"eval_samples_per_second": 1120.066,
|
428 |
+
"eval_steps_per_second": 5.623,
|
429 |
+
"step": 28000
|
430 |
+
},
|
431 |
+
{
|
432 |
+
"epoch": 20.73,
|
433 |
+
"grad_norm": 0.16571784019470215,
|
434 |
+
"learning_rate": 1.7168347761628677e-05,
|
435 |
+
"loss": 0.0419,
|
436 |
+
"step": 29000
|
437 |
+
},
|
438 |
+
{
|
439 |
+
"epoch": 20.73,
|
440 |
+
"eval_loss": 0.06056862324476242,
|
441 |
+
"eval_runtime": 27.0748,
|
442 |
+
"eval_samples_per_second": 1110.924,
|
443 |
+
"eval_steps_per_second": 5.577,
|
444 |
+
"step": 29000
|
445 |
+
},
|
446 |
+
{
|
447 |
+
"epoch": 21.44,
|
448 |
+
"grad_norm": 0.19518467783927917,
|
449 |
+
"learning_rate": 1.584465094114844e-05,
|
450 |
+
"loss": 0.0409,
|
451 |
+
"step": 30000
|
452 |
+
},
|
453 |
+
{
|
454 |
+
"epoch": 21.44,
|
455 |
+
"eval_loss": 0.06490638852119446,
|
456 |
+
"eval_runtime": 26.8481,
|
457 |
+
"eval_samples_per_second": 1120.301,
|
458 |
+
"eval_steps_per_second": 5.624,
|
459 |
+
"step": 30000
|
460 |
+
},
|
461 |
+
{
|
462 |
+
"epoch": 22.16,
|
463 |
+
"grad_norm": 0.15420591831207275,
|
464 |
+
"learning_rate": 1.4520954120668203e-05,
|
465 |
+
"loss": 0.0397,
|
466 |
+
"step": 31000
|
467 |
+
},
|
468 |
+
{
|
469 |
+
"epoch": 22.16,
|
470 |
+
"eval_loss": 0.05918469280004501,
|
471 |
+
"eval_runtime": 26.8143,
|
472 |
+
"eval_samples_per_second": 1121.713,
|
473 |
+
"eval_steps_per_second": 5.631,
|
474 |
+
"step": 31000
|
475 |
+
},
|
476 |
+
{
|
477 |
+
"epoch": 22.87,
|
478 |
+
"grad_norm": 0.26854997873306274,
|
479 |
+
"learning_rate": 1.3197257300187965e-05,
|
480 |
+
"loss": 0.0387,
|
481 |
+
"step": 32000
|
482 |
+
},
|
483 |
+
{
|
484 |
+
"epoch": 22.87,
|
485 |
+
"eval_loss": 0.06144551932811737,
|
486 |
+
"eval_runtime": 26.8852,
|
487 |
+
"eval_samples_per_second": 1118.757,
|
488 |
+
"eval_steps_per_second": 5.616,
|
489 |
+
"step": 32000
|
490 |
+
},
|
491 |
+
{
|
492 |
+
"epoch": 23.59,
|
493 |
+
"grad_norm": 0.17430314421653748,
|
494 |
+
"learning_rate": 1.1873560479707728e-05,
|
495 |
+
"loss": 0.0373,
|
496 |
+
"step": 33000
|
497 |
+
},
|
498 |
+
{
|
499 |
+
"epoch": 23.59,
|
500 |
+
"eval_loss": 0.06159648299217224,
|
501 |
+
"eval_runtime": 26.7887,
|
502 |
+
"eval_samples_per_second": 1122.785,
|
503 |
+
"eval_steps_per_second": 5.637,
|
504 |
+
"step": 33000
|
505 |
+
},
|
506 |
+
{
|
507 |
+
"epoch": 24.3,
|
508 |
+
"grad_norm": 0.14911049604415894,
|
509 |
+
"learning_rate": 1.054986365922749e-05,
|
510 |
+
"loss": 0.0369,
|
511 |
+
"step": 34000
|
512 |
+
},
|
513 |
+
{
|
514 |
+
"epoch": 24.3,
|
515 |
+
"eval_loss": 0.05931873992085457,
|
516 |
+
"eval_runtime": 26.8571,
|
517 |
+
"eval_samples_per_second": 1119.926,
|
518 |
+
"eval_steps_per_second": 5.622,
|
519 |
+
"step": 34000
|
520 |
+
},
|
521 |
+
{
|
522 |
+
"epoch": 25.02,
|
523 |
+
"grad_norm": 0.13620807230472565,
|
524 |
+
"learning_rate": 9.226166838747254e-06,
|
525 |
+
"loss": 0.0361,
|
526 |
+
"step": 35000
|
527 |
+
},
|
528 |
+
{
|
529 |
+
"epoch": 25.02,
|
530 |
+
"eval_loss": 0.05695568770170212,
|
531 |
+
"eval_runtime": 26.8966,
|
532 |
+
"eval_samples_per_second": 1118.283,
|
533 |
+
"eval_steps_per_second": 5.614,
|
534 |
+
"step": 35000
|
535 |
+
},
|
536 |
+
{
|
537 |
+
"epoch": 25.73,
|
538 |
+
"grad_norm": 0.13764438033103943,
|
539 |
+
"learning_rate": 7.902470018267017e-06,
|
540 |
+
"loss": 0.0349,
|
541 |
+
"step": 36000
|
542 |
+
},
|
543 |
+
{
|
544 |
+
"epoch": 25.73,
|
545 |
+
"eval_loss": 0.05707501247525215,
|
546 |
+
"eval_runtime": 26.986,
|
547 |
+
"eval_samples_per_second": 1114.578,
|
548 |
+
"eval_steps_per_second": 5.595,
|
549 |
+
"step": 36000
|
550 |
+
},
|
551 |
+
{
|
552 |
+
"epoch": 26.45,
|
553 |
+
"grad_norm": 0.2389635145664215,
|
554 |
+
"learning_rate": 6.578773197786779e-06,
|
555 |
+
"loss": 0.0343,
|
556 |
+
"step": 37000
|
557 |
+
},
|
558 |
+
{
|
559 |
+
"epoch": 26.45,
|
560 |
+
"eval_loss": 0.0577365942299366,
|
561 |
+
"eval_runtime": 26.9903,
|
562 |
+
"eval_samples_per_second": 1114.401,
|
563 |
+
"eval_steps_per_second": 5.595,
|
564 |
+
"step": 37000
|
565 |
+
},
|
566 |
+
{
|
567 |
+
"epoch": 27.16,
|
568 |
+
"grad_norm": 0.15828461945056915,
|
569 |
+
"learning_rate": 5.255076377306542e-06,
|
570 |
+
"loss": 0.034,
|
571 |
+
"step": 38000
|
572 |
+
},
|
573 |
+
{
|
574 |
+
"epoch": 27.16,
|
575 |
+
"eval_loss": 0.05767366662621498,
|
576 |
+
"eval_runtime": 27.1454,
|
577 |
+
"eval_samples_per_second": 1108.035,
|
578 |
+
"eval_steps_per_second": 5.563,
|
579 |
+
"step": 38000
|
580 |
+
},
|
581 |
+
{
|
582 |
+
"epoch": 27.88,
|
583 |
+
"grad_norm": 0.1059570387005806,
|
584 |
+
"learning_rate": 3.9313795568263045e-06,
|
585 |
+
"loss": 0.0332,
|
586 |
+
"step": 39000
|
587 |
+
},
|
588 |
+
{
|
589 |
+
"epoch": 27.88,
|
590 |
+
"eval_loss": 0.056225307285785675,
|
591 |
+
"eval_runtime": 26.9534,
|
592 |
+
"eval_samples_per_second": 1115.928,
|
593 |
+
"eval_steps_per_second": 5.602,
|
594 |
+
"step": 39000
|
595 |
+
},
|
596 |
+
{
|
597 |
+
"epoch": 28.59,
|
598 |
+
"grad_norm": 0.1975150853395462,
|
599 |
+
"learning_rate": 2.6076827363460673e-06,
|
600 |
+
"loss": 0.0329,
|
601 |
+
"step": 40000
|
602 |
+
},
|
603 |
+
{
|
604 |
+
"epoch": 28.59,
|
605 |
+
"eval_loss": 0.05555161088705063,
|
606 |
+
"eval_runtime": 27.1187,
|
607 |
+
"eval_samples_per_second": 1109.122,
|
608 |
+
"eval_steps_per_second": 5.568,
|
609 |
+
"step": 40000
|
610 |
+
},
|
611 |
+
{
|
612 |
+
"epoch": 29.31,
|
613 |
+
"grad_norm": 0.1037423312664032,
|
614 |
+
"learning_rate": 1.28398591586583e-06,
|
615 |
+
"loss": 0.0319,
|
616 |
+
"step": 41000
|
617 |
+
},
|
618 |
+
{
|
619 |
+
"epoch": 29.31,
|
620 |
+
"eval_loss": 0.05535305291414261,
|
621 |
+
"eval_runtime": 26.8353,
|
622 |
+
"eval_samples_per_second": 1120.838,
|
623 |
+
"eval_steps_per_second": 5.627,
|
624 |
+
"step": 41000
|
625 |
+
}
|
626 |
+
],
|
627 |
+
"logging_steps": 1000,
|
628 |
+
"max_steps": 41970,
|
629 |
+
"num_input_tokens_seen": 0,
|
630 |
+
"num_train_epochs": 30,
|
631 |
+
"save_steps": 1000,
|
632 |
+
"total_flos": 3.270624085088659e+16,
|
633 |
+
"train_batch_size": 200,
|
634 |
+
"trial_name": null,
|
635 |
+
"trial_params": null
|
636 |
+
}
|
ckpt/ling_disc/checkpoint-41000/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:deb9dc15db671f7ae0b9e7e7bc26ca9e20c0fde45babc266a60753e2b23d6328
|
3 |
+
size 4984
|
ckpt/ling_disc/config.json
ADDED
@@ -0,0 +1,120 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "microsoft/deberta-v3-small",
|
3 |
+
"architectures": [
|
4 |
+
"DebertaReplacedTokenizer"
|
5 |
+
],
|
6 |
+
"attention_probs_dropout_prob": 0.1,
|
7 |
+
"hidden_act": "gelu",
|
8 |
+
"hidden_dropout_prob": 0.1,
|
9 |
+
"hidden_size": 768,
|
10 |
+
"id2label": {
|
11 |
+
"0": "LABEL_0",
|
12 |
+
"1": "LABEL_1",
|
13 |
+
"2": "LABEL_2",
|
14 |
+
"3": "LABEL_3",
|
15 |
+
"4": "LABEL_4",
|
16 |
+
"5": "LABEL_5",
|
17 |
+
"6": "LABEL_6",
|
18 |
+
"7": "LABEL_7",
|
19 |
+
"8": "LABEL_8",
|
20 |
+
"9": "LABEL_9",
|
21 |
+
"10": "LABEL_10",
|
22 |
+
"11": "LABEL_11",
|
23 |
+
"12": "LABEL_12",
|
24 |
+
"13": "LABEL_13",
|
25 |
+
"14": "LABEL_14",
|
26 |
+
"15": "LABEL_15",
|
27 |
+
"16": "LABEL_16",
|
28 |
+
"17": "LABEL_17",
|
29 |
+
"18": "LABEL_18",
|
30 |
+
"19": "LABEL_19",
|
31 |
+
"20": "LABEL_20",
|
32 |
+
"21": "LABEL_21",
|
33 |
+
"22": "LABEL_22",
|
34 |
+
"23": "LABEL_23",
|
35 |
+
"24": "LABEL_24",
|
36 |
+
"25": "LABEL_25",
|
37 |
+
"26": "LABEL_26",
|
38 |
+
"27": "LABEL_27",
|
39 |
+
"28": "LABEL_28",
|
40 |
+
"29": "LABEL_29",
|
41 |
+
"30": "LABEL_30",
|
42 |
+
"31": "LABEL_31",
|
43 |
+
"32": "LABEL_32",
|
44 |
+
"33": "LABEL_33",
|
45 |
+
"34": "LABEL_34",
|
46 |
+
"35": "LABEL_35",
|
47 |
+
"36": "LABEL_36",
|
48 |
+
"37": "LABEL_37",
|
49 |
+
"38": "LABEL_38",
|
50 |
+
"39": "LABEL_39"
|
51 |
+
},
|
52 |
+
"initializer_range": 0.02,
|
53 |
+
"intermediate_size": 3072,
|
54 |
+
"label2id": {
|
55 |
+
"LABEL_0": 0,
|
56 |
+
"LABEL_1": 1,
|
57 |
+
"LABEL_10": 10,
|
58 |
+
"LABEL_11": 11,
|
59 |
+
"LABEL_12": 12,
|
60 |
+
"LABEL_13": 13,
|
61 |
+
"LABEL_14": 14,
|
62 |
+
"LABEL_15": 15,
|
63 |
+
"LABEL_16": 16,
|
64 |
+
"LABEL_17": 17,
|
65 |
+
"LABEL_18": 18,
|
66 |
+
"LABEL_19": 19,
|
67 |
+
"LABEL_2": 2,
|
68 |
+
"LABEL_20": 20,
|
69 |
+
"LABEL_21": 21,
|
70 |
+
"LABEL_22": 22,
|
71 |
+
"LABEL_23": 23,
|
72 |
+
"LABEL_24": 24,
|
73 |
+
"LABEL_25": 25,
|
74 |
+
"LABEL_26": 26,
|
75 |
+
"LABEL_27": 27,
|
76 |
+
"LABEL_28": 28,
|
77 |
+
"LABEL_29": 29,
|
78 |
+
"LABEL_3": 3,
|
79 |
+
"LABEL_30": 30,
|
80 |
+
"LABEL_31": 31,
|
81 |
+
"LABEL_32": 32,
|
82 |
+
"LABEL_33": 33,
|
83 |
+
"LABEL_34": 34,
|
84 |
+
"LABEL_35": 35,
|
85 |
+
"LABEL_36": 36,
|
86 |
+
"LABEL_37": 37,
|
87 |
+
"LABEL_38": 38,
|
88 |
+
"LABEL_39": 39,
|
89 |
+
"LABEL_4": 4,
|
90 |
+
"LABEL_5": 5,
|
91 |
+
"LABEL_6": 6,
|
92 |
+
"LABEL_7": 7,
|
93 |
+
"LABEL_8": 8,
|
94 |
+
"LABEL_9": 9
|
95 |
+
},
|
96 |
+
"layer_norm_eps": 1e-07,
|
97 |
+
"max_position_embeddings": 512,
|
98 |
+
"max_relative_positions": -1,
|
99 |
+
"model_type": "deberta-v2",
|
100 |
+
"norm_rel_ebd": "layer_norm",
|
101 |
+
"num_attention_heads": 12,
|
102 |
+
"num_hidden_layers": 6,
|
103 |
+
"pad_token_id": 0,
|
104 |
+
"pooler_dropout": 0,
|
105 |
+
"pooler_hidden_act": "gelu",
|
106 |
+
"pooler_hidden_size": 768,
|
107 |
+
"pos_att_type": [
|
108 |
+
"p2c",
|
109 |
+
"c2p"
|
110 |
+
],
|
111 |
+
"position_biased_input": false,
|
112 |
+
"position_buckets": 256,
|
113 |
+
"problem_type": "regression",
|
114 |
+
"relative_attention": true,
|
115 |
+
"share_att_key": true,
|
116 |
+
"torch_dtype": "float32",
|
117 |
+
"transformers_version": "4.39.3",
|
118 |
+
"type_vocab_size": 0,
|
119 |
+
"vocab_size": 128100
|
120 |
+
}
|
ckpt/ling_disc/model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:15221fd5565118b32b1adf7b42c27cae6a3d8dd32b0ef85473b70bb072964661
|
3 |
+
size 275252064
|
ckpt/ling_disc/scaler.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1dbad9723e37379b55bb6d7300abf6ad705f320bd599ca7f583e574f4a26f4a4
|
3 |
+
size 1575
|
ckpt/ling_disc/special_tokens_map.json
ADDED
@@ -0,0 +1,119 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"additional_special_tokens": [
|
3 |
+
"<extra_id_0>",
|
4 |
+
"<extra_id_1>",
|
5 |
+
"<extra_id_2>",
|
6 |
+
"<extra_id_3>",
|
7 |
+
"<extra_id_4>",
|
8 |
+
"<extra_id_5>",
|
9 |
+
"<extra_id_6>",
|
10 |
+
"<extra_id_7>",
|
11 |
+
"<extra_id_8>",
|
12 |
+
"<extra_id_9>",
|
13 |
+
"<extra_id_10>",
|
14 |
+
"<extra_id_11>",
|
15 |
+
"<extra_id_12>",
|
16 |
+
"<extra_id_13>",
|
17 |
+
"<extra_id_14>",
|
18 |
+
"<extra_id_15>",
|
19 |
+
"<extra_id_16>",
|
20 |
+
"<extra_id_17>",
|
21 |
+
"<extra_id_18>",
|
22 |
+
"<extra_id_19>",
|
23 |
+
"<extra_id_20>",
|
24 |
+
"<extra_id_21>",
|
25 |
+
"<extra_id_22>",
|
26 |
+
"<extra_id_23>",
|
27 |
+
"<extra_id_24>",
|
28 |
+
"<extra_id_25>",
|
29 |
+
"<extra_id_26>",
|
30 |
+
"<extra_id_27>",
|
31 |
+
"<extra_id_28>",
|
32 |
+
"<extra_id_29>",
|
33 |
+
"<extra_id_30>",
|
34 |
+
"<extra_id_31>",
|
35 |
+
"<extra_id_32>",
|
36 |
+
"<extra_id_33>",
|
37 |
+
"<extra_id_34>",
|
38 |
+
"<extra_id_35>",
|
39 |
+
"<extra_id_36>",
|
40 |
+
"<extra_id_37>",
|
41 |
+
"<extra_id_38>",
|
42 |
+
"<extra_id_39>",
|
43 |
+
"<extra_id_40>",
|
44 |
+
"<extra_id_41>",
|
45 |
+
"<extra_id_42>",
|
46 |
+
"<extra_id_43>",
|
47 |
+
"<extra_id_44>",
|
48 |
+
"<extra_id_45>",
|
49 |
+
"<extra_id_46>",
|
50 |
+
"<extra_id_47>",
|
51 |
+
"<extra_id_48>",
|
52 |
+
"<extra_id_49>",
|
53 |
+
"<extra_id_50>",
|
54 |
+
"<extra_id_51>",
|
55 |
+
"<extra_id_52>",
|
56 |
+
"<extra_id_53>",
|
57 |
+
"<extra_id_54>",
|
58 |
+
"<extra_id_55>",
|
59 |
+
"<extra_id_56>",
|
60 |
+
"<extra_id_57>",
|
61 |
+
"<extra_id_58>",
|
62 |
+
"<extra_id_59>",
|
63 |
+
"<extra_id_60>",
|
64 |
+
"<extra_id_61>",
|
65 |
+
"<extra_id_62>",
|
66 |
+
"<extra_id_63>",
|
67 |
+
"<extra_id_64>",
|
68 |
+
"<extra_id_65>",
|
69 |
+
"<extra_id_66>",
|
70 |
+
"<extra_id_67>",
|
71 |
+
"<extra_id_68>",
|
72 |
+
"<extra_id_69>",
|
73 |
+
"<extra_id_70>",
|
74 |
+
"<extra_id_71>",
|
75 |
+
"<extra_id_72>",
|
76 |
+
"<extra_id_73>",
|
77 |
+
"<extra_id_74>",
|
78 |
+
"<extra_id_75>",
|
79 |
+
"<extra_id_76>",
|
80 |
+
"<extra_id_77>",
|
81 |
+
"<extra_id_78>",
|
82 |
+
"<extra_id_79>",
|
83 |
+
"<extra_id_80>",
|
84 |
+
"<extra_id_81>",
|
85 |
+
"<extra_id_82>",
|
86 |
+
"<extra_id_83>",
|
87 |
+
"<extra_id_84>",
|
88 |
+
"<extra_id_85>",
|
89 |
+
"<extra_id_86>",
|
90 |
+
"<extra_id_87>",
|
91 |
+
"<extra_id_88>",
|
92 |
+
"<extra_id_89>",
|
93 |
+
"<extra_id_90>",
|
94 |
+
"<extra_id_91>",
|
95 |
+
"<extra_id_92>",
|
96 |
+
"<extra_id_93>",
|
97 |
+
"<extra_id_94>",
|
98 |
+
"<extra_id_95>",
|
99 |
+
"<extra_id_96>",
|
100 |
+
"<extra_id_97>",
|
101 |
+
"<extra_id_98>",
|
102 |
+
"<extra_id_99>"
|
103 |
+
],
|
104 |
+
"eos_token": {
|
105 |
+
"content": "</s>",
|
106 |
+
"lstrip": false,
|
107 |
+
"normalized": false,
|
108 |
+
"rstrip": false,
|
109 |
+
"single_word": false
|
110 |
+
},
|
111 |
+
"pad_token": "</s>",
|
112 |
+
"unk_token": {
|
113 |
+
"content": "<unk>",
|
114 |
+
"lstrip": false,
|
115 |
+
"normalized": false,
|
116 |
+
"rstrip": false,
|
117 |
+
"single_word": false
|
118 |
+
}
|
119 |
+
}
|
ckpt/ling_disc/spiece.model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d60acb128cf7b7f2536e8f38a5b18a05535c9e14c7a355904270e15b0945ea86
|
3 |
+
size 791656
|
ckpt/ling_disc/tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
ckpt/ling_disc/tokenizer_config.json
ADDED
@@ -0,0 +1,938 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"added_tokens_decoder": {
|
3 |
+
"0": {
|
4 |
+
"content": "<pad>",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false,
|
9 |
+
"special": true
|
10 |
+
},
|
11 |
+
"1": {
|
12 |
+
"content": "</s>",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": false,
|
17 |
+
"special": true
|
18 |
+
},
|
19 |
+
"2": {
|
20 |
+
"content": "<unk>",
|
21 |
+
"lstrip": false,
|
22 |
+
"normalized": false,
|
23 |
+
"rstrip": false,
|
24 |
+
"single_word": false,
|
25 |
+
"special": true
|
26 |
+
},
|
27 |
+
"32000": {
|
28 |
+
"content": "<extra_id_99>",
|
29 |
+
"lstrip": false,
|
30 |
+
"normalized": false,
|
31 |
+
"rstrip": false,
|
32 |
+
"single_word": false,
|
33 |
+
"special": true
|
34 |
+
},
|
35 |
+
"32001": {
|
36 |
+
"content": "<extra_id_98>",
|
37 |
+
"lstrip": false,
|
38 |
+
"normalized": false,
|
39 |
+
"rstrip": false,
|
40 |
+
"single_word": false,
|
41 |
+
"special": true
|
42 |
+
},
|
43 |
+
"32002": {
|
44 |
+
"content": "<extra_id_97>",
|
45 |
+
"lstrip": false,
|
46 |
+
"normalized": false,
|
47 |
+
"rstrip": false,
|
48 |
+
"single_word": false,
|
49 |
+
"special": true
|
50 |
+
},
|
51 |
+
"32003": {
|
52 |
+
"content": "<extra_id_96>",
|
53 |
+
"lstrip": false,
|
54 |
+
"normalized": false,
|
55 |
+
"rstrip": false,
|
56 |
+
"single_word": false,
|
57 |
+
"special": true
|
58 |
+
},
|
59 |
+
"32004": {
|
60 |
+
"content": "<extra_id_95>",
|
61 |
+
"lstrip": false,
|
62 |
+
"normalized": false,
|
63 |
+
"rstrip": false,
|
64 |
+
"single_word": false,
|
65 |
+
"special": true
|
66 |
+
},
|
67 |
+
"32005": {
|
68 |
+
"content": "<extra_id_94>",
|
69 |
+
"lstrip": false,
|
70 |
+
"normalized": false,
|
71 |
+
"rstrip": false,
|
72 |
+
"single_word": false,
|
73 |
+
"special": true
|
74 |
+
},
|
75 |
+
"32006": {
|
76 |
+
"content": "<extra_id_93>",
|
77 |
+
"lstrip": false,
|
78 |
+
"normalized": false,
|
79 |
+
"rstrip": false,
|
80 |
+
"single_word": false,
|
81 |
+
"special": true
|
82 |
+
},
|
83 |
+
"32007": {
|
84 |
+
"content": "<extra_id_92>",
|
85 |
+
"lstrip": false,
|
86 |
+
"normalized": false,
|
87 |
+
"rstrip": false,
|
88 |
+
"single_word": false,
|
89 |
+
"special": true
|
90 |
+
},
|
91 |
+
"32008": {
|
92 |
+
"content": "<extra_id_91>",
|
93 |
+
"lstrip": false,
|
94 |
+
"normalized": false,
|
95 |
+
"rstrip": false,
|
96 |
+
"single_word": false,
|
97 |
+
"special": true
|
98 |
+
},
|
99 |
+
"32009": {
|
100 |
+
"content": "<extra_id_90>",
|
101 |
+
"lstrip": false,
|
102 |
+
"normalized": false,
|
103 |
+
"rstrip": false,
|
104 |
+
"single_word": false,
|
105 |
+
"special": true
|
106 |
+
},
|
107 |
+
"32010": {
|
108 |
+
"content": "<extra_id_89>",
|
109 |
+
"lstrip": false,
|
110 |
+
"normalized": false,
|
111 |
+
"rstrip": false,
|
112 |
+
"single_word": false,
|
113 |
+
"special": true
|
114 |
+
},
|
115 |
+
"32011": {
|
116 |
+
"content": "<extra_id_88>",
|
117 |
+
"lstrip": false,
|
118 |
+
"normalized": false,
|
119 |
+
"rstrip": false,
|
120 |
+
"single_word": false,
|
121 |
+
"special": true
|
122 |
+
},
|
123 |
+
"32012": {
|
124 |
+
"content": "<extra_id_87>",
|
125 |
+
"lstrip": false,
|
126 |
+
"normalized": false,
|
127 |
+
"rstrip": false,
|
128 |
+
"single_word": false,
|
129 |
+
"special": true
|
130 |
+
},
|
131 |
+
"32013": {
|
132 |
+
"content": "<extra_id_86>",
|
133 |
+
"lstrip": false,
|
134 |
+
"normalized": false,
|
135 |
+
"rstrip": false,
|
136 |
+
"single_word": false,
|
137 |
+
"special": true
|
138 |
+
},
|
139 |
+
"32014": {
|
140 |
+
"content": "<extra_id_85>",
|
141 |
+
"lstrip": false,
|
142 |
+
"normalized": false,
|
143 |
+
"rstrip": false,
|
144 |
+
"single_word": false,
|
145 |
+
"special": true
|
146 |
+
},
|
147 |
+
"32015": {
|
148 |
+
"content": "<extra_id_84>",
|
149 |
+
"lstrip": false,
|
150 |
+
"normalized": false,
|
151 |
+
"rstrip": false,
|
152 |
+
"single_word": false,
|
153 |
+
"special": true
|
154 |
+
},
|
155 |
+
"32016": {
|
156 |
+
"content": "<extra_id_83>",
|
157 |
+
"lstrip": false,
|
158 |
+
"normalized": false,
|
159 |
+
"rstrip": false,
|
160 |
+
"single_word": false,
|
161 |
+
"special": true
|
162 |
+
},
|
163 |
+
"32017": {
|
164 |
+
"content": "<extra_id_82>",
|
165 |
+
"lstrip": false,
|
166 |
+
"normalized": false,
|
167 |
+
"rstrip": false,
|
168 |
+
"single_word": false,
|
169 |
+
"special": true
|
170 |
+
},
|
171 |
+
"32018": {
|
172 |
+
"content": "<extra_id_81>",
|
173 |
+
"lstrip": false,
|
174 |
+
"normalized": false,
|
175 |
+
"rstrip": false,
|
176 |
+
"single_word": false,
|
177 |
+
"special": true
|
178 |
+
},
|
179 |
+
"32019": {
|
180 |
+
"content": "<extra_id_80>",
|
181 |
+
"lstrip": false,
|
182 |
+
"normalized": false,
|
183 |
+
"rstrip": false,
|
184 |
+
"single_word": false,
|
185 |
+
"special": true
|
186 |
+
},
|
187 |
+
"32020": {
|
188 |
+
"content": "<extra_id_79>",
|
189 |
+
"lstrip": false,
|
190 |
+
"normalized": false,
|
191 |
+
"rstrip": false,
|
192 |
+
"single_word": false,
|
193 |
+
"special": true
|
194 |
+
},
|
195 |
+
"32021": {
|
196 |
+
"content": "<extra_id_78>",
|
197 |
+
"lstrip": false,
|
198 |
+
"normalized": false,
|
199 |
+
"rstrip": false,
|
200 |
+
"single_word": false,
|
201 |
+
"special": true
|
202 |
+
},
|
203 |
+
"32022": {
|
204 |
+
"content": "<extra_id_77>",
|
205 |
+
"lstrip": false,
|
206 |
+
"normalized": false,
|
207 |
+
"rstrip": false,
|
208 |
+
"single_word": false,
|
209 |
+
"special": true
|
210 |
+
},
|
211 |
+
"32023": {
|
212 |
+
"content": "<extra_id_76>",
|
213 |
+
"lstrip": false,
|
214 |
+
"normalized": false,
|
215 |
+
"rstrip": false,
|
216 |
+
"single_word": false,
|
217 |
+
"special": true
|
218 |
+
},
|
219 |
+
"32024": {
|
220 |
+
"content": "<extra_id_75>",
|
221 |
+
"lstrip": false,
|
222 |
+
"normalized": false,
|
223 |
+
"rstrip": false,
|
224 |
+
"single_word": false,
|
225 |
+
"special": true
|
226 |
+
},
|
227 |
+
"32025": {
|
228 |
+
"content": "<extra_id_74>",
|
229 |
+
"lstrip": false,
|
230 |
+
"normalized": false,
|
231 |
+
"rstrip": false,
|
232 |
+
"single_word": false,
|
233 |
+
"special": true
|
234 |
+
},
|
235 |
+
"32026": {
|
236 |
+
"content": "<extra_id_73>",
|
237 |
+
"lstrip": false,
|
238 |
+
"normalized": false,
|
239 |
+
"rstrip": false,
|
240 |
+
"single_word": false,
|
241 |
+
"special": true
|
242 |
+
},
|
243 |
+
"32027": {
|
244 |
+
"content": "<extra_id_72>",
|
245 |
+
"lstrip": false,
|
246 |
+
"normalized": false,
|
247 |
+
"rstrip": false,
|
248 |
+
"single_word": false,
|
249 |
+
"special": true
|
250 |
+
},
|
251 |
+
"32028": {
|
252 |
+
"content": "<extra_id_71>",
|
253 |
+
"lstrip": false,
|
254 |
+
"normalized": false,
|
255 |
+
"rstrip": false,
|
256 |
+
"single_word": false,
|
257 |
+
"special": true
|
258 |
+
},
|
259 |
+
"32029": {
|
260 |
+
"content": "<extra_id_70>",
|
261 |
+
"lstrip": false,
|
262 |
+
"normalized": false,
|
263 |
+
"rstrip": false,
|
264 |
+
"single_word": false,
|
265 |
+
"special": true
|
266 |
+
},
|
267 |
+
"32030": {
|
268 |
+
"content": "<extra_id_69>",
|
269 |
+
"lstrip": false,
|
270 |
+
"normalized": false,
|
271 |
+
"rstrip": false,
|
272 |
+
"single_word": false,
|
273 |
+
"special": true
|
274 |
+
},
|
275 |
+
"32031": {
|
276 |
+
"content": "<extra_id_68>",
|
277 |
+
"lstrip": false,
|
278 |
+
"normalized": false,
|
279 |
+
"rstrip": false,
|
280 |
+
"single_word": false,
|
281 |
+
"special": true
|
282 |
+
},
|
283 |
+
"32032": {
|
284 |
+
"content": "<extra_id_67>",
|
285 |
+
"lstrip": false,
|
286 |
+
"normalized": false,
|
287 |
+
"rstrip": false,
|
288 |
+
"single_word": false,
|
289 |
+
"special": true
|
290 |
+
},
|
291 |
+
"32033": {
|
292 |
+
"content": "<extra_id_66>",
|
293 |
+
"lstrip": false,
|
294 |
+
"normalized": false,
|
295 |
+
"rstrip": false,
|
296 |
+
"single_word": false,
|
297 |
+
"special": true
|
298 |
+
},
|
299 |
+
"32034": {
|
300 |
+
"content": "<extra_id_65>",
|
301 |
+
"lstrip": false,
|
302 |
+
"normalized": false,
|
303 |
+
"rstrip": false,
|
304 |
+
"single_word": false,
|
305 |
+
"special": true
|
306 |
+
},
|
307 |
+
"32035": {
|
308 |
+
"content": "<extra_id_64>",
|
309 |
+
"lstrip": false,
|
310 |
+
"normalized": false,
|
311 |
+
"rstrip": false,
|
312 |
+
"single_word": false,
|
313 |
+
"special": true
|
314 |
+
},
|
315 |
+
"32036": {
|
316 |
+
"content": "<extra_id_63>",
|
317 |
+
"lstrip": false,
|
318 |
+
"normalized": false,
|
319 |
+
"rstrip": false,
|
320 |
+
"single_word": false,
|
321 |
+
"special": true
|
322 |
+
},
|
323 |
+
"32037": {
|
324 |
+
"content": "<extra_id_62>",
|
325 |
+
"lstrip": false,
|
326 |
+
"normalized": false,
|
327 |
+
"rstrip": false,
|
328 |
+
"single_word": false,
|
329 |
+
"special": true
|
330 |
+
},
|
331 |
+
"32038": {
|
332 |
+
"content": "<extra_id_61>",
|
333 |
+
"lstrip": false,
|
334 |
+
"normalized": false,
|
335 |
+
"rstrip": false,
|
336 |
+
"single_word": false,
|
337 |
+
"special": true
|
338 |
+
},
|
339 |
+
"32039": {
|
340 |
+
"content": "<extra_id_60>",
|
341 |
+
"lstrip": false,
|
342 |
+
"normalized": false,
|
343 |
+
"rstrip": false,
|
344 |
+
"single_word": false,
|
345 |
+
"special": true
|
346 |
+
},
|
347 |
+
"32040": {
|
348 |
+
"content": "<extra_id_59>",
|
349 |
+
"lstrip": false,
|
350 |
+
"normalized": false,
|
351 |
+
"rstrip": false,
|
352 |
+
"single_word": false,
|
353 |
+
"special": true
|
354 |
+
},
|
355 |
+
"32041": {
|
356 |
+
"content": "<extra_id_58>",
|
357 |
+
"lstrip": false,
|
358 |
+
"normalized": false,
|
359 |
+
"rstrip": false,
|
360 |
+
"single_word": false,
|
361 |
+
"special": true
|
362 |
+
},
|
363 |
+
"32042": {
|
364 |
+
"content": "<extra_id_57>",
|
365 |
+
"lstrip": false,
|
366 |
+
"normalized": false,
|
367 |
+
"rstrip": false,
|
368 |
+
"single_word": false,
|
369 |
+
"special": true
|
370 |
+
},
|
371 |
+
"32043": {
|
372 |
+
"content": "<extra_id_56>",
|
373 |
+
"lstrip": false,
|
374 |
+
"normalized": false,
|
375 |
+
"rstrip": false,
|
376 |
+
"single_word": false,
|
377 |
+
"special": true
|
378 |
+
},
|
379 |
+
"32044": {
|
380 |
+
"content": "<extra_id_55>",
|
381 |
+
"lstrip": false,
|
382 |
+
"normalized": false,
|
383 |
+
"rstrip": false,
|
384 |
+
"single_word": false,
|
385 |
+
"special": true
|
386 |
+
},
|
387 |
+
"32045": {
|
388 |
+
"content": "<extra_id_54>",
|
389 |
+
"lstrip": false,
|
390 |
+
"normalized": false,
|
391 |
+
"rstrip": false,
|
392 |
+
"single_word": false,
|
393 |
+
"special": true
|
394 |
+
},
|
395 |
+
"32046": {
|
396 |
+
"content": "<extra_id_53>",
|
397 |
+
"lstrip": false,
|
398 |
+
"normalized": false,
|
399 |
+
"rstrip": false,
|
400 |
+
"single_word": false,
|
401 |
+
"special": true
|
402 |
+
},
|
403 |
+
"32047": {
|
404 |
+
"content": "<extra_id_52>",
|
405 |
+
"lstrip": false,
|
406 |
+
"normalized": false,
|
407 |
+
"rstrip": false,
|
408 |
+
"single_word": false,
|
409 |
+
"special": true
|
410 |
+
},
|
411 |
+
"32048": {
|
412 |
+
"content": "<extra_id_51>",
|
413 |
+
"lstrip": false,
|
414 |
+
"normalized": false,
|
415 |
+
"rstrip": false,
|
416 |
+
"single_word": false,
|
417 |
+
"special": true
|
418 |
+
},
|
419 |
+
"32049": {
|
420 |
+
"content": "<extra_id_50>",
|
421 |
+
"lstrip": false,
|
422 |
+
"normalized": false,
|
423 |
+
"rstrip": false,
|
424 |
+
"single_word": false,
|
425 |
+
"special": true
|
426 |
+
},
|
427 |
+
"32050": {
|
428 |
+
"content": "<extra_id_49>",
|
429 |
+
"lstrip": false,
|
430 |
+
"normalized": false,
|
431 |
+
"rstrip": false,
|
432 |
+
"single_word": false,
|
433 |
+
"special": true
|
434 |
+
},
|
435 |
+
"32051": {
|
436 |
+
"content": "<extra_id_48>",
|
437 |
+
"lstrip": false,
|
438 |
+
"normalized": false,
|
439 |
+
"rstrip": false,
|
440 |
+
"single_word": false,
|
441 |
+
"special": true
|
442 |
+
},
|
443 |
+
"32052": {
|
444 |
+
"content": "<extra_id_47>",
|
445 |
+
"lstrip": false,
|
446 |
+
"normalized": false,
|
447 |
+
"rstrip": false,
|
448 |
+
"single_word": false,
|
449 |
+
"special": true
|
450 |
+
},
|
451 |
+
"32053": {
|
452 |
+
"content": "<extra_id_46>",
|
453 |
+
"lstrip": false,
|
454 |
+
"normalized": false,
|
455 |
+
"rstrip": false,
|
456 |
+
"single_word": false,
|
457 |
+
"special": true
|
458 |
+
},
|
459 |
+
"32054": {
|
460 |
+
"content": "<extra_id_45>",
|
461 |
+
"lstrip": false,
|
462 |
+
"normalized": false,
|
463 |
+
"rstrip": false,
|
464 |
+
"single_word": false,
|
465 |
+
"special": true
|
466 |
+
},
|
467 |
+
"32055": {
|
468 |
+
"content": "<extra_id_44>",
|
469 |
+
"lstrip": false,
|
470 |
+
"normalized": false,
|
471 |
+
"rstrip": false,
|
472 |
+
"single_word": false,
|
473 |
+
"special": true
|
474 |
+
},
|
475 |
+
"32056": {
|
476 |
+
"content": "<extra_id_43>",
|
477 |
+
"lstrip": false,
|
478 |
+
"normalized": false,
|
479 |
+
"rstrip": false,
|
480 |
+
"single_word": false,
|
481 |
+
"special": true
|
482 |
+
},
|
483 |
+
"32057": {
|
484 |
+
"content": "<extra_id_42>",
|
485 |
+
"lstrip": false,
|
486 |
+
"normalized": false,
|
487 |
+
"rstrip": false,
|
488 |
+
"single_word": false,
|
489 |
+
"special": true
|
490 |
+
},
|
491 |
+
"32058": {
|
492 |
+
"content": "<extra_id_41>",
|
493 |
+
"lstrip": false,
|
494 |
+
"normalized": false,
|
495 |
+
"rstrip": false,
|
496 |
+
"single_word": false,
|
497 |
+
"special": true
|
498 |
+
},
|
499 |
+
"32059": {
|
500 |
+
"content": "<extra_id_40>",
|
501 |
+
"lstrip": false,
|
502 |
+
"normalized": false,
|
503 |
+
"rstrip": false,
|
504 |
+
"single_word": false,
|
505 |
+
"special": true
|
506 |
+
},
|
507 |
+
"32060": {
|
508 |
+
"content": "<extra_id_39>",
|
509 |
+
"lstrip": false,
|
510 |
+
"normalized": false,
|
511 |
+
"rstrip": false,
|
512 |
+
"single_word": false,
|
513 |
+
"special": true
|
514 |
+
},
|
515 |
+
"32061": {
|
516 |
+
"content": "<extra_id_38>",
|
517 |
+
"lstrip": false,
|
518 |
+
"normalized": false,
|
519 |
+
"rstrip": false,
|
520 |
+
"single_word": false,
|
521 |
+
"special": true
|
522 |
+
},
|
523 |
+
"32062": {
|
524 |
+
"content": "<extra_id_37>",
|
525 |
+
"lstrip": false,
|
526 |
+
"normalized": false,
|
527 |
+
"rstrip": false,
|
528 |
+
"single_word": false,
|
529 |
+
"special": true
|
530 |
+
},
|
531 |
+
"32063": {
|
532 |
+
"content": "<extra_id_36>",
|
533 |
+
"lstrip": false,
|
534 |
+
"normalized": false,
|
535 |
+
"rstrip": false,
|
536 |
+
"single_word": false,
|
537 |
+
"special": true
|
538 |
+
},
|
539 |
+
"32064": {
|
540 |
+
"content": "<extra_id_35>",
|
541 |
+
"lstrip": false,
|
542 |
+
"normalized": false,
|
543 |
+
"rstrip": false,
|
544 |
+
"single_word": false,
|
545 |
+
"special": true
|
546 |
+
},
|
547 |
+
"32065": {
|
548 |
+
"content": "<extra_id_34>",
|
549 |
+
"lstrip": false,
|
550 |
+
"normalized": false,
|
551 |
+
"rstrip": false,
|
552 |
+
"single_word": false,
|
553 |
+
"special": true
|
554 |
+
},
|
555 |
+
"32066": {
|
556 |
+
"content": "<extra_id_33>",
|
557 |
+
"lstrip": false,
|
558 |
+
"normalized": false,
|
559 |
+
"rstrip": false,
|
560 |
+
"single_word": false,
|
561 |
+
"special": true
|
562 |
+
},
|
563 |
+
"32067": {
|
564 |
+
"content": "<extra_id_32>",
|
565 |
+
"lstrip": false,
|
566 |
+
"normalized": false,
|
567 |
+
"rstrip": false,
|
568 |
+
"single_word": false,
|
569 |
+
"special": true
|
570 |
+
},
|
571 |
+
"32068": {
|
572 |
+
"content": "<extra_id_31>",
|
573 |
+
"lstrip": false,
|
574 |
+
"normalized": false,
|
575 |
+
"rstrip": false,
|
576 |
+
"single_word": false,
|
577 |
+
"special": true
|
578 |
+
},
|
579 |
+
"32069": {
|
580 |
+
"content": "<extra_id_30>",
|
581 |
+
"lstrip": false,
|
582 |
+
"normalized": false,
|
583 |
+
"rstrip": false,
|
584 |
+
"single_word": false,
|
585 |
+
"special": true
|
586 |
+
},
|
587 |
+
"32070": {
|
588 |
+
"content": "<extra_id_29>",
|
589 |
+
"lstrip": false,
|
590 |
+
"normalized": false,
|
591 |
+
"rstrip": false,
|
592 |
+
"single_word": false,
|
593 |
+
"special": true
|
594 |
+
},
|
595 |
+
"32071": {
|
596 |
+
"content": "<extra_id_28>",
|
597 |
+
"lstrip": false,
|
598 |
+
"normalized": false,
|
599 |
+
"rstrip": false,
|
600 |
+
"single_word": false,
|
601 |
+
"special": true
|
602 |
+
},
|
603 |
+
"32072": {
|
604 |
+
"content": "<extra_id_27>",
|
605 |
+
"lstrip": false,
|
606 |
+
"normalized": false,
|
607 |
+
"rstrip": false,
|
608 |
+
"single_word": false,
|
609 |
+
"special": true
|
610 |
+
},
|
611 |
+
"32073": {
|
612 |
+
"content": "<extra_id_26>",
|
613 |
+
"lstrip": false,
|
614 |
+
"normalized": false,
|
615 |
+
"rstrip": false,
|
616 |
+
"single_word": false,
|
617 |
+
"special": true
|
618 |
+
},
|
619 |
+
"32074": {
|
620 |
+
"content": "<extra_id_25>",
|
621 |
+
"lstrip": false,
|
622 |
+
"normalized": false,
|
623 |
+
"rstrip": false,
|
624 |
+
"single_word": false,
|
625 |
+
"special": true
|
626 |
+
},
|
627 |
+
"32075": {
|
628 |
+
"content": "<extra_id_24>",
|
629 |
+
"lstrip": false,
|
630 |
+
"normalized": false,
|
631 |
+
"rstrip": false,
|
632 |
+
"single_word": false,
|
633 |
+
"special": true
|
634 |
+
},
|
635 |
+
"32076": {
|
636 |
+
"content": "<extra_id_23>",
|
637 |
+
"lstrip": false,
|
638 |
+
"normalized": false,
|
639 |
+
"rstrip": false,
|
640 |
+
"single_word": false,
|
641 |
+
"special": true
|
642 |
+
},
|
643 |
+
"32077": {
|
644 |
+
"content": "<extra_id_22>",
|
645 |
+
"lstrip": false,
|
646 |
+
"normalized": false,
|
647 |
+
"rstrip": false,
|
648 |
+
"single_word": false,
|
649 |
+
"special": true
|
650 |
+
},
|
651 |
+
"32078": {
|
652 |
+
"content": "<extra_id_21>",
|
653 |
+
"lstrip": false,
|
654 |
+
"normalized": false,
|
655 |
+
"rstrip": false,
|
656 |
+
"single_word": false,
|
657 |
+
"special": true
|
658 |
+
},
|
659 |
+
"32079": {
|
660 |
+
"content": "<extra_id_20>",
|
661 |
+
"lstrip": false,
|
662 |
+
"normalized": false,
|
663 |
+
"rstrip": false,
|
664 |
+
"single_word": false,
|
665 |
+
"special": true
|
666 |
+
},
|
667 |
+
"32080": {
|
668 |
+
"content": "<extra_id_19>",
|
669 |
+
"lstrip": false,
|
670 |
+
"normalized": false,
|
671 |
+
"rstrip": false,
|
672 |
+
"single_word": false,
|
673 |
+
"special": true
|
674 |
+
},
|
675 |
+
"32081": {
|
676 |
+
"content": "<extra_id_18>",
|
677 |
+
"lstrip": false,
|
678 |
+
"normalized": false,
|
679 |
+
"rstrip": false,
|
680 |
+
"single_word": false,
|
681 |
+
"special": true
|
682 |
+
},
|
683 |
+
"32082": {
|
684 |
+
"content": "<extra_id_17>",
|
685 |
+
"lstrip": false,
|
686 |
+
"normalized": false,
|
687 |
+
"rstrip": false,
|
688 |
+
"single_word": false,
|
689 |
+
"special": true
|
690 |
+
},
|
691 |
+
"32083": {
|
692 |
+
"content": "<extra_id_16>",
|
693 |
+
"lstrip": false,
|
694 |
+
"normalized": false,
|
695 |
+
"rstrip": false,
|
696 |
+
"single_word": false,
|
697 |
+
"special": true
|
698 |
+
},
|
699 |
+
"32084": {
|
700 |
+
"content": "<extra_id_15>",
|
701 |
+
"lstrip": false,
|
702 |
+
"normalized": false,
|
703 |
+
"rstrip": false,
|
704 |
+
"single_word": false,
|
705 |
+
"special": true
|
706 |
+
},
|
707 |
+
"32085": {
|
708 |
+
"content": "<extra_id_14>",
|
709 |
+
"lstrip": false,
|
710 |
+
"normalized": false,
|
711 |
+
"rstrip": false,
|
712 |
+
"single_word": false,
|
713 |
+
"special": true
|
714 |
+
},
|
715 |
+
"32086": {
|
716 |
+
"content": "<extra_id_13>",
|
717 |
+
"lstrip": false,
|
718 |
+
"normalized": false,
|
719 |
+
"rstrip": false,
|
720 |
+
"single_word": false,
|
721 |
+
"special": true
|
722 |
+
},
|
723 |
+
"32087": {
|
724 |
+
"content": "<extra_id_12>",
|
725 |
+
"lstrip": false,
|
726 |
+
"normalized": false,
|
727 |
+
"rstrip": false,
|
728 |
+
"single_word": false,
|
729 |
+
"special": true
|
730 |
+
},
|
731 |
+
"32088": {
|
732 |
+
"content": "<extra_id_11>",
|
733 |
+
"lstrip": false,
|
734 |
+
"normalized": false,
|
735 |
+
"rstrip": false,
|
736 |
+
"single_word": false,
|
737 |
+
"special": true
|
738 |
+
},
|
739 |
+
"32089": {
|
740 |
+
"content": "<extra_id_10>",
|
741 |
+
"lstrip": false,
|
742 |
+
"normalized": false,
|
743 |
+
"rstrip": false,
|
744 |
+
"single_word": false,
|
745 |
+
"special": true
|
746 |
+
},
|
747 |
+
"32090": {
|
748 |
+
"content": "<extra_id_9>",
|
749 |
+
"lstrip": false,
|
750 |
+
"normalized": false,
|
751 |
+
"rstrip": false,
|
752 |
+
"single_word": false,
|
753 |
+
"special": true
|
754 |
+
},
|
755 |
+
"32091": {
|
756 |
+
"content": "<extra_id_8>",
|
757 |
+
"lstrip": false,
|
758 |
+
"normalized": false,
|
759 |
+
"rstrip": false,
|
760 |
+
"single_word": false,
|
761 |
+
"special": true
|
762 |
+
},
|
763 |
+
"32092": {
|
764 |
+
"content": "<extra_id_7>",
|
765 |
+
"lstrip": false,
|
766 |
+
"normalized": false,
|
767 |
+
"rstrip": false,
|
768 |
+
"single_word": false,
|
769 |
+
"special": true
|
770 |
+
},
|
771 |
+
"32093": {
|
772 |
+
"content": "<extra_id_6>",
|
773 |
+
"lstrip": false,
|
774 |
+
"normalized": false,
|
775 |
+
"rstrip": false,
|
776 |
+
"single_word": false,
|
777 |
+
"special": true
|
778 |
+
},
|
779 |
+
"32094": {
|
780 |
+
"content": "<extra_id_5>",
|
781 |
+
"lstrip": false,
|
782 |
+
"normalized": false,
|
783 |
+
"rstrip": false,
|
784 |
+
"single_word": false,
|
785 |
+
"special": true
|
786 |
+
},
|
787 |
+
"32095": {
|
788 |
+
"content": "<extra_id_4>",
|
789 |
+
"lstrip": false,
|
790 |
+
"normalized": false,
|
791 |
+
"rstrip": false,
|
792 |
+
"single_word": false,
|
793 |
+
"special": true
|
794 |
+
},
|
795 |
+
"32096": {
|
796 |
+
"content": "<extra_id_3>",
|
797 |
+
"lstrip": false,
|
798 |
+
"normalized": false,
|
799 |
+
"rstrip": false,
|
800 |
+
"single_word": false,
|
801 |
+
"special": true
|
802 |
+
},
|
803 |
+
"32097": {
|
804 |
+
"content": "<extra_id_2>",
|
805 |
+
"lstrip": false,
|
806 |
+
"normalized": false,
|
807 |
+
"rstrip": false,
|
808 |
+
"single_word": false,
|
809 |
+
"special": true
|
810 |
+
},
|
811 |
+
"32098": {
|
812 |
+
"content": "<extra_id_1>",
|
813 |
+
"lstrip": false,
|
814 |
+
"normalized": false,
|
815 |
+
"rstrip": false,
|
816 |
+
"single_word": false,
|
817 |
+
"special": true
|
818 |
+
},
|
819 |
+
"32099": {
|
820 |
+
"content": "<extra_id_0>",
|
821 |
+
"lstrip": false,
|
822 |
+
"normalized": false,
|
823 |
+
"rstrip": false,
|
824 |
+
"single_word": false,
|
825 |
+
"special": true
|
826 |
+
}
|
827 |
+
},
|
828 |
+
"additional_special_tokens": [
|
829 |
+
"<extra_id_0>",
|
830 |
+
"<extra_id_1>",
|
831 |
+
"<extra_id_2>",
|
832 |
+
"<extra_id_3>",
|
833 |
+
"<extra_id_4>",
|
834 |
+
"<extra_id_5>",
|
835 |
+
"<extra_id_6>",
|
836 |
+
"<extra_id_7>",
|
837 |
+
"<extra_id_8>",
|
838 |
+
"<extra_id_9>",
|
839 |
+
"<extra_id_10>",
|
840 |
+
"<extra_id_11>",
|
841 |
+
"<extra_id_12>",
|
842 |
+
"<extra_id_13>",
|
843 |
+
"<extra_id_14>",
|
844 |
+
"<extra_id_15>",
|
845 |
+
"<extra_id_16>",
|
846 |
+
"<extra_id_17>",
|
847 |
+
"<extra_id_18>",
|
848 |
+
"<extra_id_19>",
|
849 |
+
"<extra_id_20>",
|
850 |
+
"<extra_id_21>",
|
851 |
+
"<extra_id_22>",
|
852 |
+
"<extra_id_23>",
|
853 |
+
"<extra_id_24>",
|
854 |
+
"<extra_id_25>",
|
855 |
+
"<extra_id_26>",
|
856 |
+
"<extra_id_27>",
|
857 |
+
"<extra_id_28>",
|
858 |
+
"<extra_id_29>",
|
859 |
+
"<extra_id_30>",
|
860 |
+
"<extra_id_31>",
|
861 |
+
"<extra_id_32>",
|
862 |
+
"<extra_id_33>",
|
863 |
+
"<extra_id_34>",
|
864 |
+
"<extra_id_35>",
|
865 |
+
"<extra_id_36>",
|
866 |
+
"<extra_id_37>",
|
867 |
+
"<extra_id_38>",
|
868 |
+
"<extra_id_39>",
|
869 |
+
"<extra_id_40>",
|
870 |
+
"<extra_id_41>",
|
871 |
+
"<extra_id_42>",
|
872 |
+
"<extra_id_43>",
|
873 |
+
"<extra_id_44>",
|
874 |
+
"<extra_id_45>",
|
875 |
+
"<extra_id_46>",
|
876 |
+
"<extra_id_47>",
|
877 |
+
"<extra_id_48>",
|
878 |
+
"<extra_id_49>",
|
879 |
+
"<extra_id_50>",
|
880 |
+
"<extra_id_51>",
|
881 |
+
"<extra_id_52>",
|
882 |
+
"<extra_id_53>",
|
883 |
+
"<extra_id_54>",
|
884 |
+
"<extra_id_55>",
|
885 |
+
"<extra_id_56>",
|
886 |
+
"<extra_id_57>",
|
887 |
+
"<extra_id_58>",
|
888 |
+
"<extra_id_59>",
|
889 |
+
"<extra_id_60>",
|
890 |
+
"<extra_id_61>",
|
891 |
+
"<extra_id_62>",
|
892 |
+
"<extra_id_63>",
|
893 |
+
"<extra_id_64>",
|
894 |
+
"<extra_id_65>",
|
895 |
+
"<extra_id_66>",
|
896 |
+
"<extra_id_67>",
|
897 |
+
"<extra_id_68>",
|
898 |
+
"<extra_id_69>",
|
899 |
+
"<extra_id_70>",
|
900 |
+
"<extra_id_71>",
|
901 |
+
"<extra_id_72>",
|
902 |
+
"<extra_id_73>",
|
903 |
+
"<extra_id_74>",
|
904 |
+
"<extra_id_75>",
|
905 |
+
"<extra_id_76>",
|
906 |
+
"<extra_id_77>",
|
907 |
+
"<extra_id_78>",
|
908 |
+
"<extra_id_79>",
|
909 |
+
"<extra_id_80>",
|
910 |
+
"<extra_id_81>",
|
911 |
+
"<extra_id_82>",
|
912 |
+
"<extra_id_83>",
|
913 |
+
"<extra_id_84>",
|
914 |
+
"<extra_id_85>",
|
915 |
+
"<extra_id_86>",
|
916 |
+
"<extra_id_87>",
|
917 |
+
"<extra_id_88>",
|
918 |
+
"<extra_id_89>",
|
919 |
+
"<extra_id_90>",
|
920 |
+
"<extra_id_91>",
|
921 |
+
"<extra_id_92>",
|
922 |
+
"<extra_id_93>",
|
923 |
+
"<extra_id_94>",
|
924 |
+
"<extra_id_95>",
|
925 |
+
"<extra_id_96>",
|
926 |
+
"<extra_id_97>",
|
927 |
+
"<extra_id_98>",
|
928 |
+
"<extra_id_99>"
|
929 |
+
],
|
930 |
+
"clean_up_tokenization_spaces": true,
|
931 |
+
"eos_token": "</s>",
|
932 |
+
"extra_ids": 100,
|
933 |
+
"model_max_length": 512,
|
934 |
+
"pad_token": "</s>",
|
935 |
+
"sp_model_kwargs": {},
|
936 |
+
"tokenizer_class": "T5Tokenizer",
|
937 |
+
"unk_token": "<unk>"
|
938 |
+
}
|
ckpt/ling_disc/trainer_state.json
ADDED
@@ -0,0 +1,645 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": 0.05535305291414261,
|
3 |
+
"best_model_checkpoint": "/data/mohamed/checkpoints/ling_disc/deberta-v3-small_flan-t5-base_40/checkpoint-41000",
|
4 |
+
"epoch": 30.0,
|
5 |
+
"eval_steps": 1000,
|
6 |
+
"global_step": 41970,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.71,
|
13 |
+
"grad_norm": 0.855617344379425,
|
14 |
+
"learning_rate": 1.1913271384322135e-05,
|
15 |
+
"loss": 0.9117,
|
16 |
+
"step": 1000
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 0.71,
|
20 |
+
"eval_loss": 0.6742472052574158,
|
21 |
+
"eval_runtime": 27.0595,
|
22 |
+
"eval_samples_per_second": 1111.549,
|
23 |
+
"eval_steps_per_second": 5.58,
|
24 |
+
"step": 1000
|
25 |
+
},
|
26 |
+
{
|
27 |
+
"epoch": 1.43,
|
28 |
+
"grad_norm": 4.203719139099121,
|
29 |
+
"learning_rate": 2.382654276864427e-05,
|
30 |
+
"loss": 0.4114,
|
31 |
+
"step": 2000
|
32 |
+
},
|
33 |
+
{
|
34 |
+
"epoch": 1.43,
|
35 |
+
"eval_loss": 0.3266257345676422,
|
36 |
+
"eval_runtime": 26.9318,
|
37 |
+
"eval_samples_per_second": 1116.822,
|
38 |
+
"eval_steps_per_second": 5.607,
|
39 |
+
"step": 2000
|
40 |
+
},
|
41 |
+
{
|
42 |
+
"epoch": 2.14,
|
43 |
+
"grad_norm": 3.1638591289520264,
|
44 |
+
"learning_rate": 3.57398141529664e-05,
|
45 |
+
"loss": 0.2624,
|
46 |
+
"step": 3000
|
47 |
+
},
|
48 |
+
{
|
49 |
+
"epoch": 2.14,
|
50 |
+
"eval_loss": 0.24602766335010529,
|
51 |
+
"eval_runtime": 27.0604,
|
52 |
+
"eval_samples_per_second": 1111.512,
|
53 |
+
"eval_steps_per_second": 5.58,
|
54 |
+
"step": 3000
|
55 |
+
},
|
56 |
+
{
|
57 |
+
"epoch": 2.86,
|
58 |
+
"grad_norm": 1.7417826652526855,
|
59 |
+
"learning_rate": 4.765308553728854e-05,
|
60 |
+
"loss": 0.2002,
|
61 |
+
"step": 4000
|
62 |
+
},
|
63 |
+
{
|
64 |
+
"epoch": 2.86,
|
65 |
+
"eval_loss": 0.1770436018705368,
|
66 |
+
"eval_runtime": 26.8812,
|
67 |
+
"eval_samples_per_second": 1118.922,
|
68 |
+
"eval_steps_per_second": 5.617,
|
69 |
+
"step": 4000
|
70 |
+
},
|
71 |
+
{
|
72 |
+
"epoch": 3.57,
|
73 |
+
"grad_norm": 1.1299816370010376,
|
74 |
+
"learning_rate": 4.893707145315437e-05,
|
75 |
+
"loss": 0.1635,
|
76 |
+
"step": 5000
|
77 |
+
},
|
78 |
+
{
|
79 |
+
"epoch": 3.57,
|
80 |
+
"eval_loss": 0.14757415652275085,
|
81 |
+
"eval_runtime": 26.7857,
|
82 |
+
"eval_samples_per_second": 1122.914,
|
83 |
+
"eval_steps_per_second": 5.637,
|
84 |
+
"step": 5000
|
85 |
+
},
|
86 |
+
{
|
87 |
+
"epoch": 4.29,
|
88 |
+
"grad_norm": 1.210856556892395,
|
89 |
+
"learning_rate": 4.761337463267413e-05,
|
90 |
+
"loss": 0.1404,
|
91 |
+
"step": 6000
|
92 |
+
},
|
93 |
+
{
|
94 |
+
"epoch": 4.29,
|
95 |
+
"eval_loss": 0.12851941585540771,
|
96 |
+
"eval_runtime": 26.9893,
|
97 |
+
"eval_samples_per_second": 1114.44,
|
98 |
+
"eval_steps_per_second": 5.595,
|
99 |
+
"step": 6000
|
100 |
+
},
|
101 |
+
{
|
102 |
+
"epoch": 5.0,
|
103 |
+
"grad_norm": 2.0565412044525146,
|
104 |
+
"learning_rate": 4.62896778121939e-05,
|
105 |
+
"loss": 0.1263,
|
106 |
+
"step": 7000
|
107 |
+
},
|
108 |
+
{
|
109 |
+
"epoch": 5.0,
|
110 |
+
"eval_loss": 0.12228666245937347,
|
111 |
+
"eval_runtime": 26.7363,
|
112 |
+
"eval_samples_per_second": 1124.987,
|
113 |
+
"eval_steps_per_second": 5.648,
|
114 |
+
"step": 7000
|
115 |
+
},
|
116 |
+
{
|
117 |
+
"epoch": 5.72,
|
118 |
+
"grad_norm": 1.8667607307434082,
|
119 |
+
"learning_rate": 4.496598099171366e-05,
|
120 |
+
"loss": 0.1127,
|
121 |
+
"step": 8000
|
122 |
+
},
|
123 |
+
{
|
124 |
+
"epoch": 5.72,
|
125 |
+
"eval_loss": 0.11036147177219391,
|
126 |
+
"eval_runtime": 26.7509,
|
127 |
+
"eval_samples_per_second": 1124.375,
|
128 |
+
"eval_steps_per_second": 5.645,
|
129 |
+
"step": 8000
|
130 |
+
},
|
131 |
+
{
|
132 |
+
"epoch": 6.43,
|
133 |
+
"grad_norm": 0.7492337226867676,
|
134 |
+
"learning_rate": 4.364228417123342e-05,
|
135 |
+
"loss": 0.1059,
|
136 |
+
"step": 9000
|
137 |
+
},
|
138 |
+
{
|
139 |
+
"epoch": 6.43,
|
140 |
+
"eval_loss": 0.10317497700452805,
|
141 |
+
"eval_runtime": 27.0158,
|
142 |
+
"eval_samples_per_second": 1113.349,
|
143 |
+
"eval_steps_per_second": 5.589,
|
144 |
+
"step": 9000
|
145 |
+
},
|
146 |
+
{
|
147 |
+
"epoch": 7.15,
|
148 |
+
"grad_norm": 0.7611485123634338,
|
149 |
+
"learning_rate": 4.231858735075319e-05,
|
150 |
+
"loss": 0.0993,
|
151 |
+
"step": 10000
|
152 |
+
},
|
153 |
+
{
|
154 |
+
"epoch": 7.15,
|
155 |
+
"eval_loss": 0.10284282267093658,
|
156 |
+
"eval_runtime": 26.795,
|
157 |
+
"eval_samples_per_second": 1122.524,
|
158 |
+
"eval_steps_per_second": 5.635,
|
159 |
+
"step": 10000
|
160 |
+
},
|
161 |
+
{
|
162 |
+
"epoch": 7.86,
|
163 |
+
"grad_norm": 0.5870215892791748,
|
164 |
+
"learning_rate": 4.099489053027295e-05,
|
165 |
+
"loss": 0.0887,
|
166 |
+
"step": 11000
|
167 |
+
},
|
168 |
+
{
|
169 |
+
"epoch": 7.86,
|
170 |
+
"eval_loss": 0.09789762645959854,
|
171 |
+
"eval_runtime": 26.8453,
|
172 |
+
"eval_samples_per_second": 1120.419,
|
173 |
+
"eval_steps_per_second": 5.625,
|
174 |
+
"step": 11000
|
175 |
+
},
|
176 |
+
{
|
177 |
+
"epoch": 8.58,
|
178 |
+
"grad_norm": 0.48922085762023926,
|
179 |
+
"learning_rate": 3.9671193709792706e-05,
|
180 |
+
"loss": 0.0842,
|
181 |
+
"step": 12000
|
182 |
+
},
|
183 |
+
{
|
184 |
+
"epoch": 8.58,
|
185 |
+
"eval_loss": 0.09349656105041504,
|
186 |
+
"eval_runtime": 26.8273,
|
187 |
+
"eval_samples_per_second": 1121.172,
|
188 |
+
"eval_steps_per_second": 5.629,
|
189 |
+
"step": 12000
|
190 |
+
},
|
191 |
+
{
|
192 |
+
"epoch": 9.29,
|
193 |
+
"grad_norm": 0.4252859354019165,
|
194 |
+
"learning_rate": 3.8347496889312476e-05,
|
195 |
+
"loss": 0.0793,
|
196 |
+
"step": 13000
|
197 |
+
},
|
198 |
+
{
|
199 |
+
"epoch": 9.29,
|
200 |
+
"eval_loss": 0.09415590018033981,
|
201 |
+
"eval_runtime": 25.9362,
|
202 |
+
"eval_samples_per_second": 1159.693,
|
203 |
+
"eval_steps_per_second": 5.822,
|
204 |
+
"step": 13000
|
205 |
+
},
|
206 |
+
{
|
207 |
+
"epoch": 10.01,
|
208 |
+
"grad_norm": 0.44548505544662476,
|
209 |
+
"learning_rate": 3.702380006883224e-05,
|
210 |
+
"loss": 0.076,
|
211 |
+
"step": 14000
|
212 |
+
},
|
213 |
+
{
|
214 |
+
"epoch": 10.01,
|
215 |
+
"eval_loss": 0.08913980424404144,
|
216 |
+
"eval_runtime": 26.7379,
|
217 |
+
"eval_samples_per_second": 1124.919,
|
218 |
+
"eval_steps_per_second": 5.647,
|
219 |
+
"step": 14000
|
220 |
+
},
|
221 |
+
{
|
222 |
+
"epoch": 10.72,
|
223 |
+
"grad_norm": 0.2965373694896698,
|
224 |
+
"learning_rate": 3.5700103248352e-05,
|
225 |
+
"loss": 0.0714,
|
226 |
+
"step": 15000
|
227 |
+
},
|
228 |
+
{
|
229 |
+
"epoch": 10.72,
|
230 |
+
"eval_loss": 0.08456840366125107,
|
231 |
+
"eval_runtime": 26.787,
|
232 |
+
"eval_samples_per_second": 1122.857,
|
233 |
+
"eval_steps_per_second": 5.637,
|
234 |
+
"step": 15000
|
235 |
+
},
|
236 |
+
{
|
237 |
+
"epoch": 11.44,
|
238 |
+
"grad_norm": 0.3205694854259491,
|
239 |
+
"learning_rate": 3.437640642787176e-05,
|
240 |
+
"loss": 0.0677,
|
241 |
+
"step": 16000
|
242 |
+
},
|
243 |
+
{
|
244 |
+
"epoch": 11.44,
|
245 |
+
"eval_loss": 0.07863688468933105,
|
246 |
+
"eval_runtime": 26.8242,
|
247 |
+
"eval_samples_per_second": 1121.299,
|
248 |
+
"eval_steps_per_second": 5.629,
|
249 |
+
"step": 16000
|
250 |
+
},
|
251 |
+
{
|
252 |
+
"epoch": 12.15,
|
253 |
+
"grad_norm": 0.2736203670501709,
|
254 |
+
"learning_rate": 3.3052709607391525e-05,
|
255 |
+
"loss": 0.0636,
|
256 |
+
"step": 17000
|
257 |
+
},
|
258 |
+
{
|
259 |
+
"epoch": 12.15,
|
260 |
+
"eval_loss": 0.07664181292057037,
|
261 |
+
"eval_runtime": 26.7818,
|
262 |
+
"eval_samples_per_second": 1123.077,
|
263 |
+
"eval_steps_per_second": 5.638,
|
264 |
+
"step": 17000
|
265 |
+
},
|
266 |
+
{
|
267 |
+
"epoch": 12.87,
|
268 |
+
"grad_norm": 0.25644680857658386,
|
269 |
+
"learning_rate": 3.172901278691129e-05,
|
270 |
+
"loss": 0.0618,
|
271 |
+
"step": 18000
|
272 |
+
},
|
273 |
+
{
|
274 |
+
"epoch": 12.87,
|
275 |
+
"eval_loss": 0.07351888716220856,
|
276 |
+
"eval_runtime": 26.8445,
|
277 |
+
"eval_samples_per_second": 1120.453,
|
278 |
+
"eval_steps_per_second": 5.625,
|
279 |
+
"step": 18000
|
280 |
+
},
|
281 |
+
{
|
282 |
+
"epoch": 13.58,
|
283 |
+
"grad_norm": 0.2748676538467407,
|
284 |
+
"learning_rate": 3.0405315966431053e-05,
|
285 |
+
"loss": 0.0584,
|
286 |
+
"step": 19000
|
287 |
+
},
|
288 |
+
{
|
289 |
+
"epoch": 13.58,
|
290 |
+
"eval_loss": 0.07314006239175797,
|
291 |
+
"eval_runtime": 26.8333,
|
292 |
+
"eval_samples_per_second": 1120.921,
|
293 |
+
"eval_steps_per_second": 5.627,
|
294 |
+
"step": 19000
|
295 |
+
},
|
296 |
+
{
|
297 |
+
"epoch": 14.3,
|
298 |
+
"grad_norm": 0.30235132575035095,
|
299 |
+
"learning_rate": 2.9081619145950812e-05,
|
300 |
+
"loss": 0.057,
|
301 |
+
"step": 20000
|
302 |
+
},
|
303 |
+
{
|
304 |
+
"epoch": 14.3,
|
305 |
+
"eval_loss": 0.07568340748548508,
|
306 |
+
"eval_runtime": 27.0109,
|
307 |
+
"eval_samples_per_second": 1113.55,
|
308 |
+
"eval_steps_per_second": 5.59,
|
309 |
+
"step": 20000
|
310 |
+
},
|
311 |
+
{
|
312 |
+
"epoch": 15.01,
|
313 |
+
"grad_norm": 0.2508692145347595,
|
314 |
+
"learning_rate": 2.7757922325470574e-05,
|
315 |
+
"loss": 0.0558,
|
316 |
+
"step": 21000
|
317 |
+
},
|
318 |
+
{
|
319 |
+
"epoch": 15.01,
|
320 |
+
"eval_loss": 0.07675843685865402,
|
321 |
+
"eval_runtime": 26.9026,
|
322 |
+
"eval_samples_per_second": 1118.032,
|
323 |
+
"eval_steps_per_second": 5.613,
|
324 |
+
"step": 21000
|
325 |
+
},
|
326 |
+
{
|
327 |
+
"epoch": 15.73,
|
328 |
+
"grad_norm": 0.3341030478477478,
|
329 |
+
"learning_rate": 2.643422550499034e-05,
|
330 |
+
"loss": 0.0533,
|
331 |
+
"step": 22000
|
332 |
+
},
|
333 |
+
{
|
334 |
+
"epoch": 15.73,
|
335 |
+
"eval_loss": 0.07339715212583542,
|
336 |
+
"eval_runtime": 26.8727,
|
337 |
+
"eval_samples_per_second": 1119.278,
|
338 |
+
"eval_steps_per_second": 5.619,
|
339 |
+
"step": 22000
|
340 |
+
},
|
341 |
+
{
|
342 |
+
"epoch": 16.44,
|
343 |
+
"grad_norm": 0.30433303117752075,
|
344 |
+
"learning_rate": 2.51105286845101e-05,
|
345 |
+
"loss": 0.0516,
|
346 |
+
"step": 23000
|
347 |
+
},
|
348 |
+
{
|
349 |
+
"epoch": 16.44,
|
350 |
+
"eval_loss": 0.0694783553481102,
|
351 |
+
"eval_runtime": 26.8551,
|
352 |
+
"eval_samples_per_second": 1120.012,
|
353 |
+
"eval_steps_per_second": 5.623,
|
354 |
+
"step": 23000
|
355 |
+
},
|
356 |
+
{
|
357 |
+
"epoch": 17.16,
|
358 |
+
"grad_norm": 0.39424875378608704,
|
359 |
+
"learning_rate": 2.378683186402986e-05,
|
360 |
+
"loss": 0.049,
|
361 |
+
"step": 24000
|
362 |
+
},
|
363 |
+
{
|
364 |
+
"epoch": 17.16,
|
365 |
+
"eval_loss": 0.06750107556581497,
|
366 |
+
"eval_runtime": 26.9045,
|
367 |
+
"eval_samples_per_second": 1117.954,
|
368 |
+
"eval_steps_per_second": 5.612,
|
369 |
+
"step": 24000
|
370 |
+
},
|
371 |
+
{
|
372 |
+
"epoch": 17.87,
|
373 |
+
"grad_norm": 0.29526183009147644,
|
374 |
+
"learning_rate": 2.2463135043549627e-05,
|
375 |
+
"loss": 0.0478,
|
376 |
+
"step": 25000
|
377 |
+
},
|
378 |
+
{
|
379 |
+
"epoch": 17.87,
|
380 |
+
"eval_loss": 0.06841529905796051,
|
381 |
+
"eval_runtime": 26.9131,
|
382 |
+
"eval_samples_per_second": 1117.597,
|
383 |
+
"eval_steps_per_second": 5.611,
|
384 |
+
"step": 25000
|
385 |
+
},
|
386 |
+
{
|
387 |
+
"epoch": 18.58,
|
388 |
+
"grad_norm": 0.2802821099758148,
|
389 |
+
"learning_rate": 2.113943822306939e-05,
|
390 |
+
"loss": 0.0472,
|
391 |
+
"step": 26000
|
392 |
+
},
|
393 |
+
{
|
394 |
+
"epoch": 18.58,
|
395 |
+
"eval_loss": 0.0680340975522995,
|
396 |
+
"eval_runtime": 26.8442,
|
397 |
+
"eval_samples_per_second": 1120.467,
|
398 |
+
"eval_steps_per_second": 5.625,
|
399 |
+
"step": 26000
|
400 |
+
},
|
401 |
+
{
|
402 |
+
"epoch": 19.3,
|
403 |
+
"grad_norm": 0.198490172624588,
|
404 |
+
"learning_rate": 1.9815741402589152e-05,
|
405 |
+
"loss": 0.0445,
|
406 |
+
"step": 27000
|
407 |
+
},
|
408 |
+
{
|
409 |
+
"epoch": 19.3,
|
410 |
+
"eval_loss": 0.059882719069719315,
|
411 |
+
"eval_runtime": 26.9691,
|
412 |
+
"eval_samples_per_second": 1115.275,
|
413 |
+
"eval_steps_per_second": 5.599,
|
414 |
+
"step": 27000
|
415 |
+
},
|
416 |
+
{
|
417 |
+
"epoch": 20.01,
|
418 |
+
"grad_norm": 0.3383251130580902,
|
419 |
+
"learning_rate": 1.8492044582108914e-05,
|
420 |
+
"loss": 0.0435,
|
421 |
+
"step": 28000
|
422 |
+
},
|
423 |
+
{
|
424 |
+
"epoch": 20.01,
|
425 |
+
"eval_loss": 0.06356318295001984,
|
426 |
+
"eval_runtime": 26.8538,
|
427 |
+
"eval_samples_per_second": 1120.066,
|
428 |
+
"eval_steps_per_second": 5.623,
|
429 |
+
"step": 28000
|
430 |
+
},
|
431 |
+
{
|
432 |
+
"epoch": 20.73,
|
433 |
+
"grad_norm": 0.16571784019470215,
|
434 |
+
"learning_rate": 1.7168347761628677e-05,
|
435 |
+
"loss": 0.0419,
|
436 |
+
"step": 29000
|
437 |
+
},
|
438 |
+
{
|
439 |
+
"epoch": 20.73,
|
440 |
+
"eval_loss": 0.06056862324476242,
|
441 |
+
"eval_runtime": 27.0748,
|
442 |
+
"eval_samples_per_second": 1110.924,
|
443 |
+
"eval_steps_per_second": 5.577,
|
444 |
+
"step": 29000
|
445 |
+
},
|
446 |
+
{
|
447 |
+
"epoch": 21.44,
|
448 |
+
"grad_norm": 0.19518467783927917,
|
449 |
+
"learning_rate": 1.584465094114844e-05,
|
450 |
+
"loss": 0.0409,
|
451 |
+
"step": 30000
|
452 |
+
},
|
453 |
+
{
|
454 |
+
"epoch": 21.44,
|
455 |
+
"eval_loss": 0.06490638852119446,
|
456 |
+
"eval_runtime": 26.8481,
|
457 |
+
"eval_samples_per_second": 1120.301,
|
458 |
+
"eval_steps_per_second": 5.624,
|
459 |
+
"step": 30000
|
460 |
+
},
|
461 |
+
{
|
462 |
+
"epoch": 22.16,
|
463 |
+
"grad_norm": 0.15420591831207275,
|
464 |
+
"learning_rate": 1.4520954120668203e-05,
|
465 |
+
"loss": 0.0397,
|
466 |
+
"step": 31000
|
467 |
+
},
|
468 |
+
{
|
469 |
+
"epoch": 22.16,
|
470 |
+
"eval_loss": 0.05918469280004501,
|
471 |
+
"eval_runtime": 26.8143,
|
472 |
+
"eval_samples_per_second": 1121.713,
|
473 |
+
"eval_steps_per_second": 5.631,
|
474 |
+
"step": 31000
|
475 |
+
},
|
476 |
+
{
|
477 |
+
"epoch": 22.87,
|
478 |
+
"grad_norm": 0.26854997873306274,
|
479 |
+
"learning_rate": 1.3197257300187965e-05,
|
480 |
+
"loss": 0.0387,
|
481 |
+
"step": 32000
|
482 |
+
},
|
483 |
+
{
|
484 |
+
"epoch": 22.87,
|
485 |
+
"eval_loss": 0.06144551932811737,
|
486 |
+
"eval_runtime": 26.8852,
|
487 |
+
"eval_samples_per_second": 1118.757,
|
488 |
+
"eval_steps_per_second": 5.616,
|
489 |
+
"step": 32000
|
490 |
+
},
|
491 |
+
{
|
492 |
+
"epoch": 23.59,
|
493 |
+
"grad_norm": 0.17430314421653748,
|
494 |
+
"learning_rate": 1.1873560479707728e-05,
|
495 |
+
"loss": 0.0373,
|
496 |
+
"step": 33000
|
497 |
+
},
|
498 |
+
{
|
499 |
+
"epoch": 23.59,
|
500 |
+
"eval_loss": 0.06159648299217224,
|
501 |
+
"eval_runtime": 26.7887,
|
502 |
+
"eval_samples_per_second": 1122.785,
|
503 |
+
"eval_steps_per_second": 5.637,
|
504 |
+
"step": 33000
|
505 |
+
},
|
506 |
+
{
|
507 |
+
"epoch": 24.3,
|
508 |
+
"grad_norm": 0.14911049604415894,
|
509 |
+
"learning_rate": 1.054986365922749e-05,
|
510 |
+
"loss": 0.0369,
|
511 |
+
"step": 34000
|
512 |
+
},
|
513 |
+
{
|
514 |
+
"epoch": 24.3,
|
515 |
+
"eval_loss": 0.05931873992085457,
|
516 |
+
"eval_runtime": 26.8571,
|
517 |
+
"eval_samples_per_second": 1119.926,
|
518 |
+
"eval_steps_per_second": 5.622,
|
519 |
+
"step": 34000
|
520 |
+
},
|
521 |
+
{
|
522 |
+
"epoch": 25.02,
|
523 |
+
"grad_norm": 0.13620807230472565,
|
524 |
+
"learning_rate": 9.226166838747254e-06,
|
525 |
+
"loss": 0.0361,
|
526 |
+
"step": 35000
|
527 |
+
},
|
528 |
+
{
|
529 |
+
"epoch": 25.02,
|
530 |
+
"eval_loss": 0.05695568770170212,
|
531 |
+
"eval_runtime": 26.8966,
|
532 |
+
"eval_samples_per_second": 1118.283,
|
533 |
+
"eval_steps_per_second": 5.614,
|
534 |
+
"step": 35000
|
535 |
+
},
|
536 |
+
{
|
537 |
+
"epoch": 25.73,
|
538 |
+
"grad_norm": 0.13764438033103943,
|
539 |
+
"learning_rate": 7.902470018267017e-06,
|
540 |
+
"loss": 0.0349,
|
541 |
+
"step": 36000
|
542 |
+
},
|
543 |
+
{
|
544 |
+
"epoch": 25.73,
|
545 |
+
"eval_loss": 0.05707501247525215,
|
546 |
+
"eval_runtime": 26.986,
|
547 |
+
"eval_samples_per_second": 1114.578,
|
548 |
+
"eval_steps_per_second": 5.595,
|
549 |
+
"step": 36000
|
550 |
+
},
|
551 |
+
{
|
552 |
+
"epoch": 26.45,
|
553 |
+
"grad_norm": 0.2389635145664215,
|
554 |
+
"learning_rate": 6.578773197786779e-06,
|
555 |
+
"loss": 0.0343,
|
556 |
+
"step": 37000
|
557 |
+
},
|
558 |
+
{
|
559 |
+
"epoch": 26.45,
|
560 |
+
"eval_loss": 0.0577365942299366,
|
561 |
+
"eval_runtime": 26.9903,
|
562 |
+
"eval_samples_per_second": 1114.401,
|
563 |
+
"eval_steps_per_second": 5.595,
|
564 |
+
"step": 37000
|
565 |
+
},
|
566 |
+
{
|
567 |
+
"epoch": 27.16,
|
568 |
+
"grad_norm": 0.15828461945056915,
|
569 |
+
"learning_rate": 5.255076377306542e-06,
|
570 |
+
"loss": 0.034,
|
571 |
+
"step": 38000
|
572 |
+
},
|
573 |
+
{
|
574 |
+
"epoch": 27.16,
|
575 |
+
"eval_loss": 0.05767366662621498,
|
576 |
+
"eval_runtime": 27.1454,
|
577 |
+
"eval_samples_per_second": 1108.035,
|
578 |
+
"eval_steps_per_second": 5.563,
|
579 |
+
"step": 38000
|
580 |
+
},
|
581 |
+
{
|
582 |
+
"epoch": 27.88,
|
583 |
+
"grad_norm": 0.1059570387005806,
|
584 |
+
"learning_rate": 3.9313795568263045e-06,
|
585 |
+
"loss": 0.0332,
|
586 |
+
"step": 39000
|
587 |
+
},
|
588 |
+
{
|
589 |
+
"epoch": 27.88,
|
590 |
+
"eval_loss": 0.056225307285785675,
|
591 |
+
"eval_runtime": 26.9534,
|
592 |
+
"eval_samples_per_second": 1115.928,
|
593 |
+
"eval_steps_per_second": 5.602,
|
594 |
+
"step": 39000
|
595 |
+
},
|
596 |
+
{
|
597 |
+
"epoch": 28.59,
|
598 |
+
"grad_norm": 0.1975150853395462,
|
599 |
+
"learning_rate": 2.6076827363460673e-06,
|
600 |
+
"loss": 0.0329,
|
601 |
+
"step": 40000
|
602 |
+
},
|
603 |
+
{
|
604 |
+
"epoch": 28.59,
|
605 |
+
"eval_loss": 0.05555161088705063,
|
606 |
+
"eval_runtime": 27.1187,
|
607 |
+
"eval_samples_per_second": 1109.122,
|
608 |
+
"eval_steps_per_second": 5.568,
|
609 |
+
"step": 40000
|
610 |
+
},
|
611 |
+
{
|
612 |
+
"epoch": 29.31,
|
613 |
+
"grad_norm": 0.1037423312664032,
|
614 |
+
"learning_rate": 1.28398591586583e-06,
|
615 |
+
"loss": 0.0319,
|
616 |
+
"step": 41000
|
617 |
+
},
|
618 |
+
{
|
619 |
+
"epoch": 29.31,
|
620 |
+
"eval_loss": 0.05535305291414261,
|
621 |
+
"eval_runtime": 26.8353,
|
622 |
+
"eval_samples_per_second": 1120.838,
|
623 |
+
"eval_steps_per_second": 5.627,
|
624 |
+
"step": 41000
|
625 |
+
},
|
626 |
+
{
|
627 |
+
"epoch": 30.0,
|
628 |
+
"step": 41970,
|
629 |
+
"total_flos": 3.347206753110317e+16,
|
630 |
+
"train_loss": 0.09860551060169176,
|
631 |
+
"train_runtime": 13103.021,
|
632 |
+
"train_samples_per_second": 640.368,
|
633 |
+
"train_steps_per_second": 3.203
|
634 |
+
}
|
635 |
+
],
|
636 |
+
"logging_steps": 1000,
|
637 |
+
"max_steps": 41970,
|
638 |
+
"num_input_tokens_seen": 0,
|
639 |
+
"num_train_epochs": 30,
|
640 |
+
"save_steps": 1000,
|
641 |
+
"total_flos": 3.347206753110317e+16,
|
642 |
+
"train_batch_size": 200,
|
643 |
+
"trial_name": null,
|
644 |
+
"trial_params": null
|
645 |
+
}
|
ckpt/ling_disc/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:deb9dc15db671f7ae0b9e7e7bc26ca9e20c0fde45babc266a60753e2b23d6328
|
3 |
+
size 4984
|
ckpt/model.json
ADDED
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"data": "ling_conversion",
|
3 |
+
"data_sources": ["qqp", "mrpc", "stsb"],
|
4 |
+
"data_type": "text",
|
5 |
+
"kld_annealing": "cyclic",
|
6 |
+
"lingpred_annealing": "mono",
|
7 |
+
"ling_embed_type": "one-layer",
|
8 |
+
"combine_weight": 1,
|
9 |
+
"alpha_kld": 1,
|
10 |
+
"alpha_lingpred": 1,
|
11 |
+
"alpha_sem": 1,
|
12 |
+
"max_grad_norm": 10,
|
13 |
+
"sem_loss_tao": 0.5,
|
14 |
+
"sem_loss_eps": 1,
|
15 |
+
"ckpt": "./ckpt/model.pt",
|
16 |
+
"disc_type": "deberta",
|
17 |
+
"disc_ckpt": "./ckpt/ling_disc",
|
18 |
+
"sem_ckpt": "./ckpt/sem_emb.pt",
|
19 |
+
"lng_ids": null,
|
20 |
+
"lng_ids_idx": null,
|
21 |
+
"model_name": "google/flan-t5-base",
|
22 |
+
"aim_exp": "lingconv-0606",
|
23 |
+
"sem_loss_type": "dedicated",
|
24 |
+
"combine_method": "decoder_add_first",
|
25 |
+
"train_log": 200,
|
26 |
+
"val_log": 2000,
|
27 |
+
"batch_size": 80,
|
28 |
+
"eval_batch_size": 200,
|
29 |
+
"max_eval_samples": 1000,
|
30 |
+
"test_batch_size": 1,
|
31 |
+
"hidden_dim": 500,
|
32 |
+
"latent_dim": 150,
|
33 |
+
"lng_dim": 40,
|
34 |
+
"disc_lng_dim": 40,
|
35 |
+
"use_lora": false,
|
36 |
+
"lora_r": 64,
|
37 |
+
"gpu": "4",
|
38 |
+
"epochs": 20,
|
39 |
+
"grad_accumulation": 1,
|
40 |
+
"n_ica": 10,
|
41 |
+
"max_length": 200,
|
42 |
+
"total_steps": null,
|
43 |
+
"kld_const": 1,
|
44 |
+
"lr": 0.001,
|
45 |
+
"kl_weight": 0.1,
|
46 |
+
"weight_decay": 0.01,
|
47 |
+
"ling_dropout": 0.1,
|
48 |
+
"predict_fn": "logs/test.txt",
|
49 |
+
"save_predict": false,
|
50 |
+
"use_ica": false,
|
51 |
+
"pretrain_gen": false,
|
52 |
+
"pretrain_sem": false,
|
53 |
+
"pretrain_disc": false,
|
54 |
+
"linggen_type": "none",
|
55 |
+
"linggen_input": "s+l",
|
56 |
+
"aug_same": false,
|
57 |
+
"ling_vae": false,
|
58 |
+
"process_lingpred": false,
|
59 |
+
"fudge_lambda": 1.0,
|
60 |
+
"use_lingpred": false,
|
61 |
+
"ling2_only": true,
|
62 |
+
"cycle_loss": false,
|
63 |
+
"disc_loss": false,
|
64 |
+
"sem_loss": false,
|
65 |
+
"sim_loss": false,
|
66 |
+
"optuna": false,
|
67 |
+
"debug": false,
|
68 |
+
"demo": false,
|
69 |
+
"fudge": false,
|
70 |
+
"out_fn": "logs/default",
|
71 |
+
"eval_only": false,
|
72 |
+
"predict_with_feedback": false,
|
73 |
+
"feedback_param": "s",
|
74 |
+
"eval_ling": false,
|
75 |
+
"seed": 0,
|
76 |
+
"major_arg": 0,
|
77 |
+
"quantize_lng": false,
|
78 |
+
"quant_nbins": 20,
|
79 |
+
"src_lng": "ling",
|
80 |
+
"to_restore": [],
|
81 |
+
"disc_steps": 0
|
82 |
+
}
|
ckpt/model.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a675026d23bf857c796e00fda67b500e4cc13b43db030b08fdfaef14823fbe42
|
3 |
+
size 2971737146
|
ckpt/sem_emb.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0c58f890cb0121eacf8ac99d2fac53e2962f457d8c02e0b6386a4b3e342ac10c
|
3 |
+
size 1315675291
|
compute_lng.py
CHANGED
@@ -4,38 +4,6 @@ import lftk
|
|
4 |
import spacy
|
5 |
nlp = spacy.load("en_core_web_sm")
|
6 |
|
7 |
-
def extract_lingfeat(text):
|
8 |
-
from lingfeat import extractor
|
9 |
-
LingFeat = extractor.pass_text(text)
|
10 |
-
LingFeat.preprocess()
|
11 |
-
|
12 |
-
d = {}
|
13 |
-
d.update(LingFeat.WoKF_()) # Wikipedia Knowledge Features
|
14 |
-
d.update(LingFeat.WBKF_()) # WeeBit Corpus Knowledge Features
|
15 |
-
d.update(LingFeat.OSKF_()) # OneStopEng Corpus Knowledge Features
|
16 |
-
|
17 |
-
# Discourse (Disco) Features
|
18 |
-
d.update(LingFeat.EnDF_()) # Entity Density Features
|
19 |
-
d.update(LingFeat.EnGF_()) # Entity Grid Features
|
20 |
-
|
21 |
-
# Syntactic (Synta) Features
|
22 |
-
# d.update(LingFeat.PhrF_()) # Noun/Verb/Adj/Adv/... Phrasal Features (logging stanza)
|
23 |
-
# d.update(LingFeat.TrSF_()) # (Parse) Tree Structural Features (logging stanza)
|
24 |
-
d.update(LingFeat.POSF_()) # Noun/Verb/Adj/Adv/... Part-of-Speech Features
|
25 |
-
|
26 |
-
# Lexico Semantic (LxSem) Features
|
27 |
-
d.update(LingFeat.TTRF_()) # Type Token Ratio Features
|
28 |
-
d.update(LingFeat.VarF_()) # Noun/Verb/Adj/Adv Variation Features
|
29 |
-
d.update(LingFeat.PsyF_()) # Psycholinguistic Difficulty of Words (AoA Kuperman)
|
30 |
-
d.update(LingFeat.WorF_()) # Word Familiarity from Frequency Count (SubtlexUS)
|
31 |
-
|
32 |
-
# Shallow Traditional (ShTra) Features
|
33 |
-
d.update(LingFeat.ShaF_()) # Shallow Features (e.g. avg number of tokens)
|
34 |
-
d.update(LingFeat.TraF_()) # Traditional Formulas
|
35 |
-
|
36 |
-
return list(d.values())
|
37 |
-
|
38 |
-
|
39 |
def extract_lftk(text):
|
40 |
if text == '':
|
41 |
return [0.] * 220
|
@@ -45,12 +13,9 @@ def extract_lftk(text):
|
|
45 |
feats = LFTK.extract()
|
46 |
return list(feats.values())
|
47 |
|
48 |
-
def compute_lng(text
|
49 |
lca_feats = lca(text)
|
50 |
-
|
51 |
-
sca_feats = [0] * 23
|
52 |
-
else:
|
53 |
-
sca_feats = sca(text)
|
54 |
lftk = extract_lftk(text)
|
55 |
all_feats = lca_feats + sca_feats + lftk
|
56 |
|
|
|
4 |
import spacy
|
5 |
nlp = spacy.load("en_core_web_sm")
|
6 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
def extract_lftk(text):
|
8 |
if text == '':
|
9 |
return [0.] * 220
|
|
|
13 |
feats = LFTK.extract()
|
14 |
return list(feats.values())
|
15 |
|
16 |
+
def compute_lng(text):
|
17 |
lca_feats = lca(text)
|
18 |
+
sca_feats = sca(text)
|
|
|
|
|
|
|
19 |
lftk = extract_lftk(text)
|
20 |
all_feats = lca_feats + sca_feats + lftk
|
21 |
|
demo.py
DELETED
@@ -1,371 +0,0 @@
|
|
1 |
-
def run_gradio(model, tokenizer, scaler, ling_collection, examples=None, lng_names=None, M=None):
|
2 |
-
import numpy as np
|
3 |
-
import torch
|
4 |
-
from datetime import datetime
|
5 |
-
from compute_lng import compute_lng
|
6 |
-
import gradio as gr
|
7 |
-
m = np.load('assets/m.npy')
|
8 |
-
m = -1/m
|
9 |
-
m[m == -np.inf] = 0
|
10 |
-
m /= 100
|
11 |
-
device = model.backbone.device
|
12 |
-
|
13 |
-
def visibility(mode):
|
14 |
-
if mode == 0:
|
15 |
-
vis_group = group1
|
16 |
-
elif mode == 1:
|
17 |
-
vis_group = group2
|
18 |
-
elif mode == 2:
|
19 |
-
vis_group = group3
|
20 |
-
|
21 |
-
output = [gr.update(value=''), gr.update(value='')]
|
22 |
-
for component in components:
|
23 |
-
if component in vis_group:
|
24 |
-
output.append(gr.update(visible=True))
|
25 |
-
else:
|
26 |
-
output.append(gr.update(visible=False))
|
27 |
-
return output
|
28 |
-
|
29 |
-
def generate(sent1, ling):
|
30 |
-
input_ids = tokenizer.encode(sent1, return_tensors='pt').to(device)
|
31 |
-
ling1 = scaler.transform([ling['Source']])
|
32 |
-
ling2 = scaler.transform([ling['Target']])
|
33 |
-
inputs = {'sentence1_input_ids': input_ids,
|
34 |
-
'sentence1_ling': torch.tensor(ling1).float().to(device),
|
35 |
-
'sentence2_ling': torch.tensor(ling2).float().to(device),
|
36 |
-
'sentence1_attention_mask': torch.ones_like(input_ids)}
|
37 |
-
preds = []
|
38 |
-
with torch.no_grad():
|
39 |
-
pred = model.infer(inputs).cpu().numpy()
|
40 |
-
pred = tokenizer.batch_decode(pred,
|
41 |
-
skip_special_tokens=True)[0]
|
42 |
-
|
43 |
-
return pred
|
44 |
-
|
45 |
-
def generate_with_feedbacks(sent1, ling):
|
46 |
-
preds = []
|
47 |
-
eta = 0.1
|
48 |
-
input_ids = tokenizer.encode(sent1, return_tensors='pt').to(device)
|
49 |
-
ling1 = torch.tensor(scaler.transform([ling['Source']])).float().to(device)
|
50 |
-
ling2 = torch.tensor(scaler.transform([ling['Target']])).float().to(device)
|
51 |
-
ling1_embed = model.ling_embed(ling1)
|
52 |
-
ling2_embed = model.ling_embed(ling2)
|
53 |
-
cur_ling = ling1_embed + eta * (ling2_embed - ling1_embed)
|
54 |
-
inputs = {'sentence1_input_ids': input_ids,
|
55 |
-
'sent1_ling_embed': ling1_embed,
|
56 |
-
'sent2_ling_embed': ling2_embed,
|
57 |
-
'sentence1_attention_mask': torch.ones_like(input_ids)}
|
58 |
-
converged = False
|
59 |
-
c = 0
|
60 |
-
while not converged:
|
61 |
-
with torch.no_grad():
|
62 |
-
pred = model.infer(inputs)
|
63 |
-
inputs_pred = inputs.copy()
|
64 |
-
inputs_pred.update({'input_ids': pred,
|
65 |
-
'attention_mask': torch.ones_like(pred)})
|
66 |
-
ling_pred = model.ling_disc(**inputs_pred)
|
67 |
-
ling_pred_embed = model.ling_embed(ling_pred)
|
68 |
-
|
69 |
-
if len(interpolations) == 0 or pred != interpolations[-1]:
|
70 |
-
interpolations.append(pred)
|
71 |
-
|
72 |
-
diff = torch.mean((ling2_embed - ling_pred_embed)**2)
|
73 |
-
scale = torch.norm(cur_ling)/torch.norm(ling2)
|
74 |
-
|
75 |
-
# print(f'Diff: {diff.item():.3f} / Scale: ({scale.item():.3f})>> {tokenizer.batch_decode(pred.cpu().numpy(), skip_special_tokens=True)[0]}')
|
76 |
-
if diff < 1e-5 or c >= 50:
|
77 |
-
converged = True
|
78 |
-
else:
|
79 |
-
# cur_ling = cur_ling + eta * (ling2_embed - ling_pred_embed)
|
80 |
-
inputs.update({
|
81 |
-
'sentence1_input_ids': pred,
|
82 |
-
# 'sent2_ling_embed': ling2_embed,
|
83 |
-
'sentence1_attention_mask': torch.ones_like(pred)
|
84 |
-
})
|
85 |
-
c += 1
|
86 |
-
|
87 |
-
pred = tokenizer.batch_decode(pred.cpu().numpy(),
|
88 |
-
skip_special_tokens=True)[0]
|
89 |
-
|
90 |
-
return pred
|
91 |
-
def generate_with_feedback(sent1, ling, approx):
|
92 |
-
if sent1 == '':
|
93 |
-
return ['Please input a source text.', '']
|
94 |
-
preds = []
|
95 |
-
interpolations = []
|
96 |
-
input_ids = tokenizer.encode(sent1, return_tensors='pt').to(device)
|
97 |
-
ling1 = torch.tensor(scaler.transform([ling['Source']])).float().to(device)
|
98 |
-
ling2 = torch.tensor(scaler.transform([ling['Target']])).float().to(device)
|
99 |
-
ling1_embed = model.ling_embed(ling1)
|
100 |
-
ling2_embed = model.ling_embed(ling2)
|
101 |
-
inputs = {'sentence1_input_ids': input_ids,
|
102 |
-
'sent1_ling_embed': ling1_embed,
|
103 |
-
'sent2_ling_embed': ling2_embed,
|
104 |
-
'sentence1_attention_mask': torch.ones_like(input_ids)}
|
105 |
-
converged = False
|
106 |
-
c = 0
|
107 |
-
eta = 0.3
|
108 |
-
while not converged:
|
109 |
-
with torch.no_grad():
|
110 |
-
pred = model.infer(inputs)
|
111 |
-
inputs_pred = inputs.copy()
|
112 |
-
inputs_pred.update({'input_ids': pred,
|
113 |
-
'attention_mask': torch.ones_like(pred)})
|
114 |
-
pred_text = tokenizer.batch_decode(pred.cpu().numpy(),
|
115 |
-
skip_special_tokens=True)[0]
|
116 |
-
if 'approximate' in approx:
|
117 |
-
ling_pred = model.ling_disc(**inputs_pred)
|
118 |
-
elif 'exact' in approx:
|
119 |
-
ling_pred = compute_lng(pred_text)
|
120 |
-
ling_pred = scaler.transform([ling_pred])[0]
|
121 |
-
ling_pred = torch.tensor(ling_pred).to(pred.device).float()
|
122 |
-
else:
|
123 |
-
raise ValueError()
|
124 |
-
ling_pred_embed = model.ling_embed(ling_pred)
|
125 |
-
|
126 |
-
if len(interpolations) == 0 or pred_text != interpolations[-1]:
|
127 |
-
interpolations.append(pred_text)
|
128 |
-
|
129 |
-
diff = torch.mean((ling2_embed - ling_pred_embed)**2)
|
130 |
-
|
131 |
-
# print(f'Diff {diff.item():.3f}>> {tokenizer.batch_decode(pred.cpu().numpy(), skip_special_tokens=True)[0]}')
|
132 |
-
if diff < 10 or c >= 50:
|
133 |
-
converged = True
|
134 |
-
else:
|
135 |
-
ling2_embed = ling2_embed + eta * (ling_pred_embed - ling2_embed)
|
136 |
-
inputs.update({'sent2_ling_embed': ling2_embed})
|
137 |
-
c += 1
|
138 |
-
|
139 |
-
|
140 |
-
interpolation = '-- ' + '\n-- '.join(interpolations)
|
141 |
-
return [pred_text, interpolation]
|
142 |
-
|
143 |
-
def generate_random(sent1, ling, count, approx):
|
144 |
-
preds, interpolations = [], []
|
145 |
-
for c in range(count):
|
146 |
-
idx = np.random.randint(0, len(ling_collection))
|
147 |
-
ling_ex = ling_collection[idx]
|
148 |
-
ling['Target'] = ling_ex
|
149 |
-
pred, interpolation = generate_with_feedback(sent1, ling, approx)
|
150 |
-
preds.append(pred)
|
151 |
-
interpolations.append(interpolation)
|
152 |
-
return '\n***\n'.join(preds), '\n***\n'.join(interpolations), ling
|
153 |
-
|
154 |
-
def estimate_gen(sent1, sent2, ling, approx):
|
155 |
-
if 'approximate' in approx:
|
156 |
-
input_ids = tokenizer.encode(sent2, return_tensors='pt').to(device)
|
157 |
-
with torch.no_grad():
|
158 |
-
ling_pred = model.ling_disc(input_ids=input_ids).cpu().numpy()
|
159 |
-
ling_pred = scaler.inverse_transform(ling_pred)[0]
|
160 |
-
elif 'exact' in approx:
|
161 |
-
ling_pred = compute_lng(sent2)
|
162 |
-
else:
|
163 |
-
raise ValueError()
|
164 |
-
|
165 |
-
ling['Target'] = ling_pred
|
166 |
-
gen = generate_with_feedback(sent1, ling, approx)
|
167 |
-
results = gen + [ling]
|
168 |
-
|
169 |
-
return results
|
170 |
-
|
171 |
-
def estimate_tgt(sent2, ling, approx):
|
172 |
-
if 'approximate' in approx:
|
173 |
-
input_ids = tokenizer.encode(sent2, return_tensors='pt').to(device)
|
174 |
-
with torch.no_grad():
|
175 |
-
ling_pred = model.ling_disc(input_ids=input_ids).cpu().numpy()
|
176 |
-
ling_pred = scaler.inverse_transform(ling_pred)[0]
|
177 |
-
elif 'exact' in approx:
|
178 |
-
ling_pred = compute_lng(sent2)
|
179 |
-
else:
|
180 |
-
raise ValueError()
|
181 |
-
|
182 |
-
ling['Target'] = ling_pred
|
183 |
-
return ling
|
184 |
-
|
185 |
-
def estimate_src(sent1, ling, approx):
|
186 |
-
if 'approximate' in approx:
|
187 |
-
input_ids = tokenizer.encode(sent1, return_tensors='pt').to(device)
|
188 |
-
with torch.no_grad():
|
189 |
-
ling_pred = model.ling_disc(input_ids=input_ids).cpu().numpy()
|
190 |
-
ling_pred = scaler.inverse_transform(ling_pred)[0]
|
191 |
-
elif 'exact' in approx:
|
192 |
-
ling_pred = compute_lng(sent1)
|
193 |
-
else:
|
194 |
-
raise ValueError()
|
195 |
-
|
196 |
-
ling['Source'] = ling_pred
|
197 |
-
return ling
|
198 |
-
|
199 |
-
def rand_target(ling):
|
200 |
-
ling['Target'] = scaler.inverse_transform([np.random.randn(*ling['Target'].shape)])[0]
|
201 |
-
return ling
|
202 |
-
|
203 |
-
def rand_ex_target(ling):
|
204 |
-
idx = np.random.randint(0, len(examples))
|
205 |
-
ling_ex = examples[idx][1]
|
206 |
-
ling['Target'] = ling_ex['Target']
|
207 |
-
return ling
|
208 |
-
|
209 |
-
def copy(ling):
|
210 |
-
ling['Target'] = ling['Source']
|
211 |
-
return ling
|
212 |
-
|
213 |
-
def add_noise(ling):
|
214 |
-
x = scaler.transform([ling['Target']])
|
215 |
-
x += np.random.randn(*ling['Target'].shape)
|
216 |
-
x = scaler.inverse_transform(x)[0]
|
217 |
-
ling['Target'] = x
|
218 |
-
return ling
|
219 |
-
|
220 |
-
def add(ling):
|
221 |
-
x = scaler.transform([ling['Target']])
|
222 |
-
x += m
|
223 |
-
x = scaler.inverse_transform(x)[0]
|
224 |
-
ling['Target'] = x
|
225 |
-
return ling
|
226 |
-
|
227 |
-
def sub(ling):
|
228 |
-
x = scaler.transform([ling['Target']])
|
229 |
-
x -= m
|
230 |
-
x = scaler.inverse_transform(x)[0]
|
231 |
-
ling['Target'] = x
|
232 |
-
return ling
|
233 |
-
|
234 |
-
# title = ''
|
235 |
-
# for i, model in enumerate(models):
|
236 |
-
# if i > 0:
|
237 |
-
# title += '\n'
|
238 |
-
# title += f"model ({i})\n\tUsing VAE = {model.args.ling_vae}\n\tUsing ICA = {model.args.use_ica}\n\tNumber of features = {model.args.lng_dim if not model.args.use_ica else model.args.n_ica}"
|
239 |
-
title = """
|
240 |
-
# LingConv: A System for Controlled Linguistic Conversion
|
241 |
-
|
242 |
-
## Description
|
243 |
-
|
244 |
-
This system is an encoder-decoder model for complexity controlled text generation, guided by 241
|
245 |
-
linguistic complexity indices as key attributes. Given a sentence and a desired level of linguistic
|
246 |
-
complexity, the model can generate diverse paraphrases that maintain consistent meaning, adjusted for
|
247 |
-
different linguistic complexity levels. However, it's important to note that not all index combinations are
|
248 |
-
feasible (such as requesting a sentence of "length" 5 with 10 "unique words"). To ensure high quality
|
249 |
-
outputs, our approach interpolates the embedding of linguistic indices to locate the most closely matched,
|
250 |
-
achievable set of indices for the given target.
|
251 |
-
"""
|
252 |
-
|
253 |
-
guide = """
|
254 |
-
You may use the system in on of the following ways:
|
255 |
-
|
256 |
-
**Randomized Paraphrase Generation**: Select this option to produce multiple paraphrases with a range
|
257 |
-
of linguistic complexity. You need to provide a source text, specify the number of paraphrases you want,
|
258 |
-
and click "Generate." The linguistic complexity of the paraphrases will be determined randomly.
|
259 |
-
|
260 |
-
**Complexity-Matched Paraphrasing**: Select this option to generate a paraphrase of the given source
|
261 |
-
sentence that closely mirrors the linguistic complexity of another given sentence. Input your source
|
262 |
-
sentence along with another sentence (which will serve only to extract linguistic indices for the
|
263 |
-
paraphrase generation). Then, click "Generate."
|
264 |
-
|
265 |
-
**Manual Linguistic Control**: Select this option to manually control the linguistic complexity of the
|
266 |
-
generated text. We provided a set of tools for manual adjustments of the desired linguistic complexity of
|
267 |
-
the target sentence. These tools enable the user to extract linguistic indices from a given sentence,
|
268 |
-
generate a random (yet coherent) set of linguistic indices, and add or remove noise from the indices.
|
269 |
-
These tools are designed for experimental use and require the user to possess linguistic expertise for
|
270 |
-
effective input of linguistic indices. To use these tools, select "Tools to assist in setting linguistic
|
271 |
-
indices." Once indices are entered, click "Generate."
|
272 |
-
|
273 |
-
|
274 |
-
Second, you may select to use exact or approximate computation of linguistic indices (used in mode (2) and
|
275 |
-
in quality control of the genration). Approximate computation is significantly faster.
|
276 |
-
|
277 |
-
Third, you may view the intermediate sentences of the quality control process by selecting the checkbox.
|
278 |
-
|
279 |
-
Fourth, you may try out some examples by clicking on "Examples...". Examples consist of a source sentences,
|
280 |
-
the indices of the source sentences, and a sample set of target linguistic indices.
|
281 |
-
|
282 |
-
Please make your choice below.
|
283 |
-
|
284 |
-
"""
|
285 |
-
|
286 |
-
sent1 = gr.Textbox(label='Source text')
|
287 |
-
ling = gr.Dataframe(value = [[x, 0, 0] for x in lng_names],
|
288 |
-
headers=['Index', 'Source', 'Target'],
|
289 |
-
datatype=['str', 'number', 'number'], visible=False)
|
290 |
-
css = """
|
291 |
-
#guide span.svelte-s1r2yt {font-size: 22px !important;
|
292 |
-
font-weight: 600 !important}
|
293 |
-
"""
|
294 |
-
with gr.Blocks(css=css) as demo:
|
295 |
-
gr.Markdown(title)
|
296 |
-
with gr.Accordion("Quick Start Guide", open=False, elem_id='guide'):
|
297 |
-
gr.Markdown(guide)
|
298 |
-
|
299 |
-
mode = gr.Radio(value='Randomized Paraphrase Generation',
|
300 |
-
label='How would you like to use this system?',
|
301 |
-
type="index",
|
302 |
-
choices=['Randomized Paraphrase Generation',
|
303 |
-
'Complexity-Matched Paraphrasing', 'Manual Linguistic Control'])
|
304 |
-
approx = gr.Radio(value='Use approximate computation of linguistic indices (faster)',
|
305 |
-
choices=['Use approximate computation of linguistic indices (faster)',
|
306 |
-
'Use exact computation of linguistic indices'], container=False, show_label=False)
|
307 |
-
control_interpolation = gr.Checkbox(label='View the intermediate sentences in the interpolation of linguistic indices')
|
308 |
-
|
309 |
-
with gr.Accordion("Examples...", open=False):
|
310 |
-
gr.Examples(examples, [sent1, ling], examples_per_page=4, label=None)
|
311 |
-
|
312 |
-
with gr.Row():
|
313 |
-
sent1.render()
|
314 |
-
with gr.Column():
|
315 |
-
sent2 = gr.Textbox(label='Generated text')
|
316 |
-
interpolation = gr.Textbox(label='Quality control interpolation', visible=False, lines=5)
|
317 |
-
#####################
|
318 |
-
with gr.Row():
|
319 |
-
generate_random_btn = gr.Button("Generate",
|
320 |
-
variant='primary', scale=1, visible=True)
|
321 |
-
count = gr.Number(label='Number of generated sentences', value=3, precision=0, scale=1, visible=True)
|
322 |
-
# generate_fb_btn = gr.Button("Generate with auto-adjust (towards pred)")
|
323 |
-
# generate_fb_s_btn = gr.Button("Generate with auto-adjust (moving s)")
|
324 |
-
# add_noise_btn = gr.Button('Add noise to target linguistic indices')
|
325 |
-
#####################
|
326 |
-
with gr.Row():
|
327 |
-
estimate_gen_btn = gr.Button("Generate",
|
328 |
-
variant='primary',
|
329 |
-
scale=1, visible=False)
|
330 |
-
sent_ling_gen = gr.Textbox(label='Text to estimate linguistic indices', scale=1, visible=False)
|
331 |
-
#####################
|
332 |
-
generate_btn = gr.Button("Generate", variant='primary', visible=False)
|
333 |
-
with gr.Accordion("Tools to assist in the setting of linguistic indices...", open=False, visible=False) as ling_tools:
|
334 |
-
with gr.Row():
|
335 |
-
estimate_tgt_btn = gr.Button("Estimate linguistic indices of this sentence", visible=False)
|
336 |
-
sent_ling_est = gr.Textbox(label='Text to estimate linguistic indices', scale=2, visible=False)
|
337 |
-
estimate_src_btn = gr.Button("Estimate linguistic indices of source sentence", visible=False)
|
338 |
-
# rand_btn = gr.Button("Random target")
|
339 |
-
rand_ex_btn = gr.Button("Random target", size='lg', visible=False)
|
340 |
-
copy_btn = gr.Button("Copy linguistic indices of source to target", size='sm', visible=False)
|
341 |
-
with gr.Row():
|
342 |
-
add_btn = gr.Button('Add \u03B5 to target linguistic indices', visible=False)
|
343 |
-
sub_btn = gr.Button('Subtract \u03B5 from target linguistic indices', visible=False)
|
344 |
-
ling.render()
|
345 |
-
#####################
|
346 |
-
|
347 |
-
estimate_src_btn.click(estimate_src, inputs=[sent1, ling, approx], outputs=[ling])
|
348 |
-
estimate_tgt_btn.click(estimate_tgt, inputs=[sent_ling_est, ling, approx], outputs=[ling])
|
349 |
-
# estimate_tgt_btn.click(estimate_tgt, inputs=[sent_ling, ling], outputs=[ling])
|
350 |
-
estimate_gen_btn.click(estimate_gen, inputs=[sent1, sent_ling_gen, ling, approx], outputs=[sent2, interpolation, ling])
|
351 |
-
# rand_btn.click(rand_target, inputs=[ling], outputs=[ling])
|
352 |
-
rand_ex_btn.click(rand_ex_target, inputs=[ling], outputs=[ling])
|
353 |
-
copy_btn.click(copy, inputs=[ling], outputs=[ling])
|
354 |
-
generate_btn.click(generate_with_feedback, inputs=[sent1, ling, approx], outputs=[sent2, interpolation])
|
355 |
-
generate_random_btn.click(generate_random, inputs=[sent1, ling, count, approx],
|
356 |
-
outputs=[sent2, interpolation, ling])
|
357 |
-
# generate_fb_btn.click(generate_with_feedback, inputs=[sent1, ling], outputs=sent2s)
|
358 |
-
# generate_fb_s_btn.click(generate_with_feedbacks, inputs=[sent1, ling], outputs=sent2s)
|
359 |
-
add_btn.click(add, inputs=[ling], outputs=[ling])
|
360 |
-
sub_btn.click(sub, inputs=[ling], outputs=[ling])
|
361 |
-
# add_noise_btn.click(add_noise, inputs=[ling], outputs=[ling])
|
362 |
-
|
363 |
-
group1 = [generate_random_btn, count]
|
364 |
-
group2 = [estimate_gen_btn, sent_ling_gen]
|
365 |
-
group3 = [generate_btn, estimate_src_btn, estimate_tgt_btn, sent_ling_est, rand_ex_btn, copy_btn, add_btn, sub_btn, ling, ling_tools]
|
366 |
-
components = group1 + group2 + group3
|
367 |
-
mode.change(visibility, inputs=[mode], outputs=[sent2, interpolation] + components)
|
368 |
-
control_interpolation.change(lambda v: gr.update(visible=v), inputs=[control_interpolation],
|
369 |
-
outputs=[interpolation])
|
370 |
-
|
371 |
-
demo.launch(share=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
lftk_ids.csv
ADDED
@@ -0,0 +1,221 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
key,name,formulation,domain,family,language
|
2 |
+
t_word,total_number_of_words,foundation,surface,wordsent,general
|
3 |
+
t_stopword,total_number_of_stop_words,foundation,surface,wordsent,general
|
4 |
+
t_punct,total_number_of_puntuations,foundation,syntax,wordsent,general
|
5 |
+
t_syll,total_number_of_syllables,foundation,surface,wordsent,en
|
6 |
+
t_syll2,total_number_of_words_more_than_two_syllables,foundation,surface,wordsent,en
|
7 |
+
t_syll3,total_number_of_words_more_than_three_syllables,foundation,surface,wordsent,en
|
8 |
+
t_uword,total_number_of_unique_words,foundation,surface,wordsent,general
|
9 |
+
t_sent,total_number_of_sentences,foundation,surface,wordsent,general
|
10 |
+
t_char,total_number_of_characters,foundation,surface,wordsent,general
|
11 |
+
a_word_ps,average_number_of_words_per_sentence,derivation,surface,avgwordsent,general
|
12 |
+
a_char_ps,average_number_of_characters_per_sentence,derivation,surface,avgwordsent,general
|
13 |
+
a_char_pw,average_number_of_characters_per_word,derivation,surface,avgwordsent,general
|
14 |
+
a_syll_ps,average_number_of_syllables_per_sentence,derivation,surface,avgwordsent,en
|
15 |
+
a_syll_pw,average_number_of_syllables_per_word,derivation,surface,avgwordsent,en
|
16 |
+
a_stopword_ps,average_number_of_stop_words_per_sentence,derivation,surface,avgwordsent,en
|
17 |
+
a_stopword_pw,average_number_of_stop_words_per_word,derivation,surface,avgwordsent,en
|
18 |
+
t_kup,total_kuperman_age_of_acquistion_of_words,foundation,lexico-semantics,worddiff,en
|
19 |
+
t_bry,total_brysbaert_age_of_acquistion_of_words,foundation,lexico-semantics,worddiff,en
|
20 |
+
t_subtlex_us_zipf,total_subtlex_us_zipf_of_words,foundation,lexico-semantics,worddiff,en
|
21 |
+
a_kup_pw,average_kuperman_age_of_acquistion_of_words_per_word,derivation,lexico-semantics,avgworddiff,en
|
22 |
+
a_bry_pw,average_brysbaert_age_of_acquistion_of_words_per_word,derivation,lexico-semantics,avgworddiff,en
|
23 |
+
a_kup_ps,average_kuperman_age_of_acquistion_of_words_per_sentence,derivation,lexico-semantics,avgworddiff,en
|
24 |
+
a_bry_ps,average_brysbaert_age_of_acquistion_of_words_per_sentence,derivation,lexico-semantics,avgworddiff,en
|
25 |
+
a_subtlex_us_zipf_pw,average_subtlex_us_zipf_of_words_per_word,derivation,lexico-semantics,avgworddiff,en
|
26 |
+
a_subtlex_us_zipf_ps,average_subtlex_us_zipf_of_words_per_sentence,derivation,lexico-semantics,avgworddiff,en
|
27 |
+
t_n_ent,total_number_of_named_entities,foundation,discourse,entity,general
|
28 |
+
t_n_ent_person,total_number_of_named_entities_person,foundation,discourse,entity,en
|
29 |
+
t_n_ent_norp,total_number_of_named_entities_norp,foundation,discourse,entity,en
|
30 |
+
t_n_ent_fac,total_number_of_named_entities_fac,foundation,discourse,entity,en
|
31 |
+
t_n_ent_org,total_number_of_named_entities_org,foundation,discourse,entity,en
|
32 |
+
t_n_ent_gpe,total_number_of_named_entities_gpe,foundation,discourse,entity,en
|
33 |
+
t_n_ent_loc,total_number_of_named_entities_loc,foundation,discourse,entity,en
|
34 |
+
t_n_ent_product,total_number_of_named_entities_product,foundation,discourse,entity,en
|
35 |
+
t_n_ent_event,total_number_of_named_entities_event,foundation,discourse,entity,en
|
36 |
+
t_n_ent_art,total_number_of_named_entities_art,foundation,discourse,entity,en
|
37 |
+
t_n_ent_law,total_number_of_named_entities_law,foundation,discourse,entity,en
|
38 |
+
t_n_ent_language,total_number_of_named_entities_language,foundation,discourse,entity,en
|
39 |
+
t_n_ent_date,total_number_of_named_entities_date,foundation,discourse,entity,en
|
40 |
+
t_n_ent_time,total_number_of_named_entities_time,foundation,discourse,entity,en
|
41 |
+
t_n_ent_percent,total_number_of_named_entities_percent,foundation,discourse,entity,en
|
42 |
+
t_n_ent_money,total_number_of_named_entities_money,foundation,discourse,entity,en
|
43 |
+
t_n_ent_quantity,total_number_of_named_entities_quantity,foundation,discourse,entity,en
|
44 |
+
t_n_ent_ordinal,total_number_of_named_entities_ordinal,foundation,discourse,entity,en
|
45 |
+
t_n_ent_cardinal,total_number_of_named_entities_cardinal,foundation,discourse,entity,en
|
46 |
+
a_n_ent_pw,average_number_of_named_entities_per_word,derivation,discourse,avgentity,general
|
47 |
+
a_n_ent_person_pw,average_number_of_named_entities_person_per_word,derivation,discourse,avgentity,en
|
48 |
+
a_n_ent_norp_pw,average_number_of_named_entities_norp_per_word,derivation,discourse,avgentity,en
|
49 |
+
a_n_ent_fac_pw,average_number_of_named_entities_fac_per_word,derivation,discourse,avgentity,en
|
50 |
+
a_n_ent_org_pw,average_number_of_named_entities_org_per_word,derivation,discourse,avgentity,en
|
51 |
+
a_n_ent_gpe_pw,average_number_of_named_entities_gpe_per_word,derivation,discourse,avgentity,en
|
52 |
+
a_n_ent_loc_pw,average_number_of_named_entities_loc_per_word,derivation,discourse,avgentity,en
|
53 |
+
a_n_ent_product_pw,average_number_of_named_entities_product_per_word,derivation,discourse,avgentity,en
|
54 |
+
a_n_ent_event_pw,average_number_of_named_entities_event_per_word,derivation,discourse,avgentity,en
|
55 |
+
a_n_ent_art_pw,average_number_of_named_entities_art_per_word,derivation,discourse,avgentity,en
|
56 |
+
a_n_ent_law_pw,average_number_of_named_entities_law_per_word,derivation,discourse,avgentity,en
|
57 |
+
a_n_ent_language_pw,average_number_of_named_entities_language_per_word,derivation,discourse,avgentity,en
|
58 |
+
a_n_ent_date_pw,average_number_of_named_entities_date_per_word,derivation,discourse,avgentity,en
|
59 |
+
a_n_ent_time_pw,average_number_of_named_entities_time_per_word,derivation,discourse,avgentity,en
|
60 |
+
a_n_ent_percent_pw,average_number_of_named_entities_percent_per_word,derivation,discourse,avgentity,en
|
61 |
+
a_n_ent_money_pw,average_number_of_named_entities_money_per_word,derivation,discourse,avgentity,en
|
62 |
+
a_n_ent_quantity_pw,average_number_of_named_entities_quantity_per_word,derivation,discourse,avgentity,en
|
63 |
+
a_n_ent_ordinal_pw,average_number_of_named_entities_ordinal_per_word,derivation,discourse,avgentity,en
|
64 |
+
a_n_ent_cardinal_pw,average_number_of_named_entities_cardinal_per_word,derivation,discourse,avgentity,en
|
65 |
+
a_n_ent_ps,average_number_of_named_entities_per_sentence,derivation,discourse,avgentity,general
|
66 |
+
a_n_ent_person_ps,average_number_of_named_entities_person_per_sentence,derivation,discourse,avgentity,en
|
67 |
+
a_n_ent_norp_ps,average_number_of_named_entities_norp_per_sentence,derivation,discourse,avgentity,en
|
68 |
+
a_n_ent_fac_ps,average_number_of_named_entities_fac_per_sentence,derivation,discourse,avgentity,en
|
69 |
+
a_n_ent_org_ps,average_number_of_named_entities_org_per_sentence,derivation,discourse,avgentity,en
|
70 |
+
a_n_ent_gpe_ps,average_number_of_named_entities_gpe_per_sentence,derivation,discourse,avgentity,en
|
71 |
+
a_n_ent_loc_ps,average_number_of_named_entities_loc_per_sentence,derivation,discourse,avgentity,en
|
72 |
+
a_n_ent_product_ps,average_number_of_named_entities_product_per_sentence,derivation,discourse,avgentity,en
|
73 |
+
a_n_ent_event_ps,average_number_of_named_entities_event_per_sentence,derivation,discourse,avgentity,en
|
74 |
+
a_n_ent_art_ps,average_number_of_named_entities_art_per_sentence,derivation,discourse,avgentity,en
|
75 |
+
a_n_ent_law_ps,average_number_of_named_entities_law_per_sentence,derivation,discourse,avgentity,en
|
76 |
+
a_n_ent_language_ps,average_number_of_named_entities_language_per_sentence,derivation,discourse,avgentity,en
|
77 |
+
a_n_ent_date_ps,average_number_of_named_entities_date_per_sentence,derivation,discourse,avgentity,en
|
78 |
+
a_n_ent_time_ps,average_number_of_named_entities_time_per_sentence,derivation,discourse,avgentity,en
|
79 |
+
a_n_ent_percent_ps,average_number_of_named_entities_percent_per_sentence,derivation,discourse,avgentity,en
|
80 |
+
a_n_ent_money_ps,average_number_of_named_entities_money_per_sentence,derivation,discourse,avgentity,en
|
81 |
+
a_n_ent_quantity_ps,average_number_of_named_entities_quantity_per_sentence,derivation,discourse,avgentity,en
|
82 |
+
a_n_ent_ordinal_ps,average_number_of_named_entities_ordinal_per_sentence,derivation,discourse,avgentity,en
|
83 |
+
a_n_ent_cardinal_ps,average_number_of_named_entities_cardinal_per_sentence,derivation,discourse,avgentity,en
|
84 |
+
simp_adj_var,simple_adjectives_variation,derivation,lexico-semantics,lexicalvariation,general
|
85 |
+
simp_adp_var,simple_adpositions_variation,derivation,lexico-semantics,lexicalvariation,general
|
86 |
+
simp_adv_var,simple_adverbs_variation,derivation,lexico-semantics,lexicalvariation,general
|
87 |
+
simp_aux_var,simple_auxiliaries_variation,derivation,lexico-semantics,lexicalvariation,general
|
88 |
+
simp_cconj_var,simple_coordinating_conjunctions_variation,derivation,lexico-semantics,lexicalvariation,general
|
89 |
+
simp_det_var,simple_determiners_variation,derivation,lexico-semantics,lexicalvariation,general
|
90 |
+
simp_intj_var,simple_interjections_variation,derivation,lexico-semantics,lexicalvariation,general
|
91 |
+
simp_noun_var,simple_nouns_variation,derivation,lexico-semantics,lexicalvariation,general
|
92 |
+
simp_num_var,simple_numerals_variation,derivation,lexico-semantics,lexicalvariation,general
|
93 |
+
simp_part_var,simple_particles_variation,derivation,lexico-semantics,lexicalvariation,general
|
94 |
+
simp_pron_var,simple_pronouns_variation,derivation,lexico-semantics,lexicalvariation,general
|
95 |
+
simp_propn_var,simple_proper_nouns_variation,derivation,lexico-semantics,lexicalvariation,general
|
96 |
+
simp_punct_var,simple_punctuations_variation,derivation,lexico-semantics,lexicalvariation,general
|
97 |
+
simp_sconj_var,simple_subordinating_conjunctions_variation,derivation,lexico-semantics,lexicalvariation,general
|
98 |
+
simp_sym_var,simple_symbols_variation,derivation,lexico-semantics,lexicalvariation,general
|
99 |
+
simp_verb_var,simple_verbs_variation,derivation,lexico-semantics,lexicalvariation,general
|
100 |
+
simp_space_var,simple_spaces_variation,derivation,lexico-semantics,lexicalvariation,general
|
101 |
+
root_adj_var,root_adjectives_variation,derivation,lexico-semantics,lexicalvariation,general
|
102 |
+
root_adp_var,root_adpositions_variation,derivation,lexico-semantics,lexicalvariation,general
|
103 |
+
root_adv_var,root_adverbs_variation,derivation,lexico-semantics,lexicalvariation,general
|
104 |
+
root_aux_var,root_auxiliaries_variation,derivation,lexico-semantics,lexicalvariation,general
|
105 |
+
root_cconj_var,root_coordinating_conjunctions_variation,derivation,lexico-semantics,lexicalvariation,general
|
106 |
+
root_det_var,root_determiners_variation,derivation,lexico-semantics,lexicalvariation,general
|
107 |
+
root_intj_var,root_interjections_variation,derivation,lexico-semantics,lexicalvariation,general
|
108 |
+
root_noun_var,root_nouns_variation,derivation,lexico-semantics,lexicalvariation,general
|
109 |
+
root_num_var,root_numerals_variation,derivation,lexico-semantics,lexicalvariation,general
|
110 |
+
root_part_var,root_particles_variation,derivation,lexico-semantics,lexicalvariation,general
|
111 |
+
root_pron_var,root_pronouns_variation,derivation,lexico-semantics,lexicalvariation,general
|
112 |
+
root_propn_var,root_proper_nouns_variation,derivation,lexico-semantics,lexicalvariation,general
|
113 |
+
root_punct_var,root_punctuations_variation,derivation,lexico-semantics,lexicalvariation,general
|
114 |
+
root_sconj_var,root_subordinating_conjunctions_variation,derivation,lexico-semantics,lexicalvariation,general
|
115 |
+
root_sym_var,root_symbols_variation,derivation,lexico-semantics,lexicalvariation,general
|
116 |
+
root_verb_var,root_verbs_variation,derivation,lexico-semantics,lexicalvariation,general
|
117 |
+
root_space_var,root_spaces_variation,derivation,lexico-semantics,lexicalvariation,general
|
118 |
+
corr_adj_var,corrected_adjectives_variation,derivation,lexico-semantics,lexicalvariation,general
|
119 |
+
corr_adp_var,corrected_adpositions_variation,derivation,lexico-semantics,lexicalvariation,general
|
120 |
+
corr_adv_var,corrected_adverbs_variation,derivation,lexico-semantics,lexicalvariation,general
|
121 |
+
corr_aux_var,corrected_auxiliaries_variation,derivation,lexico-semantics,lexicalvariation,general
|
122 |
+
corr_cconj_var,corrected_coordinating_conjunctions_variation,derivation,lexico-semantics,lexicalvariation,general
|
123 |
+
corr_det_var,corrected_determiners_variation,derivation,lexico-semantics,lexicalvariation,general
|
124 |
+
corr_intj_var,corrected_interjections_variation,derivation,lexico-semantics,lexicalvariation,general
|
125 |
+
corr_noun_var,corrected_nouns_variation,derivation,lexico-semantics,lexicalvariation,general
|
126 |
+
corr_num_var,corrected_numerals_variation,derivation,lexico-semantics,lexicalvariation,general
|
127 |
+
corr_part_var,corrected_particles_variation,derivation,lexico-semantics,lexicalvariation,general
|
128 |
+
corr_pron_var,corrected_pronouns_variation,derivation,lexico-semantics,lexicalvariation,general
|
129 |
+
corr_propn_var,corrected_proper_nouns_variation,derivation,lexico-semantics,lexicalvariation,general
|
130 |
+
corr_punct_var,corrected_punctuations_variation,derivation,lexico-semantics,lexicalvariation,general
|
131 |
+
corr_sconj_var,corrected_subordinating_conjunctions_variation,derivation,lexico-semantics,lexicalvariation,general
|
132 |
+
corr_sym_var,corrected_symbols_variation,derivation,lexico-semantics,lexicalvariation,general
|
133 |
+
corr_verb_var,corrected_verbs_variation,derivation,lexico-semantics,lexicalvariation,general
|
134 |
+
corr_space_var,corrected_spaces_variation,derivation,lexico-semantics,lexicalvariation,general
|
135 |
+
simp_ttr,simple_type_token_ratio,derivation,lexico-semantics,typetokenratio,general
|
136 |
+
root_ttr,root_type_token_ratio,derivation,lexico-semantics,typetokenratio,general
|
137 |
+
corr_ttr,corrected_type_token_ratio,derivation,lexico-semantics,typetokenratio,general
|
138 |
+
bilog_ttr,bilogarithmic_type_token_ratio,derivation,lexico-semantics,typetokenratio,general
|
139 |
+
uber_ttr,uber_type_token_ratio,derivation,lexico-semantics,typetokenratio,general
|
140 |
+
simp_ttr_no_lem,simple_type_token_ratio_no_lemma,derivation,lexico-semantics,typetokenratio,general
|
141 |
+
root_ttr_no_lem,root_type_token_ratio_no_lemma,derivation,lexico-semantics,typetokenratio,general
|
142 |
+
corr_ttr_no_lem,corrected_type_token_ratio_no_lemma,derivation,lexico-semantics,typetokenratio,general
|
143 |
+
bilog_ttr_no_lem,bilogarithmic_type_token_ratio_no_lemma,derivation,lexico-semantics,typetokenratio,general
|
144 |
+
uber_ttr_no_lem,uber_type_token_ratio_no_lemma,derivation,lexico-semantics,typetokenratio,general
|
145 |
+
n_adj,total_number_of_adjectives,foundation,syntax,partofspeech,general
|
146 |
+
n_adp,total_number_of_adpositions,foundation,syntax,partofspeech,general
|
147 |
+
n_adv,total_number_of_adverbs,foundation,syntax,partofspeech,general
|
148 |
+
n_aux,total_number_of_auxiliaries,foundation,syntax,partofspeech,general
|
149 |
+
n_cconj,total_number_of_coordinating_conjunctions,foundation,syntax,partofspeech,general
|
150 |
+
n_det,total_number_of_determiners,foundation,syntax,partofspeech,general
|
151 |
+
n_intj,total_number_of_interjections,foundation,syntax,partofspeech,general
|
152 |
+
n_noun,total_number_of_nouns,foundation,syntax,partofspeech,general
|
153 |
+
n_num,total_number_of_numerals,foundation,syntax,partofspeech,general
|
154 |
+
n_part,total_number_of_particles,foundation,syntax,partofspeech,general
|
155 |
+
n_pron,total_number_of_pronouns,foundation,syntax,partofspeech,general
|
156 |
+
n_propn,total_number_of_proper_nouns,foundation,syntax,partofspeech,general
|
157 |
+
n_punct,total_number_of_punctuations,foundation,syntax,partofspeech,general
|
158 |
+
n_sconj,total_number_of_subordinating_conjunctions,foundation,syntax,partofspeech,general
|
159 |
+
n_sym,total_number_of_symbols,foundation,syntax,partofspeech,general
|
160 |
+
n_verb,total_number_of_verbs,foundation,syntax,partofspeech,general
|
161 |
+
n_space,total_number_of_spaces,foundation,syntax,partofspeech,general
|
162 |
+
n_uadj,total_number_of_unique_adjectives,foundation,syntax,partofspeech,general
|
163 |
+
n_uadp,total_number_of_unique_adpositions,foundation,syntax,partofspeech,general
|
164 |
+
n_uadv,total_number_of_unique_adverbs,foundation,syntax,partofspeech,general
|
165 |
+
n_uaux,total_number_of_unique_auxiliaries,foundation,syntax,partofspeech,general
|
166 |
+
n_ucconj,total_number_of_unique_coordinating_conjunctions,foundation,syntax,partofspeech,general
|
167 |
+
n_udet,total_number_of_unique_determiners,foundation,syntax,partofspeech,general
|
168 |
+
n_uintj,total_number_of_unique_interjections,foundation,syntax,partofspeech,general
|
169 |
+
n_unoun,total_number_of_unique_nouns,foundation,syntax,partofspeech,general
|
170 |
+
n_unum,total_number_of_unique_numerals,foundation,syntax,partofspeech,general
|
171 |
+
n_upart,total_number_of_unique_particles,foundation,syntax,partofspeech,general
|
172 |
+
n_upron,total_number_of_unique_pronouns,foundation,syntax,partofspeech,general
|
173 |
+
n_upropn,total_number_of_unique_proper_nouns,foundation,syntax,partofspeech,general
|
174 |
+
n_upunct,total_number_of_unique_punctuations,foundation,syntax,partofspeech,general
|
175 |
+
n_usconj,total_number_of_unique_subordinating_conjunctions,foundation,syntax,partofspeech,general
|
176 |
+
n_usym,total_number_of_unique_symbols,foundation,syntax,partofspeech,general
|
177 |
+
n_uverb,total_number_of_unique_verbs,foundation,syntax,partofspeech,general
|
178 |
+
n_uspace,total_number_of_unique_spaces,foundation,syntax,partofspeech,general
|
179 |
+
a_adj_pw,average_number_of_adjectives_per_word,derivation,syntax,avgpartofspeech,general
|
180 |
+
a_adp_pw,average_number_of_adpositions_per_word,derivation,syntax,avgpartofspeech,general
|
181 |
+
a_adv_pw,average_number_of_adverbs_per_word,derivation,syntax,avgpartofspeech,general
|
182 |
+
a_aux_pw,average_number_of_auxiliaries_per_word,derivation,syntax,avgpartofspeech,general
|
183 |
+
a_cconj_pw,average_number_of_coordinating_conjunctions_per_word,derivation,syntax,avgpartofspeech,general
|
184 |
+
a_det_pw,average_number_of_determiners_per_word,derivation,syntax,avgpartofspeech,general
|
185 |
+
a_intj_pw,average_number_of_interjections_per_word,derivation,syntax,avgpartofspeech,general
|
186 |
+
a_noun_pw,average_number_of_nouns_per_word,derivation,syntax,avgpartofspeech,general
|
187 |
+
a_num_pw,average_number_of_numerals_per_word,derivation,syntax,avgpartofspeech,general
|
188 |
+
a_part_pw,average_number_of_particles_per_word,derivation,syntax,avgpartofspeech,general
|
189 |
+
a_pron_pw,average_number_of_pronouns_per_word,derivation,syntax,avgpartofspeech,general
|
190 |
+
a_propn_pw,average_number_of_proper_nouns_per_word,derivation,syntax,avgpartofspeech,general
|
191 |
+
a_punct_pw,average_number_of_punctuations_per_word,derivation,syntax,avgpartofspeech,general
|
192 |
+
a_sconj_pw,average_number_of_subordinating_conjunctions_per_word,derivation,syntax,avgpartofspeech,general
|
193 |
+
a_sym_pw,average_number_of_symbols_per_word,derivation,syntax,avgpartofspeech,general
|
194 |
+
a_verb_pw,average_number_of_verbs_per_word,derivation,syntax,avgpartofspeech,general
|
195 |
+
a_space_pw,average_number_of_spaces_per_word,derivation,syntax,avgpartofspeech,general
|
196 |
+
a_adj_ps,average_number_of_adjectives_per_sentence,derivation,syntax,avgpartofspeech,general
|
197 |
+
a_adp_ps,average_number_of_adpositions_per_sentence,derivation,syntax,avgpartofspeech,general
|
198 |
+
a_adv_ps,average_number_of_adverbs_per_sentence,derivation,syntax,avgpartofspeech,general
|
199 |
+
a_aux_ps,average_number_of_auxiliaries_per_sentence,derivation,syntax,avgpartofspeech,general
|
200 |
+
a_cconj_ps,average_number_of_coordinating_conjunctions_per_sentence,derivation,syntax,avgpartofspeech,general
|
201 |
+
a_det_ps,average_number_of_determiners_per_sentence,derivation,syntax,avgpartofspeech,general
|
202 |
+
a_intj_ps,average_number_of_interjections_per_sentence,derivation,syntax,avgpartofspeech,general
|
203 |
+
a_noun_ps,average_number_of_nouns_per_sentence,derivation,syntax,avgpartofspeech,general
|
204 |
+
a_num_ps,average_number_of_numerals_per_sentence,derivation,syntax,avgpartofspeech,general
|
205 |
+
a_part_ps,average_number_of_particles_per_sentence,derivation,syntax,avgpartofspeech,general
|
206 |
+
a_pron_ps,average_number_of_pronouns_per_sentence,derivation,syntax,avgpartofspeech,general
|
207 |
+
a_propn_ps,average_number_of_proper_nouns_per_sentence,derivation,syntax,avgpartofspeech,general
|
208 |
+
a_punct_ps,average_number_of_punctuations_per_sentence,derivation,syntax,avgpartofspeech,general
|
209 |
+
a_sconj_ps,average_number_of_subordinating_conjunctions_per_sentence,derivation,syntax,avgpartofspeech,general
|
210 |
+
a_sym_ps,average_number_of_symbols_per_sentence,derivation,syntax,avgpartofspeech,general
|
211 |
+
a_verb_ps,average_number_of_verbs_per_sentence,derivation,syntax,avgpartofspeech,general
|
212 |
+
a_space_ps,average_number_of_spaces_per_sentence,derivation,syntax,avgpartofspeech,general
|
213 |
+
fkre,flesch_kincaid_reading_ease,derivation,surface,readformula,en
|
214 |
+
fkgl,flesch_kincaid_grade_level,derivation,surface,readformula,en
|
215 |
+
fogi,gunning_fog_index,derivation,surface,readformula,en
|
216 |
+
smog,smog_index,derivation,surface,readformula,en
|
217 |
+
cole,coleman_liau_index,derivation,surface,readformula,en
|
218 |
+
auto,automated_readability_index,derivation,surface,readformula,en
|
219 |
+
rt_fast,reading_time_for_fast_readers,derivation,surface,readtimeformula,en
|
220 |
+
rt_average,reading_time_for_average_readers,derivation,surface,readtimeformula,en
|
221 |
+
rt_slow,reading_time_for_slow_readers,derivation,surface,readtimeformula,en
|
model.py
CHANGED
@@ -504,7 +504,7 @@ class EncoderDecoderVAE(nn.Module):
|
|
504 |
dec_output, _ = self.infer_with_cache(batch)
|
505 |
return dec_output
|
506 |
|
507 |
-
def infer_with_feedback_BP(self, ling_disc, sem_emb, batch, tokenizer
|
508 |
from torch.autograd import grad
|
509 |
interpolations = []
|
510 |
def line_search():
|
@@ -519,8 +519,6 @@ class EncoderDecoderVAE(nn.Module):
|
|
519 |
new_loss, pred = get_loss(param_)
|
520 |
max_len = pred.shape[1]
|
521 |
lens = torch.where(pred == self.eos_token_id, 1, 0).argmax(-1) + 1
|
522 |
-
# if lens.item() == 1:
|
523 |
-
# patience -= 1
|
524 |
batch.update({
|
525 |
'sentence2_input_ids': pred,
|
526 |
'sentence2_attention_mask': sequence_mask(lens, max_len = max_len)
|
@@ -528,8 +526,6 @@ class EncoderDecoderVAE(nn.Module):
|
|
528 |
sem_prob = torch.sigmoid(sem_emb(**batch)).item()
|
529 |
# if sem_prob <= 0.1:
|
530 |
# patience -= 1
|
531 |
-
# f.write(f'[{eta}], [{new_loss.item():.2f}], [{sem_prob:.2f}], {tokenizer.decode(pred[0])}\n')
|
532 |
-
# print(f'[{eta}], [{new_loss.item():.2f}], [{sem_prob:.2f}], {tokenizer.decode(pred[0])}\n')
|
533 |
if new_loss < loss and sem_prob >= 0.90 and lens.item() > 1:
|
534 |
return param_
|
535 |
eta *= 2.25
|
@@ -565,18 +561,11 @@ class EncoderDecoderVAE(nn.Module):
|
|
565 |
elif self.args.feedback_param == 'logits':
|
566 |
logits = self.infer_with_cache(batch)[1]['scores']
|
567 |
param = torch.nn.Parameter(logits, requires_grad = True)
|
568 |
-
f = open(self.args.fb_log, 'a') if self.args.fb_log else None
|
569 |
target_np = batch['sentence2_ling'][0].cpu().numpy()
|
570 |
while True:
|
571 |
loss, pred = get_loss(param)
|
572 |
pred_text = tokenizer.batch_decode(pred.cpu().numpy(),
|
573 |
skip_special_tokens=True)[0]
|
574 |
-
if f:
|
575 |
-
# from compute_lng import compute_lng
|
576 |
-
# lng_pred = scaler.transform(np.array([compute_lng(pred_text)])[:,used_indices])[0]
|
577 |
-
# real_loss = np.mean((lng_pred - target_np)**2)
|
578 |
-
# f.write(f'Loss: {loss.item():.2f}\tReal loss:{real_loss:.2f}\t{pred_text}\n')
|
579 |
-
f.write(f'*** [{loss.item():.2f}], {pred_text}\n')
|
580 |
interpolations.append(pred_text)
|
581 |
if loss < 1:
|
582 |
break
|
@@ -585,65 +574,8 @@ class EncoderDecoderVAE(nn.Module):
|
|
585 |
param = line_search()
|
586 |
if param is False:
|
587 |
break
|
588 |
-
if f:
|
589 |
-
f.write(f'[return] {pred_text}\n\n')
|
590 |
-
f.close()
|
591 |
return pred, [pred_text, interpolations]
|
592 |
|
593 |
-
def infer_with_feedback(self, ling_disc, batch, tokenizer, scaler, approx=False):
|
594 |
-
interpolations = []
|
595 |
-
converged = False
|
596 |
-
c = 0
|
597 |
-
eta = 0.3
|
598 |
-
use_embed = True
|
599 |
-
if use_embed:
|
600 |
-
ling1_embed = self.ling_embed(batch['sentence1_ling'])
|
601 |
-
ling2_embed = self.ling_embed(batch['sentence2_ling'])
|
602 |
-
batch.update({
|
603 |
-
'sent1_ling_embed': ling1_embed,
|
604 |
-
'sent2_ling_embed': ling2_embed,
|
605 |
-
})
|
606 |
-
else:
|
607 |
-
ling2 = batch['sentence2_ling']
|
608 |
-
ling2_orig = batch['sentence2_ling'].clone()
|
609 |
-
while not converged:
|
610 |
-
with torch.no_grad():
|
611 |
-
pred = self.infer(batch)
|
612 |
-
inputs_pred = batch.copy()
|
613 |
-
inputs_pred.update({'input_ids': pred,
|
614 |
-
'attention_mask': torch.ones_like(pred)})
|
615 |
-
pred_text = tokenizer.batch_decode(pred.cpu().numpy(),
|
616 |
-
skip_special_tokens=True)[0]
|
617 |
-
if approx:
|
618 |
-
ling_pred = ling_disc(**inputs_pred)
|
619 |
-
else:
|
620 |
-
ling_pred = compute_lng(pred_text)
|
621 |
-
ling_pred = scaler.transform([ling_pred])[0]
|
622 |
-
ling_pred = torch.tensor(ling_pred).to(pred.device).float()
|
623 |
-
if use_embed:
|
624 |
-
ling_pred_embed = self.ling_embed(ling_pred)
|
625 |
-
# diff = torch.mean((ling2_embed - ling_pred_embed)**2)
|
626 |
-
# else:
|
627 |
-
diff = torch.mean((ling2_orig - ling_pred)**2)
|
628 |
-
|
629 |
-
|
630 |
-
# print(f'Diff {diff.item():.3f}>> {tokenizer.batch_decode(pred.cpu().numpy(), skip_special_tokens=True)[0]}')
|
631 |
-
if diff < 1e-1 or c == 6:
|
632 |
-
converged = True
|
633 |
-
elif use_embed:
|
634 |
-
ling2_embed = ling2_embed + eta * (ling_pred_embed - ling2_embed)
|
635 |
-
batch.update({'sent2_ling_embed': ling2_embed})
|
636 |
-
else:
|
637 |
-
ling2 = ling2 + eta * (ling_pred - ling2)
|
638 |
-
batch.update({'sentence2_ling': ling2})
|
639 |
-
|
640 |
-
c += 1
|
641 |
-
|
642 |
-
if len(interpolations) == 0 or pred_text != interpolations[-1]:
|
643 |
-
interpolations.append(pred_text)
|
644 |
-
|
645 |
-
return [pred_text, interpolations]
|
646 |
-
|
647 |
def set_grad(module, state):
|
648 |
if module is not None:
|
649 |
for p in module.parameters():
|
@@ -694,3 +626,42 @@ class LingDiscPipeline():
|
|
694 |
with torch.no_grad():
|
695 |
ling_pred = self.model(input_ids=inputs.input_ids.cuda())
|
696 |
return ling_pred
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
504 |
dec_output, _ = self.infer_with_cache(batch)
|
505 |
return dec_output
|
506 |
|
507 |
+
def infer_with_feedback_BP(self, ling_disc, sem_emb, batch, tokenizer):
|
508 |
from torch.autograd import grad
|
509 |
interpolations = []
|
510 |
def line_search():
|
|
|
519 |
new_loss, pred = get_loss(param_)
|
520 |
max_len = pred.shape[1]
|
521 |
lens = torch.where(pred == self.eos_token_id, 1, 0).argmax(-1) + 1
|
|
|
|
|
522 |
batch.update({
|
523 |
'sentence2_input_ids': pred,
|
524 |
'sentence2_attention_mask': sequence_mask(lens, max_len = max_len)
|
|
|
526 |
sem_prob = torch.sigmoid(sem_emb(**batch)).item()
|
527 |
# if sem_prob <= 0.1:
|
528 |
# patience -= 1
|
|
|
|
|
529 |
if new_loss < loss and sem_prob >= 0.90 and lens.item() > 1:
|
530 |
return param_
|
531 |
eta *= 2.25
|
|
|
561 |
elif self.args.feedback_param == 'logits':
|
562 |
logits = self.infer_with_cache(batch)[1]['scores']
|
563 |
param = torch.nn.Parameter(logits, requires_grad = True)
|
|
|
564 |
target_np = batch['sentence2_ling'][0].cpu().numpy()
|
565 |
while True:
|
566 |
loss, pred = get_loss(param)
|
567 |
pred_text = tokenizer.batch_decode(pred.cpu().numpy(),
|
568 |
skip_special_tokens=True)[0]
|
|
|
|
|
|
|
|
|
|
|
|
|
569 |
interpolations.append(pred_text)
|
570 |
if loss < 1:
|
571 |
break
|
|
|
574 |
param = line_search()
|
575 |
if param is False:
|
576 |
break
|
|
|
|
|
|
|
577 |
return pred, [pred_text, interpolations]
|
578 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
579 |
def set_grad(module, state):
|
580 |
if module is not None:
|
581 |
for p in module.parameters():
|
|
|
626 |
with torch.no_grad():
|
627 |
ling_pred = self.model(input_ids=inputs.input_ids.cuda())
|
628 |
return ling_pred
|
629 |
+
|
630 |
+
def get_model(args, tokenizer, device):
|
631 |
+
if args.pretrain_disc or args.disc_loss or args.disc_ckpt:
|
632 |
+
ling_disc = LingDisc(args.model_name, args.disc_type, args.disc_ckpt).to(device)
|
633 |
+
else:
|
634 |
+
ling_disc = None
|
635 |
+
if args.linggen_type != 'none':
|
636 |
+
ling_gen = LingGenerator(args).to(device)
|
637 |
+
if args.sem_loss or args.sem_ckpt:
|
638 |
+
if args.sem_loss_type == 'shared':
|
639 |
+
sem_emb = seld.backbone.encoder
|
640 |
+
elif args.sem_loss_type == 'dedicated':
|
641 |
+
sem_emb = SemEmb(T5EncoderModel.from_pretrained('google/flan-t5-base'), tokenizer.eos_token_id).to(device)
|
642 |
+
else:
|
643 |
+
raise NotImplementedError('Semantic loss type')
|
644 |
+
else:
|
645 |
+
sem_emb = None
|
646 |
+
|
647 |
+
if not args.pretrain_disc:
|
648 |
+
model = EncoderDecoderVAE(args, tokenizer.pad_token_id, tokenizer.eos_token_id).to(device)
|
649 |
+
if args.use_lora:
|
650 |
+
target_modules = ["Attention.k", "Attention.q", "Attention.v", "Attention.o", "lm_head", "wi_0", "wi_1", "wo"]
|
651 |
+
target_modules = '|'.join(f'(.*{module})' for module in target_modules)
|
652 |
+
target_modules = f'backbone.({target_modules})'
|
653 |
+
config = LoraConfig(
|
654 |
+
r=args.lora_r,
|
655 |
+
lora_alpha=args.lora_r * 2,
|
656 |
+
target_modules=target_modules,
|
657 |
+
lora_dropout=0.1,
|
658 |
+
bias="lora_only",
|
659 |
+
modules_to_save=['ling_embed'],
|
660 |
+
)
|
661 |
+
model = get_peft_model(model, config)
|
662 |
+
model.print_trainable_parameters()
|
663 |
+
else:
|
664 |
+
model = ling_disc
|
665 |
+
|
666 |
+
return model, ling_disc, sem_emb
|
667 |
+
|
options.py
CHANGED
@@ -1,9 +1,10 @@
|
|
|
|
1 |
import argparse
|
|
|
2 |
from datetime import datetime
|
3 |
-
from const import
|
4 |
-
import os, json
|
5 |
from copy import deepcopy
|
6 |
-
|
7 |
|
8 |
def parse_args(ckpt=None):
|
9 |
parser = argparse.ArgumentParser()
|
@@ -97,8 +98,6 @@ def parse_args(ckpt=None):
|
|
97 |
|
98 |
major_arg = args.major_arg
|
99 |
to_restore = [
|
100 |
-
'total_steps','major_arg','gpu','demo', 'eval_only', 'save_predict', 'predict_fn', 'fudge', 'predict_with_feedback',
|
101 |
-
'feedback_param', 'fb_log', 'data_dir', 'data', 'disc_ckpt', 'disc_type', 'sem_ckpt', 'fudge_lambda', 'test_batch_size', 'src_lng'
|
102 |
] + args.to_restore
|
103 |
to_restore = {k: args.__dict__[k] for k in to_restore}
|
104 |
|
@@ -130,7 +129,7 @@ def parse_args(ckpt=None):
|
|
130 |
args.__dict__.update(to_restore)
|
131 |
args.ckpt = ckpt
|
132 |
|
133 |
-
lng_names =
|
134 |
for i in range(len(args_list)):
|
135 |
if args_list[i].lng_ids or args_list[i].lng_ids_idx:
|
136 |
if args_list[i].lng_ids_idx:
|
|
|
1 |
+
import os, json
|
2 |
import argparse
|
3 |
+
import numpy as np
|
4 |
from datetime import datetime
|
5 |
+
from const import lftkplus_names
|
|
|
6 |
from copy import deepcopy
|
7 |
+
|
8 |
|
9 |
def parse_args(ckpt=None):
|
10 |
parser = argparse.ArgumentParser()
|
|
|
98 |
|
99 |
major_arg = args.major_arg
|
100 |
to_restore = [
|
|
|
|
|
101 |
] + args.to_restore
|
102 |
to_restore = {k: args.__dict__[k] for k in to_restore}
|
103 |
|
|
|
129 |
args.__dict__.update(to_restore)
|
130 |
args.ckpt = ckpt
|
131 |
|
132 |
+
lng_names = lftkplus_names
|
133 |
for i in range(len(args_list)):
|
134 |
if args_list[i].lng_ids or args_list[i].lng_ids_idx:
|
135 |
if args_list[i].lng_ids_idx:
|