Spaces:

mohdelgaar
/

LingConv

Sleeping

App Files Files Community

mohdelgaar commited on Aug 6

Commit

674b430

•

1 Parent(s): e048c03

Update layout and samples

Browse files

Files changed (35) hide show

app.py +340 -33
assets/ling_collection.npy +3 -0
assets/logo.png +0 -0
assets/ratios.npy +3 -0
assets/samples.bin +3 -0
assets/scaler.bin +3 -0
assets/stats.json +3 -0
ckpt/ling_disc/checkpoint-41000/config.json +120 -0
ckpt/ling_disc/checkpoint-41000/model.safetensors +3 -0
ckpt/ling_disc/checkpoint-41000/optimizer.pt +3 -0
ckpt/ling_disc/checkpoint-41000/rng_state.pth +3 -0
ckpt/ling_disc/checkpoint-41000/scheduler.pt +3 -0
ckpt/ling_disc/checkpoint-41000/special_tokens_map.json +119 -0
ckpt/ling_disc/checkpoint-41000/spiece.model +3 -0
ckpt/ling_disc/checkpoint-41000/tokenizer.json +0 -0
ckpt/ling_disc/checkpoint-41000/tokenizer_config.json +938 -0
ckpt/ling_disc/checkpoint-41000/trainer_state.json +636 -0
ckpt/ling_disc/checkpoint-41000/training_args.bin +3 -0
ckpt/ling_disc/config.json +120 -0
ckpt/ling_disc/model.safetensors +3 -0
ckpt/ling_disc/scaler.bin +3 -0
ckpt/ling_disc/special_tokens_map.json +119 -0
ckpt/ling_disc/spiece.model +3 -0
ckpt/ling_disc/tokenizer.json +0 -0
ckpt/ling_disc/tokenizer_config.json +938 -0
ckpt/ling_disc/trainer_state.json +645 -0
ckpt/ling_disc/training_args.bin +3 -0
ckpt/model.json +82 -0
ckpt/model.pt +3 -0
ckpt/sem_emb.pt +3 -0
compute_lng.py +2 -37
demo.py +0 -371
lftk_ids.csv +221 -0
model.py +40 -69
options.py +5 -6

app.py CHANGED Viewed

@@ -3,60 +3,367 @@ import spacy
 # nltk.download('wordnet')
 # spacy.cli.download('en_core_web_sm')
-from const import name_map
-from demo import run_gradio
 from model import get_model
 from options import parse_args
-import numpy as np
 from transformers import T5Tokenizer
-import torch
-import joblib
 def process_examples(samples, full_names):
-    for i in range(len(samples)):
-        sample = samples[i]
-        input_text = tokenizer.decode(sample['sentence1_input_ids'], skip_special_tokens=True)
-        ling1 = scaler.inverse_transform([sample['sentence1_ling']])[0]
-        ling2 = scaler.inverse_transform([sample['sentence2_ling']])[0]
-        ling = pd.DataFrame({'Index': full_names, 'Source': ling1, 'Target': ling2})
-        samples[i] = [input_text, ling]
-    return list(samples)
 args, args_list, lng_names = parse_args(ckpt='./ckpt/model.pt')
-print(args)
-exit()
 tokenizer = T5Tokenizer.from_pretrained(args.model_name)
 device = 'cuda' if torch.cuda.is_available() else 'cpu'
-full_names = [name_map[x] for x in lng_names]
-# samples = joblib.load('assets/samples.bin')
-# examples = process_examples(samples, full_names)
-# ling_collection = np.load('assets/ling_collection.npy')
 scaler = joblib.load('assets/scaler.bin')
 model, ling_disc, sem_emb = get_model(args, tokenizer, device)
 state = torch.load(args.ckpt, map_location=torch.device('cpu'))
 model.load_state_dict(state['model'], strict=True)
 model.eval()
-print(model is not None, ling_disc is not None, sem_emb is not None)
-exit()
-if args.disc_type == 't5':
-    state = torch.load(args.disc_ckpt)
-    if 'model' in state:
-        ling_disc.load_state_dict(state['model'], strict=False)
-    else:
-        ling_disc.load_state_dict(state, strict=False)
 ling_disc.eval()
 state = torch.load(args.sem_ckpt)
-if 'model' in state:
-    sem_emb.load_state_dict(state['model'], strict=False)
-else:
-    sem_emb.load_state_dict(state, strict=False)
 sem_emb.eval()
-run_gradio(model, tokenizer, scaler, ling_collection, examples, full_names)

 # nltk.download('wordnet')
 # spacy.cli.download('en_core_web_sm')
+import torch
+import joblib, json
+import numpy as np
+import pandas as pd
+import gradio as gr
+from const import used_indices, name_map
 from model import get_model
 from options import parse_args
 from transformers import T5Tokenizer
+from compute_lng import compute_lng
 def process_examples(samples, full_names):
+    processed = []
+    for sample in samples:
+        processed.append([
+            sample['sentence1'],
+            pd.DataFrame({'Index': full_names, 'Source': sample['sentence1_ling'], 'Target': sample['sentence2_ling']})
+                    ])
+    return processed
 args, args_list, lng_names = parse_args(ckpt='./ckpt/model.pt')
 tokenizer = T5Tokenizer.from_pretrained(args.model_name)
 device = 'cuda' if torch.cuda.is_available() else 'cpu'
+lng_names = [name_map[x] for x in lng_names]
+examples = json.load(open('assets/examples.json'))
+examples = process_examples(examples, lng_names)
+stats = json.load(open('assets/stats.json'))
+ling_collection = np.load('assets/ling_collection.npy')
 scaler = joblib.load('assets/scaler.bin')
+scale_ratio = np.load('assets/ratios.npy')
 model, ling_disc, sem_emb = get_model(args, tokenizer, device)
 state = torch.load(args.ckpt, map_location=torch.device('cpu'))
 model.load_state_dict(state['model'], strict=True)
 model.eval()
 ling_disc.eval()
 state = torch.load(args.sem_ckpt)
+sem_emb.load_state_dict(state['model'], strict=True)
 sem_emb.eval()
+device = model.backbone.device
+############# Start demo code
+def round_ling(x):
+    is_int = stats['is_int']
+    mins = stats['min']
+    maxs = stats['max']
+    for i in range(len(x)):
+        # if is_int[i]:
+        #     x[i] = round(x[i])
+        # else:
+        #     x[i] = round(x[i], 3)
+        x[i] = round(x[i], 3)
+    return np.clip(x, mins, maxs)
+def visibility(mode):
+    if mode == 0:
+        vis_group = group1
+    elif mode == 1:
+        vis_group = group2
+    elif mode == 2:
+        vis_group = group3
+    output = [gr.update(value=''), gr.update(value='')]
+    for component in components:
+        if component in vis_group:
+            output.append(gr.update(visible=True))
+        else:
+            output.append(gr.update(visible=False))
+    return output
+def generate(sent1, ling):
+    input_ids = tokenizer.encode(sent1, return_tensors='pt').to(device)
+    ling1 = scaler.transform([ling['Source']])
+    ling2 = scaler.transform([ling['Target']])
+    inputs = {'sentence1_input_ids': input_ids,
+            'sentence1_ling': torch.tensor(ling1).float().to(device),
+            'sentence2_ling': torch.tensor(ling2).float().to(device),
+            'sentence1_attention_mask': torch.ones_like(input_ids)}
+    preds = []
+    with torch.no_grad():
+        pred = model.infer(inputs).cpu().numpy()
+    pred = tokenizer.batch_decode(pred,
+            skip_special_tokens=True)[0]
+    return pred
+def generate_with_feedback(sent1, ling, approx):
+    if sent1 == '':
+        return ['Please input a source text.', '']
+    input_ids = tokenizer.encode(sent1, return_tensors='pt').to(device)
+    ling2 = torch.tensor(scaler.transform([ling['Target']])).float().to(device)
+    inputs = {
+            'sentence1_input_ids': input_ids,
+            'sentence2_ling': ling2,
+            'sentence1_attention_mask': torch.ones_like(input_ids)
+              }
+    pred, (pred_text, interpolations) = model.infer_with_feedback_BP(ling_disc, sem_emb, inputs, tokenizer)
+    interpolation = '-- ' + '\n-- '.join(interpolations)
+    return [pred_text, interpolation]
+def generate_random(sent1, ling, count, approx):
+    preds, interpolations = [], []
+    for c in range(count):
+        idx = np.random.randint(0, len(ling_collection))
+        ling_ex = ling_collection[idx]
+        ling['Target'] = ling_ex
+        pred, interpolation =  generate_with_feedback(sent1, ling, approx)
+        preds.append(pred)
+        interpolations.append(interpolation)
+    return '\n***\n'.join(preds), '\n***\n'.join(interpolations), ling
+def estimate_gen(sent1, sent2, ling, approx):
+    if 'approximate' in approx:
+        input_ids = tokenizer.encode(sent2, return_tensors='pt').to(device)
+        with torch.no_grad():
+            ling_pred = ling_disc(input_ids=input_ids).cpu().numpy()
+        ling_pred = scaler.inverse_transform(ling_pred)[0]
+    elif 'exact' in approx:
+        ling_pred = np.array(compute_lng(sent2))[used_indices]
+    else:
+        raise ValueError()
+    ling_pred = round_ling(ling_pred)
+    ling['Target'] = ling_pred
+    gen = generate_with_feedback(sent1, ling, approx)
+    results = gen + [ling]
+    return results
+def estimate_tgt(sent2, ling, approx):
+    if 'approximate' in approx:
+        input_ids = tokenizer.encode(sent2, return_tensors='pt').to(device)
+        with torch.no_grad():
+            ling_pred = ling_disc(input_ids=input_ids).cpu().numpy()
+        ling_pred = scaler.inverse_transform(ling_pred)[0]
+    elif 'exact' in approx:
+        ling_pred = np.array(compute_lng(sent2))[used_indices]
+    else:
+        raise ValueError()
+    ling_pred = round_ling(ling_pred)
+    ling['Target'] = ling_pred
+    return ling
+def estimate_src(sent1, ling, approx):
+    if 'approximate' in approx:
+        input_ids = tokenizer.encode(sent1, return_tensors='pt').to(device)
+        with torch.no_grad():
+            ling_pred = ling_disc(input_ids=input_ids).cpu().numpy()
+        ling_pred = scaler.inverse_transform(ling_pred)[0]
+    elif 'exact' in approx:
+        ling_pred = np.array(compute_lng(sent1))[used_indices]
+    else:
+        raise ValueError()
+    ling['Source'] = ling_pred
+    return ling
+def rand_target(ling):
+    ling['Target'] = scaler.inverse_transform([np.random.randn(*ling['Target'].shape)])[0]
+    return ling
+def rand_ex_target(ling):
+    idx = np.random.randint(0, len(ling_collection))
+    ling_ex = ling_collection[idx]
+    ling['Target'] = ling_ex
+    return ling
+def copy(ling):
+    ling['Target'] = ling['Source']
+    return ling
+def add(ling):
+    scale_stepsize = np.random.uniform(1.0, 5.0)
+    x = ling['Target'] + scale_stepsize * scale_ratio
+    x = round_ling(x)
+    ling['Target'] =  x
+    return ling
+def sub(ling):
+    scale_stepsize = np.random.uniform(1.0, 5.0)
+    x = ling['Target'] - scale_stepsize * scale_ratio
+    x = round_ling(x)
+    ling['Target'] =  x
+    return ling
+title = """
+<h1 style="text-align: center;">Controlled Paraphrase Generation with Linguistic Feature Control</h1>
+<p style="font-size:1.2em;">This system utilizes an encoder-decoder model to generate text with controlled complexity, guided by 40 linguistic complexity indices.
+The model can generate diverse paraphrases of a given sentence, each adjusted to maintain consistent meaning while varying
+in linguistic complexity according to the desired level.</p>
+<p style="font-size:1.2em;">It is important to note that not all index combinations are feasible (e.g., a sentence of "length" 5 with 10 "unique words").
+To ensure high-quality outputs, our approach interpolates the embeddings of linguistic indices to identify the closest,
+achievable set of indices for the given target.</p>
+"""
+guide = """
+You may use the system in on of the following ways:
+**Randomized Paraphrase Generation**: Select this option to produce multiple paraphrases with a range
+of linguistic complexity. You need to provide a source text, specify the number of paraphrases you want,
+and click "Generate." The linguistic complexity of the paraphrases will be determined randomly.
+**Complexity-Matched Paraphrasing**: Select this option to generate a paraphrase of the given source
+sentence that closely mirrors the linguistic complexity of another given sentence. Input your source
+sentence along with another sentence (which will serve only to extract linguistic indices for the
+paraphrase generation). Then, click "Generate."
+**Manual Linguistic Control**: Select this option to manually control the linguistic complexity of the
+generated text. We provided a set of tools for manual adjustments of the desired linguistic complexity of
+the target sentence. These tools enable the user to extract linguistic indices from a given sentence,
+generate a random (yet coherent) set of linguistic indices, and add or remove noise from the indices.
+These tools are designed for experimental use and require the user to possess linguistic expertise for
+effective input of linguistic indices. To use these tools, select "Tools to assist in setting linguistic
+indices." Once indices are entered, click "Generate."
+Second, you may select to use exact or approximate computation of linguistic indices (used in mode (2) and
+in quality control of the genration). Approximate computation is significantly faster.
+Third, you may view the intermediate sentences of the quality control process by selecting the checkbox.
+Fourth, you may try out some examples by clicking on "Examples...". Examples consist of a source sentences,
+the indices of the source sentences, and a sample set of target linguistic indices.
+Please make your choice below.
+"""
+sent1 = gr.Textbox(label='Source text')
+ling = gr.Dataframe(value = [[x, 0, 0] for x in lng_names],
+        headers=['Index', 'Source', 'Target'],
+        datatype=['str', 'number', 'number'], visible=False)
+css = """
+#guide span.svelte-1w6vloh {font-size: 22px !important; font-weight: 600 !important}
+#mode span.svelte-1gfkn6j {font-size: 18px !important; font-weight: 600 !important}
+#mode {border: 0px; box-shadow: none}
+#mode .block {padding: 0px}
+div.gradio-container {color: black}
+div.form {background: inherit}
+body {
+  --text-sm: 12px;
+  --text-md: 16px;
+  --text-lg: 18px;
+  --input-text-size: 16px;
+  --section-text-size: 16px;
+  --input-background: --neutral-50;
+  }
+.separator {
+  width: 100%;
+  height: 3px; /* Adjust the height for boldness */
+  background-color: #000; /* Adjust the color as needed */
+  margin: 20px 0; /* Adjust the margin as needed */
+}
+"""
+with gr.Blocks(
+        theme=gr.themes.Default(
+            spacing_size=gr.themes.sizes.spacing_md,
+            text_size=gr.themes.sizes.text_md,
+            ),
+        css=css) as demo:
+    gr.Image('assets/logo.png', height=100, container=False, show_download_button=False)
+    gr.Markdown(title)
+    with gr.Accordion("🚀 Quick Start Guide", open=False, elem_id='guide'):
+        gr.Markdown(guide)
+    with gr.Group(elem_classes='separator'):
+        pass
+    with gr.Group(elem_id='mode'):
+        mode = gr.Radio(
+                value='Randomized Paraphrase Generation',
+                label='How would you like to use this system?',
+                type="index",
+                choices=['🔄 Randomized Paraphrase Generation',
+                         '⚖️ Complexity-Matched Paraphrasing',
+                         '🎛️ Manual Linguistic Control'],
+                        )
+    with gr.Accordion("⚙️ Advanced Options", open=False):
+        approx = gr.Radio(value='Use approximate computation of linguistic indices (faster)',
+                choices=['Use approximate computation of linguistic indices (faster)',
+                    'Use exact computation of linguistic indices'], container=False, show_label=False)
+        control_interpolation = gr.Checkbox(label='View the intermediate sentences in the interpolation of linguistic indices')
+    with gr.Accordion("📑 Examples...", open=False):
+        gr.Examples(examples, [sent1, ling], examples_per_page=4, label=None)
+    with gr.Row():
+        sent1.render()
+        with gr.Column():
+            sent2 = gr.Textbox(label='Generated text')
+    interpolation = gr.Textbox(label='Quality control interpolation', visible=False, lines=5)
+    with gr.Group(elem_classes='separator'):
+        pass
+    #####################
+    with gr.Row():
+        generate_random_btn = gr.Button("Generate",
+                variant='primary', scale=1, visible=True)
+        count = gr.Number(label='Number of generated sentences', value=3, precision=0, scale=1, visible=True)
+    # generate_fb_btn = gr.Button("Generate with auto-adjust (towards pred)")
+    # generate_fb_s_btn = gr.Button("Generate with auto-adjust (moving s)")
+    #####################
+    with gr.Row():
+        estimate_gen_btn = gr.Button("Generate",
+                variant='primary',
+                scale=1, visible=False)
+        sent_ling_gen = gr.Textbox(label='Text to estimate linguistic indices', scale=1, visible=False)
+    #####################
+    generate_btn = gr.Button("Generate", variant='primary', visible=False)
+    with gr.Accordion("Tools to assist in the setting of linguistic indices...", open=False, visible=False) as ling_tools:
+        with gr.Row():
+            estimate_tgt_btn = gr.Button("Estimate linguistic indices of this sentence", visible=False)
+            sent_ling_est = gr.Textbox(label='Text to estimate linguistic indices', scale=2, visible=False)
+        estimate_src_btn = gr.Button("Estimate linguistic indices of source sentence", visible=False)
+        # rand_btn = gr.Button("Random target")
+        rand_ex_btn = gr.Button("Random target", size='lg', visible=False)
+        copy_btn = gr.Button("Copy linguistic indices of source to target", size='sm', visible=False)
+        with gr.Row():
+            sub_btn = gr.Button('Subtract \u03B5 from target linguistic indices', visible=False)
+            add_btn = gr.Button('Add \u03B5 to target linguistic indices', visible=False)
+    ling.render()
+    #####################
+    estimate_src_btn.click(estimate_src, inputs=[sent1, ling, approx], outputs=[ling])
+    estimate_tgt_btn.click(estimate_tgt, inputs=[sent_ling_est, ling, approx], outputs=[ling])
+    # estimate_tgt_btn.click(estimate_tgt, inputs=[sent_ling, ling], outputs=[ling])
+    estimate_gen_btn.click(estimate_gen, inputs=[sent1, sent_ling_gen, ling, approx], outputs=[sent2, interpolation, ling])
+    # rand_btn.click(rand_target, inputs=[ling], outputs=[ling])
+    rand_ex_btn.click(rand_ex_target, inputs=[ling], outputs=[ling])
+    copy_btn.click(copy, inputs=[ling], outputs=[ling])
+    generate_btn.click(generate_with_feedback, inputs=[sent1, ling, approx], outputs=[sent2, interpolation])
+    generate_random_btn.click(generate_random, inputs=[sent1, ling, count, approx],
+            outputs=[sent2, interpolation, ling])
+    # generate_fb_btn.click(generate_with_feedback, inputs=[sent1, ling], outputs=sent2s)
+    # generate_fb_s_btn.click(generate_with_feedbacks, inputs=[sent1, ling], outputs=sent2s)
+    add_btn.click(add, inputs=[ling], outputs=[ling])
+    sub_btn.click(sub, inputs=[ling], outputs=[ling])
+    group1 = [generate_random_btn, count]
+    group2 = [estimate_gen_btn, sent_ling_gen]
+    group3 = [generate_btn, estimate_src_btn, estimate_tgt_btn, sent_ling_est, rand_ex_btn, copy_btn, add_btn, sub_btn, ling, ling_tools]
+    components = group1 + group2 + group3
+    mode.change(visibility, inputs=[mode], outputs=[sent2, interpolation] + components)
+    control_interpolation.change(lambda v: gr.update(visible=v), inputs=[control_interpolation],
+            outputs=[interpolation])
+print('Finished loading')
+demo.launch(share=True)

assets/ling_collection.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b1755705e1c6e2b40a091b7ec8b147c1e9b7dfac5a7c4f1e3d5ff092223a0a10
+size 320128

assets/logo.png ADDED Viewed

assets/ratios.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dc60ebcd53fd467fd7f3c9e9652fb9364285e2833325b6ab46b1c86e2e136b3a
+size 448

assets/samples.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5de4fd9314c1df65f14187cc13fb07300b3a359f57c9bd69ab834ef6148a8368
+size 80651

assets/scaler.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d3dc9e74494b2049672441b0587cd73bae605b271941528ea585672bf48d1a84
+size 1414

assets/stats.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{"min": [0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 1.0, 2.0, 1.25, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -12.951, 0.004],
+    "max": [100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 23.0, 100.0],
+    "is_int": [true, true, true, true, true, true, true, false, false, false, false, false, false, true, true, true, true, true, true, true, true, true, true, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false]}

ckpt/ling_disc/checkpoint-41000/config.json ADDED Viewed

	@@ -0,0 +1,120 @@

+{
+  "_name_or_path": "microsoft/deberta-v3-small",
+  "architectures": [
+    "DebertaReplacedTokenizer"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "id2label": {
+    "0": "LABEL_0",
+    "1": "LABEL_1",
+    "2": "LABEL_2",
+    "3": "LABEL_3",
+    "4": "LABEL_4",
+    "5": "LABEL_5",
+    "6": "LABEL_6",
+    "7": "LABEL_7",
+    "8": "LABEL_8",
+    "9": "LABEL_9",
+    "10": "LABEL_10",
+    "11": "LABEL_11",
+    "12": "LABEL_12",
+    "13": "LABEL_13",
+    "14": "LABEL_14",
+    "15": "LABEL_15",
+    "16": "LABEL_16",
+    "17": "LABEL_17",
+    "18": "LABEL_18",
+    "19": "LABEL_19",
+    "20": "LABEL_20",
+    "21": "LABEL_21",
+    "22": "LABEL_22",
+    "23": "LABEL_23",
+    "24": "LABEL_24",
+    "25": "LABEL_25",
+    "26": "LABEL_26",
+    "27": "LABEL_27",
+    "28": "LABEL_28",
+    "29": "LABEL_29",
+    "30": "LABEL_30",
+    "31": "LABEL_31",
+    "32": "LABEL_32",
+    "33": "LABEL_33",
+    "34": "LABEL_34",
+    "35": "LABEL_35",
+    "36": "LABEL_36",
+    "37": "LABEL_37",
+    "38": "LABEL_38",
+    "39": "LABEL_39"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "label2id": {
+    "LABEL_0": 0,
+    "LABEL_1": 1,
+    "LABEL_10": 10,
+    "LABEL_11": 11,
+    "LABEL_12": 12,
+    "LABEL_13": 13,
+    "LABEL_14": 14,
+    "LABEL_15": 15,
+    "LABEL_16": 16,
+    "LABEL_17": 17,
+    "LABEL_18": 18,
+    "LABEL_19": 19,
+    "LABEL_2": 2,
+    "LABEL_20": 20,
+    "LABEL_21": 21,
+    "LABEL_22": 22,
+    "LABEL_23": 23,
+    "LABEL_24": 24,
+    "LABEL_25": 25,
+    "LABEL_26": 26,
+    "LABEL_27": 27,
+    "LABEL_28": 28,
+    "LABEL_29": 29,
+    "LABEL_3": 3,
+    "LABEL_30": 30,
+    "LABEL_31": 31,
+    "LABEL_32": 32,
+    "LABEL_33": 33,
+    "LABEL_34": 34,
+    "LABEL_35": 35,
+    "LABEL_36": 36,
+    "LABEL_37": 37,
+    "LABEL_38": 38,
+    "LABEL_39": 39,
+    "LABEL_4": 4,
+    "LABEL_5": 5,
+    "LABEL_6": 6,
+    "LABEL_7": 7,
+    "LABEL_8": 8,
+    "LABEL_9": 9
+  },
+  "layer_norm_eps": 1e-07,
+  "max_position_embeddings": 512,
+  "max_relative_positions": -1,
+  "model_type": "deberta-v2",
+  "norm_rel_ebd": "layer_norm",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 6,
+  "pad_token_id": 0,
+  "pooler_dropout": 0,
+  "pooler_hidden_act": "gelu",
+  "pooler_hidden_size": 768,
+  "pos_att_type": [
+    "p2c",
+    "c2p"
+  ],
+  "position_biased_input": false,
+  "position_buckets": 256,
+  "problem_type": "regression",
+  "relative_attention": true,
+  "share_att_key": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.39.3",
+  "type_vocab_size": 0,
+  "vocab_size": 128100
+}

ckpt/ling_disc/checkpoint-41000/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:15221fd5565118b32b1adf7b42c27cae6a3d8dd32b0ef85473b70bb072964661
+size 275252064

ckpt/ling_disc/checkpoint-41000/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7dce4669eb4c8d092887dca957afda50838e0d8821093ac6ec80dfc38c786041
+size 550568634

ckpt/ling_disc/checkpoint-41000/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b153bd123a079b6d0ee9f3616a0498be47197aca1c9c7764282514bc91fdc08d
+size 14244

ckpt/ling_disc/checkpoint-41000/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7600c7adf0e16517c635d7b3eee259739a7966140efc08f2afff26d19bb4fb29
+size 1064

ckpt/ling_disc/checkpoint-41000/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,119 @@

+{
+  "additional_special_tokens": [
+    "<extra_id_0>",
+    "<extra_id_1>",
+    "<extra_id_2>",
+    "<extra_id_3>",
+    "<extra_id_4>",
+    "<extra_id_5>",
+    "<extra_id_6>",
+    "<extra_id_7>",
+    "<extra_id_8>",
+    "<extra_id_9>",
+    "<extra_id_10>",
+    "<extra_id_11>",
+    "<extra_id_12>",
+    "<extra_id_13>",
+    "<extra_id_14>",
+    "<extra_id_15>",
+    "<extra_id_16>",
+    "<extra_id_17>",
+    "<extra_id_18>",
+    "<extra_id_19>",
+    "<extra_id_20>",
+    "<extra_id_21>",
+    "<extra_id_22>",
+    "<extra_id_23>",
+    "<extra_id_24>",
+    "<extra_id_25>",
+    "<extra_id_26>",
+    "<extra_id_27>",
+    "<extra_id_28>",
+    "<extra_id_29>",
+    "<extra_id_30>",
+    "<extra_id_31>",
+    "<extra_id_32>",
+    "<extra_id_33>",
+    "<extra_id_34>",
+    "<extra_id_35>",
+    "<extra_id_36>",
+    "<extra_id_37>",
+    "<extra_id_38>",
+    "<extra_id_39>",
+    "<extra_id_40>",
+    "<extra_id_41>",
+    "<extra_id_42>",
+    "<extra_id_43>",
+    "<extra_id_44>",
+    "<extra_id_45>",
+    "<extra_id_46>",
+    "<extra_id_47>",
+    "<extra_id_48>",
+    "<extra_id_49>",
+    "<extra_id_50>",
+    "<extra_id_51>",
+    "<extra_id_52>",
+    "<extra_id_53>",
+    "<extra_id_54>",
+    "<extra_id_55>",
+    "<extra_id_56>",
+    "<extra_id_57>",
+    "<extra_id_58>",
+    "<extra_id_59>",
+    "<extra_id_60>",
+    "<extra_id_61>",
+    "<extra_id_62>",
+    "<extra_id_63>",
+    "<extra_id_64>",
+    "<extra_id_65>",
+    "<extra_id_66>",
+    "<extra_id_67>",
+    "<extra_id_68>",
+    "<extra_id_69>",
+    "<extra_id_70>",
+    "<extra_id_71>",
+    "<extra_id_72>",
+    "<extra_id_73>",
+    "<extra_id_74>",
+    "<extra_id_75>",
+    "<extra_id_76>",
+    "<extra_id_77>",
+    "<extra_id_78>",
+    "<extra_id_79>",
+    "<extra_id_80>",
+    "<extra_id_81>",
+    "<extra_id_82>",
+    "<extra_id_83>",
+    "<extra_id_84>",
+    "<extra_id_85>",
+    "<extra_id_86>",
+    "<extra_id_87>",
+    "<extra_id_88>",
+    "<extra_id_89>",
+    "<extra_id_90>",
+    "<extra_id_91>",
+    "<extra_id_92>",
+    "<extra_id_93>",
+    "<extra_id_94>",
+    "<extra_id_95>",
+    "<extra_id_96>",
+    "<extra_id_97>",
+    "<extra_id_98>",
+    "<extra_id_99>"
+  ],
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "</s>",
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

ckpt/ling_disc/checkpoint-41000/spiece.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d60acb128cf7b7f2536e8f38a5b18a05535c9e14c7a355904270e15b0945ea86
+size 791656

ckpt/ling_disc/checkpoint-41000/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

ckpt/ling_disc/checkpoint-41000/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,938 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32000": {
+      "content": "<extra_id_99>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32001": {
+      "content": "<extra_id_98>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32002": {
+      "content": "<extra_id_97>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32003": {
+      "content": "<extra_id_96>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32004": {
+      "content": "<extra_id_95>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32005": {
+      "content": "<extra_id_94>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32006": {
+      "content": "<extra_id_93>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32007": {
+      "content": "<extra_id_92>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32008": {
+      "content": "<extra_id_91>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32009": {
+      "content": "<extra_id_90>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32010": {
+      "content": "<extra_id_89>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32011": {
+      "content": "<extra_id_88>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32012": {
+      "content": "<extra_id_87>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32013": {
+      "content": "<extra_id_86>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32014": {
+      "content": "<extra_id_85>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32015": {
+      "content": "<extra_id_84>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32016": {
+      "content": "<extra_id_83>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32017": {
+      "content": "<extra_id_82>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32018": {
+      "content": "<extra_id_81>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32019": {
+      "content": "<extra_id_80>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32020": {
+      "content": "<extra_id_79>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32021": {
+      "content": "<extra_id_78>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32022": {
+      "content": "<extra_id_77>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32023": {
+      "content": "<extra_id_76>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32024": {
+      "content": "<extra_id_75>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32025": {
+      "content": "<extra_id_74>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32026": {
+      "content": "<extra_id_73>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32027": {
+      "content": "<extra_id_72>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32028": {
+      "content": "<extra_id_71>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32029": {
+      "content": "<extra_id_70>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32030": {
+      "content": "<extra_id_69>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32031": {
+      "content": "<extra_id_68>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32032": {
+      "content": "<extra_id_67>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32033": {
+      "content": "<extra_id_66>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32034": {
+      "content": "<extra_id_65>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32035": {
+      "content": "<extra_id_64>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32036": {
+      "content": "<extra_id_63>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32037": {
+      "content": "<extra_id_62>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32038": {
+      "content": "<extra_id_61>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32039": {
+      "content": "<extra_id_60>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32040": {
+      "content": "<extra_id_59>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32041": {
+      "content": "<extra_id_58>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32042": {
+      "content": "<extra_id_57>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32043": {
+      "content": "<extra_id_56>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32044": {
+      "content": "<extra_id_55>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32045": {
+      "content": "<extra_id_54>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32046": {
+      "content": "<extra_id_53>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32047": {
+      "content": "<extra_id_52>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32048": {
+      "content": "<extra_id_51>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32049": {
+      "content": "<extra_id_50>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32050": {
+      "content": "<extra_id_49>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32051": {
+      "content": "<extra_id_48>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32052": {
+      "content": "<extra_id_47>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32053": {
+      "content": "<extra_id_46>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32054": {
+      "content": "<extra_id_45>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32055": {
+      "content": "<extra_id_44>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32056": {
+      "content": "<extra_id_43>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32057": {
+      "content": "<extra_id_42>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32058": {
+      "content": "<extra_id_41>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32059": {
+      "content": "<extra_id_40>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32060": {
+      "content": "<extra_id_39>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32061": {
+      "content": "<extra_id_38>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32062": {
+      "content": "<extra_id_37>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32063": {
+      "content": "<extra_id_36>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32064": {
+      "content": "<extra_id_35>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32065": {
+      "content": "<extra_id_34>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32066": {
+      "content": "<extra_id_33>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32067": {
+      "content": "<extra_id_32>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32068": {
+      "content": "<extra_id_31>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32069": {
+      "content": "<extra_id_30>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32070": {
+      "content": "<extra_id_29>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32071": {
+      "content": "<extra_id_28>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32072": {
+      "content": "<extra_id_27>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32073": {
+      "content": "<extra_id_26>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32074": {
+      "content": "<extra_id_25>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32075": {
+      "content": "<extra_id_24>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32076": {
+      "content": "<extra_id_23>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32077": {
+      "content": "<extra_id_22>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32078": {
+      "content": "<extra_id_21>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32079": {
+      "content": "<extra_id_20>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32080": {
+      "content": "<extra_id_19>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32081": {
+      "content": "<extra_id_18>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32082": {
+      "content": "<extra_id_17>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32083": {
+      "content": "<extra_id_16>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32084": {
+      "content": "<extra_id_15>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32085": {
+      "content": "<extra_id_14>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32086": {
+      "content": "<extra_id_13>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32087": {
+      "content": "<extra_id_12>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32088": {
+      "content": "<extra_id_11>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32089": {
+      "content": "<extra_id_10>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32090": {
+      "content": "<extra_id_9>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32091": {
+      "content": "<extra_id_8>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32092": {
+      "content": "<extra_id_7>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32093": {
+      "content": "<extra_id_6>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32094": {
+      "content": "<extra_id_5>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32095": {
+      "content": "<extra_id_4>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32096": {
+      "content": "<extra_id_3>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32097": {
+      "content": "<extra_id_2>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32098": {
+      "content": "<extra_id_1>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32099": {
+      "content": "<extra_id_0>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_tokens": [
+    "<extra_id_0>",
+    "<extra_id_1>",
+    "<extra_id_2>",
+    "<extra_id_3>",
+    "<extra_id_4>",
+    "<extra_id_5>",
+    "<extra_id_6>",
+    "<extra_id_7>",
+    "<extra_id_8>",
+    "<extra_id_9>",
+    "<extra_id_10>",
+    "<extra_id_11>",
+    "<extra_id_12>",
+    "<extra_id_13>",
+    "<extra_id_14>",
+    "<extra_id_15>",
+    "<extra_id_16>",
+    "<extra_id_17>",
+    "<extra_id_18>",
+    "<extra_id_19>",
+    "<extra_id_20>",
+    "<extra_id_21>",
+    "<extra_id_22>",
+    "<extra_id_23>",
+    "<extra_id_24>",
+    "<extra_id_25>",
+    "<extra_id_26>",
+    "<extra_id_27>",
+    "<extra_id_28>",
+    "<extra_id_29>",
+    "<extra_id_30>",
+    "<extra_id_31>",
+    "<extra_id_32>",
+    "<extra_id_33>",
+    "<extra_id_34>",
+    "<extra_id_35>",
+    "<extra_id_36>",
+    "<extra_id_37>",
+    "<extra_id_38>",
+    "<extra_id_39>",
+    "<extra_id_40>",
+    "<extra_id_41>",
+    "<extra_id_42>",
+    "<extra_id_43>",
+    "<extra_id_44>",
+    "<extra_id_45>",
+    "<extra_id_46>",
+    "<extra_id_47>",
+    "<extra_id_48>",
+    "<extra_id_49>",
+    "<extra_id_50>",
+    "<extra_id_51>",
+    "<extra_id_52>",
+    "<extra_id_53>",
+    "<extra_id_54>",
+    "<extra_id_55>",
+    "<extra_id_56>",
+    "<extra_id_57>",
+    "<extra_id_58>",
+    "<extra_id_59>",
+    "<extra_id_60>",
+    "<extra_id_61>",
+    "<extra_id_62>",
+    "<extra_id_63>",
+    "<extra_id_64>",
+    "<extra_id_65>",
+    "<extra_id_66>",
+    "<extra_id_67>",
+    "<extra_id_68>",
+    "<extra_id_69>",
+    "<extra_id_70>",
+    "<extra_id_71>",
+    "<extra_id_72>",
+    "<extra_id_73>",
+    "<extra_id_74>",
+    "<extra_id_75>",
+    "<extra_id_76>",
+    "<extra_id_77>",
+    "<extra_id_78>",
+    "<extra_id_79>",
+    "<extra_id_80>",
+    "<extra_id_81>",
+    "<extra_id_82>",
+    "<extra_id_83>",
+    "<extra_id_84>",
+    "<extra_id_85>",
+    "<extra_id_86>",
+    "<extra_id_87>",
+    "<extra_id_88>",
+    "<extra_id_89>",
+    "<extra_id_90>",
+    "<extra_id_91>",
+    "<extra_id_92>",
+    "<extra_id_93>",
+    "<extra_id_94>",
+    "<extra_id_95>",
+    "<extra_id_96>",
+    "<extra_id_97>",
+    "<extra_id_98>",
+    "<extra_id_99>"
+  ],
+  "clean_up_tokenization_spaces": true,
+  "eos_token": "</s>",
+  "extra_ids": 100,
+  "model_max_length": 512,
+  "pad_token": "</s>",
+  "sp_model_kwargs": {},
+  "tokenizer_class": "T5Tokenizer",
+  "unk_token": "<unk>"
+}

ckpt/ling_disc/checkpoint-41000/trainer_state.json ADDED Viewed

	@@ -0,0 +1,636 @@

+{
+  "best_metric": 0.05535305291414261,
+  "best_model_checkpoint": "/data/mohamed/checkpoints/ling_disc/deberta-v3-small_flan-t5-base_40/checkpoint-41000",
+  "epoch": 29.306647605432453,
+  "eval_steps": 1000,
+  "global_step": 41000,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.71,
+      "grad_norm": 0.855617344379425,
+      "learning_rate": 1.1913271384322135e-05,
+      "loss": 0.9117,
+      "step": 1000
+    },
+    {
+      "epoch": 0.71,
+      "eval_loss": 0.6742472052574158,
+      "eval_runtime": 27.0595,
+      "eval_samples_per_second": 1111.549,
+      "eval_steps_per_second": 5.58,
+      "step": 1000
+    },
+    {
+      "epoch": 1.43,
+      "grad_norm": 4.203719139099121,
+      "learning_rate": 2.382654276864427e-05,
+      "loss": 0.4114,
+      "step": 2000
+    },
+    {
+      "epoch": 1.43,
+      "eval_loss": 0.3266257345676422,
+      "eval_runtime": 26.9318,
+      "eval_samples_per_second": 1116.822,
+      "eval_steps_per_second": 5.607,
+      "step": 2000
+    },
+    {
+      "epoch": 2.14,
+      "grad_norm": 3.1638591289520264,
+      "learning_rate": 3.57398141529664e-05,
+      "loss": 0.2624,
+      "step": 3000
+    },
+    {
+      "epoch": 2.14,
+      "eval_loss": 0.24602766335010529,
+      "eval_runtime": 27.0604,
+      "eval_samples_per_second": 1111.512,
+      "eval_steps_per_second": 5.58,
+      "step": 3000
+    },
+    {
+      "epoch": 2.86,
+      "grad_norm": 1.7417826652526855,
+      "learning_rate": 4.765308553728854e-05,
+      "loss": 0.2002,
+      "step": 4000
+    },
+    {
+      "epoch": 2.86,
+      "eval_loss": 0.1770436018705368,
+      "eval_runtime": 26.8812,
+      "eval_samples_per_second": 1118.922,
+      "eval_steps_per_second": 5.617,
+      "step": 4000
+    },
+    {
+      "epoch": 3.57,
+      "grad_norm": 1.1299816370010376,
+      "learning_rate": 4.893707145315437e-05,
+      "loss": 0.1635,
+      "step": 5000
+    },
+    {
+      "epoch": 3.57,
+      "eval_loss": 0.14757415652275085,
+      "eval_runtime": 26.7857,
+      "eval_samples_per_second": 1122.914,
+      "eval_steps_per_second": 5.637,
+      "step": 5000
+    },
+    {
+      "epoch": 4.29,
+      "grad_norm": 1.210856556892395,
+      "learning_rate": 4.761337463267413e-05,
+      "loss": 0.1404,
+      "step": 6000
+    },
+    {
+      "epoch": 4.29,
+      "eval_loss": 0.12851941585540771,
+      "eval_runtime": 26.9893,
+      "eval_samples_per_second": 1114.44,
+      "eval_steps_per_second": 5.595,
+      "step": 6000
+    },
+    {
+      "epoch": 5.0,
+      "grad_norm": 2.0565412044525146,
+      "learning_rate": 4.62896778121939e-05,
+      "loss": 0.1263,
+      "step": 7000
+    },
+    {
+      "epoch": 5.0,
+      "eval_loss": 0.12228666245937347,
+      "eval_runtime": 26.7363,
+      "eval_samples_per_second": 1124.987,
+      "eval_steps_per_second": 5.648,
+      "step": 7000
+    },
+    {
+      "epoch": 5.72,
+      "grad_norm": 1.8667607307434082,
+      "learning_rate": 4.496598099171366e-05,
+      "loss": 0.1127,
+      "step": 8000
+    },
+    {
+      "epoch": 5.72,
+      "eval_loss": 0.11036147177219391,
+      "eval_runtime": 26.7509,
+      "eval_samples_per_second": 1124.375,
+      "eval_steps_per_second": 5.645,
+      "step": 8000
+    },
+    {
+      "epoch": 6.43,
+      "grad_norm": 0.7492337226867676,
+      "learning_rate": 4.364228417123342e-05,
+      "loss": 0.1059,
+      "step": 9000
+    },
+    {
+      "epoch": 6.43,
+      "eval_loss": 0.10317497700452805,
+      "eval_runtime": 27.0158,
+      "eval_samples_per_second": 1113.349,
+      "eval_steps_per_second": 5.589,
+      "step": 9000
+    },
+    {
+      "epoch": 7.15,
+      "grad_norm": 0.7611485123634338,
+      "learning_rate": 4.231858735075319e-05,
+      "loss": 0.0993,
+      "step": 10000
+    },
+    {
+      "epoch": 7.15,
+      "eval_loss": 0.10284282267093658,
+      "eval_runtime": 26.795,
+      "eval_samples_per_second": 1122.524,
+      "eval_steps_per_second": 5.635,
+      "step": 10000
+    },
+    {
+      "epoch": 7.86,
+      "grad_norm": 0.5870215892791748,
+      "learning_rate": 4.099489053027295e-05,
+      "loss": 0.0887,
+      "step": 11000
+    },
+    {
+      "epoch": 7.86,
+      "eval_loss": 0.09789762645959854,
+      "eval_runtime": 26.8453,
+      "eval_samples_per_second": 1120.419,
+      "eval_steps_per_second": 5.625,
+      "step": 11000
+    },
+    {
+      "epoch": 8.58,
+      "grad_norm": 0.48922085762023926,
+      "learning_rate": 3.9671193709792706e-05,
+      "loss": 0.0842,
+      "step": 12000
+    },
+    {
+      "epoch": 8.58,
+      "eval_loss": 0.09349656105041504,
+      "eval_runtime": 26.8273,
+      "eval_samples_per_second": 1121.172,
+      "eval_steps_per_second": 5.629,
+      "step": 12000
+    },
+    {
+      "epoch": 9.29,
+      "grad_norm": 0.4252859354019165,
+      "learning_rate": 3.8347496889312476e-05,
+      "loss": 0.0793,
+      "step": 13000
+    },
+    {
+      "epoch": 9.29,
+      "eval_loss": 0.09415590018033981,
+      "eval_runtime": 25.9362,
+      "eval_samples_per_second": 1159.693,
+      "eval_steps_per_second": 5.822,
+      "step": 13000
+    },
+    {
+      "epoch": 10.01,
+      "grad_norm": 0.44548505544662476,
+      "learning_rate": 3.702380006883224e-05,
+      "loss": 0.076,
+      "step": 14000
+    },
+    {
+      "epoch": 10.01,
+      "eval_loss": 0.08913980424404144,
+      "eval_runtime": 26.7379,
+      "eval_samples_per_second": 1124.919,
+      "eval_steps_per_second": 5.647,
+      "step": 14000
+    },
+    {
+      "epoch": 10.72,
+      "grad_norm": 0.2965373694896698,
+      "learning_rate": 3.5700103248352e-05,
+      "loss": 0.0714,
+      "step": 15000
+    },
+    {
+      "epoch": 10.72,
+      "eval_loss": 0.08456840366125107,
+      "eval_runtime": 26.787,
+      "eval_samples_per_second": 1122.857,
+      "eval_steps_per_second": 5.637,
+      "step": 15000
+    },
+    {
+      "epoch": 11.44,
+      "grad_norm": 0.3205694854259491,
+      "learning_rate": 3.437640642787176e-05,
+      "loss": 0.0677,
+      "step": 16000
+    },
+    {
+      "epoch": 11.44,
+      "eval_loss": 0.07863688468933105,
+      "eval_runtime": 26.8242,
+      "eval_samples_per_second": 1121.299,
+      "eval_steps_per_second": 5.629,
+      "step": 16000
+    },
+    {
+      "epoch": 12.15,
+      "grad_norm": 0.2736203670501709,
+      "learning_rate": 3.3052709607391525e-05,
+      "loss": 0.0636,
+      "step": 17000
+    },
+    {
+      "epoch": 12.15,
+      "eval_loss": 0.07664181292057037,
+      "eval_runtime": 26.7818,
+      "eval_samples_per_second": 1123.077,
+      "eval_steps_per_second": 5.638,
+      "step": 17000
+    },
+    {
+      "epoch": 12.87,
+      "grad_norm": 0.25644680857658386,
+      "learning_rate": 3.172901278691129e-05,
+      "loss": 0.0618,
+      "step": 18000
+    },
+    {
+      "epoch": 12.87,
+      "eval_loss": 0.07351888716220856,
+      "eval_runtime": 26.8445,
+      "eval_samples_per_second": 1120.453,
+      "eval_steps_per_second": 5.625,
+      "step": 18000
+    },
+    {
+      "epoch": 13.58,
+      "grad_norm": 0.2748676538467407,
+      "learning_rate": 3.0405315966431053e-05,
+      "loss": 0.0584,
+      "step": 19000
+    },
+    {
+      "epoch": 13.58,
+      "eval_loss": 0.07314006239175797,
+      "eval_runtime": 26.8333,
+      "eval_samples_per_second": 1120.921,
+      "eval_steps_per_second": 5.627,
+      "step": 19000
+    },
+    {
+      "epoch": 14.3,
+      "grad_norm": 0.30235132575035095,
+      "learning_rate": 2.9081619145950812e-05,
+      "loss": 0.057,
+      "step": 20000
+    },
+    {
+      "epoch": 14.3,
+      "eval_loss": 0.07568340748548508,
+      "eval_runtime": 27.0109,
+      "eval_samples_per_second": 1113.55,
+      "eval_steps_per_second": 5.59,
+      "step": 20000
+    },
+    {
+      "epoch": 15.01,
+      "grad_norm": 0.2508692145347595,
+      "learning_rate": 2.7757922325470574e-05,
+      "loss": 0.0558,
+      "step": 21000
+    },
+    {
+      "epoch": 15.01,
+      "eval_loss": 0.07675843685865402,
+      "eval_runtime": 26.9026,
+      "eval_samples_per_second": 1118.032,
+      "eval_steps_per_second": 5.613,
+      "step": 21000
+    },
+    {
+      "epoch": 15.73,
+      "grad_norm": 0.3341030478477478,
+      "learning_rate": 2.643422550499034e-05,
+      "loss": 0.0533,
+      "step": 22000
+    },
+    {
+      "epoch": 15.73,
+      "eval_loss": 0.07339715212583542,
+      "eval_runtime": 26.8727,
+      "eval_samples_per_second": 1119.278,
+      "eval_steps_per_second": 5.619,
+      "step": 22000
+    },
+    {
+      "epoch": 16.44,
+      "grad_norm": 0.30433303117752075,
+      "learning_rate": 2.51105286845101e-05,
+      "loss": 0.0516,
+      "step": 23000
+    },
+    {
+      "epoch": 16.44,
+      "eval_loss": 0.0694783553481102,
+      "eval_runtime": 26.8551,
+      "eval_samples_per_second": 1120.012,
+      "eval_steps_per_second": 5.623,
+      "step": 23000
+    },
+    {
+      "epoch": 17.16,
+      "grad_norm": 0.39424875378608704,
+      "learning_rate": 2.378683186402986e-05,
+      "loss": 0.049,
+      "step": 24000
+    },
+    {
+      "epoch": 17.16,
+      "eval_loss": 0.06750107556581497,
+      "eval_runtime": 26.9045,
+      "eval_samples_per_second": 1117.954,
+      "eval_steps_per_second": 5.612,
+      "step": 24000
+    },
+    {
+      "epoch": 17.87,
+      "grad_norm": 0.29526183009147644,
+      "learning_rate": 2.2463135043549627e-05,
+      "loss": 0.0478,
+      "step": 25000
+    },
+    {
+      "epoch": 17.87,
+      "eval_loss": 0.06841529905796051,
+      "eval_runtime": 26.9131,
+      "eval_samples_per_second": 1117.597,
+      "eval_steps_per_second": 5.611,
+      "step": 25000
+    },
+    {
+      "epoch": 18.58,
+      "grad_norm": 0.2802821099758148,
+      "learning_rate": 2.113943822306939e-05,
+      "loss": 0.0472,
+      "step": 26000
+    },
+    {
+      "epoch": 18.58,
+      "eval_loss": 0.0680340975522995,
+      "eval_runtime": 26.8442,
+      "eval_samples_per_second": 1120.467,
+      "eval_steps_per_second": 5.625,
+      "step": 26000
+    },
+    {
+      "epoch": 19.3,
+      "grad_norm": 0.198490172624588,
+      "learning_rate": 1.9815741402589152e-05,
+      "loss": 0.0445,
+      "step": 27000
+    },
+    {
+      "epoch": 19.3,
+      "eval_loss": 0.059882719069719315,
+      "eval_runtime": 26.9691,
+      "eval_samples_per_second": 1115.275,
+      "eval_steps_per_second": 5.599,
+      "step": 27000
+    },
+    {
+      "epoch": 20.01,
+      "grad_norm": 0.3383251130580902,
+      "learning_rate": 1.8492044582108914e-05,
+      "loss": 0.0435,
+      "step": 28000
+    },
+    {
+      "epoch": 20.01,
+      "eval_loss": 0.06356318295001984,
+      "eval_runtime": 26.8538,
+      "eval_samples_per_second": 1120.066,
+      "eval_steps_per_second": 5.623,
+      "step": 28000
+    },
+    {
+      "epoch": 20.73,
+      "grad_norm": 0.16571784019470215,
+      "learning_rate": 1.7168347761628677e-05,
+      "loss": 0.0419,
+      "step": 29000
+    },
+    {
+      "epoch": 20.73,
+      "eval_loss": 0.06056862324476242,
+      "eval_runtime": 27.0748,
+      "eval_samples_per_second": 1110.924,
+      "eval_steps_per_second": 5.577,
+      "step": 29000
+    },
+    {
+      "epoch": 21.44,
+      "grad_norm": 0.19518467783927917,
+      "learning_rate": 1.584465094114844e-05,
+      "loss": 0.0409,
+      "step": 30000
+    },
+    {
+      "epoch": 21.44,
+      "eval_loss": 0.06490638852119446,
+      "eval_runtime": 26.8481,
+      "eval_samples_per_second": 1120.301,
+      "eval_steps_per_second": 5.624,
+      "step": 30000
+    },
+    {
+      "epoch": 22.16,
+      "grad_norm": 0.15420591831207275,
+      "learning_rate": 1.4520954120668203e-05,
+      "loss": 0.0397,
+      "step": 31000
+    },
+    {
+      "epoch": 22.16,
+      "eval_loss": 0.05918469280004501,
+      "eval_runtime": 26.8143,
+      "eval_samples_per_second": 1121.713,
+      "eval_steps_per_second": 5.631,
+      "step": 31000
+    },
+    {
+      "epoch": 22.87,
+      "grad_norm": 0.26854997873306274,
+      "learning_rate": 1.3197257300187965e-05,
+      "loss": 0.0387,
+      "step": 32000
+    },
+    {
+      "epoch": 22.87,
+      "eval_loss": 0.06144551932811737,
+      "eval_runtime": 26.8852,
+      "eval_samples_per_second": 1118.757,
+      "eval_steps_per_second": 5.616,
+      "step": 32000
+    },
+    {
+      "epoch": 23.59,
+      "grad_norm": 0.17430314421653748,
+      "learning_rate": 1.1873560479707728e-05,
+      "loss": 0.0373,
+      "step": 33000
+    },
+    {
+      "epoch": 23.59,
+      "eval_loss": 0.06159648299217224,
+      "eval_runtime": 26.7887,
+      "eval_samples_per_second": 1122.785,
+      "eval_steps_per_second": 5.637,
+      "step": 33000
+    },
+    {
+      "epoch": 24.3,
+      "grad_norm": 0.14911049604415894,
+      "learning_rate": 1.054986365922749e-05,
+      "loss": 0.0369,
+      "step": 34000
+    },
+    {
+      "epoch": 24.3,
+      "eval_loss": 0.05931873992085457,
+      "eval_runtime": 26.8571,
+      "eval_samples_per_second": 1119.926,
+      "eval_steps_per_second": 5.622,
+      "step": 34000
+    },
+    {
+      "epoch": 25.02,
+      "grad_norm": 0.13620807230472565,
+      "learning_rate": 9.226166838747254e-06,
+      "loss": 0.0361,
+      "step": 35000
+    },
+    {
+      "epoch": 25.02,
+      "eval_loss": 0.05695568770170212,
+      "eval_runtime": 26.8966,
+      "eval_samples_per_second": 1118.283,
+      "eval_steps_per_second": 5.614,
+      "step": 35000
+    },
+    {
+      "epoch": 25.73,
+      "grad_norm": 0.13764438033103943,
+      "learning_rate": 7.902470018267017e-06,
+      "loss": 0.0349,
+      "step": 36000
+    },
+    {
+      "epoch": 25.73,
+      "eval_loss": 0.05707501247525215,
+      "eval_runtime": 26.986,
+      "eval_samples_per_second": 1114.578,
+      "eval_steps_per_second": 5.595,
+      "step": 36000
+    },
+    {
+      "epoch": 26.45,
+      "grad_norm": 0.2389635145664215,
+      "learning_rate": 6.578773197786779e-06,
+      "loss": 0.0343,
+      "step": 37000
+    },
+    {
+      "epoch": 26.45,
+      "eval_loss": 0.0577365942299366,
+      "eval_runtime": 26.9903,
+      "eval_samples_per_second": 1114.401,
+      "eval_steps_per_second": 5.595,
+      "step": 37000
+    },
+    {
+      "epoch": 27.16,
+      "grad_norm": 0.15828461945056915,
+      "learning_rate": 5.255076377306542e-06,
+      "loss": 0.034,
+      "step": 38000
+    },
+    {
+      "epoch": 27.16,
+      "eval_loss": 0.05767366662621498,
+      "eval_runtime": 27.1454,
+      "eval_samples_per_second": 1108.035,
+      "eval_steps_per_second": 5.563,
+      "step": 38000
+    },
+    {
+      "epoch": 27.88,
+      "grad_norm": 0.1059570387005806,
+      "learning_rate": 3.9313795568263045e-06,
+      "loss": 0.0332,
+      "step": 39000
+    },
+    {
+      "epoch": 27.88,
+      "eval_loss": 0.056225307285785675,
+      "eval_runtime": 26.9534,
+      "eval_samples_per_second": 1115.928,
+      "eval_steps_per_second": 5.602,
+      "step": 39000
+    },
+    {
+      "epoch": 28.59,
+      "grad_norm": 0.1975150853395462,
+      "learning_rate": 2.6076827363460673e-06,
+      "loss": 0.0329,
+      "step": 40000
+    },
+    {
+      "epoch": 28.59,
+      "eval_loss": 0.05555161088705063,
+      "eval_runtime": 27.1187,
+      "eval_samples_per_second": 1109.122,
+      "eval_steps_per_second": 5.568,
+      "step": 40000
+    },
+    {
+      "epoch": 29.31,
+      "grad_norm": 0.1037423312664032,
+      "learning_rate": 1.28398591586583e-06,
+      "loss": 0.0319,
+      "step": 41000
+    },
+    {
+      "epoch": 29.31,
+      "eval_loss": 0.05535305291414261,
+      "eval_runtime": 26.8353,
+      "eval_samples_per_second": 1120.838,
+      "eval_steps_per_second": 5.627,
+      "step": 41000
+    }
+  ],
+  "logging_steps": 1000,
+  "max_steps": 41970,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 30,
+  "save_steps": 1000,
+  "total_flos": 3.270624085088659e+16,
+  "train_batch_size": 200,
+  "trial_name": null,
+  "trial_params": null
+}

ckpt/ling_disc/checkpoint-41000/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:deb9dc15db671f7ae0b9e7e7bc26ca9e20c0fde45babc266a60753e2b23d6328
+size 4984

ckpt/ling_disc/config.json ADDED Viewed

	@@ -0,0 +1,120 @@

+{
+  "_name_or_path": "microsoft/deberta-v3-small",
+  "architectures": [
+    "DebertaReplacedTokenizer"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "id2label": {
+    "0": "LABEL_0",
+    "1": "LABEL_1",
+    "2": "LABEL_2",
+    "3": "LABEL_3",
+    "4": "LABEL_4",
+    "5": "LABEL_5",
+    "6": "LABEL_6",
+    "7": "LABEL_7",
+    "8": "LABEL_8",
+    "9": "LABEL_9",
+    "10": "LABEL_10",
+    "11": "LABEL_11",
+    "12": "LABEL_12",
+    "13": "LABEL_13",
+    "14": "LABEL_14",
+    "15": "LABEL_15",
+    "16": "LABEL_16",
+    "17": "LABEL_17",
+    "18": "LABEL_18",
+    "19": "LABEL_19",
+    "20": "LABEL_20",
+    "21": "LABEL_21",
+    "22": "LABEL_22",
+    "23": "LABEL_23",
+    "24": "LABEL_24",
+    "25": "LABEL_25",
+    "26": "LABEL_26",
+    "27": "LABEL_27",
+    "28": "LABEL_28",
+    "29": "LABEL_29",
+    "30": "LABEL_30",
+    "31": "LABEL_31",
+    "32": "LABEL_32",
+    "33": "LABEL_33",
+    "34": "LABEL_34",
+    "35": "LABEL_35",
+    "36": "LABEL_36",
+    "37": "LABEL_37",
+    "38": "LABEL_38",
+    "39": "LABEL_39"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "label2id": {
+    "LABEL_0": 0,
+    "LABEL_1": 1,
+    "LABEL_10": 10,
+    "LABEL_11": 11,
+    "LABEL_12": 12,
+    "LABEL_13": 13,
+    "LABEL_14": 14,
+    "LABEL_15": 15,
+    "LABEL_16": 16,
+    "LABEL_17": 17,
+    "LABEL_18": 18,
+    "LABEL_19": 19,
+    "LABEL_2": 2,
+    "LABEL_20": 20,
+    "LABEL_21": 21,
+    "LABEL_22": 22,
+    "LABEL_23": 23,
+    "LABEL_24": 24,
+    "LABEL_25": 25,
+    "LABEL_26": 26,
+    "LABEL_27": 27,
+    "LABEL_28": 28,
+    "LABEL_29": 29,
+    "LABEL_3": 3,
+    "LABEL_30": 30,
+    "LABEL_31": 31,
+    "LABEL_32": 32,
+    "LABEL_33": 33,
+    "LABEL_34": 34,
+    "LABEL_35": 35,
+    "LABEL_36": 36,
+    "LABEL_37": 37,
+    "LABEL_38": 38,
+    "LABEL_39": 39,
+    "LABEL_4": 4,
+    "LABEL_5": 5,
+    "LABEL_6": 6,
+    "LABEL_7": 7,
+    "LABEL_8": 8,
+    "LABEL_9": 9
+  },
+  "layer_norm_eps": 1e-07,
+  "max_position_embeddings": 512,
+  "max_relative_positions": -1,
+  "model_type": "deberta-v2",
+  "norm_rel_ebd": "layer_norm",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 6,
+  "pad_token_id": 0,
+  "pooler_dropout": 0,
+  "pooler_hidden_act": "gelu",
+  "pooler_hidden_size": 768,
+  "pos_att_type": [
+    "p2c",
+    "c2p"
+  ],
+  "position_biased_input": false,
+  "position_buckets": 256,
+  "problem_type": "regression",
+  "relative_attention": true,
+  "share_att_key": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.39.3",
+  "type_vocab_size": 0,
+  "vocab_size": 128100
+}

ckpt/ling_disc/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:15221fd5565118b32b1adf7b42c27cae6a3d8dd32b0ef85473b70bb072964661
+size 275252064

ckpt/ling_disc/scaler.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1dbad9723e37379b55bb6d7300abf6ad705f320bd599ca7f583e574f4a26f4a4
+size 1575

ckpt/ling_disc/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,119 @@

+{
+  "additional_special_tokens": [
+    "<extra_id_0>",
+    "<extra_id_1>",
+    "<extra_id_2>",
+    "<extra_id_3>",
+    "<extra_id_4>",
+    "<extra_id_5>",
+    "<extra_id_6>",
+    "<extra_id_7>",
+    "<extra_id_8>",
+    "<extra_id_9>",
+    "<extra_id_10>",
+    "<extra_id_11>",
+    "<extra_id_12>",
+    "<extra_id_13>",
+    "<extra_id_14>",
+    "<extra_id_15>",
+    "<extra_id_16>",
+    "<extra_id_17>",
+    "<extra_id_18>",
+    "<extra_id_19>",
+    "<extra_id_20>",
+    "<extra_id_21>",
+    "<extra_id_22>",
+    "<extra_id_23>",
+    "<extra_id_24>",
+    "<extra_id_25>",
+    "<extra_id_26>",
+    "<extra_id_27>",
+    "<extra_id_28>",
+    "<extra_id_29>",
+    "<extra_id_30>",
+    "<extra_id_31>",
+    "<extra_id_32>",
+    "<extra_id_33>",
+    "<extra_id_34>",
+    "<extra_id_35>",
+    "<extra_id_36>",
+    "<extra_id_37>",
+    "<extra_id_38>",
+    "<extra_id_39>",
+    "<extra_id_40>",
+    "<extra_id_41>",
+    "<extra_id_42>",
+    "<extra_id_43>",
+    "<extra_id_44>",
+    "<extra_id_45>",
+    "<extra_id_46>",
+    "<extra_id_47>",
+    "<extra_id_48>",
+    "<extra_id_49>",
+    "<extra_id_50>",
+    "<extra_id_51>",
+    "<extra_id_52>",
+    "<extra_id_53>",
+    "<extra_id_54>",
+    "<extra_id_55>",
+    "<extra_id_56>",
+    "<extra_id_57>",
+    "<extra_id_58>",
+    "<extra_id_59>",
+    "<extra_id_60>",
+    "<extra_id_61>",
+    "<extra_id_62>",
+    "<extra_id_63>",
+    "<extra_id_64>",
+    "<extra_id_65>",
+    "<extra_id_66>",
+    "<extra_id_67>",
+    "<extra_id_68>",
+    "<extra_id_69>",
+    "<extra_id_70>",
+    "<extra_id_71>",
+    "<extra_id_72>",
+    "<extra_id_73>",
+    "<extra_id_74>",
+    "<extra_id_75>",
+    "<extra_id_76>",
+    "<extra_id_77>",
+    "<extra_id_78>",
+    "<extra_id_79>",
+    "<extra_id_80>",
+    "<extra_id_81>",
+    "<extra_id_82>",
+    "<extra_id_83>",
+    "<extra_id_84>",
+    "<extra_id_85>",
+    "<extra_id_86>",
+    "<extra_id_87>",
+    "<extra_id_88>",
+    "<extra_id_89>",
+    "<extra_id_90>",
+    "<extra_id_91>",
+    "<extra_id_92>",
+    "<extra_id_93>",
+    "<extra_id_94>",
+    "<extra_id_95>",
+    "<extra_id_96>",
+    "<extra_id_97>",
+    "<extra_id_98>",
+    "<extra_id_99>"
+  ],
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "</s>",
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

ckpt/ling_disc/spiece.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d60acb128cf7b7f2536e8f38a5b18a05535c9e14c7a355904270e15b0945ea86
+size 791656

ckpt/ling_disc/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

ckpt/ling_disc/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,938 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32000": {
+      "content": "<extra_id_99>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32001": {
+      "content": "<extra_id_98>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32002": {
+      "content": "<extra_id_97>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32003": {
+      "content": "<extra_id_96>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32004": {
+      "content": "<extra_id_95>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32005": {
+      "content": "<extra_id_94>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32006": {
+      "content": "<extra_id_93>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32007": {
+      "content": "<extra_id_92>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32008": {
+      "content": "<extra_id_91>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32009": {
+      "content": "<extra_id_90>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32010": {
+      "content": "<extra_id_89>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32011": {
+      "content": "<extra_id_88>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32012": {
+      "content": "<extra_id_87>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32013": {
+      "content": "<extra_id_86>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32014": {
+      "content": "<extra_id_85>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32015": {
+      "content": "<extra_id_84>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32016": {
+      "content": "<extra_id_83>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32017": {
+      "content": "<extra_id_82>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32018": {
+      "content": "<extra_id_81>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32019": {
+      "content": "<extra_id_80>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32020": {
+      "content": "<extra_id_79>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32021": {
+      "content": "<extra_id_78>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32022": {
+      "content": "<extra_id_77>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32023": {
+      "content": "<extra_id_76>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32024": {
+      "content": "<extra_id_75>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32025": {
+      "content": "<extra_id_74>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32026": {
+      "content": "<extra_id_73>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32027": {
+      "content": "<extra_id_72>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32028": {
+      "content": "<extra_id_71>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32029": {
+      "content": "<extra_id_70>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32030": {
+      "content": "<extra_id_69>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32031": {
+      "content": "<extra_id_68>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32032": {
+      "content": "<extra_id_67>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32033": {
+      "content": "<extra_id_66>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32034": {
+      "content": "<extra_id_65>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32035": {
+      "content": "<extra_id_64>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32036": {
+      "content": "<extra_id_63>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32037": {
+      "content": "<extra_id_62>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32038": {
+      "content": "<extra_id_61>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32039": {
+      "content": "<extra_id_60>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32040": {
+      "content": "<extra_id_59>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32041": {
+      "content": "<extra_id_58>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32042": {
+      "content": "<extra_id_57>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32043": {
+      "content": "<extra_id_56>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32044": {
+      "content": "<extra_id_55>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32045": {
+      "content": "<extra_id_54>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32046": {
+      "content": "<extra_id_53>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32047": {
+      "content": "<extra_id_52>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32048": {
+      "content": "<extra_id_51>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32049": {
+      "content": "<extra_id_50>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32050": {
+      "content": "<extra_id_49>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32051": {
+      "content": "<extra_id_48>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32052": {
+      "content": "<extra_id_47>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32053": {
+      "content": "<extra_id_46>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32054": {
+      "content": "<extra_id_45>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32055": {
+      "content": "<extra_id_44>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32056": {
+      "content": "<extra_id_43>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32057": {
+      "content": "<extra_id_42>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32058": {
+      "content": "<extra_id_41>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32059": {
+      "content": "<extra_id_40>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32060": {
+      "content": "<extra_id_39>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32061": {
+      "content": "<extra_id_38>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32062": {
+      "content": "<extra_id_37>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32063": {
+      "content": "<extra_id_36>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32064": {
+      "content": "<extra_id_35>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32065": {
+      "content": "<extra_id_34>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32066": {
+      "content": "<extra_id_33>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32067": {
+      "content": "<extra_id_32>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32068": {
+      "content": "<extra_id_31>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32069": {
+      "content": "<extra_id_30>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32070": {
+      "content": "<extra_id_29>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32071": {
+      "content": "<extra_id_28>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32072": {
+      "content": "<extra_id_27>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32073": {
+      "content": "<extra_id_26>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32074": {
+      "content": "<extra_id_25>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32075": {
+      "content": "<extra_id_24>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32076": {
+      "content": "<extra_id_23>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32077": {
+      "content": "<extra_id_22>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32078": {
+      "content": "<extra_id_21>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32079": {
+      "content": "<extra_id_20>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32080": {
+      "content": "<extra_id_19>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32081": {
+      "content": "<extra_id_18>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32082": {
+      "content": "<extra_id_17>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32083": {
+      "content": "<extra_id_16>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32084": {
+      "content": "<extra_id_15>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32085": {
+      "content": "<extra_id_14>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32086": {
+      "content": "<extra_id_13>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32087": {
+      "content": "<extra_id_12>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32088": {
+      "content": "<extra_id_11>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32089": {
+      "content": "<extra_id_10>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32090": {
+      "content": "<extra_id_9>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32091": {
+      "content": "<extra_id_8>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32092": {
+      "content": "<extra_id_7>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32093": {
+      "content": "<extra_id_6>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32094": {
+      "content": "<extra_id_5>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32095": {
+      "content": "<extra_id_4>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32096": {
+      "content": "<extra_id_3>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32097": {
+      "content": "<extra_id_2>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32098": {
+      "content": "<extra_id_1>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32099": {
+      "content": "<extra_id_0>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_tokens": [
+    "<extra_id_0>",
+    "<extra_id_1>",
+    "<extra_id_2>",
+    "<extra_id_3>",
+    "<extra_id_4>",
+    "<extra_id_5>",
+    "<extra_id_6>",
+    "<extra_id_7>",
+    "<extra_id_8>",
+    "<extra_id_9>",
+    "<extra_id_10>",
+    "<extra_id_11>",
+    "<extra_id_12>",
+    "<extra_id_13>",
+    "<extra_id_14>",
+    "<extra_id_15>",
+    "<extra_id_16>",
+    "<extra_id_17>",
+    "<extra_id_18>",
+    "<extra_id_19>",
+    "<extra_id_20>",
+    "<extra_id_21>",
+    "<extra_id_22>",
+    "<extra_id_23>",
+    "<extra_id_24>",
+    "<extra_id_25>",
+    "<extra_id_26>",
+    "<extra_id_27>",
+    "<extra_id_28>",
+    "<extra_id_29>",
+    "<extra_id_30>",
+    "<extra_id_31>",
+    "<extra_id_32>",
+    "<extra_id_33>",
+    "<extra_id_34>",
+    "<extra_id_35>",
+    "<extra_id_36>",
+    "<extra_id_37>",
+    "<extra_id_38>",
+    "<extra_id_39>",
+    "<extra_id_40>",
+    "<extra_id_41>",
+    "<extra_id_42>",
+    "<extra_id_43>",
+    "<extra_id_44>",
+    "<extra_id_45>",
+    "<extra_id_46>",
+    "<extra_id_47>",
+    "<extra_id_48>",
+    "<extra_id_49>",
+    "<extra_id_50>",
+    "<extra_id_51>",
+    "<extra_id_52>",
+    "<extra_id_53>",
+    "<extra_id_54>",
+    "<extra_id_55>",
+    "<extra_id_56>",
+    "<extra_id_57>",
+    "<extra_id_58>",
+    "<extra_id_59>",
+    "<extra_id_60>",
+    "<extra_id_61>",
+    "<extra_id_62>",
+    "<extra_id_63>",
+    "<extra_id_64>",
+    "<extra_id_65>",
+    "<extra_id_66>",
+    "<extra_id_67>",
+    "<extra_id_68>",
+    "<extra_id_69>",
+    "<extra_id_70>",
+    "<extra_id_71>",
+    "<extra_id_72>",
+    "<extra_id_73>",
+    "<extra_id_74>",
+    "<extra_id_75>",
+    "<extra_id_76>",
+    "<extra_id_77>",
+    "<extra_id_78>",
+    "<extra_id_79>",
+    "<extra_id_80>",
+    "<extra_id_81>",
+    "<extra_id_82>",
+    "<extra_id_83>",
+    "<extra_id_84>",
+    "<extra_id_85>",
+    "<extra_id_86>",
+    "<extra_id_87>",
+    "<extra_id_88>",
+    "<extra_id_89>",
+    "<extra_id_90>",
+    "<extra_id_91>",
+    "<extra_id_92>",
+    "<extra_id_93>",
+    "<extra_id_94>",
+    "<extra_id_95>",
+    "<extra_id_96>",
+    "<extra_id_97>",
+    "<extra_id_98>",
+    "<extra_id_99>"
+  ],
+  "clean_up_tokenization_spaces": true,
+  "eos_token": "</s>",
+  "extra_ids": 100,
+  "model_max_length": 512,
+  "pad_token": "</s>",
+  "sp_model_kwargs": {},
+  "tokenizer_class": "T5Tokenizer",
+  "unk_token": "<unk>"
+}

ckpt/ling_disc/trainer_state.json ADDED Viewed

	@@ -0,0 +1,645 @@

+{
+  "best_metric": 0.05535305291414261,
+  "best_model_checkpoint": "/data/mohamed/checkpoints/ling_disc/deberta-v3-small_flan-t5-base_40/checkpoint-41000",
+  "epoch": 30.0,
+  "eval_steps": 1000,
+  "global_step": 41970,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.71,
+      "grad_norm": 0.855617344379425,
+      "learning_rate": 1.1913271384322135e-05,
+      "loss": 0.9117,
+      "step": 1000
+    },
+    {
+      "epoch": 0.71,
+      "eval_loss": 0.6742472052574158,
+      "eval_runtime": 27.0595,
+      "eval_samples_per_second": 1111.549,
+      "eval_steps_per_second": 5.58,
+      "step": 1000
+    },
+    {
+      "epoch": 1.43,
+      "grad_norm": 4.203719139099121,
+      "learning_rate": 2.382654276864427e-05,
+      "loss": 0.4114,
+      "step": 2000
+    },
+    {
+      "epoch": 1.43,
+      "eval_loss": 0.3266257345676422,
+      "eval_runtime": 26.9318,
+      "eval_samples_per_second": 1116.822,
+      "eval_steps_per_second": 5.607,
+      "step": 2000
+    },
+    {
+      "epoch": 2.14,
+      "grad_norm": 3.1638591289520264,
+      "learning_rate": 3.57398141529664e-05,
+      "loss": 0.2624,
+      "step": 3000
+    },
+    {
+      "epoch": 2.14,
+      "eval_loss": 0.24602766335010529,
+      "eval_runtime": 27.0604,
+      "eval_samples_per_second": 1111.512,
+      "eval_steps_per_second": 5.58,
+      "step": 3000
+    },
+    {
+      "epoch": 2.86,
+      "grad_norm": 1.7417826652526855,
+      "learning_rate": 4.765308553728854e-05,
+      "loss": 0.2002,
+      "step": 4000
+    },
+    {
+      "epoch": 2.86,
+      "eval_loss": 0.1770436018705368,
+      "eval_runtime": 26.8812,
+      "eval_samples_per_second": 1118.922,
+      "eval_steps_per_second": 5.617,
+      "step": 4000
+    },
+    {
+      "epoch": 3.57,
+      "grad_norm": 1.1299816370010376,
+      "learning_rate": 4.893707145315437e-05,
+      "loss": 0.1635,
+      "step": 5000
+    },
+    {
+      "epoch": 3.57,
+      "eval_loss": 0.14757415652275085,
+      "eval_runtime": 26.7857,
+      "eval_samples_per_second": 1122.914,
+      "eval_steps_per_second": 5.637,
+      "step": 5000
+    },
+    {
+      "epoch": 4.29,
+      "grad_norm": 1.210856556892395,
+      "learning_rate": 4.761337463267413e-05,
+      "loss": 0.1404,
+      "step": 6000
+    },
+    {
+      "epoch": 4.29,
+      "eval_loss": 0.12851941585540771,
+      "eval_runtime": 26.9893,
+      "eval_samples_per_second": 1114.44,
+      "eval_steps_per_second": 5.595,
+      "step": 6000
+    },
+    {
+      "epoch": 5.0,
+      "grad_norm": 2.0565412044525146,
+      "learning_rate": 4.62896778121939e-05,
+      "loss": 0.1263,
+      "step": 7000
+    },
+    {
+      "epoch": 5.0,
+      "eval_loss": 0.12228666245937347,
+      "eval_runtime": 26.7363,
+      "eval_samples_per_second": 1124.987,
+      "eval_steps_per_second": 5.648,
+      "step": 7000
+    },
+    {
+      "epoch": 5.72,
+      "grad_norm": 1.8667607307434082,
+      "learning_rate": 4.496598099171366e-05,
+      "loss": 0.1127,
+      "step": 8000
+    },
+    {
+      "epoch": 5.72,
+      "eval_loss": 0.11036147177219391,
+      "eval_runtime": 26.7509,
+      "eval_samples_per_second": 1124.375,
+      "eval_steps_per_second": 5.645,
+      "step": 8000
+    },
+    {
+      "epoch": 6.43,
+      "grad_norm": 0.7492337226867676,
+      "learning_rate": 4.364228417123342e-05,
+      "loss": 0.1059,
+      "step": 9000
+    },
+    {
+      "epoch": 6.43,
+      "eval_loss": 0.10317497700452805,
+      "eval_runtime": 27.0158,
+      "eval_samples_per_second": 1113.349,
+      "eval_steps_per_second": 5.589,
+      "step": 9000
+    },
+    {
+      "epoch": 7.15,
+      "grad_norm": 0.7611485123634338,
+      "learning_rate": 4.231858735075319e-05,
+      "loss": 0.0993,
+      "step": 10000
+    },
+    {
+      "epoch": 7.15,
+      "eval_loss": 0.10284282267093658,
+      "eval_runtime": 26.795,
+      "eval_samples_per_second": 1122.524,
+      "eval_steps_per_second": 5.635,
+      "step": 10000
+    },
+    {
+      "epoch": 7.86,
+      "grad_norm": 0.5870215892791748,
+      "learning_rate": 4.099489053027295e-05,
+      "loss": 0.0887,
+      "step": 11000
+    },
+    {
+      "epoch": 7.86,
+      "eval_loss": 0.09789762645959854,
+      "eval_runtime": 26.8453,
+      "eval_samples_per_second": 1120.419,
+      "eval_steps_per_second": 5.625,
+      "step": 11000
+    },
+    {
+      "epoch": 8.58,
+      "grad_norm": 0.48922085762023926,
+      "learning_rate": 3.9671193709792706e-05,
+      "loss": 0.0842,
+      "step": 12000
+    },
+    {
+      "epoch": 8.58,
+      "eval_loss": 0.09349656105041504,
+      "eval_runtime": 26.8273,
+      "eval_samples_per_second": 1121.172,
+      "eval_steps_per_second": 5.629,
+      "step": 12000
+    },
+    {
+      "epoch": 9.29,
+      "grad_norm": 0.4252859354019165,
+      "learning_rate": 3.8347496889312476e-05,
+      "loss": 0.0793,
+      "step": 13000
+    },
+    {
+      "epoch": 9.29,
+      "eval_loss": 0.09415590018033981,
+      "eval_runtime": 25.9362,
+      "eval_samples_per_second": 1159.693,
+      "eval_steps_per_second": 5.822,
+      "step": 13000
+    },
+    {
+      "epoch": 10.01,
+      "grad_norm": 0.44548505544662476,
+      "learning_rate": 3.702380006883224e-05,
+      "loss": 0.076,
+      "step": 14000
+    },
+    {
+      "epoch": 10.01,
+      "eval_loss": 0.08913980424404144,
+      "eval_runtime": 26.7379,
+      "eval_samples_per_second": 1124.919,
+      "eval_steps_per_second": 5.647,
+      "step": 14000
+    },
+    {
+      "epoch": 10.72,
+      "grad_norm": 0.2965373694896698,
+      "learning_rate": 3.5700103248352e-05,
+      "loss": 0.0714,
+      "step": 15000
+    },
+    {
+      "epoch": 10.72,
+      "eval_loss": 0.08456840366125107,
+      "eval_runtime": 26.787,
+      "eval_samples_per_second": 1122.857,
+      "eval_steps_per_second": 5.637,
+      "step": 15000
+    },
+    {
+      "epoch": 11.44,
+      "grad_norm": 0.3205694854259491,
+      "learning_rate": 3.437640642787176e-05,
+      "loss": 0.0677,
+      "step": 16000
+    },
+    {
+      "epoch": 11.44,
+      "eval_loss": 0.07863688468933105,
+      "eval_runtime": 26.8242,
+      "eval_samples_per_second": 1121.299,
+      "eval_steps_per_second": 5.629,
+      "step": 16000
+    },
+    {
+      "epoch": 12.15,
+      "grad_norm": 0.2736203670501709,
+      "learning_rate": 3.3052709607391525e-05,
+      "loss": 0.0636,
+      "step": 17000
+    },
+    {
+      "epoch": 12.15,
+      "eval_loss": 0.07664181292057037,
+      "eval_runtime": 26.7818,
+      "eval_samples_per_second": 1123.077,
+      "eval_steps_per_second": 5.638,
+      "step": 17000
+    },
+    {
+      "epoch": 12.87,
+      "grad_norm": 0.25644680857658386,
+      "learning_rate": 3.172901278691129e-05,
+      "loss": 0.0618,
+      "step": 18000
+    },
+    {
+      "epoch": 12.87,
+      "eval_loss": 0.07351888716220856,
+      "eval_runtime": 26.8445,
+      "eval_samples_per_second": 1120.453,
+      "eval_steps_per_second": 5.625,
+      "step": 18000
+    },
+    {
+      "epoch": 13.58,
+      "grad_norm": 0.2748676538467407,
+      "learning_rate": 3.0405315966431053e-05,
+      "loss": 0.0584,
+      "step": 19000
+    },
+    {
+      "epoch": 13.58,
+      "eval_loss": 0.07314006239175797,
+      "eval_runtime": 26.8333,
+      "eval_samples_per_second": 1120.921,
+      "eval_steps_per_second": 5.627,
+      "step": 19000
+    },
+    {
+      "epoch": 14.3,
+      "grad_norm": 0.30235132575035095,
+      "learning_rate": 2.9081619145950812e-05,
+      "loss": 0.057,
+      "step": 20000
+    },
+    {
+      "epoch": 14.3,
+      "eval_loss": 0.07568340748548508,
+      "eval_runtime": 27.0109,
+      "eval_samples_per_second": 1113.55,
+      "eval_steps_per_second": 5.59,
+      "step": 20000
+    },
+    {
+      "epoch": 15.01,
+      "grad_norm": 0.2508692145347595,
+      "learning_rate": 2.7757922325470574e-05,
+      "loss": 0.0558,
+      "step": 21000
+    },
+    {
+      "epoch": 15.01,
+      "eval_loss": 0.07675843685865402,
+      "eval_runtime": 26.9026,
+      "eval_samples_per_second": 1118.032,
+      "eval_steps_per_second": 5.613,
+      "step": 21000
+    },
+    {
+      "epoch": 15.73,
+      "grad_norm": 0.3341030478477478,
+      "learning_rate": 2.643422550499034e-05,
+      "loss": 0.0533,
+      "step": 22000
+    },
+    {
+      "epoch": 15.73,
+      "eval_loss": 0.07339715212583542,
+      "eval_runtime": 26.8727,
+      "eval_samples_per_second": 1119.278,
+      "eval_steps_per_second": 5.619,
+      "step": 22000
+    },
+    {
+      "epoch": 16.44,
+      "grad_norm": 0.30433303117752075,
+      "learning_rate": 2.51105286845101e-05,
+      "loss": 0.0516,
+      "step": 23000
+    },
+    {
+      "epoch": 16.44,
+      "eval_loss": 0.0694783553481102,
+      "eval_runtime": 26.8551,
+      "eval_samples_per_second": 1120.012,
+      "eval_steps_per_second": 5.623,
+      "step": 23000
+    },
+    {
+      "epoch": 17.16,
+      "grad_norm": 0.39424875378608704,
+      "learning_rate": 2.378683186402986e-05,
+      "loss": 0.049,
+      "step": 24000
+    },
+    {
+      "epoch": 17.16,
+      "eval_loss": 0.06750107556581497,
+      "eval_runtime": 26.9045,
+      "eval_samples_per_second": 1117.954,
+      "eval_steps_per_second": 5.612,
+      "step": 24000
+    },
+    {
+      "epoch": 17.87,
+      "grad_norm": 0.29526183009147644,
+      "learning_rate": 2.2463135043549627e-05,
+      "loss": 0.0478,
+      "step": 25000
+    },
+    {
+      "epoch": 17.87,
+      "eval_loss": 0.06841529905796051,
+      "eval_runtime": 26.9131,
+      "eval_samples_per_second": 1117.597,
+      "eval_steps_per_second": 5.611,
+      "step": 25000
+    },
+    {
+      "epoch": 18.58,
+      "grad_norm": 0.2802821099758148,
+      "learning_rate": 2.113943822306939e-05,
+      "loss": 0.0472,
+      "step": 26000
+    },
+    {
+      "epoch": 18.58,
+      "eval_loss": 0.0680340975522995,
+      "eval_runtime": 26.8442,
+      "eval_samples_per_second": 1120.467,
+      "eval_steps_per_second": 5.625,
+      "step": 26000
+    },
+    {
+      "epoch": 19.3,
+      "grad_norm": 0.198490172624588,
+      "learning_rate": 1.9815741402589152e-05,
+      "loss": 0.0445,
+      "step": 27000
+    },
+    {
+      "epoch": 19.3,
+      "eval_loss": 0.059882719069719315,
+      "eval_runtime": 26.9691,
+      "eval_samples_per_second": 1115.275,
+      "eval_steps_per_second": 5.599,
+      "step": 27000
+    },
+    {
+      "epoch": 20.01,
+      "grad_norm": 0.3383251130580902,
+      "learning_rate": 1.8492044582108914e-05,
+      "loss": 0.0435,
+      "step": 28000
+    },
+    {
+      "epoch": 20.01,
+      "eval_loss": 0.06356318295001984,
+      "eval_runtime": 26.8538,
+      "eval_samples_per_second": 1120.066,
+      "eval_steps_per_second": 5.623,
+      "step": 28000
+    },
+    {
+      "epoch": 20.73,
+      "grad_norm": 0.16571784019470215,
+      "learning_rate": 1.7168347761628677e-05,
+      "loss": 0.0419,
+      "step": 29000
+    },
+    {
+      "epoch": 20.73,
+      "eval_loss": 0.06056862324476242,
+      "eval_runtime": 27.0748,
+      "eval_samples_per_second": 1110.924,
+      "eval_steps_per_second": 5.577,
+      "step": 29000
+    },
+    {
+      "epoch": 21.44,
+      "grad_norm": 0.19518467783927917,
+      "learning_rate": 1.584465094114844e-05,
+      "loss": 0.0409,
+      "step": 30000
+    },
+    {
+      "epoch": 21.44,
+      "eval_loss": 0.06490638852119446,
+      "eval_runtime": 26.8481,
+      "eval_samples_per_second": 1120.301,
+      "eval_steps_per_second": 5.624,
+      "step": 30000
+    },
+    {
+      "epoch": 22.16,
+      "grad_norm": 0.15420591831207275,
+      "learning_rate": 1.4520954120668203e-05,
+      "loss": 0.0397,
+      "step": 31000
+    },
+    {
+      "epoch": 22.16,
+      "eval_loss": 0.05918469280004501,
+      "eval_runtime": 26.8143,
+      "eval_samples_per_second": 1121.713,
+      "eval_steps_per_second": 5.631,
+      "step": 31000
+    },
+    {
+      "epoch": 22.87,
+      "grad_norm": 0.26854997873306274,
+      "learning_rate": 1.3197257300187965e-05,
+      "loss": 0.0387,
+      "step": 32000
+    },
+    {
+      "epoch": 22.87,
+      "eval_loss": 0.06144551932811737,
+      "eval_runtime": 26.8852,
+      "eval_samples_per_second": 1118.757,
+      "eval_steps_per_second": 5.616,
+      "step": 32000
+    },
+    {
+      "epoch": 23.59,
+      "grad_norm": 0.17430314421653748,
+      "learning_rate": 1.1873560479707728e-05,
+      "loss": 0.0373,
+      "step": 33000
+    },
+    {
+      "epoch": 23.59,
+      "eval_loss": 0.06159648299217224,
+      "eval_runtime": 26.7887,
+      "eval_samples_per_second": 1122.785,
+      "eval_steps_per_second": 5.637,
+      "step": 33000
+    },
+    {
+      "epoch": 24.3,
+      "grad_norm": 0.14911049604415894,
+      "learning_rate": 1.054986365922749e-05,
+      "loss": 0.0369,
+      "step": 34000
+    },
+    {
+      "epoch": 24.3,
+      "eval_loss": 0.05931873992085457,
+      "eval_runtime": 26.8571,
+      "eval_samples_per_second": 1119.926,
+      "eval_steps_per_second": 5.622,
+      "step": 34000
+    },
+    {
+      "epoch": 25.02,
+      "grad_norm": 0.13620807230472565,
+      "learning_rate": 9.226166838747254e-06,
+      "loss": 0.0361,
+      "step": 35000
+    },
+    {
+      "epoch": 25.02,
+      "eval_loss": 0.05695568770170212,
+      "eval_runtime": 26.8966,
+      "eval_samples_per_second": 1118.283,
+      "eval_steps_per_second": 5.614,
+      "step": 35000
+    },
+    {
+      "epoch": 25.73,
+      "grad_norm": 0.13764438033103943,
+      "learning_rate": 7.902470018267017e-06,
+      "loss": 0.0349,
+      "step": 36000
+    },
+    {
+      "epoch": 25.73,
+      "eval_loss": 0.05707501247525215,
+      "eval_runtime": 26.986,
+      "eval_samples_per_second": 1114.578,
+      "eval_steps_per_second": 5.595,
+      "step": 36000
+    },
+    {
+      "epoch": 26.45,
+      "grad_norm": 0.2389635145664215,
+      "learning_rate": 6.578773197786779e-06,
+      "loss": 0.0343,
+      "step": 37000
+    },
+    {
+      "epoch": 26.45,
+      "eval_loss": 0.0577365942299366,
+      "eval_runtime": 26.9903,
+      "eval_samples_per_second": 1114.401,
+      "eval_steps_per_second": 5.595,
+      "step": 37000
+    },
+    {
+      "epoch": 27.16,
+      "grad_norm": 0.15828461945056915,
+      "learning_rate": 5.255076377306542e-06,
+      "loss": 0.034,
+      "step": 38000
+    },
+    {
+      "epoch": 27.16,
+      "eval_loss": 0.05767366662621498,
+      "eval_runtime": 27.1454,
+      "eval_samples_per_second": 1108.035,
+      "eval_steps_per_second": 5.563,
+      "step": 38000
+    },
+    {
+      "epoch": 27.88,
+      "grad_norm": 0.1059570387005806,
+      "learning_rate": 3.9313795568263045e-06,
+      "loss": 0.0332,
+      "step": 39000
+    },
+    {
+      "epoch": 27.88,
+      "eval_loss": 0.056225307285785675,
+      "eval_runtime": 26.9534,
+      "eval_samples_per_second": 1115.928,
+      "eval_steps_per_second": 5.602,
+      "step": 39000
+    },
+    {
+      "epoch": 28.59,
+      "grad_norm": 0.1975150853395462,
+      "learning_rate": 2.6076827363460673e-06,
+      "loss": 0.0329,
+      "step": 40000
+    },
+    {
+      "epoch": 28.59,
+      "eval_loss": 0.05555161088705063,
+      "eval_runtime": 27.1187,
+      "eval_samples_per_second": 1109.122,
+      "eval_steps_per_second": 5.568,
+      "step": 40000
+    },
+    {
+      "epoch": 29.31,
+      "grad_norm": 0.1037423312664032,
+      "learning_rate": 1.28398591586583e-06,
+      "loss": 0.0319,
+      "step": 41000
+    },
+    {
+      "epoch": 29.31,
+      "eval_loss": 0.05535305291414261,
+      "eval_runtime": 26.8353,
+      "eval_samples_per_second": 1120.838,
+      "eval_steps_per_second": 5.627,
+      "step": 41000
+    },
+    {
+      "epoch": 30.0,
+      "step": 41970,
+      "total_flos": 3.347206753110317e+16,
+      "train_loss": 0.09860551060169176,
+      "train_runtime": 13103.021,
+      "train_samples_per_second": 640.368,
+      "train_steps_per_second": 3.203
+    }
+  ],
+  "logging_steps": 1000,
+  "max_steps": 41970,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 30,
+  "save_steps": 1000,
+  "total_flos": 3.347206753110317e+16,
+  "train_batch_size": 200,
+  "trial_name": null,
+  "trial_params": null
+}

ckpt/ling_disc/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:deb9dc15db671f7ae0b9e7e7bc26ca9e20c0fde45babc266a60753e2b23d6328
+size 4984

ckpt/model.json ADDED Viewed

	@@ -0,0 +1,82 @@

+{
+    "data": "ling_conversion",
+    "data_sources": ["qqp", "mrpc", "stsb"],
+    "data_type": "text",
+    "kld_annealing": "cyclic",
+    "lingpred_annealing": "mono",
+    "ling_embed_type": "one-layer",
+    "combine_weight": 1,
+    "alpha_kld": 1,
+    "alpha_lingpred": 1,
+    "alpha_sem": 1,
+    "max_grad_norm": 10,
+    "sem_loss_tao": 0.5,
+    "sem_loss_eps": 1,
+    "ckpt": "./ckpt/model.pt",
+    "disc_type": "deberta",
+    "disc_ckpt": "./ckpt/ling_disc",
+    "sem_ckpt": "./ckpt/sem_emb.pt",
+    "lng_ids": null,
+    "lng_ids_idx": null,
+    "model_name": "google/flan-t5-base",
+    "aim_exp": "lingconv-0606",
+    "sem_loss_type": "dedicated",
+    "combine_method": "decoder_add_first",
+    "train_log": 200,
+    "val_log": 2000,
+    "batch_size": 80,
+    "eval_batch_size": 200,
+    "max_eval_samples": 1000,
+    "test_batch_size": 1,
+    "hidden_dim": 500,
+    "latent_dim": 150,
+    "lng_dim": 40,
+    "disc_lng_dim": 40,
+    "use_lora": false,
+    "lora_r": 64,
+    "gpu": "4",
+    "epochs": 20,
+    "grad_accumulation": 1,
+    "n_ica": 10,
+    "max_length": 200,
+    "total_steps": null,
+    "kld_const": 1,
+    "lr": 0.001,
+    "kl_weight": 0.1,
+    "weight_decay": 0.01,
+    "ling_dropout": 0.1,
+    "predict_fn": "logs/test.txt",
+    "save_predict": false,
+    "use_ica": false,
+    "pretrain_gen": false,
+    "pretrain_sem": false,
+    "pretrain_disc": false,
+    "linggen_type": "none",
+    "linggen_input": "s+l",
+    "aug_same": false,
+    "ling_vae": false,
+    "process_lingpred": false,
+    "fudge_lambda": 1.0,
+    "use_lingpred": false,
+    "ling2_only": true,
+    "cycle_loss": false,
+    "disc_loss": false,
+    "sem_loss": false,
+    "sim_loss": false,
+    "optuna": false,
+    "debug": false,
+    "demo": false,
+    "fudge": false,
+    "out_fn": "logs/default",
+    "eval_only": false,
+    "predict_with_feedback": false,
+    "feedback_param": "s",
+    "eval_ling": false,
+    "seed": 0,
+    "major_arg": 0,
+    "quantize_lng": false,
+    "quant_nbins": 20,
+    "src_lng": "ling",
+    "to_restore": [],
+    "disc_steps": 0
+}

ckpt/model.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a675026d23bf857c796e00fda67b500e4cc13b43db030b08fdfaef14823fbe42
+size 2971737146

ckpt/sem_emb.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0c58f890cb0121eacf8ac99d2fac53e2962f457d8c02e0b6386a4b3e342ac10c
+size 1315675291

compute_lng.py CHANGED Viewed

@@ -4,38 +4,6 @@ import lftk
 import spacy
 nlp = spacy.load("en_core_web_sm")
-def extract_lingfeat(text):
-    from lingfeat import extractor
-    LingFeat = extractor.pass_text(text)
-    LingFeat.preprocess()
-    d = {}
-    d.update(LingFeat.WoKF_()) # Wikipedia Knowledge Features
-    d.update(LingFeat.WBKF_()) # WeeBit Corpus Knowledge Features
-    d.update(LingFeat.OSKF_()) # OneStopEng Corpus Knowledge Features
-    # Discourse (Disco) Features
-    d.update(LingFeat.EnDF_()) # Entity Density Features
-    d.update(LingFeat.EnGF_()) # Entity Grid Features
-    # Syntactic (Synta) Features
-    # d.update(LingFeat.PhrF_()) # Noun/Verb/Adj/Adv/... Phrasal Features (logging stanza)
-    # d.update(LingFeat.TrSF_()) # (Parse) Tree Structural Features (logging stanza)
-    d.update(LingFeat.POSF_()) # Noun/Verb/Adj/Adv/... Part-of-Speech Features
-    # Lexico Semantic (LxSem) Features
-    d.update(LingFeat.TTRF_()) # Type Token Ratio Features
-    d.update(LingFeat.VarF_()) # Noun/Verb/Adj/Adv Variation Features
-    d.update(LingFeat.PsyF_()) # Psycholinguistic Difficulty of Words (AoA Kuperman)
-    d.update(LingFeat.WorF_()) # Word Familiarity from Frequency Count (SubtlexUS)
-    # Shallow Traditional (ShTra) Features
-    d.update(LingFeat.ShaF_()) # Shallow Features (e.g. avg number of tokens)
-    d.update(LingFeat.TraF_()) # Traditional Formulas
-    return list(d.values())
 def extract_lftk(text):
     if text == '':
         return [0.] * 220
@@ -45,12 +13,9 @@ def extract_lftk(text):
     feats = LFTK.extract()
     return list(feats.values())
-def compute_lng(text, shortcut = False):
     lca_feats = lca(text)
-    if shortcut:
-        sca_feats = [0] * 23
-    else:
-        sca_feats = sca(text)
     lftk = extract_lftk(text)
     all_feats = lca_feats + sca_feats + lftk

 import spacy
 nlp = spacy.load("en_core_web_sm")
 def extract_lftk(text):
     if text == '':
         return [0.] * 220
     feats = LFTK.extract()
     return list(feats.values())
+def compute_lng(text):
     lca_feats = lca(text)
+    sca_feats = sca(text)
     lftk = extract_lftk(text)
     all_feats = lca_feats + sca_feats + lftk

demo.py DELETED Viewed

@@ -1,371 +0,0 @@
-def run_gradio(model, tokenizer, scaler, ling_collection, examples=None, lng_names=None, M=None):
-    import numpy as np
-    import torch
-    from datetime import datetime
-    from compute_lng import compute_lng
-    import gradio as gr
-    m = np.load('assets/m.npy')
-    m = -1/m
-    m[m == -np.inf] = 0
-    m /= 100
-    device = model.backbone.device
-    def visibility(mode):
-        if mode == 0:
-            vis_group = group1
-        elif mode == 1:
-            vis_group = group2
-        elif mode == 2:
-            vis_group = group3
-        output = [gr.update(value=''), gr.update(value='')]
-        for component in components:
-            if component in vis_group:
-                output.append(gr.update(visible=True))
-            else:
-                output.append(gr.update(visible=False))
-        return output
-    def generate(sent1, ling):
-        input_ids = tokenizer.encode(sent1, return_tensors='pt').to(device)
-        ling1 = scaler.transform([ling['Source']])
-        ling2 = scaler.transform([ling['Target']])
-        inputs = {'sentence1_input_ids': input_ids,
-                'sentence1_ling': torch.tensor(ling1).float().to(device),
-                'sentence2_ling': torch.tensor(ling2).float().to(device),
-                'sentence1_attention_mask': torch.ones_like(input_ids)}
-        preds = []
-        with torch.no_grad():
-            pred = model.infer(inputs).cpu().numpy()
-        pred = tokenizer.batch_decode(pred,
-                skip_special_tokens=True)[0]
-        return pred
-    def generate_with_feedbacks(sent1, ling):
-        preds = []
-        eta = 0.1
-        input_ids = tokenizer.encode(sent1, return_tensors='pt').to(device)
-        ling1 = torch.tensor(scaler.transform([ling['Source']])).float().to(device)
-        ling2 = torch.tensor(scaler.transform([ling['Target']])).float().to(device)
-        ling1_embed = model.ling_embed(ling1)
-        ling2_embed = model.ling_embed(ling2)
-        cur_ling = ling1_embed + eta * (ling2_embed - ling1_embed)
-        inputs = {'sentence1_input_ids': input_ids,
-                'sent1_ling_embed': ling1_embed,
-                'sent2_ling_embed': ling2_embed,
-                'sentence1_attention_mask': torch.ones_like(input_ids)}
-        converged = False
-        c = 0
-        while not converged:
-            with torch.no_grad():
-                pred = model.infer(inputs)
-                inputs_pred = inputs.copy()
-                inputs_pred.update({'input_ids': pred,
-                    'attention_mask': torch.ones_like(pred)})
-                ling_pred = model.ling_disc(**inputs_pred)
-                ling_pred_embed = model.ling_embed(ling_pred)
-            if len(interpolations) == 0 or pred != interpolations[-1]:
-                interpolations.append(pred)
-            diff = torch.mean((ling2_embed - ling_pred_embed)**2)
-            scale = torch.norm(cur_ling)/torch.norm(ling2)
-            # print(f'Diff: {diff.item():.3f} / Scale: ({scale.item():.3f})>> {tokenizer.batch_decode(pred.cpu().numpy(), skip_special_tokens=True)[0]}')
-            if diff < 1e-5 or c >= 50:
-                converged = True
-            else:
-                # cur_ling = cur_ling + eta * (ling2_embed - ling_pred_embed)
-                inputs.update({
-                    'sentence1_input_ids': pred,
-                    # 'sent2_ling_embed': ling2_embed,
-                    'sentence1_attention_mask': torch.ones_like(pred)
-                    })
-                c += 1
-        pred = tokenizer.batch_decode(pred.cpu().numpy(),
-                skip_special_tokens=True)[0]
-        return pred
-    def generate_with_feedback(sent1, ling, approx):
-        if sent1 == '':
-            return ['Please input a source text.', '']
-        preds = []
-        interpolations = []
-        input_ids = tokenizer.encode(sent1, return_tensors='pt').to(device)
-        ling1 = torch.tensor(scaler.transform([ling['Source']])).float().to(device)
-        ling2 = torch.tensor(scaler.transform([ling['Target']])).float().to(device)
-        ling1_embed = model.ling_embed(ling1)
-        ling2_embed = model.ling_embed(ling2)
-        inputs = {'sentence1_input_ids': input_ids,
-                'sent1_ling_embed': ling1_embed,
-                'sent2_ling_embed': ling2_embed,
-                'sentence1_attention_mask': torch.ones_like(input_ids)}
-        converged = False
-        c = 0
-        eta = 0.3
-        while not converged:
-            with torch.no_grad():
-                pred = model.infer(inputs)
-                inputs_pred = inputs.copy()
-                inputs_pred.update({'input_ids': pred,
-                    'attention_mask': torch.ones_like(pred)})
-                pred_text = tokenizer.batch_decode(pred.cpu().numpy(),
-                        skip_special_tokens=True)[0]
-                if 'approximate' in approx:
-                    ling_pred = model.ling_disc(**inputs_pred)
-                elif 'exact' in approx:
-                    ling_pred = compute_lng(pred_text)
-                    ling_pred = scaler.transform([ling_pred])[0]
-                    ling_pred = torch.tensor(ling_pred).to(pred.device).float()
-                else:
-                    raise ValueError()
-                ling_pred_embed = model.ling_embed(ling_pred)
-            if len(interpolations) == 0 or pred_text != interpolations[-1]:
-                interpolations.append(pred_text)
-            diff = torch.mean((ling2_embed - ling_pred_embed)**2)
-            # print(f'Diff {diff.item():.3f}>> {tokenizer.batch_decode(pred.cpu().numpy(), skip_special_tokens=True)[0]}')
-            if diff < 10 or c >= 50:
-                converged = True
-            else:
-                ling2_embed = ling2_embed + eta * (ling_pred_embed - ling2_embed)
-                inputs.update({'sent2_ling_embed': ling2_embed})
-                c += 1
-        interpolation = '-- ' + '\n-- '.join(interpolations)
-        return [pred_text, interpolation]
-    def generate_random(sent1, ling, count, approx):
-        preds, interpolations = [], []
-        for c in range(count):
-            idx = np.random.randint(0, len(ling_collection))
-            ling_ex = ling_collection[idx]
-            ling['Target'] = ling_ex
-            pred, interpolation =  generate_with_feedback(sent1, ling, approx)
-            preds.append(pred)
-            interpolations.append(interpolation)
-        return '\n***\n'.join(preds), '\n***\n'.join(interpolations), ling
-    def estimate_gen(sent1, sent2, ling, approx):
-        if 'approximate' in approx:
-            input_ids = tokenizer.encode(sent2, return_tensors='pt').to(device)
-            with torch.no_grad():
-                ling_pred = model.ling_disc(input_ids=input_ids).cpu().numpy()
-            ling_pred = scaler.inverse_transform(ling_pred)[0]
-        elif 'exact' in approx:
-            ling_pred = compute_lng(sent2)
-        else:
-            raise ValueError()
-        ling['Target'] = ling_pred
-        gen = generate_with_feedback(sent1, ling, approx)
-        results = gen + [ling]
-        return results
-    def estimate_tgt(sent2, ling, approx):
-        if 'approximate' in approx:
-            input_ids = tokenizer.encode(sent2, return_tensors='pt').to(device)
-            with torch.no_grad():
-                ling_pred = model.ling_disc(input_ids=input_ids).cpu().numpy()
-            ling_pred = scaler.inverse_transform(ling_pred)[0]
-        elif 'exact' in approx:
-            ling_pred = compute_lng(sent2)
-        else:
-            raise ValueError()
-        ling['Target'] = ling_pred
-        return ling
-    def estimate_src(sent1, ling, approx):
-        if 'approximate' in approx:
-            input_ids = tokenizer.encode(sent1, return_tensors='pt').to(device)
-            with torch.no_grad():
-                ling_pred = model.ling_disc(input_ids=input_ids).cpu().numpy()
-            ling_pred = scaler.inverse_transform(ling_pred)[0]
-        elif 'exact' in approx:
-            ling_pred = compute_lng(sent1)
-        else:
-            raise ValueError()
-        ling['Source'] = ling_pred
-        return ling
-    def rand_target(ling):
-        ling['Target'] = scaler.inverse_transform([np.random.randn(*ling['Target'].shape)])[0]
-        return ling
-    def rand_ex_target(ling):
-        idx = np.random.randint(0, len(examples))
-        ling_ex = examples[idx][1]
-        ling['Target'] = ling_ex['Target']
-        return ling
-    def copy(ling):
-        ling['Target'] = ling['Source']
-        return ling
-    def add_noise(ling):
-        x = scaler.transform([ling['Target']])
-        x += np.random.randn(*ling['Target'].shape)
-        x = scaler.inverse_transform(x)[0]
-        ling['Target'] =  x
-        return ling
-    def add(ling):
-        x = scaler.transform([ling['Target']])
-        x += m
-        x = scaler.inverse_transform(x)[0]
-        ling['Target'] =  x
-        return ling
-    def sub(ling):
-        x = scaler.transform([ling['Target']])
-        x -= m
-        x = scaler.inverse_transform(x)[0]
-        ling['Target'] =  x
-        return ling
-    # title = ''
-    # for i, model in enumerate(models):
-    #     if i > 0:
-    #         title += '\n'
-    #     title += f"model ({i})\n\tUsing VAE = {model.args.ling_vae}\n\tUsing ICA = {model.args.use_ica}\n\tNumber of features = {model.args.lng_dim if not model.args.use_ica else model.args.n_ica}"
-    title = """
-    # LingConv: A System for Controlled Linguistic Conversion
-    ## Description
-    This system is an encoder-decoder model for complexity controlled text generation, guided by 241
-    linguistic complexity indices as key attributes. Given a sentence and a desired level of linguistic
-    complexity, the model can generate diverse paraphrases that maintain consistent meaning, adjusted for
-    different linguistic complexity levels. However, it's important to note that not all index combinations are
-    feasible (such as requesting a sentence of "length" 5 with 10 "unique words"). To ensure high quality
-    outputs, our approach interpolates the embedding of linguistic indices to locate the most closely matched,
-    achievable set of indices for the given target.
-    """
-    guide = """
-    You may use the system in on of the following ways:
-    **Randomized Paraphrase Generation**: Select this option to produce multiple paraphrases with a range
-    of linguistic complexity. You need to provide a source text, specify the number of paraphrases you want,
-    and click "Generate." The linguistic complexity of the paraphrases will be determined randomly.
-    **Complexity-Matched Paraphrasing**: Select this option to generate a paraphrase of the given source
-    sentence that closely mirrors the linguistic complexity of another given sentence. Input your source
-    sentence along with another sentence (which will serve only to extract linguistic indices for the
-    paraphrase generation). Then, click "Generate."
-    **Manual Linguistic Control**: Select this option to manually control the linguistic complexity of the
-    generated text. We provided a set of tools for manual adjustments of the desired linguistic complexity of
-    the target sentence. These tools enable the user to extract linguistic indices from a given sentence,
-    generate a random (yet coherent) set of linguistic indices, and add or remove noise from the indices.
-    These tools are designed for experimental use and require the user to possess linguistic expertise for
-    effective input of linguistic indices. To use these tools, select "Tools to assist in setting linguistic
-    indices." Once indices are entered, click "Generate."
-    Second, you may select to use exact or approximate computation of linguistic indices (used in mode (2) and
-    in quality control of the genration). Approximate computation is significantly faster.
-    Third, you may view the intermediate sentences of the quality control process by selecting the checkbox.
-    Fourth, you may try out some examples by clicking on "Examples...". Examples consist of a source sentences,
-    the indices of the source sentences, and a sample set of target linguistic indices.
-    Please make your choice below.
-    """
-    sent1 = gr.Textbox(label='Source text')
-    ling = gr.Dataframe(value = [[x, 0, 0] for x in lng_names],
-            headers=['Index', 'Source', 'Target'],
-            datatype=['str', 'number', 'number'], visible=False)
-    css = """
-    #guide span.svelte-s1r2yt {font-size: 22px !important;
-                    font-weight: 600 !important}
-    """
-    with gr.Blocks(css=css) as demo:
-        gr.Markdown(title)
-        with gr.Accordion("Quick Start Guide", open=False, elem_id='guide'):
-            gr.Markdown(guide)
-        mode = gr.Radio(value='Randomized Paraphrase Generation',
-                label='How would you like to use this system?',
-                type="index",
-                choices=['Randomized Paraphrase Generation',
-                    'Complexity-Matched Paraphrasing', 'Manual Linguistic Control'])
-        approx = gr.Radio(value='Use approximate computation of linguistic indices (faster)',
-                choices=['Use approximate computation of linguistic indices (faster)',
-                    'Use exact computation of linguistic indices'], container=False, show_label=False)
-        control_interpolation = gr.Checkbox(label='View the intermediate sentences in the interpolation of linguistic indices')
-        with gr.Accordion("Examples...", open=False):
-            gr.Examples(examples, [sent1, ling], examples_per_page=4, label=None)
-        with gr.Row():
-            sent1.render()
-            with gr.Column():
-                sent2 = gr.Textbox(label='Generated text')
-        interpolation = gr.Textbox(label='Quality control interpolation', visible=False, lines=5)
-        #####################
-        with gr.Row():
-            generate_random_btn = gr.Button("Generate",
-                    variant='primary', scale=1, visible=True)
-            count = gr.Number(label='Number of generated sentences', value=3, precision=0, scale=1, visible=True)
-        # generate_fb_btn = gr.Button("Generate with auto-adjust (towards pred)")
-        # generate_fb_s_btn = gr.Button("Generate with auto-adjust (moving s)")
-        # add_noise_btn = gr.Button('Add noise to target linguistic indices')
-        #####################
-        with gr.Row():
-            estimate_gen_btn = gr.Button("Generate",
-                    variant='primary',
-                    scale=1, visible=False)
-            sent_ling_gen = gr.Textbox(label='Text to estimate linguistic indices', scale=1, visible=False)
-        #####################
-        generate_btn = gr.Button("Generate", variant='primary', visible=False)
-        with gr.Accordion("Tools to assist in the setting of linguistic indices...", open=False, visible=False) as ling_tools:
-            with gr.Row():
-                estimate_tgt_btn = gr.Button("Estimate linguistic indices of this sentence", visible=False)
-                sent_ling_est = gr.Textbox(label='Text to estimate linguistic indices', scale=2, visible=False)
-            estimate_src_btn = gr.Button("Estimate linguistic indices of source sentence", visible=False)
-            # rand_btn = gr.Button("Random target")
-            rand_ex_btn = gr.Button("Random target", size='lg', visible=False)
-            copy_btn = gr.Button("Copy linguistic indices of source to target", size='sm', visible=False)
-            with gr.Row():
-                add_btn = gr.Button('Add \u03B5 to target linguistic indices', visible=False)
-                sub_btn = gr.Button('Subtract \u03B5 from target linguistic indices', visible=False)
-        ling.render()
-        #####################
-        estimate_src_btn.click(estimate_src, inputs=[sent1, ling, approx], outputs=[ling])
-        estimate_tgt_btn.click(estimate_tgt, inputs=[sent_ling_est, ling, approx], outputs=[ling])
-        # estimate_tgt_btn.click(estimate_tgt, inputs=[sent_ling, ling], outputs=[ling])
-        estimate_gen_btn.click(estimate_gen, inputs=[sent1, sent_ling_gen, ling, approx], outputs=[sent2, interpolation, ling])
-        # rand_btn.click(rand_target, inputs=[ling], outputs=[ling])
-        rand_ex_btn.click(rand_ex_target, inputs=[ling], outputs=[ling])
-        copy_btn.click(copy, inputs=[ling], outputs=[ling])
-        generate_btn.click(generate_with_feedback, inputs=[sent1, ling, approx], outputs=[sent2, interpolation])
-        generate_random_btn.click(generate_random, inputs=[sent1, ling, count, approx],
-                outputs=[sent2, interpolation, ling])
-        # generate_fb_btn.click(generate_with_feedback, inputs=[sent1, ling], outputs=sent2s)
-        # generate_fb_s_btn.click(generate_with_feedbacks, inputs=[sent1, ling], outputs=sent2s)
-        add_btn.click(add, inputs=[ling], outputs=[ling])
-        sub_btn.click(sub, inputs=[ling], outputs=[ling])
-        # add_noise_btn.click(add_noise, inputs=[ling], outputs=[ling])
-        group1 = [generate_random_btn, count]
-        group2 = [estimate_gen_btn, sent_ling_gen]
-        group3 = [generate_btn, estimate_src_btn, estimate_tgt_btn, sent_ling_est, rand_ex_btn, copy_btn, add_btn, sub_btn, ling, ling_tools]
-        components = group1 + group2 + group3
-        mode.change(visibility, inputs=[mode], outputs=[sent2, interpolation] + components)
-        control_interpolation.change(lambda v: gr.update(visible=v), inputs=[control_interpolation],
-                outputs=[interpolation])
-    demo.launch(share=True)

lftk_ids.csv ADDED Viewed

	@@ -0,0 +1,221 @@

+key,name,formulation,domain,family,language
+t_word,total_number_of_words,foundation,surface,wordsent,general
+t_stopword,total_number_of_stop_words,foundation,surface,wordsent,general
+t_punct,total_number_of_puntuations,foundation,syntax,wordsent,general
+t_syll,total_number_of_syllables,foundation,surface,wordsent,en
+t_syll2,total_number_of_words_more_than_two_syllables,foundation,surface,wordsent,en
+t_syll3,total_number_of_words_more_than_three_syllables,foundation,surface,wordsent,en
+t_uword,total_number_of_unique_words,foundation,surface,wordsent,general
+t_sent,total_number_of_sentences,foundation,surface,wordsent,general
+t_char,total_number_of_characters,foundation,surface,wordsent,general
+a_word_ps,average_number_of_words_per_sentence,derivation,surface,avgwordsent,general
+a_char_ps,average_number_of_characters_per_sentence,derivation,surface,avgwordsent,general
+a_char_pw,average_number_of_characters_per_word,derivation,surface,avgwordsent,general
+a_syll_ps,average_number_of_syllables_per_sentence,derivation,surface,avgwordsent,en
+a_syll_pw,average_number_of_syllables_per_word,derivation,surface,avgwordsent,en
+a_stopword_ps,average_number_of_stop_words_per_sentence,derivation,surface,avgwordsent,en
+a_stopword_pw,average_number_of_stop_words_per_word,derivation,surface,avgwordsent,en
+t_kup,total_kuperman_age_of_acquistion_of_words,foundation,lexico-semantics,worddiff,en
+t_bry,total_brysbaert_age_of_acquistion_of_words,foundation,lexico-semantics,worddiff,en
+t_subtlex_us_zipf,total_subtlex_us_zipf_of_words,foundation,lexico-semantics,worddiff,en
+a_kup_pw,average_kuperman_age_of_acquistion_of_words_per_word,derivation,lexico-semantics,avgworddiff,en
+a_bry_pw,average_brysbaert_age_of_acquistion_of_words_per_word,derivation,lexico-semantics,avgworddiff,en
+a_kup_ps,average_kuperman_age_of_acquistion_of_words_per_sentence,derivation,lexico-semantics,avgworddiff,en
+a_bry_ps,average_brysbaert_age_of_acquistion_of_words_per_sentence,derivation,lexico-semantics,avgworddiff,en
+a_subtlex_us_zipf_pw,average_subtlex_us_zipf_of_words_per_word,derivation,lexico-semantics,avgworddiff,en
+a_subtlex_us_zipf_ps,average_subtlex_us_zipf_of_words_per_sentence,derivation,lexico-semantics,avgworddiff,en
+t_n_ent,total_number_of_named_entities,foundation,discourse,entity,general
+t_n_ent_person,total_number_of_named_entities_person,foundation,discourse,entity,en
+t_n_ent_norp,total_number_of_named_entities_norp,foundation,discourse,entity,en
+t_n_ent_fac,total_number_of_named_entities_fac,foundation,discourse,entity,en
+t_n_ent_org,total_number_of_named_entities_org,foundation,discourse,entity,en
+t_n_ent_gpe,total_number_of_named_entities_gpe,foundation,discourse,entity,en
+t_n_ent_loc,total_number_of_named_entities_loc,foundation,discourse,entity,en
+t_n_ent_product,total_number_of_named_entities_product,foundation,discourse,entity,en
+t_n_ent_event,total_number_of_named_entities_event,foundation,discourse,entity,en
+t_n_ent_art,total_number_of_named_entities_art,foundation,discourse,entity,en
+t_n_ent_law,total_number_of_named_entities_law,foundation,discourse,entity,en
+t_n_ent_language,total_number_of_named_entities_language,foundation,discourse,entity,en
+t_n_ent_date,total_number_of_named_entities_date,foundation,discourse,entity,en
+t_n_ent_time,total_number_of_named_entities_time,foundation,discourse,entity,en
+t_n_ent_percent,total_number_of_named_entities_percent,foundation,discourse,entity,en
+t_n_ent_money,total_number_of_named_entities_money,foundation,discourse,entity,en
+t_n_ent_quantity,total_number_of_named_entities_quantity,foundation,discourse,entity,en
+t_n_ent_ordinal,total_number_of_named_entities_ordinal,foundation,discourse,entity,en
+t_n_ent_cardinal,total_number_of_named_entities_cardinal,foundation,discourse,entity,en
+a_n_ent_pw,average_number_of_named_entities_per_word,derivation,discourse,avgentity,general
+a_n_ent_person_pw,average_number_of_named_entities_person_per_word,derivation,discourse,avgentity,en
+a_n_ent_norp_pw,average_number_of_named_entities_norp_per_word,derivation,discourse,avgentity,en
+a_n_ent_fac_pw,average_number_of_named_entities_fac_per_word,derivation,discourse,avgentity,en
+a_n_ent_org_pw,average_number_of_named_entities_org_per_word,derivation,discourse,avgentity,en
+a_n_ent_gpe_pw,average_number_of_named_entities_gpe_per_word,derivation,discourse,avgentity,en
+a_n_ent_loc_pw,average_number_of_named_entities_loc_per_word,derivation,discourse,avgentity,en
+a_n_ent_product_pw,average_number_of_named_entities_product_per_word,derivation,discourse,avgentity,en
+a_n_ent_event_pw,average_number_of_named_entities_event_per_word,derivation,discourse,avgentity,en
+a_n_ent_art_pw,average_number_of_named_entities_art_per_word,derivation,discourse,avgentity,en
+a_n_ent_law_pw,average_number_of_named_entities_law_per_word,derivation,discourse,avgentity,en
+a_n_ent_language_pw,average_number_of_named_entities_language_per_word,derivation,discourse,avgentity,en
+a_n_ent_date_pw,average_number_of_named_entities_date_per_word,derivation,discourse,avgentity,en
+a_n_ent_time_pw,average_number_of_named_entities_time_per_word,derivation,discourse,avgentity,en
+a_n_ent_percent_pw,average_number_of_named_entities_percent_per_word,derivation,discourse,avgentity,en
+a_n_ent_money_pw,average_number_of_named_entities_money_per_word,derivation,discourse,avgentity,en
+a_n_ent_quantity_pw,average_number_of_named_entities_quantity_per_word,derivation,discourse,avgentity,en
+a_n_ent_ordinal_pw,average_number_of_named_entities_ordinal_per_word,derivation,discourse,avgentity,en
+a_n_ent_cardinal_pw,average_number_of_named_entities_cardinal_per_word,derivation,discourse,avgentity,en
+a_n_ent_ps,average_number_of_named_entities_per_sentence,derivation,discourse,avgentity,general
+a_n_ent_person_ps,average_number_of_named_entities_person_per_sentence,derivation,discourse,avgentity,en
+a_n_ent_norp_ps,average_number_of_named_entities_norp_per_sentence,derivation,discourse,avgentity,en
+a_n_ent_fac_ps,average_number_of_named_entities_fac_per_sentence,derivation,discourse,avgentity,en
+a_n_ent_org_ps,average_number_of_named_entities_org_per_sentence,derivation,discourse,avgentity,en
+a_n_ent_gpe_ps,average_number_of_named_entities_gpe_per_sentence,derivation,discourse,avgentity,en
+a_n_ent_loc_ps,average_number_of_named_entities_loc_per_sentence,derivation,discourse,avgentity,en
+a_n_ent_product_ps,average_number_of_named_entities_product_per_sentence,derivation,discourse,avgentity,en
+a_n_ent_event_ps,average_number_of_named_entities_event_per_sentence,derivation,discourse,avgentity,en
+a_n_ent_art_ps,average_number_of_named_entities_art_per_sentence,derivation,discourse,avgentity,en
+a_n_ent_law_ps,average_number_of_named_entities_law_per_sentence,derivation,discourse,avgentity,en
+a_n_ent_language_ps,average_number_of_named_entities_language_per_sentence,derivation,discourse,avgentity,en
+a_n_ent_date_ps,average_number_of_named_entities_date_per_sentence,derivation,discourse,avgentity,en
+a_n_ent_time_ps,average_number_of_named_entities_time_per_sentence,derivation,discourse,avgentity,en
+a_n_ent_percent_ps,average_number_of_named_entities_percent_per_sentence,derivation,discourse,avgentity,en
+a_n_ent_money_ps,average_number_of_named_entities_money_per_sentence,derivation,discourse,avgentity,en
+a_n_ent_quantity_ps,average_number_of_named_entities_quantity_per_sentence,derivation,discourse,avgentity,en
+a_n_ent_ordinal_ps,average_number_of_named_entities_ordinal_per_sentence,derivation,discourse,avgentity,en
+a_n_ent_cardinal_ps,average_number_of_named_entities_cardinal_per_sentence,derivation,discourse,avgentity,en
+simp_adj_var,simple_adjectives_variation,derivation,lexico-semantics,lexicalvariation,general
+simp_adp_var,simple_adpositions_variation,derivation,lexico-semantics,lexicalvariation,general
+simp_adv_var,simple_adverbs_variation,derivation,lexico-semantics,lexicalvariation,general
+simp_aux_var,simple_auxiliaries_variation,derivation,lexico-semantics,lexicalvariation,general
+simp_cconj_var,simple_coordinating_conjunctions_variation,derivation,lexico-semantics,lexicalvariation,general
+simp_det_var,simple_determiners_variation,derivation,lexico-semantics,lexicalvariation,general
+simp_intj_var,simple_interjections_variation,derivation,lexico-semantics,lexicalvariation,general
+simp_noun_var,simple_nouns_variation,derivation,lexico-semantics,lexicalvariation,general
+simp_num_var,simple_numerals_variation,derivation,lexico-semantics,lexicalvariation,general
+simp_part_var,simple_particles_variation,derivation,lexico-semantics,lexicalvariation,general
+simp_pron_var,simple_pronouns_variation,derivation,lexico-semantics,lexicalvariation,general
+simp_propn_var,simple_proper_nouns_variation,derivation,lexico-semantics,lexicalvariation,general
+simp_punct_var,simple_punctuations_variation,derivation,lexico-semantics,lexicalvariation,general
+simp_sconj_var,simple_subordinating_conjunctions_variation,derivation,lexico-semantics,lexicalvariation,general
+simp_sym_var,simple_symbols_variation,derivation,lexico-semantics,lexicalvariation,general
+simp_verb_var,simple_verbs_variation,derivation,lexico-semantics,lexicalvariation,general
+simp_space_var,simple_spaces_variation,derivation,lexico-semantics,lexicalvariation,general
+root_adj_var,root_adjectives_variation,derivation,lexico-semantics,lexicalvariation,general
+root_adp_var,root_adpositions_variation,derivation,lexico-semantics,lexicalvariation,general
+root_adv_var,root_adverbs_variation,derivation,lexico-semantics,lexicalvariation,general
+root_aux_var,root_auxiliaries_variation,derivation,lexico-semantics,lexicalvariation,general
+root_cconj_var,root_coordinating_conjunctions_variation,derivation,lexico-semantics,lexicalvariation,general
+root_det_var,root_determiners_variation,derivation,lexico-semantics,lexicalvariation,general
+root_intj_var,root_interjections_variation,derivation,lexico-semantics,lexicalvariation,general
+root_noun_var,root_nouns_variation,derivation,lexico-semantics,lexicalvariation,general
+root_num_var,root_numerals_variation,derivation,lexico-semantics,lexicalvariation,general
+root_part_var,root_particles_variation,derivation,lexico-semantics,lexicalvariation,general
+root_pron_var,root_pronouns_variation,derivation,lexico-semantics,lexicalvariation,general
+root_propn_var,root_proper_nouns_variation,derivation,lexico-semantics,lexicalvariation,general
+root_punct_var,root_punctuations_variation,derivation,lexico-semantics,lexicalvariation,general
+root_sconj_var,root_subordinating_conjunctions_variation,derivation,lexico-semantics,lexicalvariation,general
+root_sym_var,root_symbols_variation,derivation,lexico-semantics,lexicalvariation,general
+root_verb_var,root_verbs_variation,derivation,lexico-semantics,lexicalvariation,general
+root_space_var,root_spaces_variation,derivation,lexico-semantics,lexicalvariation,general
+corr_adj_var,corrected_adjectives_variation,derivation,lexico-semantics,lexicalvariation,general
+corr_adp_var,corrected_adpositions_variation,derivation,lexico-semantics,lexicalvariation,general
+corr_adv_var,corrected_adverbs_variation,derivation,lexico-semantics,lexicalvariation,general
+corr_aux_var,corrected_auxiliaries_variation,derivation,lexico-semantics,lexicalvariation,general
+corr_cconj_var,corrected_coordinating_conjunctions_variation,derivation,lexico-semantics,lexicalvariation,general
+corr_det_var,corrected_determiners_variation,derivation,lexico-semantics,lexicalvariation,general
+corr_intj_var,corrected_interjections_variation,derivation,lexico-semantics,lexicalvariation,general
+corr_noun_var,corrected_nouns_variation,derivation,lexico-semantics,lexicalvariation,general
+corr_num_var,corrected_numerals_variation,derivation,lexico-semantics,lexicalvariation,general
+corr_part_var,corrected_particles_variation,derivation,lexico-semantics,lexicalvariation,general
+corr_pron_var,corrected_pronouns_variation,derivation,lexico-semantics,lexicalvariation,general
+corr_propn_var,corrected_proper_nouns_variation,derivation,lexico-semantics,lexicalvariation,general
+corr_punct_var,corrected_punctuations_variation,derivation,lexico-semantics,lexicalvariation,general
+corr_sconj_var,corrected_subordinating_conjunctions_variation,derivation,lexico-semantics,lexicalvariation,general
+corr_sym_var,corrected_symbols_variation,derivation,lexico-semantics,lexicalvariation,general
+corr_verb_var,corrected_verbs_variation,derivation,lexico-semantics,lexicalvariation,general
+corr_space_var,corrected_spaces_variation,derivation,lexico-semantics,lexicalvariation,general
+simp_ttr,simple_type_token_ratio,derivation,lexico-semantics,typetokenratio,general
+root_ttr,root_type_token_ratio,derivation,lexico-semantics,typetokenratio,general
+corr_ttr,corrected_type_token_ratio,derivation,lexico-semantics,typetokenratio,general
+bilog_ttr,bilogarithmic_type_token_ratio,derivation,lexico-semantics,typetokenratio,general
+uber_ttr,uber_type_token_ratio,derivation,lexico-semantics,typetokenratio,general
+simp_ttr_no_lem,simple_type_token_ratio_no_lemma,derivation,lexico-semantics,typetokenratio,general
+root_ttr_no_lem,root_type_token_ratio_no_lemma,derivation,lexico-semantics,typetokenratio,general
+corr_ttr_no_lem,corrected_type_token_ratio_no_lemma,derivation,lexico-semantics,typetokenratio,general
+bilog_ttr_no_lem,bilogarithmic_type_token_ratio_no_lemma,derivation,lexico-semantics,typetokenratio,general
+uber_ttr_no_lem,uber_type_token_ratio_no_lemma,derivation,lexico-semantics,typetokenratio,general
+n_adj,total_number_of_adjectives,foundation,syntax,partofspeech,general
+n_adp,total_number_of_adpositions,foundation,syntax,partofspeech,general
+n_adv,total_number_of_adverbs,foundation,syntax,partofspeech,general
+n_aux,total_number_of_auxiliaries,foundation,syntax,partofspeech,general
+n_cconj,total_number_of_coordinating_conjunctions,foundation,syntax,partofspeech,general
+n_det,total_number_of_determiners,foundation,syntax,partofspeech,general
+n_intj,total_number_of_interjections,foundation,syntax,partofspeech,general
+n_noun,total_number_of_nouns,foundation,syntax,partofspeech,general
+n_num,total_number_of_numerals,foundation,syntax,partofspeech,general
+n_part,total_number_of_particles,foundation,syntax,partofspeech,general
+n_pron,total_number_of_pronouns,foundation,syntax,partofspeech,general
+n_propn,total_number_of_proper_nouns,foundation,syntax,partofspeech,general
+n_punct,total_number_of_punctuations,foundation,syntax,partofspeech,general
+n_sconj,total_number_of_subordinating_conjunctions,foundation,syntax,partofspeech,general
+n_sym,total_number_of_symbols,foundation,syntax,partofspeech,general
+n_verb,total_number_of_verbs,foundation,syntax,partofspeech,general
+n_space,total_number_of_spaces,foundation,syntax,partofspeech,general
+n_uadj,total_number_of_unique_adjectives,foundation,syntax,partofspeech,general
+n_uadp,total_number_of_unique_adpositions,foundation,syntax,partofspeech,general
+n_uadv,total_number_of_unique_adverbs,foundation,syntax,partofspeech,general
+n_uaux,total_number_of_unique_auxiliaries,foundation,syntax,partofspeech,general
+n_ucconj,total_number_of_unique_coordinating_conjunctions,foundation,syntax,partofspeech,general
+n_udet,total_number_of_unique_determiners,foundation,syntax,partofspeech,general
+n_uintj,total_number_of_unique_interjections,foundation,syntax,partofspeech,general
+n_unoun,total_number_of_unique_nouns,foundation,syntax,partofspeech,general
+n_unum,total_number_of_unique_numerals,foundation,syntax,partofspeech,general
+n_upart,total_number_of_unique_particles,foundation,syntax,partofspeech,general
+n_upron,total_number_of_unique_pronouns,foundation,syntax,partofspeech,general
+n_upropn,total_number_of_unique_proper_nouns,foundation,syntax,partofspeech,general
+n_upunct,total_number_of_unique_punctuations,foundation,syntax,partofspeech,general
+n_usconj,total_number_of_unique_subordinating_conjunctions,foundation,syntax,partofspeech,general
+n_usym,total_number_of_unique_symbols,foundation,syntax,partofspeech,general
+n_uverb,total_number_of_unique_verbs,foundation,syntax,partofspeech,general
+n_uspace,total_number_of_unique_spaces,foundation,syntax,partofspeech,general
+a_adj_pw,average_number_of_adjectives_per_word,derivation,syntax,avgpartofspeech,general
+a_adp_pw,average_number_of_adpositions_per_word,derivation,syntax,avgpartofspeech,general
+a_adv_pw,average_number_of_adverbs_per_word,derivation,syntax,avgpartofspeech,general
+a_aux_pw,average_number_of_auxiliaries_per_word,derivation,syntax,avgpartofspeech,general
+a_cconj_pw,average_number_of_coordinating_conjunctions_per_word,derivation,syntax,avgpartofspeech,general
+a_det_pw,average_number_of_determiners_per_word,derivation,syntax,avgpartofspeech,general
+a_intj_pw,average_number_of_interjections_per_word,derivation,syntax,avgpartofspeech,general
+a_noun_pw,average_number_of_nouns_per_word,derivation,syntax,avgpartofspeech,general
+a_num_pw,average_number_of_numerals_per_word,derivation,syntax,avgpartofspeech,general
+a_part_pw,average_number_of_particles_per_word,derivation,syntax,avgpartofspeech,general
+a_pron_pw,average_number_of_pronouns_per_word,derivation,syntax,avgpartofspeech,general
+a_propn_pw,average_number_of_proper_nouns_per_word,derivation,syntax,avgpartofspeech,general
+a_punct_pw,average_number_of_punctuations_per_word,derivation,syntax,avgpartofspeech,general
+a_sconj_pw,average_number_of_subordinating_conjunctions_per_word,derivation,syntax,avgpartofspeech,general
+a_sym_pw,average_number_of_symbols_per_word,derivation,syntax,avgpartofspeech,general
+a_verb_pw,average_number_of_verbs_per_word,derivation,syntax,avgpartofspeech,general
+a_space_pw,average_number_of_spaces_per_word,derivation,syntax,avgpartofspeech,general
+a_adj_ps,average_number_of_adjectives_per_sentence,derivation,syntax,avgpartofspeech,general
+a_adp_ps,average_number_of_adpositions_per_sentence,derivation,syntax,avgpartofspeech,general
+a_adv_ps,average_number_of_adverbs_per_sentence,derivation,syntax,avgpartofspeech,general
+a_aux_ps,average_number_of_auxiliaries_per_sentence,derivation,syntax,avgpartofspeech,general
+a_cconj_ps,average_number_of_coordinating_conjunctions_per_sentence,derivation,syntax,avgpartofspeech,general
+a_det_ps,average_number_of_determiners_per_sentence,derivation,syntax,avgpartofspeech,general
+a_intj_ps,average_number_of_interjections_per_sentence,derivation,syntax,avgpartofspeech,general
+a_noun_ps,average_number_of_nouns_per_sentence,derivation,syntax,avgpartofspeech,general
+a_num_ps,average_number_of_numerals_per_sentence,derivation,syntax,avgpartofspeech,general
+a_part_ps,average_number_of_particles_per_sentence,derivation,syntax,avgpartofspeech,general
+a_pron_ps,average_number_of_pronouns_per_sentence,derivation,syntax,avgpartofspeech,general
+a_propn_ps,average_number_of_proper_nouns_per_sentence,derivation,syntax,avgpartofspeech,general
+a_punct_ps,average_number_of_punctuations_per_sentence,derivation,syntax,avgpartofspeech,general
+a_sconj_ps,average_number_of_subordinating_conjunctions_per_sentence,derivation,syntax,avgpartofspeech,general
+a_sym_ps,average_number_of_symbols_per_sentence,derivation,syntax,avgpartofspeech,general
+a_verb_ps,average_number_of_verbs_per_sentence,derivation,syntax,avgpartofspeech,general
+a_space_ps,average_number_of_spaces_per_sentence,derivation,syntax,avgpartofspeech,general
+fkre,flesch_kincaid_reading_ease,derivation,surface,readformula,en
+fkgl,flesch_kincaid_grade_level,derivation,surface,readformula,en
+fogi,gunning_fog_index,derivation,surface,readformula,en
+smog,smog_index,derivation,surface,readformula,en
+cole,coleman_liau_index,derivation,surface,readformula,en
+auto,automated_readability_index,derivation,surface,readformula,en
+rt_fast,reading_time_for_fast_readers,derivation,surface,readtimeformula,en
+rt_average,reading_time_for_average_readers,derivation,surface,readtimeformula,en
+rt_slow,reading_time_for_slow_readers,derivation,surface,readtimeformula,en

model.py CHANGED Viewed

@@ -504,7 +504,7 @@ class EncoderDecoderVAE(nn.Module):
         dec_output, _ = self.infer_with_cache(batch)
         return dec_output
-    def infer_with_feedback_BP(self, ling_disc, sem_emb, batch, tokenizer, scaler):
         from torch.autograd import grad
         interpolations = []
         def line_search():
@@ -519,8 +519,6 @@ class EncoderDecoderVAE(nn.Module):
                     new_loss, pred = get_loss(param_)
                 max_len = pred.shape[1]
                 lens = torch.where(pred == self.eos_token_id, 1, 0).argmax(-1) + 1
-                # if lens.item() == 1:
-                #     patience -= 1
                 batch.update({
                     'sentence2_input_ids': pred,
                     'sentence2_attention_mask': sequence_mask(lens, max_len = max_len)
@@ -528,8 +526,6 @@ class EncoderDecoderVAE(nn.Module):
                 sem_prob = torch.sigmoid(sem_emb(**batch)).item()
                 # if sem_prob <= 0.1:
                 #     patience -= 1
-                # f.write(f'[{eta}], [{new_loss.item():.2f}], [{sem_prob:.2f}], {tokenizer.decode(pred[0])}\n')
-                # print(f'[{eta}], [{new_loss.item():.2f}], [{sem_prob:.2f}], {tokenizer.decode(pred[0])}\n')
                 if new_loss < loss and sem_prob >= 0.90 and lens.item() > 1:
                     return param_
                 eta *= 2.25
@@ -565,18 +561,11 @@ class EncoderDecoderVAE(nn.Module):
         elif self.args.feedback_param == 'logits':
             logits = self.infer_with_cache(batch)[1]['scores']
             param = torch.nn.Parameter(logits, requires_grad = True)
-        f = open(self.args.fb_log, 'a') if self.args.fb_log else None
         target_np = batch['sentence2_ling'][0].cpu().numpy()
         while True:
             loss, pred = get_loss(param)
             pred_text = tokenizer.batch_decode(pred.cpu().numpy(),
                     skip_special_tokens=True)[0]
-            if f:
-                # from compute_lng import compute_lng
-                # lng_pred = scaler.transform(np.array([compute_lng(pred_text)])[:,used_indices])[0]
-                # real_loss = np.mean((lng_pred - target_np)**2)
-                # f.write(f'Loss: {loss.item():.2f}\tReal loss:{real_loss:.2f}\t{pred_text}\n')
-                f.write(f'*** [{loss.item():.2f}], {pred_text}\n')
             interpolations.append(pred_text)
             if loss < 1:
                 break
@@ -585,65 +574,8 @@ class EncoderDecoderVAE(nn.Module):
             param = line_search()
             if param is False:
                 break
-        if f:
-            f.write(f'[return] {pred_text}\n\n')
-            f.close()
         return pred, [pred_text, interpolations]
-    def infer_with_feedback(self, ling_disc, batch, tokenizer, scaler, approx=False):
-        interpolations = []
-        converged = False
-        c = 0
-        eta = 0.3
-        use_embed = True
-        if use_embed:
-            ling1_embed = self.ling_embed(batch['sentence1_ling'])
-            ling2_embed = self.ling_embed(batch['sentence2_ling'])
-            batch.update({
-                    'sent1_ling_embed': ling1_embed,
-                    'sent2_ling_embed': ling2_embed,
-                    })
-        else:
-            ling2 = batch['sentence2_ling']
-        ling2_orig = batch['sentence2_ling'].clone()
-        while not converged:
-            with torch.no_grad():
-                pred = self.infer(batch)
-                inputs_pred = batch.copy()
-                inputs_pred.update({'input_ids': pred,
-                    'attention_mask': torch.ones_like(pred)})
-                pred_text = tokenizer.batch_decode(pred.cpu().numpy(),
-                        skip_special_tokens=True)[0]
-                if approx:
-                    ling_pred = ling_disc(**inputs_pred)
-                else:
-                    ling_pred = compute_lng(pred_text)
-                    ling_pred = scaler.transform([ling_pred])[0]
-                    ling_pred = torch.tensor(ling_pred).to(pred.device).float()
-                if use_embed:
-                    ling_pred_embed = self.ling_embed(ling_pred)
-                    # diff = torch.mean((ling2_embed - ling_pred_embed)**2)
-                # else:
-                diff = torch.mean((ling2_orig - ling_pred)**2)
-            # print(f'Diff {diff.item():.3f}>> {tokenizer.batch_decode(pred.cpu().numpy(), skip_special_tokens=True)[0]}')
-            if diff < 1e-1 or c == 6:
-                converged = True
-            elif use_embed:
-                ling2_embed = ling2_embed + eta * (ling_pred_embed - ling2_embed)
-                batch.update({'sent2_ling_embed': ling2_embed})
-            else:
-                ling2 = ling2 + eta * (ling_pred - ling2)
-                batch.update({'sentence2_ling': ling2})
-            c += 1
-            if len(interpolations) == 0 or pred_text != interpolations[-1]:
-                interpolations.append(pred_text)
-        return [pred_text, interpolations]
 def set_grad(module, state):
     if module is not None:
         for p in module.parameters():
@@ -694,3 +626,42 @@ class LingDiscPipeline():
         with torch.no_grad():
             ling_pred = self.model(input_ids=inputs.input_ids.cuda())
         return ling_pred

         dec_output, _ = self.infer_with_cache(batch)
         return dec_output
+    def infer_with_feedback_BP(self, ling_disc, sem_emb, batch, tokenizer):
         from torch.autograd import grad
         interpolations = []
         def line_search():
                     new_loss, pred = get_loss(param_)
                 max_len = pred.shape[1]
                 lens = torch.where(pred == self.eos_token_id, 1, 0).argmax(-1) + 1
                 batch.update({
                     'sentence2_input_ids': pred,
                     'sentence2_attention_mask': sequence_mask(lens, max_len = max_len)
                 sem_prob = torch.sigmoid(sem_emb(**batch)).item()
                 # if sem_prob <= 0.1:
                 #     patience -= 1
                 if new_loss < loss and sem_prob >= 0.90 and lens.item() > 1:
                     return param_
                 eta *= 2.25
         elif self.args.feedback_param == 'logits':
             logits = self.infer_with_cache(batch)[1]['scores']
             param = torch.nn.Parameter(logits, requires_grad = True)
         target_np = batch['sentence2_ling'][0].cpu().numpy()
         while True:
             loss, pred = get_loss(param)
             pred_text = tokenizer.batch_decode(pred.cpu().numpy(),
                     skip_special_tokens=True)[0]
             interpolations.append(pred_text)
             if loss < 1:
                 break
             param = line_search()
             if param is False:
                 break
         return pred, [pred_text, interpolations]
 def set_grad(module, state):
     if module is not None:
         for p in module.parameters():
         with torch.no_grad():
             ling_pred = self.model(input_ids=inputs.input_ids.cuda())
         return ling_pred
+def get_model(args, tokenizer, device):
+    if args.pretrain_disc or args.disc_loss or args.disc_ckpt:
+        ling_disc = LingDisc(args.model_name, args.disc_type, args.disc_ckpt).to(device)
+    else:
+        ling_disc = None
+    if args.linggen_type != 'none':
+        ling_gen = LingGenerator(args).to(device)
+    if args.sem_loss or args.sem_ckpt:
+        if args.sem_loss_type == 'shared':
+            sem_emb = seld.backbone.encoder
+        elif args.sem_loss_type == 'dedicated':
+            sem_emb = SemEmb(T5EncoderModel.from_pretrained('google/flan-t5-base'), tokenizer.eos_token_id).to(device)
+        else:
+            raise NotImplementedError('Semantic loss type')
+    else:
+        sem_emb = None
+    if not args.pretrain_disc:
+        model = EncoderDecoderVAE(args, tokenizer.pad_token_id, tokenizer.eos_token_id).to(device)
+        if args.use_lora:
+            target_modules = ["Attention.k", "Attention.q", "Attention.v", "Attention.o", "lm_head", "wi_0", "wi_1", "wo"]
+            target_modules = '|'.join(f'(.*{module})' for module in target_modules)
+            target_modules = f'backbone.({target_modules})'
+            config = LoraConfig(
+                    r=args.lora_r,
+                    lora_alpha=args.lora_r * 2,
+                    target_modules=target_modules,
+                    lora_dropout=0.1,
+                    bias="lora_only",
+                    modules_to_save=['ling_embed'],
+                    )
+            model = get_peft_model(model, config)
+            model.print_trainable_parameters()
+    else:
+        model = ling_disc
+    return model, ling_disc, sem_emb

options.py CHANGED Viewed

@@ -1,9 +1,10 @@
 import argparse
 from datetime import datetime
-from const import lca_names, sca_names, lingfeat_names
-import os, json
 from copy import deepcopy
-import numpy as np
 def parse_args(ckpt=None):
     parser = argparse.ArgumentParser()
@@ -97,8 +98,6 @@ def parse_args(ckpt=None):
     major_arg = args.major_arg
     to_restore = [
-            'total_steps','major_arg','gpu','demo', 'eval_only', 'save_predict', 'predict_fn', 'fudge', 'predict_with_feedback',
-            'feedback_param', 'fb_log', 'data_dir', 'data', 'disc_ckpt', 'disc_type', 'sem_ckpt', 'fudge_lambda', 'test_batch_size', 'src_lng'
             ] + args.to_restore
     to_restore = {k: args.__dict__[k] for k in to_restore}
@@ -130,7 +129,7 @@ def parse_args(ckpt=None):
                 args.__dict__.update(to_restore)
                 args.ckpt = ckpt
-    lng_names = lca_names + sca_names + lingfeat_names
     for i in range(len(args_list)):
         if args_list[i].lng_ids or args_list[i].lng_ids_idx:
             if args_list[i].lng_ids_idx:

+import os, json
 import argparse
+import numpy as np
 from datetime import datetime
+from const import lftkplus_names
 from copy import deepcopy
 def parse_args(ckpt=None):
     parser = argparse.ArgumentParser()
     major_arg = args.major_arg
     to_restore = [
             ] + args.to_restore
     to_restore = {k: args.__dict__[k] for k in to_restore}
                 args.__dict__.update(to_restore)
                 args.ckpt = ckpt
+    lng_names = lftkplus_names
     for i in range(len(args_list)):
         if args_list[i].lng_ids or args_list[i].lng_ids_idx:
             if args_list[i].lng_ids_idx: