|
|
|
|
|
|
|
|
|
|
|
import contextlib |
|
import logging |
|
import json |
|
import os |
|
import random |
|
import sys |
|
import tempfile |
|
import unittest |
|
from io import StringIO |
|
from typing import List, Dict |
|
import torch |
|
from fairseq import options |
|
from fairseq_cli import eval_lm, train |
|
from tests.utils import ( |
|
create_dummy_data, |
|
generate_main, |
|
preprocess_lm_data, |
|
preprocess_summarization_data, |
|
preprocess_translation_data, |
|
create_laser_data_and_config_json, |
|
train_translation_model, |
|
train_language_model, |
|
) |
|
|
|
|
|
try: |
|
import transformers |
|
|
|
has_hf_transformers = True |
|
except ImportError: |
|
has_hf_transformers = False |
|
|
|
|
|
class TestTranslation(unittest.TestCase): |
|
def setUp(self): |
|
logging.disable(logging.CRITICAL) |
|
|
|
def tearDown(self): |
|
logging.disable(logging.NOTSET) |
|
|
|
def test_fconv(self): |
|
with contextlib.redirect_stdout(StringIO()): |
|
with tempfile.TemporaryDirectory("test_fconv") as data_dir: |
|
create_dummy_data(data_dir) |
|
preprocess_translation_data(data_dir) |
|
train_translation_model(data_dir, "fconv_iwslt_de_en") |
|
generate_main(data_dir) |
|
|
|
def test_raw(self): |
|
with contextlib.redirect_stdout(StringIO()): |
|
with tempfile.TemporaryDirectory("test_fconv_raw") as data_dir: |
|
create_dummy_data(data_dir) |
|
preprocess_translation_data(data_dir, ["--dataset-impl", "raw"]) |
|
train_translation_model( |
|
data_dir, "fconv_iwslt_de_en", ["--dataset-impl", "raw"] |
|
) |
|
generate_main(data_dir, ["--dataset-impl", "raw"]) |
|
|
|
def test_update_freq(self): |
|
with contextlib.redirect_stdout(StringIO()): |
|
with tempfile.TemporaryDirectory("test_update_freq") as data_dir: |
|
create_dummy_data(data_dir) |
|
preprocess_translation_data(data_dir) |
|
train_translation_model( |
|
data_dir, "fconv_iwslt_de_en", ["--update-freq", "3"] |
|
) |
|
generate_main(data_dir) |
|
|
|
def test_max_positions(self): |
|
with contextlib.redirect_stdout(StringIO()): |
|
with tempfile.TemporaryDirectory("test_max_positions") as data_dir: |
|
create_dummy_data(data_dir) |
|
preprocess_translation_data(data_dir) |
|
with self.assertRaises(Exception) as context: |
|
train_translation_model( |
|
data_dir, |
|
"fconv_iwslt_de_en", |
|
["--max-target-positions", "5"], |
|
) |
|
self.assertTrue( |
|
"skip this example with --skip-invalid-size-inputs-valid-test" |
|
in str(context.exception) |
|
) |
|
train_translation_model( |
|
data_dir, |
|
"fconv_iwslt_de_en", |
|
[ |
|
"--max-target-positions", |
|
"5", |
|
"--skip-invalid-size-inputs-valid-test", |
|
], |
|
) |
|
with self.assertRaises(Exception) as context: |
|
generate_main(data_dir) |
|
generate_main(data_dir, ["--skip-invalid-size-inputs-valid-test"]) |
|
|
|
def test_generation(self): |
|
with contextlib.redirect_stdout(StringIO()): |
|
with tempfile.TemporaryDirectory("test_sampling") as data_dir: |
|
create_dummy_data(data_dir) |
|
preprocess_translation_data(data_dir) |
|
train_translation_model(data_dir, "fconv_iwslt_de_en") |
|
generate_main( |
|
data_dir, |
|
[ |
|
"--sampling", |
|
"--temperature", |
|
"2", |
|
"--beam", |
|
"2", |
|
"--nbest", |
|
"2", |
|
], |
|
) |
|
generate_main( |
|
data_dir, |
|
[ |
|
"--sampling", |
|
"--sampling-topk", |
|
"3", |
|
"--beam", |
|
"2", |
|
"--nbest", |
|
"2", |
|
], |
|
) |
|
generate_main( |
|
data_dir, |
|
[ |
|
"--sampling", |
|
"--sampling-topp", |
|
"0.2", |
|
"--beam", |
|
"2", |
|
"--nbest", |
|
"2", |
|
], |
|
) |
|
generate_main( |
|
data_dir, |
|
[ |
|
"--diversity-rate", |
|
"0.5", |
|
"--beam", |
|
"6", |
|
], |
|
) |
|
with self.assertRaises(ValueError): |
|
generate_main( |
|
data_dir, |
|
[ |
|
"--diverse-beam-groups", |
|
"4", |
|
"--match-source-len", |
|
], |
|
) |
|
generate_main(data_dir, ["--prefix-size", "2"]) |
|
generate_main(data_dir, ["--retain-dropout"]) |
|
|
|
def test_eval_bleu(self): |
|
with contextlib.redirect_stdout(StringIO()): |
|
with tempfile.TemporaryDirectory("test_eval_bleu") as data_dir: |
|
create_dummy_data(data_dir) |
|
preprocess_translation_data(data_dir) |
|
train_translation_model( |
|
data_dir, |
|
"fconv_iwslt_de_en", |
|
[ |
|
"--eval-bleu", |
|
"--eval-bleu-print-samples", |
|
"--eval-bleu-remove-bpe", |
|
"--eval-bleu-detok", |
|
"space", |
|
"--eval-bleu-args", |
|
'{"beam": 4, "min_len": 10}', |
|
], |
|
) |
|
|
|
def test_lstm(self): |
|
with contextlib.redirect_stdout(StringIO()): |
|
with tempfile.TemporaryDirectory("test_lstm") as data_dir: |
|
create_dummy_data(data_dir) |
|
preprocess_translation_data(data_dir) |
|
train_translation_model( |
|
data_dir, |
|
"lstm_wiseman_iwslt_de_en", |
|
[ |
|
"--encoder-layers", |
|
"2", |
|
"--decoder-layers", |
|
"2", |
|
"--encoder-embed-dim", |
|
"8", |
|
"--decoder-embed-dim", |
|
"8", |
|
"--decoder-out-embed-dim", |
|
"8", |
|
], |
|
) |
|
generate_main(data_dir) |
|
|
|
def test_lstm_bidirectional(self): |
|
with contextlib.redirect_stdout(StringIO()): |
|
with tempfile.TemporaryDirectory("test_lstm_bidirectional") as data_dir: |
|
create_dummy_data(data_dir) |
|
preprocess_translation_data(data_dir) |
|
train_translation_model( |
|
data_dir, |
|
"lstm", |
|
[ |
|
"--encoder-layers", |
|
"2", |
|
"--encoder-bidirectional", |
|
"--encoder-hidden-size", |
|
"16", |
|
"--encoder-embed-dim", |
|
"8", |
|
"--decoder-embed-dim", |
|
"8", |
|
"--decoder-out-embed-dim", |
|
"8", |
|
"--decoder-layers", |
|
"2", |
|
], |
|
) |
|
generate_main(data_dir) |
|
|
|
def test_transformer(self): |
|
with contextlib.redirect_stdout(StringIO()): |
|
with tempfile.TemporaryDirectory("test_transformer") as data_dir: |
|
create_dummy_data(data_dir) |
|
preprocess_translation_data(data_dir) |
|
train_translation_model( |
|
data_dir, |
|
"transformer_iwslt_de_en", |
|
[ |
|
"--encoder-layers", |
|
"2", |
|
"--decoder-layers", |
|
"2", |
|
"--encoder-embed-dim", |
|
"8", |
|
"--decoder-embed-dim", |
|
"8", |
|
], |
|
run_validation=True, |
|
) |
|
generate_main(data_dir) |
|
|
|
def test_multilingual_transformer(self): |
|
|
|
encoder_langtok_flags = [ |
|
[], |
|
["--encoder-langtok", "src"], |
|
["--encoder-langtok", "tgt"], |
|
] |
|
decoder_langtok_flags = [[], ["--decoder-langtok"]] |
|
with contextlib.redirect_stdout(StringIO()): |
|
for i in range(len(encoder_langtok_flags)): |
|
for j in range(len(decoder_langtok_flags)): |
|
enc_ltok_flag = encoder_langtok_flags[i] |
|
dec_ltok_flag = decoder_langtok_flags[j] |
|
with tempfile.TemporaryDirectory( |
|
f"test_multilingual_transformer_{i}_{j}" |
|
) as data_dir: |
|
create_dummy_data(data_dir) |
|
preprocess_translation_data(data_dir) |
|
train_translation_model( |
|
data_dir, |
|
arch="multilingual_transformer", |
|
task="multilingual_translation", |
|
extra_flags=[ |
|
"--encoder-layers", |
|
"2", |
|
"--decoder-layers", |
|
"2", |
|
"--encoder-embed-dim", |
|
"8", |
|
"--decoder-embed-dim", |
|
"8", |
|
] |
|
+ enc_ltok_flag |
|
+ dec_ltok_flag, |
|
lang_flags=["--lang-pairs", "in-out,out-in"], |
|
run_validation=True, |
|
extra_valid_flags=enc_ltok_flag + dec_ltok_flag, |
|
) |
|
generate_main( |
|
data_dir, |
|
extra_flags=[ |
|
"--task", |
|
"multilingual_translation", |
|
"--lang-pairs", |
|
"in-out,out-in", |
|
"--source-lang", |
|
"in", |
|
"--target-lang", |
|
"out", |
|
] |
|
+ enc_ltok_flag |
|
+ dec_ltok_flag, |
|
) |
|
|
|
@unittest.skipIf( |
|
sys.platform.lower() == "darwin", "skip latent depth test on MacOS" |
|
) |
|
def test_multilingual_translation_latent_depth(self): |
|
|
|
encoder_latent_layer = [[], ["--encoder-latent-layer"]] |
|
decoder_latent_layer = [[], ["--decoder-latent-layer"]] |
|
with contextlib.redirect_stdout(StringIO()): |
|
for i in range(len(encoder_latent_layer)): |
|
for j in range(len(decoder_latent_layer)): |
|
if i == 0 and j == 0: |
|
continue |
|
enc_ll_flag = encoder_latent_layer[i] |
|
dec_ll_flag = decoder_latent_layer[j] |
|
with tempfile.TemporaryDirectory( |
|
f"test_multilingual_translation_latent_depth_{i}_{j}" |
|
) as data_dir: |
|
create_dummy_data(data_dir) |
|
preprocess_translation_data( |
|
data_dir, extra_flags=["--joined-dictionary"] |
|
) |
|
train_translation_model( |
|
data_dir, |
|
arch="latent_multilingual_transformer", |
|
task="multilingual_translation_latent_depth", |
|
extra_flags=[ |
|
"--user-dir", |
|
"examples/latent_depth/latent_depth_src", |
|
"--encoder-layers", |
|
"2", |
|
"--decoder-layers", |
|
"2", |
|
"--encoder-embed-dim", |
|
"8", |
|
"--decoder-embed-dim", |
|
"8", |
|
"--share-encoders", |
|
"--share-decoders", |
|
"--sparsity-weight", |
|
"0.1", |
|
] |
|
+ enc_ll_flag |
|
+ dec_ll_flag, |
|
lang_flags=["--lang-pairs", "in-out,out-in"], |
|
run_validation=True, |
|
extra_valid_flags=[ |
|
"--user-dir", |
|
"examples/latent_depth/latent_depth_src", |
|
] |
|
+ enc_ll_flag |
|
+ dec_ll_flag, |
|
) |
|
generate_main( |
|
data_dir, |
|
extra_flags=[ |
|
"--user-dir", |
|
"examples/latent_depth/latent_depth_src", |
|
"--task", |
|
"multilingual_translation_latent_depth", |
|
"--lang-pairs", |
|
"in-out,out-in", |
|
"--source-lang", |
|
"in", |
|
"--target-lang", |
|
"out", |
|
] |
|
+ enc_ll_flag |
|
+ dec_ll_flag, |
|
) |
|
|
|
def test_translation_multi_simple_epoch(self): |
|
|
|
encoder_langtok_flags = [ |
|
[], |
|
["--encoder-langtok", "src"], |
|
["--encoder-langtok", "tgt"], |
|
] |
|
decoder_langtok_flags = [[], ["--decoder-langtok"]] |
|
with contextlib.redirect_stdout(StringIO()): |
|
for i in range(len(encoder_langtok_flags)): |
|
for j in range(len(decoder_langtok_flags)): |
|
enc_ltok_flag = encoder_langtok_flags[i] |
|
dec_ltok_flag = decoder_langtok_flags[j] |
|
with tempfile.TemporaryDirectory( |
|
f"test_translation_multi_simple_epoch_{i}_{j}" |
|
) as data_dir: |
|
create_dummy_data(data_dir) |
|
preprocess_translation_data( |
|
data_dir, extra_flags=["--joined-dictionary"] |
|
) |
|
train_translation_model( |
|
data_dir, |
|
arch="transformer", |
|
task="translation_multi_simple_epoch", |
|
extra_flags=[ |
|
"--encoder-layers", |
|
"2", |
|
"--decoder-layers", |
|
"2", |
|
"--encoder-embed-dim", |
|
"8", |
|
"--decoder-embed-dim", |
|
"8", |
|
"--sampling-method", |
|
"temperature", |
|
"--sampling-temperature", |
|
"1.5", |
|
"--virtual-epoch-size", |
|
"1000", |
|
] |
|
+ enc_ltok_flag |
|
+ dec_ltok_flag, |
|
lang_flags=["--lang-pairs", "in-out,out-in"], |
|
run_validation=True, |
|
extra_valid_flags=enc_ltok_flag + dec_ltok_flag, |
|
) |
|
generate_main( |
|
data_dir, |
|
extra_flags=[ |
|
"--task", |
|
"translation_multi_simple_epoch", |
|
"--lang-pairs", |
|
"in-out,out-in", |
|
"--source-lang", |
|
"in", |
|
"--target-lang", |
|
"out", |
|
] |
|
+ enc_ltok_flag |
|
+ dec_ltok_flag, |
|
) |
|
|
|
def test_translation_multi_simple_epoch_no_vepoch(self): |
|
|
|
with contextlib.redirect_stdout(StringIO()): |
|
enc_ltok_flag = ["--encoder-langtok", "src"] |
|
dec_ltok_flag = ["--decoder-langtok"] |
|
with tempfile.TemporaryDirectory( |
|
"test_translation_multi_simple_epoch_dict" |
|
) as data_dir: |
|
create_dummy_data(data_dir) |
|
preprocess_translation_data(data_dir, extra_flags=[]) |
|
train_translation_model( |
|
data_dir, |
|
arch="transformer", |
|
task="translation_multi_simple_epoch", |
|
extra_flags=[ |
|
"--encoder-layers", |
|
"2", |
|
"--decoder-layers", |
|
"2", |
|
"--encoder-embed-dim", |
|
"8", |
|
"--decoder-embed-dim", |
|
"8", |
|
"--sampling-method", |
|
"temperature", |
|
"--sampling-temperature", |
|
"1.5", |
|
] |
|
+ enc_ltok_flag |
|
+ dec_ltok_flag, |
|
lang_flags=["--lang-pairs", "in-out"], |
|
run_validation=True, |
|
extra_valid_flags=enc_ltok_flag + dec_ltok_flag, |
|
) |
|
generate_main( |
|
data_dir, |
|
extra_flags=[ |
|
"--task", |
|
"translation_multi_simple_epoch", |
|
"--lang-pairs", |
|
"in-out", |
|
"--source-lang", |
|
"in", |
|
"--target-lang", |
|
"out", |
|
] |
|
+ enc_ltok_flag |
|
+ dec_ltok_flag, |
|
) |
|
|
|
def test_translation_multi_simple_epoch_dicts(self): |
|
|
|
with contextlib.redirect_stdout(StringIO()): |
|
enc_ltok_flag = ["--encoder-langtok", "src"] |
|
dec_ltok_flag = ["--decoder-langtok"] |
|
with tempfile.TemporaryDirectory( |
|
"test_translation_multi_simple_epoch_dict" |
|
) as data_dir: |
|
create_dummy_data(data_dir) |
|
preprocess_translation_data(data_dir, extra_flags=[]) |
|
train_translation_model( |
|
data_dir, |
|
arch="transformer", |
|
task="translation_multi_simple_epoch", |
|
extra_flags=[ |
|
"--encoder-layers", |
|
"2", |
|
"--decoder-layers", |
|
"2", |
|
"--encoder-embed-dim", |
|
"8", |
|
"--decoder-embed-dim", |
|
"8", |
|
"--sampling-method", |
|
"temperature", |
|
"--sampling-temperature", |
|
"1.5", |
|
"--virtual-epoch-size", |
|
"1000", |
|
] |
|
+ enc_ltok_flag |
|
+ dec_ltok_flag, |
|
lang_flags=["--lang-pairs", "in-out"], |
|
run_validation=True, |
|
extra_valid_flags=enc_ltok_flag + dec_ltok_flag, |
|
) |
|
generate_main( |
|
data_dir, |
|
extra_flags=[ |
|
"--task", |
|
"translation_multi_simple_epoch", |
|
"--lang-pairs", |
|
"in-out", |
|
"--source-lang", |
|
"in", |
|
"--target-lang", |
|
"out", |
|
] |
|
+ enc_ltok_flag |
|
+ dec_ltok_flag, |
|
) |
|
|
|
def test_translation_multi_simple_epoch_src_tgt_dict_spec(self): |
|
|
|
with contextlib.redirect_stdout(StringIO()): |
|
enc_ltok_flag = ["--encoder-langtok", "src"] |
|
dec_ltok_flag = ["--decoder-langtok"] |
|
with tempfile.TemporaryDirectory( |
|
"test_translation_multi_simple_epoch_dict" |
|
) as data_dir: |
|
create_dummy_data(data_dir) |
|
preprocess_translation_data(data_dir, extra_flags=[]) |
|
train_translation_model( |
|
data_dir, |
|
arch="transformer", |
|
task="translation_multi_simple_epoch", |
|
extra_flags=[ |
|
"--source-dict", |
|
f"{data_dir}/dict.in.txt", |
|
"--target-dict", |
|
f"{data_dir}/dict.out.txt", |
|
"--encoder-layers", |
|
"2", |
|
"--decoder-layers", |
|
"2", |
|
"--encoder-embed-dim", |
|
"8", |
|
"--decoder-embed-dim", |
|
"8", |
|
"--sampling-method", |
|
"temperature", |
|
"--sampling-temperature", |
|
"1.5", |
|
"--virtual-epoch-size", |
|
"1000", |
|
] |
|
+ enc_ltok_flag |
|
+ dec_ltok_flag, |
|
lang_flags=["--lang-pairs", "in-out"], |
|
run_validation=True, |
|
extra_valid_flags=enc_ltok_flag + dec_ltok_flag, |
|
) |
|
generate_main( |
|
data_dir, |
|
extra_flags=[ |
|
"--task", |
|
"translation_multi_simple_epoch", |
|
"--lang-pairs", |
|
"in-out", |
|
"--source-lang", |
|
"in", |
|
"--target-lang", |
|
"out", |
|
] |
|
+ enc_ltok_flag |
|
+ dec_ltok_flag, |
|
) |
|
|
|
def test_transformer_cross_self_attention(self): |
|
with contextlib.redirect_stdout(StringIO()): |
|
with tempfile.TemporaryDirectory( |
|
"test_transformer_cross_self_attention" |
|
) as data_dir: |
|
create_dummy_data(data_dir) |
|
preprocess_translation_data(data_dir) |
|
train_translation_model( |
|
data_dir, |
|
"transformer_iwslt_de_en", |
|
[ |
|
"--encoder-layers", |
|
"2", |
|
"--decoder-layers", |
|
"2", |
|
"--encoder-embed-dim", |
|
"8", |
|
"--decoder-embed-dim", |
|
"8", |
|
"--decoder-embed-dim", |
|
"8", |
|
"--no-cross-attention", |
|
"--cross-self-attention", |
|
], |
|
run_validation=True, |
|
) |
|
generate_main(data_dir, extra_flags=[]) |
|
|
|
def test_transformer_pointer_generator(self): |
|
with contextlib.redirect_stdout(StringIO()): |
|
with tempfile.TemporaryDirectory( |
|
"test_transformer_pointer_generator" |
|
) as data_dir: |
|
create_dummy_data(data_dir) |
|
preprocess_summarization_data(data_dir) |
|
train_translation_model( |
|
data_dir, |
|
"transformer_pointer_generator", |
|
extra_flags=[ |
|
"--user-dir", |
|
"examples/pointer_generator/pointer_generator_src", |
|
"--encoder-layers", |
|
"2", |
|
"--decoder-layers", |
|
"2", |
|
"--encoder-embed-dim", |
|
"8", |
|
"--decoder-embed-dim", |
|
"8", |
|
"--alignment-layer", |
|
"-1", |
|
"--alignment-heads", |
|
"1", |
|
"--source-position-markers", |
|
"0", |
|
], |
|
run_validation=True, |
|
extra_valid_flags=[ |
|
"--user-dir", |
|
"examples/pointer_generator/pointer_generator_src", |
|
], |
|
) |
|
generate_main( |
|
data_dir, |
|
extra_flags=[ |
|
"--user-dir", |
|
"examples/pointer_generator/pointer_generator_src", |
|
], |
|
) |
|
|
|
def test_lightconv(self): |
|
with contextlib.redirect_stdout(StringIO()): |
|
with tempfile.TemporaryDirectory("test_lightconv") as data_dir: |
|
create_dummy_data(data_dir) |
|
preprocess_translation_data(data_dir) |
|
train_translation_model( |
|
data_dir, |
|
"lightconv_iwslt_de_en", |
|
[ |
|
"--encoder-conv-type", |
|
"lightweight", |
|
"--decoder-conv-type", |
|
"lightweight", |
|
"--encoder-embed-dim", |
|
"8", |
|
"--decoder-embed-dim", |
|
"8", |
|
], |
|
) |
|
generate_main(data_dir) |
|
|
|
def test_dynamicconv(self): |
|
with contextlib.redirect_stdout(StringIO()): |
|
with tempfile.TemporaryDirectory("test_dynamicconv") as data_dir: |
|
create_dummy_data(data_dir) |
|
preprocess_translation_data(data_dir) |
|
train_translation_model( |
|
data_dir, |
|
"lightconv_iwslt_de_en", |
|
[ |
|
"--encoder-conv-type", |
|
"dynamic", |
|
"--decoder-conv-type", |
|
"dynamic", |
|
"--encoder-embed-dim", |
|
"8", |
|
"--decoder-embed-dim", |
|
"8", |
|
], |
|
) |
|
generate_main(data_dir) |
|
|
|
def test_cmlm_transformer(self): |
|
with contextlib.redirect_stdout(StringIO()): |
|
with tempfile.TemporaryDirectory("test_cmlm_transformer") as data_dir: |
|
create_dummy_data(data_dir) |
|
preprocess_translation_data(data_dir, ["--joined-dictionary"]) |
|
train_translation_model( |
|
data_dir, |
|
"cmlm_transformer", |
|
[ |
|
"--apply-bert-init", |
|
"--criterion", |
|
"nat_loss", |
|
"--noise", |
|
"full_mask", |
|
"--pred-length-offset", |
|
"--length-loss-factor", |
|
"0.1", |
|
], |
|
task="translation_lev", |
|
) |
|
generate_main( |
|
data_dir, |
|
[ |
|
"--task", |
|
"translation_lev", |
|
"--iter-decode-max-iter", |
|
"9", |
|
"--iter-decode-eos-penalty", |
|
"0", |
|
"--print-step", |
|
], |
|
) |
|
|
|
def test_nonautoregressive_transformer(self): |
|
with contextlib.redirect_stdout(StringIO()): |
|
with tempfile.TemporaryDirectory( |
|
"test_nonautoregressive_transformer" |
|
) as data_dir: |
|
create_dummy_data(data_dir) |
|
preprocess_translation_data(data_dir, ["--joined-dictionary"]) |
|
train_translation_model( |
|
data_dir, |
|
"nonautoregressive_transformer", |
|
[ |
|
"--apply-bert-init", |
|
"--src-embedding-copy", |
|
"--criterion", |
|
"nat_loss", |
|
"--noise", |
|
"full_mask", |
|
"--pred-length-offset", |
|
"--length-loss-factor", |
|
"0.1", |
|
], |
|
task="translation_lev", |
|
) |
|
generate_main( |
|
data_dir, |
|
[ |
|
"--task", |
|
"translation_lev", |
|
"--iter-decode-max-iter", |
|
"0", |
|
"--iter-decode-eos-penalty", |
|
"0", |
|
"--print-step", |
|
], |
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_iterative_nonautoregressive_transformer(self): |
|
with contextlib.redirect_stdout(StringIO()): |
|
with tempfile.TemporaryDirectory( |
|
"test_iterative_nonautoregressive_transformer" |
|
) as data_dir: |
|
create_dummy_data(data_dir) |
|
preprocess_translation_data(data_dir, ["--joined-dictionary"]) |
|
train_translation_model( |
|
data_dir, |
|
"iterative_nonautoregressive_transformer", |
|
[ |
|
"--apply-bert-init", |
|
"--src-embedding-copy", |
|
"--criterion", |
|
"nat_loss", |
|
"--noise", |
|
"full_mask", |
|
"--stochastic-approx", |
|
"--dae-ratio", |
|
"0.5", |
|
"--train-step", |
|
"3", |
|
], |
|
task="translation_lev", |
|
) |
|
generate_main( |
|
data_dir, |
|
[ |
|
"--task", |
|
"translation_lev", |
|
"--iter-decode-max-iter", |
|
"9", |
|
"--iter-decode-eos-penalty", |
|
"0", |
|
"--print-step", |
|
], |
|
) |
|
|
|
def test_insertion_transformer(self): |
|
with contextlib.redirect_stdout(StringIO()): |
|
with tempfile.TemporaryDirectory("test_insertion_transformer") as data_dir: |
|
create_dummy_data(data_dir) |
|
preprocess_translation_data(data_dir, ["--joined-dictionary"]) |
|
train_translation_model( |
|
data_dir, |
|
"insertion_transformer", |
|
[ |
|
"--apply-bert-init", |
|
"--criterion", |
|
"nat_loss", |
|
"--noise", |
|
"random_mask", |
|
], |
|
task="translation_lev", |
|
) |
|
generate_main( |
|
data_dir, |
|
[ |
|
"--task", |
|
"translation_lev", |
|
"--iter-decode-max-iter", |
|
"9", |
|
"--iter-decode-eos-penalty", |
|
"0", |
|
"--print-step", |
|
], |
|
) |
|
|
|
def test_mixture_of_experts(self): |
|
with contextlib.redirect_stdout(StringIO()): |
|
with tempfile.TemporaryDirectory("test_moe") as data_dir: |
|
create_dummy_data(data_dir) |
|
preprocess_translation_data(data_dir) |
|
train_translation_model( |
|
data_dir, |
|
"transformer_iwslt_de_en", |
|
[ |
|
"--task", |
|
"translation_moe", |
|
"--user-dir", |
|
"examples/translation_moe/translation_moe_src", |
|
"--method", |
|
"hMoElp", |
|
"--mean-pool-gating-network", |
|
"--num-experts", |
|
"3", |
|
"--encoder-layers", |
|
"2", |
|
"--decoder-layers", |
|
"2", |
|
"--encoder-embed-dim", |
|
"8", |
|
"--decoder-embed-dim", |
|
"8", |
|
], |
|
) |
|
generate_main( |
|
data_dir, |
|
[ |
|
"--task", |
|
"translation_moe", |
|
"--user-dir", |
|
"examples/translation_moe/translation_moe_src", |
|
"--method", |
|
"hMoElp", |
|
"--mean-pool-gating-network", |
|
"--num-experts", |
|
"3", |
|
"--gen-expert", |
|
"0", |
|
], |
|
) |
|
|
|
def test_alignment(self): |
|
with contextlib.redirect_stdout(StringIO()): |
|
with tempfile.TemporaryDirectory("test_alignment") as data_dir: |
|
create_dummy_data(data_dir, alignment=True) |
|
preprocess_translation_data(data_dir, ["--align-suffix", "align"]) |
|
train_translation_model( |
|
data_dir, |
|
"transformer_align", |
|
[ |
|
"--encoder-layers", |
|
"2", |
|
"--decoder-layers", |
|
"2", |
|
"--encoder-embed-dim", |
|
"8", |
|
"--decoder-embed-dim", |
|
"8", |
|
"--load-alignments", |
|
"--alignment-layer", |
|
"1", |
|
"--criterion", |
|
"label_smoothed_cross_entropy_with_alignment", |
|
], |
|
run_validation=True, |
|
) |
|
generate_main(data_dir) |
|
|
|
def test_laser_lstm(self): |
|
with contextlib.redirect_stdout(StringIO()): |
|
with tempfile.TemporaryDirectory("test_laser_lstm") as data_dir: |
|
laser_config_file = create_laser_data_and_config_json(data_dir) |
|
train_translation_model( |
|
laser_config_file.name, |
|
"laser_lstm", |
|
[ |
|
"--user-dir", |
|
"examples/laser/laser_src", |
|
"--weighting-alpha", |
|
"0.3", |
|
"--encoder-bidirectional", |
|
"--encoder-hidden-size", |
|
"512", |
|
"--encoder-layers", |
|
"5", |
|
"--decoder-layers", |
|
"1", |
|
"--encoder-embed-dim", |
|
"320", |
|
"--decoder-embed-dim", |
|
"320", |
|
"--decoder-lang-embed-dim", |
|
"32", |
|
"--save-dir", |
|
data_dir, |
|
"--disable-validation", |
|
], |
|
task="laser", |
|
lang_flags=[], |
|
) |
|
|
|
def test_laser_transformer(self): |
|
with contextlib.redirect_stdout(StringIO()): |
|
with tempfile.TemporaryDirectory("test_laser_transformer") as data_dir: |
|
laser_config_file = create_laser_data_and_config_json(data_dir) |
|
train_translation_model( |
|
laser_config_file.name, |
|
"laser_transformer", |
|
[ |
|
"--user-dir", |
|
"examples/laser/laser_src", |
|
"--weighting-alpha", |
|
"0.3", |
|
"--encoder-embed-dim", |
|
"320", |
|
"--decoder-embed-dim", |
|
"320", |
|
"--decoder-lang-embed-dim", |
|
"32", |
|
"--save-dir", |
|
data_dir, |
|
"--disable-validation", |
|
], |
|
task="laser", |
|
lang_flags=[], |
|
) |
|
|
|
def test_alignment_full_context(self): |
|
with contextlib.redirect_stdout(StringIO()): |
|
with tempfile.TemporaryDirectory("test_alignment") as data_dir: |
|
create_dummy_data(data_dir, alignment=True) |
|
preprocess_translation_data(data_dir, ["--align-suffix", "align"]) |
|
train_translation_model( |
|
data_dir, |
|
"transformer_align", |
|
[ |
|
"--encoder-layers", |
|
"2", |
|
"--decoder-layers", |
|
"2", |
|
"--encoder-embed-dim", |
|
"8", |
|
"--decoder-embed-dim", |
|
"8", |
|
"--load-alignments", |
|
"--alignment-layer", |
|
"1", |
|
"--criterion", |
|
"label_smoothed_cross_entropy_with_alignment", |
|
"--full-context-alignment", |
|
], |
|
run_validation=True, |
|
) |
|
generate_main(data_dir) |
|
|
|
def test_transformer_layerdrop(self): |
|
with contextlib.redirect_stdout(StringIO()): |
|
with tempfile.TemporaryDirectory("test_transformer_layerdrop") as data_dir: |
|
create_dummy_data(data_dir) |
|
preprocess_translation_data(data_dir) |
|
train_translation_model( |
|
data_dir, |
|
"transformer_iwslt_de_en", |
|
[ |
|
"--encoder-layers", |
|
"3", |
|
"--decoder-layers", |
|
"3", |
|
"--encoder-embed-dim", |
|
"8", |
|
"--decoder-embed-dim", |
|
"8", |
|
"--encoder-layerdrop", |
|
"0.01", |
|
"--decoder-layerdrop", |
|
"0.01", |
|
], |
|
) |
|
generate_main(data_dir) |
|
generate_main( |
|
data_dir, |
|
[ |
|
"--model-overrides", |
|
"{'encoder_layers_to_keep':'0,2','decoder_layers_to_keep':'1'}", |
|
], |
|
) |
|
|
|
|
|
class TestStories(unittest.TestCase): |
|
def setUp(self): |
|
logging.disable(logging.CRITICAL) |
|
|
|
def tearDown(self): |
|
logging.disable(logging.NOTSET) |
|
|
|
def test_fconv_self_att_wp(self): |
|
with contextlib.redirect_stdout(StringIO()): |
|
with tempfile.TemporaryDirectory("test_fconv_self_att_wp") as data_dir: |
|
create_dummy_data(data_dir) |
|
preprocess_translation_data(data_dir) |
|
config = [ |
|
"--encoder-layers", |
|
"[(128, 3)] * 2", |
|
"--decoder-layers", |
|
"[(128, 3)] * 2", |
|
"--decoder-attention", |
|
"True", |
|
"--encoder-attention", |
|
"False", |
|
"--gated-attention", |
|
"True", |
|
"--self-attention", |
|
"True", |
|
"--project-input", |
|
"True", |
|
"--encoder-embed-dim", |
|
"8", |
|
"--decoder-embed-dim", |
|
"8", |
|
"--decoder-out-embed-dim", |
|
"8", |
|
"--multihead-self-attention-nheads", |
|
"2", |
|
] |
|
train_translation_model(data_dir, "fconv_self_att_wp", config) |
|
generate_main(data_dir) |
|
|
|
|
|
os.rename( |
|
os.path.join(data_dir, "checkpoint_last.pt"), |
|
os.path.join(data_dir, "pretrained.pt"), |
|
) |
|
config.extend( |
|
[ |
|
"--pretrained", |
|
"True", |
|
"--pretrained-checkpoint", |
|
os.path.join(data_dir, "pretrained.pt"), |
|
"--save-dir", |
|
os.path.join(data_dir, "fusion_model"), |
|
] |
|
) |
|
train_translation_model(data_dir, "fconv_self_att_wp", config) |
|
|
|
|
|
class TestLanguageModeling(unittest.TestCase): |
|
def setUp(self): |
|
logging.disable(logging.CRITICAL) |
|
|
|
def tearDown(self): |
|
logging.disable(logging.NOTSET) |
|
|
|
def test_fconv_lm(self): |
|
with contextlib.redirect_stdout(StringIO()): |
|
with tempfile.TemporaryDirectory("test_fconv_lm") as data_dir: |
|
create_dummy_data(data_dir) |
|
preprocess_lm_data(data_dir) |
|
train_language_model( |
|
data_dir, |
|
"fconv_lm", |
|
[ |
|
"--decoder-layers", |
|
"[(850, 3)] * 2 + [(1024,4)]", |
|
"--decoder-embed-dim", |
|
"280", |
|
"--optimizer", |
|
"nag", |
|
"--lr", |
|
"0.1", |
|
], |
|
) |
|
eval_lm_main(data_dir) |
|
generate_main( |
|
data_dir, |
|
[ |
|
"--task", |
|
"language_modeling", |
|
"--sample-break-mode", |
|
"eos", |
|
"--tokens-per-sample", |
|
"500", |
|
], |
|
) |
|
|
|
def test_transformer_lm(self): |
|
with contextlib.redirect_stdout(StringIO()): |
|
with tempfile.TemporaryDirectory("test_transformer_lm") as data_dir: |
|
create_dummy_data(data_dir) |
|
preprocess_lm_data(data_dir) |
|
train_language_model( |
|
data_dir, |
|
"transformer_lm", |
|
["--add-bos-token", '--nval', '1'], |
|
run_validation=True, |
|
) |
|
eval_lm_main(data_dir) |
|
eval_lm_main(data_dir, extra_flags=["--context-window", "25"]) |
|
generate_main( |
|
data_dir, |
|
[ |
|
"--task", |
|
"language_modeling", |
|
"--sample-break-mode", |
|
"eos", |
|
"--tokens-per-sample", |
|
"500", |
|
], |
|
) |
|
|
|
def test_transformer_lm_with_adaptive_softmax(self): |
|
with contextlib.redirect_stdout(StringIO()): |
|
with tempfile.TemporaryDirectory( |
|
"test_transformer_lm_with_adaptive_softmax" |
|
) as data_dir: |
|
create_dummy_data(data_dir) |
|
preprocess_lm_data(data_dir) |
|
train_language_model( |
|
data_dir, |
|
"transformer_lm", |
|
[ |
|
"--add-bos-token", |
|
"--criterion", |
|
"adaptive_loss", |
|
"--adaptive-softmax-cutoff", |
|
"5,10,15", |
|
], |
|
run_validation=True, |
|
) |
|
eval_lm_main(data_dir) |
|
generate_main( |
|
data_dir, |
|
[ |
|
"--task", |
|
"language_modeling", |
|
"--sample-break-mode", |
|
"eos", |
|
"--tokens-per-sample", |
|
"500", |
|
], |
|
) |
|
|
|
def test_lightconv_lm(self): |
|
with contextlib.redirect_stdout(StringIO()): |
|
with tempfile.TemporaryDirectory("test_lightconv_lm") as data_dir: |
|
create_dummy_data(data_dir) |
|
preprocess_lm_data(data_dir) |
|
train_language_model( |
|
data_dir, |
|
"lightconv_lm", |
|
["--add-bos-token"], |
|
run_validation=True, |
|
) |
|
eval_lm_main(data_dir) |
|
generate_main( |
|
data_dir, |
|
[ |
|
"--task", |
|
"language_modeling", |
|
"--sample-break-mode", |
|
"eos", |
|
"--tokens-per-sample", |
|
"500", |
|
], |
|
) |
|
|
|
def test_lstm_lm(self): |
|
with contextlib.redirect_stdout(StringIO()): |
|
with tempfile.TemporaryDirectory("test_lstm_lm") as data_dir: |
|
create_dummy_data(data_dir) |
|
preprocess_lm_data(data_dir) |
|
train_language_model( |
|
data_dir, |
|
"lstm_lm", |
|
["--add-bos-token"], |
|
run_validation=True, |
|
) |
|
eval_lm_main(data_dir) |
|
generate_main( |
|
data_dir, |
|
[ |
|
"--task", |
|
"language_modeling", |
|
"--sample-break-mode", |
|
"eos", |
|
"--tokens-per-sample", |
|
"500", |
|
], |
|
) |
|
|
|
def test_lstm_lm_residuals(self): |
|
with contextlib.redirect_stdout(StringIO()): |
|
with tempfile.TemporaryDirectory("test_lstm_lm_residuals") as data_dir: |
|
create_dummy_data(data_dir) |
|
preprocess_lm_data(data_dir) |
|
train_language_model( |
|
data_dir, |
|
"lstm_lm", |
|
["--add-bos-token", "--residuals"], |
|
run_validation=True, |
|
) |
|
eval_lm_main(data_dir) |
|
generate_main( |
|
data_dir, |
|
[ |
|
"--task", |
|
"language_modeling", |
|
"--sample-break-mode", |
|
"eos", |
|
"--tokens-per-sample", |
|
"500", |
|
], |
|
) |
|
|
|
@unittest.skipIf(not has_hf_transformers, "skip test if transformers is missing") |
|
def test_transformer_xl_bptt_lm(self): |
|
with contextlib.redirect_stdout(StringIO()): |
|
with tempfile.TemporaryDirectory("test_transformer_xl_bptt_lm") as data_dir: |
|
create_dummy_data(data_dir) |
|
preprocess_lm_data(data_dir) |
|
task_flags = [ |
|
"--user-dir", |
|
"examples/truncated_bptt", |
|
"--task", |
|
"truncated_bptt_lm", |
|
"--batch-size", |
|
"2", |
|
"--tokens-per-sample", |
|
"50", |
|
] |
|
train_language_model( |
|
data_dir=data_dir, |
|
arch="transformer_xl", |
|
extra_flags=task_flags |
|
+ [ |
|
"--n-layer", |
|
"2", |
|
], |
|
task="truncated_bptt_lm", |
|
run_validation=True, |
|
extra_valid_flags=task_flags, |
|
) |
|
eval_lm_main(data_dir, extra_flags=task_flags) |
|
|
|
train_language_model( |
|
data_dir=data_dir, |
|
arch="transformer_xl", |
|
extra_flags=task_flags |
|
+ [ |
|
"--n-layer", |
|
"2", |
|
"--offload-activations", |
|
], |
|
task="truncated_bptt_lm", |
|
run_validation=True, |
|
extra_valid_flags=task_flags, |
|
) |
|
|
|
|
|
class TestMaskedLanguageModel(unittest.TestCase): |
|
def setUp(self): |
|
logging.disable(logging.CRITICAL) |
|
|
|
def tearDown(self): |
|
logging.disable(logging.NOTSET) |
|
|
|
def test_legacy_masked_lm(self): |
|
with contextlib.redirect_stdout(StringIO()): |
|
with tempfile.TemporaryDirectory("test_legacy_mlm") as data_dir: |
|
create_dummy_data(data_dir) |
|
preprocess_lm_data(data_dir) |
|
train_legacy_masked_language_model(data_dir, "masked_lm") |
|
|
|
def test_roberta_masked_lm(self): |
|
with contextlib.redirect_stdout(StringIO()): |
|
with tempfile.TemporaryDirectory("test_roberta_mlm") as data_dir: |
|
create_dummy_data(data_dir) |
|
preprocess_lm_data(data_dir) |
|
train_masked_lm( |
|
data_dir, "roberta_base", extra_flags=["--encoder-layers", "2"] |
|
) |
|
|
|
def test_roberta_sentence_prediction(self): |
|
num_classes = 3 |
|
with contextlib.redirect_stdout(StringIO()): |
|
with tempfile.TemporaryDirectory("test_roberta_head") as data_dir: |
|
create_dummy_roberta_head_data(data_dir, num_classes=num_classes) |
|
preprocess_lm_data(os.path.join(data_dir, "input0")) |
|
preprocess_lm_data(os.path.join(data_dir, "label")) |
|
train_roberta_head(data_dir, "roberta_base", num_classes=num_classes) |
|
|
|
def test_roberta_regression_single(self): |
|
num_classes = 1 |
|
with contextlib.redirect_stdout(StringIO()): |
|
with tempfile.TemporaryDirectory( |
|
"test_roberta_regression_single" |
|
) as data_dir: |
|
create_dummy_roberta_head_data( |
|
data_dir, num_classes=num_classes, regression=True |
|
) |
|
preprocess_lm_data(os.path.join(data_dir, "input0")) |
|
train_roberta_head( |
|
data_dir, |
|
"roberta_base", |
|
num_classes=num_classes, |
|
extra_flags=["--regression-target"], |
|
) |
|
|
|
def test_roberta_regression_multiple(self): |
|
num_classes = 3 |
|
with contextlib.redirect_stdout(StringIO()): |
|
with tempfile.TemporaryDirectory( |
|
"test_roberta_regression_multiple" |
|
) as data_dir: |
|
create_dummy_roberta_head_data( |
|
data_dir, num_classes=num_classes, regression=True |
|
) |
|
preprocess_lm_data(os.path.join(data_dir, "input0")) |
|
train_roberta_head( |
|
data_dir, |
|
"roberta_base", |
|
num_classes=num_classes, |
|
extra_flags=["--regression-target"], |
|
) |
|
|
|
def test_linformer_roberta_masked_lm(self): |
|
with contextlib.redirect_stdout(StringIO()): |
|
with tempfile.TemporaryDirectory("test_linformer_roberta_mlm") as data_dir: |
|
create_dummy_data(data_dir) |
|
preprocess_lm_data(data_dir) |
|
train_masked_lm( |
|
data_dir, |
|
"linformer_roberta_base", |
|
extra_flags=[ |
|
"--user-dir", |
|
"examples/linformer/linformer_src", |
|
"--encoder-layers", |
|
"2", |
|
], |
|
) |
|
|
|
def test_linformer_roberta_sentence_prediction(self): |
|
num_classes = 3 |
|
with contextlib.redirect_stdout(StringIO()): |
|
with tempfile.TemporaryDirectory("test_linformer_roberta_head") as data_dir: |
|
create_dummy_roberta_head_data(data_dir, num_classes=num_classes) |
|
preprocess_lm_data(os.path.join(data_dir, "input0")) |
|
preprocess_lm_data(os.path.join(data_dir, "label")) |
|
train_roberta_head( |
|
data_dir, |
|
"linformer_roberta_base", |
|
num_classes=num_classes, |
|
extra_flags=["--user-dir", "examples/linformer/linformer_src"], |
|
) |
|
|
|
def test_linformer_roberta_regression_single(self): |
|
num_classes = 1 |
|
with contextlib.redirect_stdout(StringIO()): |
|
with tempfile.TemporaryDirectory( |
|
"test_linformer_roberta_regression_single" |
|
) as data_dir: |
|
create_dummy_roberta_head_data( |
|
data_dir, num_classes=num_classes, regression=True |
|
) |
|
preprocess_lm_data(os.path.join(data_dir, "input0")) |
|
train_roberta_head( |
|
data_dir, |
|
"linformer_roberta_base", |
|
num_classes=num_classes, |
|
extra_flags=[ |
|
"--regression-target", |
|
"--user-dir", |
|
"examples/linformer/linformer_src", |
|
], |
|
) |
|
|
|
def test_linformer_roberta_regression_multiple(self): |
|
num_classes = 3 |
|
with contextlib.redirect_stdout(StringIO()): |
|
with tempfile.TemporaryDirectory( |
|
"test_linformer_roberta_regression_multiple" |
|
) as data_dir: |
|
create_dummy_roberta_head_data( |
|
data_dir, num_classes=num_classes, regression=True |
|
) |
|
preprocess_lm_data(os.path.join(data_dir, "input0")) |
|
train_roberta_head( |
|
data_dir, |
|
"linformer_roberta_base", |
|
num_classes=num_classes, |
|
extra_flags=[ |
|
"--regression-target", |
|
"--user-dir", |
|
"examples/linformer/linformer_src", |
|
], |
|
) |
|
|
|
def _test_pretrained_masked_lm_for_translation(self, learned_pos_emb, encoder_only): |
|
with contextlib.redirect_stdout(StringIO()): |
|
with tempfile.TemporaryDirectory("test_mlm") as data_dir: |
|
create_dummy_data(data_dir) |
|
preprocess_lm_data(data_dir) |
|
train_legacy_masked_language_model( |
|
data_dir, |
|
arch="masked_lm", |
|
extra_args=("--encoder-learned-pos",) if learned_pos_emb else (), |
|
) |
|
with tempfile.TemporaryDirectory( |
|
"test_mlm_translation" |
|
) as translation_dir: |
|
create_dummy_data(translation_dir) |
|
preprocess_translation_data( |
|
translation_dir, extra_flags=["--joined-dictionary"] |
|
) |
|
|
|
train_translation_model( |
|
translation_dir, |
|
arch="transformer_from_pretrained_xlm", |
|
extra_flags=[ |
|
"--decoder-layers", |
|
"1", |
|
"--decoder-embed-dim", |
|
"32", |
|
"--decoder-attention-heads", |
|
"1", |
|
"--decoder-ffn-embed-dim", |
|
"32", |
|
"--encoder-layers", |
|
"1", |
|
"--encoder-embed-dim", |
|
"32", |
|
"--encoder-attention-heads", |
|
"1", |
|
"--encoder-ffn-embed-dim", |
|
"32", |
|
"--pretrained-xlm-checkpoint", |
|
"{}/checkpoint_last.pt".format(data_dir), |
|
"--activation-fn", |
|
"gelu", |
|
"--max-source-positions", |
|
"500", |
|
"--max-target-positions", |
|
"500", |
|
] |
|
+ ( |
|
["--encoder-learned-pos", "--decoder-learned-pos"] |
|
if learned_pos_emb |
|
else [] |
|
) |
|
+ (["--init-encoder-only"] if encoder_only else []), |
|
task="translation_from_pretrained_xlm", |
|
) |
|
|
|
def test_pretrained_masked_lm_for_translation_learned_pos_emb(self): |
|
self._test_pretrained_masked_lm_for_translation(True, False) |
|
|
|
def test_pretrained_masked_lm_for_translation_sinusoidal_pos_emb(self): |
|
self._test_pretrained_masked_lm_for_translation(False, False) |
|
|
|
def test_pretrained_masked_lm_for_translation_encoder_only(self): |
|
self._test_pretrained_masked_lm_for_translation(True, True) |
|
|
|
def test_r4f_roberta(self): |
|
num_classes = 3 |
|
with contextlib.redirect_stdout(StringIO()): |
|
with tempfile.TemporaryDirectory("test_r4f_roberta_head") as data_dir: |
|
create_dummy_roberta_head_data(data_dir, num_classes=num_classes) |
|
preprocess_lm_data(os.path.join(data_dir, "input0")) |
|
preprocess_lm_data(os.path.join(data_dir, "label")) |
|
train_roberta_head( |
|
data_dir, |
|
"roberta_base", |
|
num_classes=num_classes, |
|
extra_flags=[ |
|
"--user-dir", |
|
"examples/rxf/rxf_src", |
|
"--criterion", |
|
"sentence_prediction_r3f", |
|
"--spectral-norm-classification-head", |
|
], |
|
) |
|
|
|
|
|
def train_legacy_masked_language_model(data_dir, arch, extra_args=()): |
|
train_parser = options.get_training_parser() |
|
|
|
train_args = options.parse_args_and_arch( |
|
train_parser, |
|
[ |
|
"--task", |
|
"cross_lingual_lm", |
|
data_dir, |
|
"--arch", |
|
arch, |
|
|
|
"--optimizer", |
|
"adam", |
|
"--lr-scheduler", |
|
"reduce_lr_on_plateau", |
|
"--lr-shrink", |
|
"0.5", |
|
"--lr", |
|
"0.0001", |
|
"--stop-min-lr", |
|
"1e-09", |
|
|
|
"--dropout", |
|
"0.1", |
|
"--attention-dropout", |
|
"0.1", |
|
|
|
"--criterion", |
|
"legacy_masked_lm_loss", |
|
"--masked-lm-only", |
|
"--monolingual-langs", |
|
"in,out", |
|
"--num-segment", |
|
"5", |
|
|
|
"--encoder-layers", |
|
"1", |
|
"--encoder-embed-dim", |
|
"32", |
|
"--encoder-attention-heads", |
|
"1", |
|
"--encoder-ffn-embed-dim", |
|
"32", |
|
|
|
"--max-tokens", |
|
"500", |
|
"--tokens-per-sample", |
|
"500", |
|
"--save-dir", |
|
data_dir, |
|
"--max-epoch", |
|
"1", |
|
"--no-progress-bar", |
|
"--distributed-world-size", |
|
"1", |
|
"--dataset-impl", |
|
"raw", |
|
"--num-workers", |
|
"0", |
|
] |
|
+ list(extra_args), |
|
) |
|
train.main(train_args) |
|
|
|
|
|
class TestOptimizers(unittest.TestCase): |
|
def setUp(self): |
|
logging.disable(logging.CRITICAL) |
|
|
|
def tearDown(self): |
|
logging.disable(logging.NOTSET) |
|
|
|
def test_optimizers(self): |
|
with contextlib.redirect_stdout(StringIO()): |
|
with tempfile.TemporaryDirectory("test_optimizers") as data_dir: |
|
|
|
create_dummy_data(data_dir, num_examples=10, maxlen=5) |
|
preprocess_translation_data(data_dir) |
|
optimizers = ["adafactor", "adam", "nag", "adagrad", "sgd", "adadelta"] |
|
last_checkpoint = os.path.join(data_dir, "checkpoint_last.pt") |
|
for optimizer in optimizers: |
|
if os.path.exists(last_checkpoint): |
|
os.remove(last_checkpoint) |
|
train_translation_model( |
|
data_dir, |
|
"lstm", |
|
[ |
|
"--required-batch-size-multiple", |
|
"1", |
|
"--encoder-layers", |
|
"1", |
|
"--encoder-hidden-size", |
|
"32", |
|
"--decoder-layers", |
|
"1", |
|
"--optimizer", |
|
optimizer, |
|
], |
|
) |
|
generate_main(data_dir) |
|
|
|
|
|
def read_last_log_entry( |
|
logs: List[logging.LogRecord], logger_name: str |
|
) -> Dict[str, float]: |
|
for x in reversed(logs): |
|
if x.name == logger_name: |
|
return json.loads(x.message) |
|
raise ValueError(f"No entries from {logger_name} found in captured logs") |
|
|
|
|
|
class TestActivationCheckpointing(unittest.TestCase): |
|
base_flags = [ |
|
"--encoder-layers", |
|
"2", |
|
"--decoder-layers", |
|
"2", |
|
"--encoder-embed-dim", |
|
"8", |
|
"--decoder-embed-dim", |
|
"8", |
|
"--restore-file", |
|
"x.pt", |
|
"--log-format", |
|
"json", |
|
"--log-interval", |
|
"1", |
|
"--max-update", |
|
"2", |
|
] |
|
|
|
def _train(self, data_dir, extra_flags): |
|
with self.assertLogs() as logs: |
|
train_translation_model( |
|
data_dir, |
|
"transformer_iwslt_de_en", |
|
self.base_flags + extra_flags, |
|
run_validation=True, |
|
extra_valid_flags=["--log-format", "json"], |
|
) |
|
return logs.records |
|
|
|
def test_activation_offloading_does_not_change_metrics(self): |
|
"""Neither ----checkpoint-activations nor --offload-activations should change loss""" |
|
with tempfile.TemporaryDirectory("test_transformer_with_act_cpt") as data_dir: |
|
|
|
with self.assertLogs(): |
|
create_dummy_data(data_dir, num_examples=20) |
|
preprocess_translation_data(data_dir) |
|
offload_logs = self._train(data_dir, ["--offload-activations"]) |
|
baseline_logs = self._train(data_dir, []) |
|
|
|
assert len(baseline_logs) == len(offload_logs) |
|
|
|
baseline_valid_stats = read_last_log_entry(baseline_logs, "valid") |
|
offload_valid_stats = read_last_log_entry(offload_logs, "valid") |
|
baseline_train_stats = read_last_log_entry(baseline_logs, "train") |
|
offload_train_stats = read_last_log_entry(offload_logs, "train") |
|
|
|
assert ( |
|
baseline_train_stats["train_loss"] == offload_train_stats["train_loss"] |
|
) |
|
assert ( |
|
baseline_valid_stats["valid_loss"] == offload_valid_stats["valid_loss"] |
|
) |
|
|
|
def test_activation_checkpointing_does_not_change_metrics(self): |
|
"""--checkpoint-activations should not change loss""" |
|
|
|
with tempfile.TemporaryDirectory("test_transformer_with_act_cpt") as data_dir: |
|
with self.assertLogs(): |
|
create_dummy_data(data_dir, num_examples=20) |
|
preprocess_translation_data(data_dir) |
|
ckpt_logs = self._train(data_dir, ["--checkpoint-activations"]) |
|
baseline_logs = self._train(data_dir, []) |
|
assert len(baseline_logs) == len(ckpt_logs) |
|
|
|
baseline_train_stats = read_last_log_entry(baseline_logs, "train") |
|
ckpt_train_stats = read_last_log_entry(ckpt_logs, "train") |
|
assert baseline_train_stats["train_loss"] == ckpt_train_stats["train_loss"] |
|
|
|
baseline_valid_stats = read_last_log_entry(baseline_logs, "valid") |
|
ckpt_valid_stats = read_last_log_entry(ckpt_logs, "valid") |
|
assert baseline_valid_stats["valid_loss"] == ckpt_valid_stats["valid_loss"] |
|
|
|
|
|
def create_dummy_roberta_head_data( |
|
data_dir, num_examples=100, maxlen=10, num_classes=2, regression=False |
|
): |
|
input_dir = "input0" |
|
|
|
def _create_dummy_data(filename): |
|
random_data = torch.rand(num_examples * maxlen) |
|
input_data = 97 + torch.floor(26 * random_data).int() |
|
if regression: |
|
output_data = torch.rand((num_examples, num_classes)) |
|
else: |
|
output_data = 1 + torch.floor(num_classes * torch.rand(num_examples)).int() |
|
with open(os.path.join(data_dir, input_dir, filename + ".out"), "w") as f_in: |
|
label_filename = filename + ".label" if regression else filename + ".out" |
|
with open(os.path.join(data_dir, "label", label_filename), "w") as f_out: |
|
offset = 0 |
|
for i in range(num_examples): |
|
|
|
ex_len = random.randint(1, maxlen) |
|
ex_str = " ".join(map(chr, input_data[offset : offset + ex_len])) |
|
print(ex_str, file=f_in) |
|
|
|
if regression: |
|
class_str = " ".join(map(str, output_data[i].numpy())) |
|
print(class_str, file=f_out) |
|
else: |
|
class_str = "class{}".format(output_data[i]) |
|
print(class_str, file=f_out) |
|
offset += ex_len |
|
|
|
os.mkdir(os.path.join(data_dir, input_dir)) |
|
os.mkdir(os.path.join(data_dir, "label")) |
|
_create_dummy_data("train") |
|
_create_dummy_data("valid") |
|
_create_dummy_data("test") |
|
|
|
|
|
def train_masked_lm(data_dir, arch, extra_flags=None): |
|
train_parser = options.get_training_parser() |
|
train_args = options.parse_args_and_arch( |
|
train_parser, |
|
[ |
|
"--task", |
|
"masked_lm", |
|
data_dir, |
|
"--arch", |
|
arch, |
|
"--optimizer", |
|
"adam", |
|
"--lr", |
|
"0.0001", |
|
"--criterion", |
|
"masked_lm", |
|
"--batch-size", |
|
"500", |
|
"--save-dir", |
|
data_dir, |
|
"--max-epoch", |
|
"1", |
|
"--no-progress-bar", |
|
"--distributed-world-size", |
|
"1", |
|
"--ddp-backend", |
|
"no_c10d", |
|
"--num-workers", |
|
"0", |
|
] |
|
+ (extra_flags or []), |
|
) |
|
train.main(train_args) |
|
|
|
|
|
def train_roberta_head(data_dir, arch, num_classes=2, extra_flags=None): |
|
train_parser = options.get_training_parser() |
|
train_args = options.parse_args_and_arch( |
|
train_parser, |
|
[ |
|
"--task", |
|
"sentence_prediction", |
|
data_dir, |
|
"--arch", |
|
arch, |
|
"--encoder-layers", |
|
"2", |
|
"--num-classes", |
|
str(num_classes), |
|
"--optimizer", |
|
"adam", |
|
"--lr", |
|
"0.0001", |
|
"--criterion", |
|
"sentence_prediction", |
|
"--max-tokens", |
|
"500", |
|
"--max-positions", |
|
"500", |
|
"--batch-size", |
|
"500", |
|
"--save-dir", |
|
data_dir, |
|
"--max-epoch", |
|
"1", |
|
"--no-progress-bar", |
|
"--distributed-world-size", |
|
"1", |
|
"--ddp-backend", |
|
"no_c10d", |
|
"--num-workers", |
|
"0", |
|
] |
|
+ (extra_flags or []), |
|
) |
|
train.main(train_args) |
|
|
|
|
|
def eval_lm_main(data_dir, extra_flags=None): |
|
eval_lm_parser = options.get_eval_lm_parser() |
|
eval_lm_args = options.parse_args_and_arch( |
|
eval_lm_parser, |
|
[ |
|
data_dir, |
|
"--path", |
|
os.path.join(data_dir, "checkpoint_last.pt"), |
|
"--no-progress-bar", |
|
"--num-workers", |
|
"0", |
|
] |
|
+ (extra_flags or []), |
|
) |
|
eval_lm.main(eval_lm_args) |
|
|
|
|
|
if __name__ == "__main__": |
|
unittest.main() |
|
|