Spaces:
Running
Running
import argparse | |
import unittest | |
from typing import Any, Dict, Sequence | |
import torch | |
from fairseq.models import transformer | |
from tests.test_roberta import FakeTask | |
def mk_sample(tok: Sequence[int] = None, batch_size: int = 2) -> Dict[str, Any]: | |
if not tok: | |
tok = [10, 11, 12, 13, 14, 15, 2] | |
batch = torch.stack([torch.tensor(tok, dtype=torch.long)] * batch_size) | |
sample = { | |
"net_input": { | |
"src_tokens": batch, | |
"prev_output_tokens": batch, | |
"src_lengths": torch.tensor( | |
[len(tok)] * batch_size, dtype=torch.long, device=batch.device | |
), | |
}, | |
"target": batch[:, 1:], | |
} | |
return sample | |
def mk_transformer(**extra_args: Any): | |
overrides = { | |
# Use characteristics dimensions | |
"encoder_embed_dim": 12, | |
"encoder_ffn_embed_dim": 14, | |
"decoder_embed_dim": 12, | |
"decoder_ffn_embed_dim": 14, | |
# Disable dropout so we have comparable tests. | |
"dropout": 0, | |
"attention_dropout": 0, | |
"activation_dropout": 0, | |
"encoder_layerdrop": 0, | |
} | |
overrides.update(extra_args) | |
# Overrides the defaults from the parser | |
args = argparse.Namespace(**overrides) | |
transformer.tiny_architecture(args) | |
torch.manual_seed(0) | |
task = FakeTask(args) | |
return transformer.TransformerModel.build_model(args, task) | |
class TransformerTestCase(unittest.TestCase): | |
def test_forward_backward(self): | |
model = mk_transformer(encoder_embed_dim=12, decoder_embed_dim=12) | |
sample = mk_sample() | |
o, _ = model.forward(**sample["net_input"]) | |
loss = o.sum() | |
loss.backward() | |
def test_different_encoder_decoder_embed_dim(self): | |
model = mk_transformer(encoder_embed_dim=12, decoder_embed_dim=16) | |
sample = mk_sample() | |
o, _ = model.forward(**sample["net_input"]) | |
loss = o.sum() | |
loss.backward() | |