optimus: symbol: optimus find_unused_parameters: false args: {} optimus_bert_encoder: super_cfg: optimus type: optimus_bert_connector # pth: pretrained/optimus_bert_encoder.pth args: config: architectures: - BertForMaskedLM attention_probs_dropout_prob: 0.1 finetuning_task: null hidden_act: gelu hidden_dropout_prob: 0.1 hidden_size: 768 initializer_range: 0.02 intermediate_size: 3072 layer_norm_eps: 1.e-12 max_position_embeddings: 512 num_attention_heads: 12 num_hidden_layers: 12 num_labels: 2 output_attentions: false output_hidden_states: false pruned_heads: {} torchscript: false type_vocab_size: 2 vocab_size: 28996 latent_size: 768 optimus_bert_tokenizer: super_cfg: optimus type: optimus_bert_tokenizer args: do_lower_case: false max_len: 512 vocab_file: configs/vocab/bert-base-cased-vocab.txt optimus_gpt2_decoder: super_cfg: optimus type: optimus_gpt2_connector # pth: pretrained/optimus_gpt2_decoder.pth args: config: architectures: - GPT2LMHeadModel attn_pdrop: 0.1 embd_pdrop: 0.1 finetuning_task: null hidden_size: 768 initializer_range: 0.02 latent_size: 768 layer_norm_epsilon: 1.e-05 max_position_embeddings: 1024 n_ctx: 1024 n_embd: 768 n_head: 12 n_layer: 12 n_positions: 1024 num_attention_heads: 12 num_hidden_layers: 12 num_labels: 1 output_attentions: false output_hidden_states: false pretrained_config_archive_map: gpt2 : https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-config.json gpt2-medium : https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-medium-config.json gpt2-large : https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-large-config.json pruned_heads: {} resid_pdrop: 0.1 summary_activation: null summary_first_dropout: 0.1 summary_proj_to_labels: true summary_type: cls_index summary_use_proj: true torchscript: false vocab_size: 50260 optimus_gpt2_tokenizer: super_cfg: optimus type: optimus_gpt2_tokenizer args: do_lower_case: false max_len: 1024 vocab_file: configs/vocab/gpt2-vocab.json merges_file: configs/vocab/gpt2-merges.txt optimus_vae: super_cfg: optimus type: optimus_vae pth: pretrained/optimus-vae.pth args: encoder: MODEL(optimus_bert_encoder) decoder: MODEL(optimus_gpt2_decoder) tokenizer_encoder: MODEL(optimus_bert_tokenizer) tokenizer_decoder: MODEL(optimus_gpt2_tokenizer) args: latent_size: 768