|
from transformers import PretrainedConfig, AutoConfig |
|
|
|
|
|
class CLIPEncoderDecoderConfig(PretrainedConfig): |
|
model_type = "clip-encoder-decoder" |
|
|
|
def __init__( |
|
self, |
|
decoder={'_name_or_path': '', |
|
'activation_function': 'gelu_new', |
|
'add_cross_attention': True, |
|
'architectures': ['GPT2LMHeadModel'], |
|
'attn_pdrop': 0.1, |
|
'bad_words_ids': None, |
|
'begin_suppress_tokens': None, |
|
'bos_token_id': 50256, |
|
'chunk_size_feed_forward': 0, |
|
'cross_attention_hidden_size': None, |
|
'decoder_start_token_id': None, |
|
'diversity_penalty': 0.0, |
|
'do_sample': False, |
|
'early_stopping': False, |
|
'embd_pdrop': 0.1, |
|
'encoder_no_repeat_ngram_size': 0, |
|
'eos_token_id': 50256, |
|
'exponential_decay_length_penalty': None, |
|
'finetuning_task': None, |
|
'forced_bos_token_id': None, |
|
'forced_eos_token_id': None, |
|
'id2label': {'0': 'LABEL_0', '1': 'LABEL_1'}, |
|
'initializer_range': 0.02, |
|
'is_decoder': True, |
|
'is_encoder_decoder': False, |
|
'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, |
|
'layer_norm_epsilon': 1e-05, |
|
'length_penalty': 1.0, |
|
'max_length': 20, |
|
'min_length': 0, |
|
'model_type': 'gpt2', |
|
'n_ctx': 1024, |
|
'n_embd': 768, |
|
'n_head': 12, |
|
'n_inner': None, |
|
'n_layer': 12, |
|
'n_positions': 1024, |
|
'no_repeat_ngram_size': 0, |
|
'num_beam_groups': 1, |
|
'num_beams': 1, |
|
'num_return_sequences': 1, |
|
'output_attentions': False, |
|
'output_hidden_states': False, |
|
'output_scores': False, |
|
'pad_token_id': None, |
|
'prefix': None, |
|
'problem_type': None, |
|
'pruned_heads': {}, |
|
'remove_invalid_values': False, |
|
'reorder_and_upcast_attn': False, |
|
'repetition_penalty': 1.0, |
|
'resid_pdrop': 0.1, |
|
'return_dict': True, |
|
'return_dict_in_generate': False, |
|
'scale_attn_by_inverse_layer_idx': False, |
|
'scale_attn_weights': True, |
|
'sep_token_id': None, |
|
'summary_activation': None, |
|
'summary_first_dropout': 0.1, |
|
'summary_proj_to_labels': True, |
|
'summary_type': 'cls_index', |
|
'summary_use_proj': True, |
|
'suppress_tokens': None, |
|
'task_specific_params': {'text-generation': {'do_sample': True, |
|
'max_length': 50}}, |
|
'temperature': 1.0, |
|
'tf_legacy_loss': False, |
|
'tie_encoder_decoder': False, |
|
'tie_word_embeddings': True, |
|
'tokenizer_class': None, |
|
'top_k': 50, |
|
'top_p': 1.0, |
|
'torch_dtype': None, |
|
'torchscript': False, |
|
'typical_p': 1.0, |
|
'use_bfloat16': False, |
|
'use_cache': True, |
|
'vocab_size': 50257}, |
|
**kwargs): |
|
super().__init__(**kwargs) |
|
|
|
self.decoder = AutoConfig.for_model(**decoder) |
|
self.is_encoder_decoder = True |
|
|
|
@classmethod |
|
def from_encoder_decoder_configs( |
|
cls, encoder_config: PretrainedConfig, decoder_config: PretrainedConfig, **kwargs |
|
) -> PretrainedConfig: |
|
r""" |
|
Instantiate a [`VisionEncoderDecoderConfig`] (or a derived class) from a pre-trained encoder model |
|
configuration and decoder model configuration. |
|
|
|
Returns: |
|
[`VisionEncoderDecoderConfig`]: An instance of a configuration object |
|
""" |
|
decoder_config.is_decoder = True |
|
decoder_config.add_cross_attention = True |
|
|
|
return cls(encoder=encoder_config.to_dict(), decoder=decoder_config.to_dict(), **kwargs) |
|
|