from transformers_modules.google.madlad400-8b-lm.d1017b424b97ad0fd1bbeca987ff208ef17367e0.decoderonlyt5_config import DecoderOnlyT5Config class DecoderOnlyT5Config(T5Config): is_decoder_only = True # whether to call attention and mlp in parallel. # https://github.com/google/flaxformer/blob/ea17eb012a1d340ddff017b7a534c2162aaec34c/flaxformer/architectures/t5/t5_architecture.py#L384 parallel_layers = True has_relative_attention_bias = False # https://arxiv.org/abs/1911.02150 multi_query_attention = True