gla-1B-100B / config.json
bailin28's picture
Upload GLAForCausalLM
4b9828d verified
raw
history blame contribute delete
587 Bytes
{
"_name_or_path": "/data/cl/scratch/bailinw/checkpoints/gla_1b/2024-01-19/04-48-33-180834/huggingface",
"architectures": [
"GLAForCausalLM"
],
"attn_pdrop": 0.0,
"bos_token_id": 0,
"context_length": 2048,
"d_model": 2048,
"embd_pdrop": 0.0,
"eos_token_id": 0,
"load_from_llama": false,
"model_type": "gla",
"n_head": 4,
"n_layer": 24,
"pad_token_id": 0,
"resid_pdrop": 0.0,
"tie_word_embeddings": false,
"torch_dtype": "float32",
"transformers_version": "4.36.2",
"use_cache": true,
"use_gk": true,
"use_gv": false,
"vocab_size": 32000
}