empty-michael's picture
Training in progress, step 500
d9113e7 verified
raw
history blame contribute delete
711 Bytes
{
"architectures": [
"GPTNeoCodebookModel"
],
"codebook_at": [
"attn",
"mlp"
],
"codebook_kwargs": {},
"codebook_type": [
"vanilla",
"vanilla"
],
"k_codebook": [
16,
16
],
"kmeans_init": false,
"kmeans_init_examples": 1000,
"kmeans_kwargs": {
"batch_size": 24576,
"n_init": "auto"
},
"kmeans_path": "/.cache/cb_volume/huggingface/kmeans_embeddings.pt",
"layers_to_snap": [
0
],
"loss": "aeloss",
"model_type": "codebook",
"num_codebooks": [
1,
1
],
"num_codes": [
10000,
10000
],
"replace_codes": false,
"similarity_metric": "inner_product",
"torch_dtype": "float32",
"transformers_version": "4.35.2"
}