Vipitis
/

santacoder-finetuned-the-stack-glsl

@@ -1,18 +1,21 @@
 {
-  "_name_or_path": "bigcode/santacoder",
-  "activation_function": "gelu_pytorch_tanh",
   "architectures": [
-    "GPTBigCodeForCausalLM"
   ],
-  "attention_softmax_in_fp32": false,
-  "multi_query": true,
   "attn_pdrop": 0.1,
   "bos_token_id": 49152,
   "embd_pdrop": 0.1,
   "eos_token_id": 49152,
   "initializer_range": 0.02,
   "layer_norm_epsilon": 1e-05,
-  "model_type": "gpt_bigcode",
   "n_embd": 2048,
   "n_head": 16,
   "n_inner": 8192,
@@ -20,7 +23,7 @@
   "n_positions": 2048,
   "reorder_and_upcast_attn": false,
   "resid_pdrop": 0.1,
-  "scale_attention_softmax_in_fp32": false,
   "scale_attn_weights": true,
   "summary_activation": null,
   "summary_first_dropout": 0.1,

 {
+   "_name_or_path": "bigcode/santacoder",
+  "activation_function": "gelu_fast",
   "architectures": [
+    "GPT2LMHeadCustomModel"
   ],
+  "attention_head_type": "multiquery",
   "attn_pdrop": 0.1,
+  "auto_map": {
+    "AutoConfig": "configuration_gpt2_mq.GPT2CustomConfig",
+    "AutoModelForCausalLM": "modeling_gpt2_mq.GPT2LMHeadCustomModel"
+  },
   "bos_token_id": 49152,
   "embd_pdrop": 0.1,
   "eos_token_id": 49152,
   "initializer_range": 0.02,
   "layer_norm_epsilon": 1e-05,
+  "model_type": "gpt2",
   "n_embd": 2048,
   "n_head": 16,
   "n_inner": 8192,
   "n_positions": 2048,
   "reorder_and_upcast_attn": false,
   "resid_pdrop": 0.1,
+  "scale_attn_by_inverse_layer_idx": false,
   "scale_attn_weights": true,
   "summary_activation": null,
   "summary_first_dropout": 0.1,