Kemsekov commited on
Commit
4aa73c7
1 Parent(s): 2bb638b

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ assets/tokenizer/vocabulary.spm filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: keras-nlp
3
+ pipeline_tag: text-generation
4
+ ---
5
+ This is a [`Gemma` model](https://keras.io/api/keras_nlp/models/gemma) uploaded using the KerasNLP library and can be used with JAX, TensorFlow, and PyTorch backends.
6
+ This model is related to a `CausalLM` task.
7
+
8
+ Model config:
9
+ * **name:** gemma_backbone
10
+ * **trainable:** True
11
+ * **dtype:** {'module': 'keras.dtype_policies', 'class_name': 'DTypePolicyMap', 'config': {'default_policy': None, 'policy_map': {'token_embedding': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_0/ffw_linear': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_0/ffw_gating_2': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_0/ffw_gating': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_0/attention/attention_output': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_0/attention/value': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_0/attention/key': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_0/attention/query': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_1/ffw_linear': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_1/ffw_gating_2': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_1/ffw_gating': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_1/attention/attention_output': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_1/attention/value': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_1/attention/key': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_1/attention/query': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_2/ffw_linear': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_2/ffw_gating_2': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_2/ffw_gating': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_2/attention/attention_output': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_2/attention/value': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_2/attention/key': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_2/attention/query': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_3/ffw_linear': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_3/ffw_gating_2': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_3/ffw_gating': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_3/attention/attention_output': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_3/attention/value': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_3/attention/key': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_3/attention/query': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_4/ffw_linear': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_4/ffw_gating_2': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_4/ffw_gating': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_4/attention/attention_output': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_4/attention/value': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_4/attention/key': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_4/attention/query': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_5/ffw_linear': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_5/ffw_gating_2': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_5/ffw_gating': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_5/attention/attention_output': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_5/attention/value': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_5/attention/key': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_5/attention/query': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_6/ffw_linear': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_6/ffw_gating_2': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_6/ffw_gating': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_6/attention/attention_output': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_6/attention/value': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_6/attention/key': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_6/attention/query': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_7/ffw_linear': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_7/ffw_gating_2': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_7/ffw_gating': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_7/attention/attention_output': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_7/attention/value': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_7/attention/key': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_7/attention/query': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_8/ffw_linear': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_8/ffw_gating_2': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_8/ffw_gating': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_8/attention/attention_output': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_8/attention/value': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_8/attention/key': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_8/attention/query': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_9/ffw_linear': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_9/ffw_gating_2': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_9/ffw_gating': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_9/attention/attention_output': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_9/attention/value': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_9/attention/key': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_9/attention/query': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_10/ffw_linear': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_10/ffw_gating_2': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_10/ffw_gating': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_10/attention/attention_output': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_10/attention/value': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_10/attention/key': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_10/attention/query': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_11/ffw_linear': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_11/ffw_gating_2': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_11/ffw_gating': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_11/attention/attention_output': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_11/attention/value': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_11/attention/key': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_11/attention/query': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_12/ffw_linear': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_12/ffw_gating_2': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_12/ffw_gating': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_12/attention/attention_output': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_12/attention/value': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_12/attention/key': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_12/attention/query': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_13/ffw_linear': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_13/ffw_gating_2': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_13/ffw_gating': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_13/attention/attention_output': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_13/attention/value': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_13/attention/key': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_13/attention/query': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_14/ffw_linear': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_14/ffw_gating_2': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_14/ffw_gating': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_14/attention/attention_output': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_14/attention/value': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_14/attention/key': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_14/attention/query': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_15/ffw_linear': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_15/ffw_gating_2': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_15/ffw_gating': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_15/attention/attention_output': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_15/attention/value': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_15/attention/key': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_15/attention/query': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_16/ffw_linear': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_16/ffw_gating_2': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_16/ffw_gating': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_16/attention/attention_output': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_16/attention/value': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_16/attention/key': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_16/attention/query': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_17/ffw_linear': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_17/ffw_gating_2': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_17/ffw_gating': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_17/attention/attention_output': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_17/attention/value': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_17/attention/key': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_17/attention/query': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_18/ffw_linear': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_18/ffw_gating_2': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_18/ffw_gating': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_18/attention/attention_output': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_18/attention/value': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_18/attention/key': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_18/attention/query': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_19/ffw_linear': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_19/ffw_gating_2': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_19/ffw_gating': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_19/attention/attention_output': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_19/attention/value': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_19/attention/key': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_19/attention/query': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_20/ffw_linear': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_20/ffw_gating_2': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_20/ffw_gating': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_20/attention/attention_output': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_20/attention/value': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_20/attention/key': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_20/attention/query': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_21/ffw_linear': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_21/ffw_gating_2': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_21/ffw_gating': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_21/attention/attention_output': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_21/attention/value': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_21/attention/key': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_21/attention/query': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_22/ffw_linear': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_22/ffw_gating_2': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_22/ffw_gating': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_22/attention/attention_output': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_22/attention/value': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_22/attention/key': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_22/attention/query': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_23/ffw_linear': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_23/ffw_gating_2': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_23/ffw_gating': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_23/attention/attention_output': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_23/attention/value': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_23/attention/key': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_23/attention/query': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_24/ffw_linear': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_24/ffw_gating_2': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_24/ffw_gating': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_24/attention/attention_output': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_24/attention/value': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_24/attention/key': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_24/attention/query': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_25/ffw_linear': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_25/ffw_gating_2': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_25/ffw_gating': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_25/attention/attention_output': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_25/attention/value': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_25/attention/key': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'decoder_block_25/attention/query': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}}}, 'registered_name': None}
12
+ * **vocabulary_size:** 256000
13
+ * **num_layers:** 26
14
+ * **num_query_heads:** 8
15
+ * **num_key_value_heads:** 4
16
+ * **hidden_dim:** 2304
17
+ * **intermediate_dim:** 18432
18
+ * **head_dim:** 256
19
+ * **layer_norm_epsilon:** 1e-06
20
+ * **dropout:** 0
21
+ * **query_head_dim_normalize:** True
22
+ * **use_post_ffw_norm:** True
23
+ * **use_post_attention_norm:** True
24
+ * **final_logit_soft_cap:** 30.0
25
+ * **attention_logit_soft_cap:** 50.0
26
+ * **sliding_window_size:** 4096
27
+ * **use_sliding_window_attention:** True
28
+
29
+ This model card has been generated automatically and should be completed by the model author. See [Model Cards documentation](https://huggingface.co/docs/hub/model-cards) for more information.
assets/tokenizer/vocabulary.spm ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:61a7b147390c64585d6c3543dd6fc636906c9af3865a5548f27f31aee1d4c8e2
3
+ size 4241003
config.json ADDED
@@ -0,0 +1,1682 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "module": "keras_nlp.src.models.gemma.gemma_backbone",
3
+ "class_name": "GemmaBackbone",
4
+ "config": {
5
+ "name": "gemma_backbone",
6
+ "trainable": true,
7
+ "dtype": {
8
+ "module": "keras.dtype_policies",
9
+ "class_name": "DTypePolicyMap",
10
+ "config": {
11
+ "default_policy": null,
12
+ "policy_map": {
13
+ "token_embedding": {
14
+ "module": "keras.dtype_policies",
15
+ "class_name": "QuantizedDTypePolicy",
16
+ "config": {
17
+ "mode": "int8",
18
+ "source_name": null
19
+ },
20
+ "registered_name": null
21
+ },
22
+ "decoder_block_0/ffw_linear": {
23
+ "module": "keras.dtype_policies",
24
+ "class_name": "QuantizedDTypePolicy",
25
+ "config": {
26
+ "mode": "int8",
27
+ "source_name": null
28
+ },
29
+ "registered_name": null
30
+ },
31
+ "decoder_block_0/ffw_gating_2": {
32
+ "module": "keras.dtype_policies",
33
+ "class_name": "QuantizedDTypePolicy",
34
+ "config": {
35
+ "mode": "int8",
36
+ "source_name": null
37
+ },
38
+ "registered_name": null
39
+ },
40
+ "decoder_block_0/ffw_gating": {
41
+ "module": "keras.dtype_policies",
42
+ "class_name": "QuantizedDTypePolicy",
43
+ "config": {
44
+ "mode": "int8",
45
+ "source_name": null
46
+ },
47
+ "registered_name": null
48
+ },
49
+ "decoder_block_0/attention/attention_output": {
50
+ "module": "keras.dtype_policies",
51
+ "class_name": "QuantizedDTypePolicy",
52
+ "config": {
53
+ "mode": "int8",
54
+ "source_name": null
55
+ },
56
+ "registered_name": null
57
+ },
58
+ "decoder_block_0/attention/value": {
59
+ "module": "keras.dtype_policies",
60
+ "class_name": "QuantizedDTypePolicy",
61
+ "config": {
62
+ "mode": "int8",
63
+ "source_name": null
64
+ },
65
+ "registered_name": null
66
+ },
67
+ "decoder_block_0/attention/key": {
68
+ "module": "keras.dtype_policies",
69
+ "class_name": "QuantizedDTypePolicy",
70
+ "config": {
71
+ "mode": "int8",
72
+ "source_name": null
73
+ },
74
+ "registered_name": null
75
+ },
76
+ "decoder_block_0/attention/query": {
77
+ "module": "keras.dtype_policies",
78
+ "class_name": "QuantizedDTypePolicy",
79
+ "config": {
80
+ "mode": "int8",
81
+ "source_name": null
82
+ },
83
+ "registered_name": null
84
+ },
85
+ "decoder_block_1/ffw_linear": {
86
+ "module": "keras.dtype_policies",
87
+ "class_name": "QuantizedDTypePolicy",
88
+ "config": {
89
+ "mode": "int8",
90
+ "source_name": null
91
+ },
92
+ "registered_name": null
93
+ },
94
+ "decoder_block_1/ffw_gating_2": {
95
+ "module": "keras.dtype_policies",
96
+ "class_name": "QuantizedDTypePolicy",
97
+ "config": {
98
+ "mode": "int8",
99
+ "source_name": null
100
+ },
101
+ "registered_name": null
102
+ },
103
+ "decoder_block_1/ffw_gating": {
104
+ "module": "keras.dtype_policies",
105
+ "class_name": "QuantizedDTypePolicy",
106
+ "config": {
107
+ "mode": "int8",
108
+ "source_name": null
109
+ },
110
+ "registered_name": null
111
+ },
112
+ "decoder_block_1/attention/attention_output": {
113
+ "module": "keras.dtype_policies",
114
+ "class_name": "QuantizedDTypePolicy",
115
+ "config": {
116
+ "mode": "int8",
117
+ "source_name": null
118
+ },
119
+ "registered_name": null
120
+ },
121
+ "decoder_block_1/attention/value": {
122
+ "module": "keras.dtype_policies",
123
+ "class_name": "QuantizedDTypePolicy",
124
+ "config": {
125
+ "mode": "int8",
126
+ "source_name": null
127
+ },
128
+ "registered_name": null
129
+ },
130
+ "decoder_block_1/attention/key": {
131
+ "module": "keras.dtype_policies",
132
+ "class_name": "QuantizedDTypePolicy",
133
+ "config": {
134
+ "mode": "int8",
135
+ "source_name": null
136
+ },
137
+ "registered_name": null
138
+ },
139
+ "decoder_block_1/attention/query": {
140
+ "module": "keras.dtype_policies",
141
+ "class_name": "QuantizedDTypePolicy",
142
+ "config": {
143
+ "mode": "int8",
144
+ "source_name": null
145
+ },
146
+ "registered_name": null
147
+ },
148
+ "decoder_block_2/ffw_linear": {
149
+ "module": "keras.dtype_policies",
150
+ "class_name": "QuantizedDTypePolicy",
151
+ "config": {
152
+ "mode": "int8",
153
+ "source_name": null
154
+ },
155
+ "registered_name": null
156
+ },
157
+ "decoder_block_2/ffw_gating_2": {
158
+ "module": "keras.dtype_policies",
159
+ "class_name": "QuantizedDTypePolicy",
160
+ "config": {
161
+ "mode": "int8",
162
+ "source_name": null
163
+ },
164
+ "registered_name": null
165
+ },
166
+ "decoder_block_2/ffw_gating": {
167
+ "module": "keras.dtype_policies",
168
+ "class_name": "QuantizedDTypePolicy",
169
+ "config": {
170
+ "mode": "int8",
171
+ "source_name": null
172
+ },
173
+ "registered_name": null
174
+ },
175
+ "decoder_block_2/attention/attention_output": {
176
+ "module": "keras.dtype_policies",
177
+ "class_name": "QuantizedDTypePolicy",
178
+ "config": {
179
+ "mode": "int8",
180
+ "source_name": null
181
+ },
182
+ "registered_name": null
183
+ },
184
+ "decoder_block_2/attention/value": {
185
+ "module": "keras.dtype_policies",
186
+ "class_name": "QuantizedDTypePolicy",
187
+ "config": {
188
+ "mode": "int8",
189
+ "source_name": null
190
+ },
191
+ "registered_name": null
192
+ },
193
+ "decoder_block_2/attention/key": {
194
+ "module": "keras.dtype_policies",
195
+ "class_name": "QuantizedDTypePolicy",
196
+ "config": {
197
+ "mode": "int8",
198
+ "source_name": null
199
+ },
200
+ "registered_name": null
201
+ },
202
+ "decoder_block_2/attention/query": {
203
+ "module": "keras.dtype_policies",
204
+ "class_name": "QuantizedDTypePolicy",
205
+ "config": {
206
+ "mode": "int8",
207
+ "source_name": null
208
+ },
209
+ "registered_name": null
210
+ },
211
+ "decoder_block_3/ffw_linear": {
212
+ "module": "keras.dtype_policies",
213
+ "class_name": "QuantizedDTypePolicy",
214
+ "config": {
215
+ "mode": "int8",
216
+ "source_name": null
217
+ },
218
+ "registered_name": null
219
+ },
220
+ "decoder_block_3/ffw_gating_2": {
221
+ "module": "keras.dtype_policies",
222
+ "class_name": "QuantizedDTypePolicy",
223
+ "config": {
224
+ "mode": "int8",
225
+ "source_name": null
226
+ },
227
+ "registered_name": null
228
+ },
229
+ "decoder_block_3/ffw_gating": {
230
+ "module": "keras.dtype_policies",
231
+ "class_name": "QuantizedDTypePolicy",
232
+ "config": {
233
+ "mode": "int8",
234
+ "source_name": null
235
+ },
236
+ "registered_name": null
237
+ },
238
+ "decoder_block_3/attention/attention_output": {
239
+ "module": "keras.dtype_policies",
240
+ "class_name": "QuantizedDTypePolicy",
241
+ "config": {
242
+ "mode": "int8",
243
+ "source_name": null
244
+ },
245
+ "registered_name": null
246
+ },
247
+ "decoder_block_3/attention/value": {
248
+ "module": "keras.dtype_policies",
249
+ "class_name": "QuantizedDTypePolicy",
250
+ "config": {
251
+ "mode": "int8",
252
+ "source_name": null
253
+ },
254
+ "registered_name": null
255
+ },
256
+ "decoder_block_3/attention/key": {
257
+ "module": "keras.dtype_policies",
258
+ "class_name": "QuantizedDTypePolicy",
259
+ "config": {
260
+ "mode": "int8",
261
+ "source_name": null
262
+ },
263
+ "registered_name": null
264
+ },
265
+ "decoder_block_3/attention/query": {
266
+ "module": "keras.dtype_policies",
267
+ "class_name": "QuantizedDTypePolicy",
268
+ "config": {
269
+ "mode": "int8",
270
+ "source_name": null
271
+ },
272
+ "registered_name": null
273
+ },
274
+ "decoder_block_4/ffw_linear": {
275
+ "module": "keras.dtype_policies",
276
+ "class_name": "QuantizedDTypePolicy",
277
+ "config": {
278
+ "mode": "int8",
279
+ "source_name": null
280
+ },
281
+ "registered_name": null
282
+ },
283
+ "decoder_block_4/ffw_gating_2": {
284
+ "module": "keras.dtype_policies",
285
+ "class_name": "QuantizedDTypePolicy",
286
+ "config": {
287
+ "mode": "int8",
288
+ "source_name": null
289
+ },
290
+ "registered_name": null
291
+ },
292
+ "decoder_block_4/ffw_gating": {
293
+ "module": "keras.dtype_policies",
294
+ "class_name": "QuantizedDTypePolicy",
295
+ "config": {
296
+ "mode": "int8",
297
+ "source_name": null
298
+ },
299
+ "registered_name": null
300
+ },
301
+ "decoder_block_4/attention/attention_output": {
302
+ "module": "keras.dtype_policies",
303
+ "class_name": "QuantizedDTypePolicy",
304
+ "config": {
305
+ "mode": "int8",
306
+ "source_name": null
307
+ },
308
+ "registered_name": null
309
+ },
310
+ "decoder_block_4/attention/value": {
311
+ "module": "keras.dtype_policies",
312
+ "class_name": "QuantizedDTypePolicy",
313
+ "config": {
314
+ "mode": "int8",
315
+ "source_name": null
316
+ },
317
+ "registered_name": null
318
+ },
319
+ "decoder_block_4/attention/key": {
320
+ "module": "keras.dtype_policies",
321
+ "class_name": "QuantizedDTypePolicy",
322
+ "config": {
323
+ "mode": "int8",
324
+ "source_name": null
325
+ },
326
+ "registered_name": null
327
+ },
328
+ "decoder_block_4/attention/query": {
329
+ "module": "keras.dtype_policies",
330
+ "class_name": "QuantizedDTypePolicy",
331
+ "config": {
332
+ "mode": "int8",
333
+ "source_name": null
334
+ },
335
+ "registered_name": null
336
+ },
337
+ "decoder_block_5/ffw_linear": {
338
+ "module": "keras.dtype_policies",
339
+ "class_name": "QuantizedDTypePolicy",
340
+ "config": {
341
+ "mode": "int8",
342
+ "source_name": null
343
+ },
344
+ "registered_name": null
345
+ },
346
+ "decoder_block_5/ffw_gating_2": {
347
+ "module": "keras.dtype_policies",
348
+ "class_name": "QuantizedDTypePolicy",
349
+ "config": {
350
+ "mode": "int8",
351
+ "source_name": null
352
+ },
353
+ "registered_name": null
354
+ },
355
+ "decoder_block_5/ffw_gating": {
356
+ "module": "keras.dtype_policies",
357
+ "class_name": "QuantizedDTypePolicy",
358
+ "config": {
359
+ "mode": "int8",
360
+ "source_name": null
361
+ },
362
+ "registered_name": null
363
+ },
364
+ "decoder_block_5/attention/attention_output": {
365
+ "module": "keras.dtype_policies",
366
+ "class_name": "QuantizedDTypePolicy",
367
+ "config": {
368
+ "mode": "int8",
369
+ "source_name": null
370
+ },
371
+ "registered_name": null
372
+ },
373
+ "decoder_block_5/attention/value": {
374
+ "module": "keras.dtype_policies",
375
+ "class_name": "QuantizedDTypePolicy",
376
+ "config": {
377
+ "mode": "int8",
378
+ "source_name": null
379
+ },
380
+ "registered_name": null
381
+ },
382
+ "decoder_block_5/attention/key": {
383
+ "module": "keras.dtype_policies",
384
+ "class_name": "QuantizedDTypePolicy",
385
+ "config": {
386
+ "mode": "int8",
387
+ "source_name": null
388
+ },
389
+ "registered_name": null
390
+ },
391
+ "decoder_block_5/attention/query": {
392
+ "module": "keras.dtype_policies",
393
+ "class_name": "QuantizedDTypePolicy",
394
+ "config": {
395
+ "mode": "int8",
396
+ "source_name": null
397
+ },
398
+ "registered_name": null
399
+ },
400
+ "decoder_block_6/ffw_linear": {
401
+ "module": "keras.dtype_policies",
402
+ "class_name": "QuantizedDTypePolicy",
403
+ "config": {
404
+ "mode": "int8",
405
+ "source_name": null
406
+ },
407
+ "registered_name": null
408
+ },
409
+ "decoder_block_6/ffw_gating_2": {
410
+ "module": "keras.dtype_policies",
411
+ "class_name": "QuantizedDTypePolicy",
412
+ "config": {
413
+ "mode": "int8",
414
+ "source_name": null
415
+ },
416
+ "registered_name": null
417
+ },
418
+ "decoder_block_6/ffw_gating": {
419
+ "module": "keras.dtype_policies",
420
+ "class_name": "QuantizedDTypePolicy",
421
+ "config": {
422
+ "mode": "int8",
423
+ "source_name": null
424
+ },
425
+ "registered_name": null
426
+ },
427
+ "decoder_block_6/attention/attention_output": {
428
+ "module": "keras.dtype_policies",
429
+ "class_name": "QuantizedDTypePolicy",
430
+ "config": {
431
+ "mode": "int8",
432
+ "source_name": null
433
+ },
434
+ "registered_name": null
435
+ },
436
+ "decoder_block_6/attention/value": {
437
+ "module": "keras.dtype_policies",
438
+ "class_name": "QuantizedDTypePolicy",
439
+ "config": {
440
+ "mode": "int8",
441
+ "source_name": null
442
+ },
443
+ "registered_name": null
444
+ },
445
+ "decoder_block_6/attention/key": {
446
+ "module": "keras.dtype_policies",
447
+ "class_name": "QuantizedDTypePolicy",
448
+ "config": {
449
+ "mode": "int8",
450
+ "source_name": null
451
+ },
452
+ "registered_name": null
453
+ },
454
+ "decoder_block_6/attention/query": {
455
+ "module": "keras.dtype_policies",
456
+ "class_name": "QuantizedDTypePolicy",
457
+ "config": {
458
+ "mode": "int8",
459
+ "source_name": null
460
+ },
461
+ "registered_name": null
462
+ },
463
+ "decoder_block_7/ffw_linear": {
464
+ "module": "keras.dtype_policies",
465
+ "class_name": "QuantizedDTypePolicy",
466
+ "config": {
467
+ "mode": "int8",
468
+ "source_name": null
469
+ },
470
+ "registered_name": null
471
+ },
472
+ "decoder_block_7/ffw_gating_2": {
473
+ "module": "keras.dtype_policies",
474
+ "class_name": "QuantizedDTypePolicy",
475
+ "config": {
476
+ "mode": "int8",
477
+ "source_name": null
478
+ },
479
+ "registered_name": null
480
+ },
481
+ "decoder_block_7/ffw_gating": {
482
+ "module": "keras.dtype_policies",
483
+ "class_name": "QuantizedDTypePolicy",
484
+ "config": {
485
+ "mode": "int8",
486
+ "source_name": null
487
+ },
488
+ "registered_name": null
489
+ },
490
+ "decoder_block_7/attention/attention_output": {
491
+ "module": "keras.dtype_policies",
492
+ "class_name": "QuantizedDTypePolicy",
493
+ "config": {
494
+ "mode": "int8",
495
+ "source_name": null
496
+ },
497
+ "registered_name": null
498
+ },
499
+ "decoder_block_7/attention/value": {
500
+ "module": "keras.dtype_policies",
501
+ "class_name": "QuantizedDTypePolicy",
502
+ "config": {
503
+ "mode": "int8",
504
+ "source_name": null
505
+ },
506
+ "registered_name": null
507
+ },
508
+ "decoder_block_7/attention/key": {
509
+ "module": "keras.dtype_policies",
510
+ "class_name": "QuantizedDTypePolicy",
511
+ "config": {
512
+ "mode": "int8",
513
+ "source_name": null
514
+ },
515
+ "registered_name": null
516
+ },
517
+ "decoder_block_7/attention/query": {
518
+ "module": "keras.dtype_policies",
519
+ "class_name": "QuantizedDTypePolicy",
520
+ "config": {
521
+ "mode": "int8",
522
+ "source_name": null
523
+ },
524
+ "registered_name": null
525
+ },
526
+ "decoder_block_8/ffw_linear": {
527
+ "module": "keras.dtype_policies",
528
+ "class_name": "QuantizedDTypePolicy",
529
+ "config": {
530
+ "mode": "int8",
531
+ "source_name": null
532
+ },
533
+ "registered_name": null
534
+ },
535
+ "decoder_block_8/ffw_gating_2": {
536
+ "module": "keras.dtype_policies",
537
+ "class_name": "QuantizedDTypePolicy",
538
+ "config": {
539
+ "mode": "int8",
540
+ "source_name": null
541
+ },
542
+ "registered_name": null
543
+ },
544
+ "decoder_block_8/ffw_gating": {
545
+ "module": "keras.dtype_policies",
546
+ "class_name": "QuantizedDTypePolicy",
547
+ "config": {
548
+ "mode": "int8",
549
+ "source_name": null
550
+ },
551
+ "registered_name": null
552
+ },
553
+ "decoder_block_8/attention/attention_output": {
554
+ "module": "keras.dtype_policies",
555
+ "class_name": "QuantizedDTypePolicy",
556
+ "config": {
557
+ "mode": "int8",
558
+ "source_name": null
559
+ },
560
+ "registered_name": null
561
+ },
562
+ "decoder_block_8/attention/value": {
563
+ "module": "keras.dtype_policies",
564
+ "class_name": "QuantizedDTypePolicy",
565
+ "config": {
566
+ "mode": "int8",
567
+ "source_name": null
568
+ },
569
+ "registered_name": null
570
+ },
571
+ "decoder_block_8/attention/key": {
572
+ "module": "keras.dtype_policies",
573
+ "class_name": "QuantizedDTypePolicy",
574
+ "config": {
575
+ "mode": "int8",
576
+ "source_name": null
577
+ },
578
+ "registered_name": null
579
+ },
580
+ "decoder_block_8/attention/query": {
581
+ "module": "keras.dtype_policies",
582
+ "class_name": "QuantizedDTypePolicy",
583
+ "config": {
584
+ "mode": "int8",
585
+ "source_name": null
586
+ },
587
+ "registered_name": null
588
+ },
589
+ "decoder_block_9/ffw_linear": {
590
+ "module": "keras.dtype_policies",
591
+ "class_name": "QuantizedDTypePolicy",
592
+ "config": {
593
+ "mode": "int8",
594
+ "source_name": null
595
+ },
596
+ "registered_name": null
597
+ },
598
+ "decoder_block_9/ffw_gating_2": {
599
+ "module": "keras.dtype_policies",
600
+ "class_name": "QuantizedDTypePolicy",
601
+ "config": {
602
+ "mode": "int8",
603
+ "source_name": null
604
+ },
605
+ "registered_name": null
606
+ },
607
+ "decoder_block_9/ffw_gating": {
608
+ "module": "keras.dtype_policies",
609
+ "class_name": "QuantizedDTypePolicy",
610
+ "config": {
611
+ "mode": "int8",
612
+ "source_name": null
613
+ },
614
+ "registered_name": null
615
+ },
616
+ "decoder_block_9/attention/attention_output": {
617
+ "module": "keras.dtype_policies",
618
+ "class_name": "QuantizedDTypePolicy",
619
+ "config": {
620
+ "mode": "int8",
621
+ "source_name": null
622
+ },
623
+ "registered_name": null
624
+ },
625
+ "decoder_block_9/attention/value": {
626
+ "module": "keras.dtype_policies",
627
+ "class_name": "QuantizedDTypePolicy",
628
+ "config": {
629
+ "mode": "int8",
630
+ "source_name": null
631
+ },
632
+ "registered_name": null
633
+ },
634
+ "decoder_block_9/attention/key": {
635
+ "module": "keras.dtype_policies",
636
+ "class_name": "QuantizedDTypePolicy",
637
+ "config": {
638
+ "mode": "int8",
639
+ "source_name": null
640
+ },
641
+ "registered_name": null
642
+ },
643
+ "decoder_block_9/attention/query": {
644
+ "module": "keras.dtype_policies",
645
+ "class_name": "QuantizedDTypePolicy",
646
+ "config": {
647
+ "mode": "int8",
648
+ "source_name": null
649
+ },
650
+ "registered_name": null
651
+ },
652
+ "decoder_block_10/ffw_linear": {
653
+ "module": "keras.dtype_policies",
654
+ "class_name": "QuantizedDTypePolicy",
655
+ "config": {
656
+ "mode": "int8",
657
+ "source_name": null
658
+ },
659
+ "registered_name": null
660
+ },
661
+ "decoder_block_10/ffw_gating_2": {
662
+ "module": "keras.dtype_policies",
663
+ "class_name": "QuantizedDTypePolicy",
664
+ "config": {
665
+ "mode": "int8",
666
+ "source_name": null
667
+ },
668
+ "registered_name": null
669
+ },
670
+ "decoder_block_10/ffw_gating": {
671
+ "module": "keras.dtype_policies",
672
+ "class_name": "QuantizedDTypePolicy",
673
+ "config": {
674
+ "mode": "int8",
675
+ "source_name": null
676
+ },
677
+ "registered_name": null
678
+ },
679
+ "decoder_block_10/attention/attention_output": {
680
+ "module": "keras.dtype_policies",
681
+ "class_name": "QuantizedDTypePolicy",
682
+ "config": {
683
+ "mode": "int8",
684
+ "source_name": null
685
+ },
686
+ "registered_name": null
687
+ },
688
+ "decoder_block_10/attention/value": {
689
+ "module": "keras.dtype_policies",
690
+ "class_name": "QuantizedDTypePolicy",
691
+ "config": {
692
+ "mode": "int8",
693
+ "source_name": null
694
+ },
695
+ "registered_name": null
696
+ },
697
+ "decoder_block_10/attention/key": {
698
+ "module": "keras.dtype_policies",
699
+ "class_name": "QuantizedDTypePolicy",
700
+ "config": {
701
+ "mode": "int8",
702
+ "source_name": null
703
+ },
704
+ "registered_name": null
705
+ },
706
+ "decoder_block_10/attention/query": {
707
+ "module": "keras.dtype_policies",
708
+ "class_name": "QuantizedDTypePolicy",
709
+ "config": {
710
+ "mode": "int8",
711
+ "source_name": null
712
+ },
713
+ "registered_name": null
714
+ },
715
+ "decoder_block_11/ffw_linear": {
716
+ "module": "keras.dtype_policies",
717
+ "class_name": "QuantizedDTypePolicy",
718
+ "config": {
719
+ "mode": "int8",
720
+ "source_name": null
721
+ },
722
+ "registered_name": null
723
+ },
724
+ "decoder_block_11/ffw_gating_2": {
725
+ "module": "keras.dtype_policies",
726
+ "class_name": "QuantizedDTypePolicy",
727
+ "config": {
728
+ "mode": "int8",
729
+ "source_name": null
730
+ },
731
+ "registered_name": null
732
+ },
733
+ "decoder_block_11/ffw_gating": {
734
+ "module": "keras.dtype_policies",
735
+ "class_name": "QuantizedDTypePolicy",
736
+ "config": {
737
+ "mode": "int8",
738
+ "source_name": null
739
+ },
740
+ "registered_name": null
741
+ },
742
+ "decoder_block_11/attention/attention_output": {
743
+ "module": "keras.dtype_policies",
744
+ "class_name": "QuantizedDTypePolicy",
745
+ "config": {
746
+ "mode": "int8",
747
+ "source_name": null
748
+ },
749
+ "registered_name": null
750
+ },
751
+ "decoder_block_11/attention/value": {
752
+ "module": "keras.dtype_policies",
753
+ "class_name": "QuantizedDTypePolicy",
754
+ "config": {
755
+ "mode": "int8",
756
+ "source_name": null
757
+ },
758
+ "registered_name": null
759
+ },
760
+ "decoder_block_11/attention/key": {
761
+ "module": "keras.dtype_policies",
762
+ "class_name": "QuantizedDTypePolicy",
763
+ "config": {
764
+ "mode": "int8",
765
+ "source_name": null
766
+ },
767
+ "registered_name": null
768
+ },
769
+ "decoder_block_11/attention/query": {
770
+ "module": "keras.dtype_policies",
771
+ "class_name": "QuantizedDTypePolicy",
772
+ "config": {
773
+ "mode": "int8",
774
+ "source_name": null
775
+ },
776
+ "registered_name": null
777
+ },
778
+ "decoder_block_12/ffw_linear": {
779
+ "module": "keras.dtype_policies",
780
+ "class_name": "QuantizedDTypePolicy",
781
+ "config": {
782
+ "mode": "int8",
783
+ "source_name": null
784
+ },
785
+ "registered_name": null
786
+ },
787
+ "decoder_block_12/ffw_gating_2": {
788
+ "module": "keras.dtype_policies",
789
+ "class_name": "QuantizedDTypePolicy",
790
+ "config": {
791
+ "mode": "int8",
792
+ "source_name": null
793
+ },
794
+ "registered_name": null
795
+ },
796
+ "decoder_block_12/ffw_gating": {
797
+ "module": "keras.dtype_policies",
798
+ "class_name": "QuantizedDTypePolicy",
799
+ "config": {
800
+ "mode": "int8",
801
+ "source_name": null
802
+ },
803
+ "registered_name": null
804
+ },
805
+ "decoder_block_12/attention/attention_output": {
806
+ "module": "keras.dtype_policies",
807
+ "class_name": "QuantizedDTypePolicy",
808
+ "config": {
809
+ "mode": "int8",
810
+ "source_name": null
811
+ },
812
+ "registered_name": null
813
+ },
814
+ "decoder_block_12/attention/value": {
815
+ "module": "keras.dtype_policies",
816
+ "class_name": "QuantizedDTypePolicy",
817
+ "config": {
818
+ "mode": "int8",
819
+ "source_name": null
820
+ },
821
+ "registered_name": null
822
+ },
823
+ "decoder_block_12/attention/key": {
824
+ "module": "keras.dtype_policies",
825
+ "class_name": "QuantizedDTypePolicy",
826
+ "config": {
827
+ "mode": "int8",
828
+ "source_name": null
829
+ },
830
+ "registered_name": null
831
+ },
832
+ "decoder_block_12/attention/query": {
833
+ "module": "keras.dtype_policies",
834
+ "class_name": "QuantizedDTypePolicy",
835
+ "config": {
836
+ "mode": "int8",
837
+ "source_name": null
838
+ },
839
+ "registered_name": null
840
+ },
841
+ "decoder_block_13/ffw_linear": {
842
+ "module": "keras.dtype_policies",
843
+ "class_name": "QuantizedDTypePolicy",
844
+ "config": {
845
+ "mode": "int8",
846
+ "source_name": null
847
+ },
848
+ "registered_name": null
849
+ },
850
+ "decoder_block_13/ffw_gating_2": {
851
+ "module": "keras.dtype_policies",
852
+ "class_name": "QuantizedDTypePolicy",
853
+ "config": {
854
+ "mode": "int8",
855
+ "source_name": null
856
+ },
857
+ "registered_name": null
858
+ },
859
+ "decoder_block_13/ffw_gating": {
860
+ "module": "keras.dtype_policies",
861
+ "class_name": "QuantizedDTypePolicy",
862
+ "config": {
863
+ "mode": "int8",
864
+ "source_name": null
865
+ },
866
+ "registered_name": null
867
+ },
868
+ "decoder_block_13/attention/attention_output": {
869
+ "module": "keras.dtype_policies",
870
+ "class_name": "QuantizedDTypePolicy",
871
+ "config": {
872
+ "mode": "int8",
873
+ "source_name": null
874
+ },
875
+ "registered_name": null
876
+ },
877
+ "decoder_block_13/attention/value": {
878
+ "module": "keras.dtype_policies",
879
+ "class_name": "QuantizedDTypePolicy",
880
+ "config": {
881
+ "mode": "int8",
882
+ "source_name": null
883
+ },
884
+ "registered_name": null
885
+ },
886
+ "decoder_block_13/attention/key": {
887
+ "module": "keras.dtype_policies",
888
+ "class_name": "QuantizedDTypePolicy",
889
+ "config": {
890
+ "mode": "int8",
891
+ "source_name": null
892
+ },
893
+ "registered_name": null
894
+ },
895
+ "decoder_block_13/attention/query": {
896
+ "module": "keras.dtype_policies",
897
+ "class_name": "QuantizedDTypePolicy",
898
+ "config": {
899
+ "mode": "int8",
900
+ "source_name": null
901
+ },
902
+ "registered_name": null
903
+ },
904
+ "decoder_block_14/ffw_linear": {
905
+ "module": "keras.dtype_policies",
906
+ "class_name": "QuantizedDTypePolicy",
907
+ "config": {
908
+ "mode": "int8",
909
+ "source_name": null
910
+ },
911
+ "registered_name": null
912
+ },
913
+ "decoder_block_14/ffw_gating_2": {
914
+ "module": "keras.dtype_policies",
915
+ "class_name": "QuantizedDTypePolicy",
916
+ "config": {
917
+ "mode": "int8",
918
+ "source_name": null
919
+ },
920
+ "registered_name": null
921
+ },
922
+ "decoder_block_14/ffw_gating": {
923
+ "module": "keras.dtype_policies",
924
+ "class_name": "QuantizedDTypePolicy",
925
+ "config": {
926
+ "mode": "int8",
927
+ "source_name": null
928
+ },
929
+ "registered_name": null
930
+ },
931
+ "decoder_block_14/attention/attention_output": {
932
+ "module": "keras.dtype_policies",
933
+ "class_name": "QuantizedDTypePolicy",
934
+ "config": {
935
+ "mode": "int8",
936
+ "source_name": null
937
+ },
938
+ "registered_name": null
939
+ },
940
+ "decoder_block_14/attention/value": {
941
+ "module": "keras.dtype_policies",
942
+ "class_name": "QuantizedDTypePolicy",
943
+ "config": {
944
+ "mode": "int8",
945
+ "source_name": null
946
+ },
947
+ "registered_name": null
948
+ },
949
+ "decoder_block_14/attention/key": {
950
+ "module": "keras.dtype_policies",
951
+ "class_name": "QuantizedDTypePolicy",
952
+ "config": {
953
+ "mode": "int8",
954
+ "source_name": null
955
+ },
956
+ "registered_name": null
957
+ },
958
+ "decoder_block_14/attention/query": {
959
+ "module": "keras.dtype_policies",
960
+ "class_name": "QuantizedDTypePolicy",
961
+ "config": {
962
+ "mode": "int8",
963
+ "source_name": null
964
+ },
965
+ "registered_name": null
966
+ },
967
+ "decoder_block_15/ffw_linear": {
968
+ "module": "keras.dtype_policies",
969
+ "class_name": "QuantizedDTypePolicy",
970
+ "config": {
971
+ "mode": "int8",
972
+ "source_name": null
973
+ },
974
+ "registered_name": null
975
+ },
976
+ "decoder_block_15/ffw_gating_2": {
977
+ "module": "keras.dtype_policies",
978
+ "class_name": "QuantizedDTypePolicy",
979
+ "config": {
980
+ "mode": "int8",
981
+ "source_name": null
982
+ },
983
+ "registered_name": null
984
+ },
985
+ "decoder_block_15/ffw_gating": {
986
+ "module": "keras.dtype_policies",
987
+ "class_name": "QuantizedDTypePolicy",
988
+ "config": {
989
+ "mode": "int8",
990
+ "source_name": null
991
+ },
992
+ "registered_name": null
993
+ },
994
+ "decoder_block_15/attention/attention_output": {
995
+ "module": "keras.dtype_policies",
996
+ "class_name": "QuantizedDTypePolicy",
997
+ "config": {
998
+ "mode": "int8",
999
+ "source_name": null
1000
+ },
1001
+ "registered_name": null
1002
+ },
1003
+ "decoder_block_15/attention/value": {
1004
+ "module": "keras.dtype_policies",
1005
+ "class_name": "QuantizedDTypePolicy",
1006
+ "config": {
1007
+ "mode": "int8",
1008
+ "source_name": null
1009
+ },
1010
+ "registered_name": null
1011
+ },
1012
+ "decoder_block_15/attention/key": {
1013
+ "module": "keras.dtype_policies",
1014
+ "class_name": "QuantizedDTypePolicy",
1015
+ "config": {
1016
+ "mode": "int8",
1017
+ "source_name": null
1018
+ },
1019
+ "registered_name": null
1020
+ },
1021
+ "decoder_block_15/attention/query": {
1022
+ "module": "keras.dtype_policies",
1023
+ "class_name": "QuantizedDTypePolicy",
1024
+ "config": {
1025
+ "mode": "int8",
1026
+ "source_name": null
1027
+ },
1028
+ "registered_name": null
1029
+ },
1030
+ "decoder_block_16/ffw_linear": {
1031
+ "module": "keras.dtype_policies",
1032
+ "class_name": "QuantizedDTypePolicy",
1033
+ "config": {
1034
+ "mode": "int8",
1035
+ "source_name": null
1036
+ },
1037
+ "registered_name": null
1038
+ },
1039
+ "decoder_block_16/ffw_gating_2": {
1040
+ "module": "keras.dtype_policies",
1041
+ "class_name": "QuantizedDTypePolicy",
1042
+ "config": {
1043
+ "mode": "int8",
1044
+ "source_name": null
1045
+ },
1046
+ "registered_name": null
1047
+ },
1048
+ "decoder_block_16/ffw_gating": {
1049
+ "module": "keras.dtype_policies",
1050
+ "class_name": "QuantizedDTypePolicy",
1051
+ "config": {
1052
+ "mode": "int8",
1053
+ "source_name": null
1054
+ },
1055
+ "registered_name": null
1056
+ },
1057
+ "decoder_block_16/attention/attention_output": {
1058
+ "module": "keras.dtype_policies",
1059
+ "class_name": "QuantizedDTypePolicy",
1060
+ "config": {
1061
+ "mode": "int8",
1062
+ "source_name": null
1063
+ },
1064
+ "registered_name": null
1065
+ },
1066
+ "decoder_block_16/attention/value": {
1067
+ "module": "keras.dtype_policies",
1068
+ "class_name": "QuantizedDTypePolicy",
1069
+ "config": {
1070
+ "mode": "int8",
1071
+ "source_name": null
1072
+ },
1073
+ "registered_name": null
1074
+ },
1075
+ "decoder_block_16/attention/key": {
1076
+ "module": "keras.dtype_policies",
1077
+ "class_name": "QuantizedDTypePolicy",
1078
+ "config": {
1079
+ "mode": "int8",
1080
+ "source_name": null
1081
+ },
1082
+ "registered_name": null
1083
+ },
1084
+ "decoder_block_16/attention/query": {
1085
+ "module": "keras.dtype_policies",
1086
+ "class_name": "QuantizedDTypePolicy",
1087
+ "config": {
1088
+ "mode": "int8",
1089
+ "source_name": null
1090
+ },
1091
+ "registered_name": null
1092
+ },
1093
+ "decoder_block_17/ffw_linear": {
1094
+ "module": "keras.dtype_policies",
1095
+ "class_name": "QuantizedDTypePolicy",
1096
+ "config": {
1097
+ "mode": "int8",
1098
+ "source_name": null
1099
+ },
1100
+ "registered_name": null
1101
+ },
1102
+ "decoder_block_17/ffw_gating_2": {
1103
+ "module": "keras.dtype_policies",
1104
+ "class_name": "QuantizedDTypePolicy",
1105
+ "config": {
1106
+ "mode": "int8",
1107
+ "source_name": null
1108
+ },
1109
+ "registered_name": null
1110
+ },
1111
+ "decoder_block_17/ffw_gating": {
1112
+ "module": "keras.dtype_policies",
1113
+ "class_name": "QuantizedDTypePolicy",
1114
+ "config": {
1115
+ "mode": "int8",
1116
+ "source_name": null
1117
+ },
1118
+ "registered_name": null
1119
+ },
1120
+ "decoder_block_17/attention/attention_output": {
1121
+ "module": "keras.dtype_policies",
1122
+ "class_name": "QuantizedDTypePolicy",
1123
+ "config": {
1124
+ "mode": "int8",
1125
+ "source_name": null
1126
+ },
1127
+ "registered_name": null
1128
+ },
1129
+ "decoder_block_17/attention/value": {
1130
+ "module": "keras.dtype_policies",
1131
+ "class_name": "QuantizedDTypePolicy",
1132
+ "config": {
1133
+ "mode": "int8",
1134
+ "source_name": null
1135
+ },
1136
+ "registered_name": null
1137
+ },
1138
+ "decoder_block_17/attention/key": {
1139
+ "module": "keras.dtype_policies",
1140
+ "class_name": "QuantizedDTypePolicy",
1141
+ "config": {
1142
+ "mode": "int8",
1143
+ "source_name": null
1144
+ },
1145
+ "registered_name": null
1146
+ },
1147
+ "decoder_block_17/attention/query": {
1148
+ "module": "keras.dtype_policies",
1149
+ "class_name": "QuantizedDTypePolicy",
1150
+ "config": {
1151
+ "mode": "int8",
1152
+ "source_name": null
1153
+ },
1154
+ "registered_name": null
1155
+ },
1156
+ "decoder_block_18/ffw_linear": {
1157
+ "module": "keras.dtype_policies",
1158
+ "class_name": "QuantizedDTypePolicy",
1159
+ "config": {
1160
+ "mode": "int8",
1161
+ "source_name": null
1162
+ },
1163
+ "registered_name": null
1164
+ },
1165
+ "decoder_block_18/ffw_gating_2": {
1166
+ "module": "keras.dtype_policies",
1167
+ "class_name": "QuantizedDTypePolicy",
1168
+ "config": {
1169
+ "mode": "int8",
1170
+ "source_name": null
1171
+ },
1172
+ "registered_name": null
1173
+ },
1174
+ "decoder_block_18/ffw_gating": {
1175
+ "module": "keras.dtype_policies",
1176
+ "class_name": "QuantizedDTypePolicy",
1177
+ "config": {
1178
+ "mode": "int8",
1179
+ "source_name": null
1180
+ },
1181
+ "registered_name": null
1182
+ },
1183
+ "decoder_block_18/attention/attention_output": {
1184
+ "module": "keras.dtype_policies",
1185
+ "class_name": "QuantizedDTypePolicy",
1186
+ "config": {
1187
+ "mode": "int8",
1188
+ "source_name": null
1189
+ },
1190
+ "registered_name": null
1191
+ },
1192
+ "decoder_block_18/attention/value": {
1193
+ "module": "keras.dtype_policies",
1194
+ "class_name": "QuantizedDTypePolicy",
1195
+ "config": {
1196
+ "mode": "int8",
1197
+ "source_name": null
1198
+ },
1199
+ "registered_name": null
1200
+ },
1201
+ "decoder_block_18/attention/key": {
1202
+ "module": "keras.dtype_policies",
1203
+ "class_name": "QuantizedDTypePolicy",
1204
+ "config": {
1205
+ "mode": "int8",
1206
+ "source_name": null
1207
+ },
1208
+ "registered_name": null
1209
+ },
1210
+ "decoder_block_18/attention/query": {
1211
+ "module": "keras.dtype_policies",
1212
+ "class_name": "QuantizedDTypePolicy",
1213
+ "config": {
1214
+ "mode": "int8",
1215
+ "source_name": null
1216
+ },
1217
+ "registered_name": null
1218
+ },
1219
+ "decoder_block_19/ffw_linear": {
1220
+ "module": "keras.dtype_policies",
1221
+ "class_name": "QuantizedDTypePolicy",
1222
+ "config": {
1223
+ "mode": "int8",
1224
+ "source_name": null
1225
+ },
1226
+ "registered_name": null
1227
+ },
1228
+ "decoder_block_19/ffw_gating_2": {
1229
+ "module": "keras.dtype_policies",
1230
+ "class_name": "QuantizedDTypePolicy",
1231
+ "config": {
1232
+ "mode": "int8",
1233
+ "source_name": null
1234
+ },
1235
+ "registered_name": null
1236
+ },
1237
+ "decoder_block_19/ffw_gating": {
1238
+ "module": "keras.dtype_policies",
1239
+ "class_name": "QuantizedDTypePolicy",
1240
+ "config": {
1241
+ "mode": "int8",
1242
+ "source_name": null
1243
+ },
1244
+ "registered_name": null
1245
+ },
1246
+ "decoder_block_19/attention/attention_output": {
1247
+ "module": "keras.dtype_policies",
1248
+ "class_name": "QuantizedDTypePolicy",
1249
+ "config": {
1250
+ "mode": "int8",
1251
+ "source_name": null
1252
+ },
1253
+ "registered_name": null
1254
+ },
1255
+ "decoder_block_19/attention/value": {
1256
+ "module": "keras.dtype_policies",
1257
+ "class_name": "QuantizedDTypePolicy",
1258
+ "config": {
1259
+ "mode": "int8",
1260
+ "source_name": null
1261
+ },
1262
+ "registered_name": null
1263
+ },
1264
+ "decoder_block_19/attention/key": {
1265
+ "module": "keras.dtype_policies",
1266
+ "class_name": "QuantizedDTypePolicy",
1267
+ "config": {
1268
+ "mode": "int8",
1269
+ "source_name": null
1270
+ },
1271
+ "registered_name": null
1272
+ },
1273
+ "decoder_block_19/attention/query": {
1274
+ "module": "keras.dtype_policies",
1275
+ "class_name": "QuantizedDTypePolicy",
1276
+ "config": {
1277
+ "mode": "int8",
1278
+ "source_name": null
1279
+ },
1280
+ "registered_name": null
1281
+ },
1282
+ "decoder_block_20/ffw_linear": {
1283
+ "module": "keras.dtype_policies",
1284
+ "class_name": "QuantizedDTypePolicy",
1285
+ "config": {
1286
+ "mode": "int8",
1287
+ "source_name": null
1288
+ },
1289
+ "registered_name": null
1290
+ },
1291
+ "decoder_block_20/ffw_gating_2": {
1292
+ "module": "keras.dtype_policies",
1293
+ "class_name": "QuantizedDTypePolicy",
1294
+ "config": {
1295
+ "mode": "int8",
1296
+ "source_name": null
1297
+ },
1298
+ "registered_name": null
1299
+ },
1300
+ "decoder_block_20/ffw_gating": {
1301
+ "module": "keras.dtype_policies",
1302
+ "class_name": "QuantizedDTypePolicy",
1303
+ "config": {
1304
+ "mode": "int8",
1305
+ "source_name": null
1306
+ },
1307
+ "registered_name": null
1308
+ },
1309
+ "decoder_block_20/attention/attention_output": {
1310
+ "module": "keras.dtype_policies",
1311
+ "class_name": "QuantizedDTypePolicy",
1312
+ "config": {
1313
+ "mode": "int8",
1314
+ "source_name": null
1315
+ },
1316
+ "registered_name": null
1317
+ },
1318
+ "decoder_block_20/attention/value": {
1319
+ "module": "keras.dtype_policies",
1320
+ "class_name": "QuantizedDTypePolicy",
1321
+ "config": {
1322
+ "mode": "int8",
1323
+ "source_name": null
1324
+ },
1325
+ "registered_name": null
1326
+ },
1327
+ "decoder_block_20/attention/key": {
1328
+ "module": "keras.dtype_policies",
1329
+ "class_name": "QuantizedDTypePolicy",
1330
+ "config": {
1331
+ "mode": "int8",
1332
+ "source_name": null
1333
+ },
1334
+ "registered_name": null
1335
+ },
1336
+ "decoder_block_20/attention/query": {
1337
+ "module": "keras.dtype_policies",
1338
+ "class_name": "QuantizedDTypePolicy",
1339
+ "config": {
1340
+ "mode": "int8",
1341
+ "source_name": null
1342
+ },
1343
+ "registered_name": null
1344
+ },
1345
+ "decoder_block_21/ffw_linear": {
1346
+ "module": "keras.dtype_policies",
1347
+ "class_name": "QuantizedDTypePolicy",
1348
+ "config": {
1349
+ "mode": "int8",
1350
+ "source_name": null
1351
+ },
1352
+ "registered_name": null
1353
+ },
1354
+ "decoder_block_21/ffw_gating_2": {
1355
+ "module": "keras.dtype_policies",
1356
+ "class_name": "QuantizedDTypePolicy",
1357
+ "config": {
1358
+ "mode": "int8",
1359
+ "source_name": null
1360
+ },
1361
+ "registered_name": null
1362
+ },
1363
+ "decoder_block_21/ffw_gating": {
1364
+ "module": "keras.dtype_policies",
1365
+ "class_name": "QuantizedDTypePolicy",
1366
+ "config": {
1367
+ "mode": "int8",
1368
+ "source_name": null
1369
+ },
1370
+ "registered_name": null
1371
+ },
1372
+ "decoder_block_21/attention/attention_output": {
1373
+ "module": "keras.dtype_policies",
1374
+ "class_name": "QuantizedDTypePolicy",
1375
+ "config": {
1376
+ "mode": "int8",
1377
+ "source_name": null
1378
+ },
1379
+ "registered_name": null
1380
+ },
1381
+ "decoder_block_21/attention/value": {
1382
+ "module": "keras.dtype_policies",
1383
+ "class_name": "QuantizedDTypePolicy",
1384
+ "config": {
1385
+ "mode": "int8",
1386
+ "source_name": null
1387
+ },
1388
+ "registered_name": null
1389
+ },
1390
+ "decoder_block_21/attention/key": {
1391
+ "module": "keras.dtype_policies",
1392
+ "class_name": "QuantizedDTypePolicy",
1393
+ "config": {
1394
+ "mode": "int8",
1395
+ "source_name": null
1396
+ },
1397
+ "registered_name": null
1398
+ },
1399
+ "decoder_block_21/attention/query": {
1400
+ "module": "keras.dtype_policies",
1401
+ "class_name": "QuantizedDTypePolicy",
1402
+ "config": {
1403
+ "mode": "int8",
1404
+ "source_name": null
1405
+ },
1406
+ "registered_name": null
1407
+ },
1408
+ "decoder_block_22/ffw_linear": {
1409
+ "module": "keras.dtype_policies",
1410
+ "class_name": "QuantizedDTypePolicy",
1411
+ "config": {
1412
+ "mode": "int8",
1413
+ "source_name": null
1414
+ },
1415
+ "registered_name": null
1416
+ },
1417
+ "decoder_block_22/ffw_gating_2": {
1418
+ "module": "keras.dtype_policies",
1419
+ "class_name": "QuantizedDTypePolicy",
1420
+ "config": {
1421
+ "mode": "int8",
1422
+ "source_name": null
1423
+ },
1424
+ "registered_name": null
1425
+ },
1426
+ "decoder_block_22/ffw_gating": {
1427
+ "module": "keras.dtype_policies",
1428
+ "class_name": "QuantizedDTypePolicy",
1429
+ "config": {
1430
+ "mode": "int8",
1431
+ "source_name": null
1432
+ },
1433
+ "registered_name": null
1434
+ },
1435
+ "decoder_block_22/attention/attention_output": {
1436
+ "module": "keras.dtype_policies",
1437
+ "class_name": "QuantizedDTypePolicy",
1438
+ "config": {
1439
+ "mode": "int8",
1440
+ "source_name": null
1441
+ },
1442
+ "registered_name": null
1443
+ },
1444
+ "decoder_block_22/attention/value": {
1445
+ "module": "keras.dtype_policies",
1446
+ "class_name": "QuantizedDTypePolicy",
1447
+ "config": {
1448
+ "mode": "int8",
1449
+ "source_name": null
1450
+ },
1451
+ "registered_name": null
1452
+ },
1453
+ "decoder_block_22/attention/key": {
1454
+ "module": "keras.dtype_policies",
1455
+ "class_name": "QuantizedDTypePolicy",
1456
+ "config": {
1457
+ "mode": "int8",
1458
+ "source_name": null
1459
+ },
1460
+ "registered_name": null
1461
+ },
1462
+ "decoder_block_22/attention/query": {
1463
+ "module": "keras.dtype_policies",
1464
+ "class_name": "QuantizedDTypePolicy",
1465
+ "config": {
1466
+ "mode": "int8",
1467
+ "source_name": null
1468
+ },
1469
+ "registered_name": null
1470
+ },
1471
+ "decoder_block_23/ffw_linear": {
1472
+ "module": "keras.dtype_policies",
1473
+ "class_name": "QuantizedDTypePolicy",
1474
+ "config": {
1475
+ "mode": "int8",
1476
+ "source_name": null
1477
+ },
1478
+ "registered_name": null
1479
+ },
1480
+ "decoder_block_23/ffw_gating_2": {
1481
+ "module": "keras.dtype_policies",
1482
+ "class_name": "QuantizedDTypePolicy",
1483
+ "config": {
1484
+ "mode": "int8",
1485
+ "source_name": null
1486
+ },
1487
+ "registered_name": null
1488
+ },
1489
+ "decoder_block_23/ffw_gating": {
1490
+ "module": "keras.dtype_policies",
1491
+ "class_name": "QuantizedDTypePolicy",
1492
+ "config": {
1493
+ "mode": "int8",
1494
+ "source_name": null
1495
+ },
1496
+ "registered_name": null
1497
+ },
1498
+ "decoder_block_23/attention/attention_output": {
1499
+ "module": "keras.dtype_policies",
1500
+ "class_name": "QuantizedDTypePolicy",
1501
+ "config": {
1502
+ "mode": "int8",
1503
+ "source_name": null
1504
+ },
1505
+ "registered_name": null
1506
+ },
1507
+ "decoder_block_23/attention/value": {
1508
+ "module": "keras.dtype_policies",
1509
+ "class_name": "QuantizedDTypePolicy",
1510
+ "config": {
1511
+ "mode": "int8",
1512
+ "source_name": null
1513
+ },
1514
+ "registered_name": null
1515
+ },
1516
+ "decoder_block_23/attention/key": {
1517
+ "module": "keras.dtype_policies",
1518
+ "class_name": "QuantizedDTypePolicy",
1519
+ "config": {
1520
+ "mode": "int8",
1521
+ "source_name": null
1522
+ },
1523
+ "registered_name": null
1524
+ },
1525
+ "decoder_block_23/attention/query": {
1526
+ "module": "keras.dtype_policies",
1527
+ "class_name": "QuantizedDTypePolicy",
1528
+ "config": {
1529
+ "mode": "int8",
1530
+ "source_name": null
1531
+ },
1532
+ "registered_name": null
1533
+ },
1534
+ "decoder_block_24/ffw_linear": {
1535
+ "module": "keras.dtype_policies",
1536
+ "class_name": "QuantizedDTypePolicy",
1537
+ "config": {
1538
+ "mode": "int8",
1539
+ "source_name": null
1540
+ },
1541
+ "registered_name": null
1542
+ },
1543
+ "decoder_block_24/ffw_gating_2": {
1544
+ "module": "keras.dtype_policies",
1545
+ "class_name": "QuantizedDTypePolicy",
1546
+ "config": {
1547
+ "mode": "int8",
1548
+ "source_name": null
1549
+ },
1550
+ "registered_name": null
1551
+ },
1552
+ "decoder_block_24/ffw_gating": {
1553
+ "module": "keras.dtype_policies",
1554
+ "class_name": "QuantizedDTypePolicy",
1555
+ "config": {
1556
+ "mode": "int8",
1557
+ "source_name": null
1558
+ },
1559
+ "registered_name": null
1560
+ },
1561
+ "decoder_block_24/attention/attention_output": {
1562
+ "module": "keras.dtype_policies",
1563
+ "class_name": "QuantizedDTypePolicy",
1564
+ "config": {
1565
+ "mode": "int8",
1566
+ "source_name": null
1567
+ },
1568
+ "registered_name": null
1569
+ },
1570
+ "decoder_block_24/attention/value": {
1571
+ "module": "keras.dtype_policies",
1572
+ "class_name": "QuantizedDTypePolicy",
1573
+ "config": {
1574
+ "mode": "int8",
1575
+ "source_name": null
1576
+ },
1577
+ "registered_name": null
1578
+ },
1579
+ "decoder_block_24/attention/key": {
1580
+ "module": "keras.dtype_policies",
1581
+ "class_name": "QuantizedDTypePolicy",
1582
+ "config": {
1583
+ "mode": "int8",
1584
+ "source_name": null
1585
+ },
1586
+ "registered_name": null
1587
+ },
1588
+ "decoder_block_24/attention/query": {
1589
+ "module": "keras.dtype_policies",
1590
+ "class_name": "QuantizedDTypePolicy",
1591
+ "config": {
1592
+ "mode": "int8",
1593
+ "source_name": null
1594
+ },
1595
+ "registered_name": null
1596
+ },
1597
+ "decoder_block_25/ffw_linear": {
1598
+ "module": "keras.dtype_policies",
1599
+ "class_name": "QuantizedDTypePolicy",
1600
+ "config": {
1601
+ "mode": "int8",
1602
+ "source_name": null
1603
+ },
1604
+ "registered_name": null
1605
+ },
1606
+ "decoder_block_25/ffw_gating_2": {
1607
+ "module": "keras.dtype_policies",
1608
+ "class_name": "QuantizedDTypePolicy",
1609
+ "config": {
1610
+ "mode": "int8",
1611
+ "source_name": null
1612
+ },
1613
+ "registered_name": null
1614
+ },
1615
+ "decoder_block_25/ffw_gating": {
1616
+ "module": "keras.dtype_policies",
1617
+ "class_name": "QuantizedDTypePolicy",
1618
+ "config": {
1619
+ "mode": "int8",
1620
+ "source_name": null
1621
+ },
1622
+ "registered_name": null
1623
+ },
1624
+ "decoder_block_25/attention/attention_output": {
1625
+ "module": "keras.dtype_policies",
1626
+ "class_name": "QuantizedDTypePolicy",
1627
+ "config": {
1628
+ "mode": "int8",
1629
+ "source_name": null
1630
+ },
1631
+ "registered_name": null
1632
+ },
1633
+ "decoder_block_25/attention/value": {
1634
+ "module": "keras.dtype_policies",
1635
+ "class_name": "QuantizedDTypePolicy",
1636
+ "config": {
1637
+ "mode": "int8",
1638
+ "source_name": null
1639
+ },
1640
+ "registered_name": null
1641
+ },
1642
+ "decoder_block_25/attention/key": {
1643
+ "module": "keras.dtype_policies",
1644
+ "class_name": "QuantizedDTypePolicy",
1645
+ "config": {
1646
+ "mode": "int8",
1647
+ "source_name": null
1648
+ },
1649
+ "registered_name": null
1650
+ },
1651
+ "decoder_block_25/attention/query": {
1652
+ "module": "keras.dtype_policies",
1653
+ "class_name": "QuantizedDTypePolicy",
1654
+ "config": {
1655
+ "mode": "int8",
1656
+ "source_name": null
1657
+ },
1658
+ "registered_name": null
1659
+ }
1660
+ }
1661
+ },
1662
+ "registered_name": null
1663
+ },
1664
+ "vocabulary_size": 256000,
1665
+ "num_layers": 26,
1666
+ "num_query_heads": 8,
1667
+ "num_key_value_heads": 4,
1668
+ "hidden_dim": 2304,
1669
+ "intermediate_dim": 18432,
1670
+ "head_dim": 256,
1671
+ "layer_norm_epsilon": 1e-06,
1672
+ "dropout": 0,
1673
+ "query_head_dim_normalize": true,
1674
+ "use_post_ffw_norm": true,
1675
+ "use_post_attention_norm": true,
1676
+ "final_logit_soft_cap": 30.0,
1677
+ "attention_logit_soft_cap": 50.0,
1678
+ "sliding_window_size": 4096,
1679
+ "use_sliding_window_attention": true
1680
+ },
1681
+ "registered_name": "keras_nlp>GemmaBackbone"
1682
+ }
metadata.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "keras_version": "3.4.1",
3
+ "keras_nlp_version": "0.15.0",
4
+ "parameter_count": 2615303424,
5
+ "date_saved": "2024-08-09@08:07:39"
6
+ }
model.weights.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:382f602810e5efe686ecd1d43010563fdb357e7ce4129d9255c2924712b3202f
3
+ size 2619889880
preprocessor.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "module": "keras_nlp.src.models.gemma.gemma_causal_lm_preprocessor",
3
+ "class_name": "GemmaCausalLMPreprocessor",
4
+ "config": {
5
+ "name": "gemma_causal_lm_preprocessor",
6
+ "trainable": true,
7
+ "dtype": {
8
+ "module": "keras",
9
+ "class_name": "DTypePolicy",
10
+ "config": {
11
+ "name": "mixed_bfloat16"
12
+ },
13
+ "registered_name": null
14
+ },
15
+ "tokenizer": {
16
+ "module": "keras_nlp.src.models.gemma.gemma_tokenizer",
17
+ "class_name": "GemmaTokenizer",
18
+ "config": {
19
+ "name": "gemma_tokenizer",
20
+ "trainable": true,
21
+ "dtype": {
22
+ "module": "keras",
23
+ "class_name": "DTypePolicy",
24
+ "config": {
25
+ "name": "int32"
26
+ },
27
+ "registered_name": null
28
+ },
29
+ "proto": null,
30
+ "sequence_length": null
31
+ },
32
+ "registered_name": "keras_nlp>GemmaTokenizer"
33
+ },
34
+ "sequence_length": 2048,
35
+ "add_start_token": true,
36
+ "add_end_token": true
37
+ },
38
+ "registered_name": "keras_nlp>GemmaCausalLMPreprocessor"
39
+ }
task.json ADDED
@@ -0,0 +1,1729 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "module": "keras_nlp.src.models.gemma.gemma_causal_lm",
3
+ "class_name": "GemmaCausalLM",
4
+ "config": {
5
+ "backbone": {
6
+ "module": "keras_nlp.src.models.gemma.gemma_backbone",
7
+ "class_name": "GemmaBackbone",
8
+ "config": {
9
+ "name": "gemma_backbone",
10
+ "trainable": true,
11
+ "dtype": {
12
+ "module": "keras.dtype_policies",
13
+ "class_name": "DTypePolicyMap",
14
+ "config": {
15
+ "default_policy": null,
16
+ "policy_map": {
17
+ "token_embedding": {
18
+ "module": "keras.dtype_policies",
19
+ "class_name": "QuantizedDTypePolicy",
20
+ "config": {
21
+ "mode": "int8",
22
+ "source_name": null
23
+ },
24
+ "registered_name": null
25
+ },
26
+ "decoder_block_0/ffw_linear": {
27
+ "module": "keras.dtype_policies",
28
+ "class_name": "QuantizedDTypePolicy",
29
+ "config": {
30
+ "mode": "int8",
31
+ "source_name": null
32
+ },
33
+ "registered_name": null
34
+ },
35
+ "decoder_block_0/ffw_gating_2": {
36
+ "module": "keras.dtype_policies",
37
+ "class_name": "QuantizedDTypePolicy",
38
+ "config": {
39
+ "mode": "int8",
40
+ "source_name": null
41
+ },
42
+ "registered_name": null
43
+ },
44
+ "decoder_block_0/ffw_gating": {
45
+ "module": "keras.dtype_policies",
46
+ "class_name": "QuantizedDTypePolicy",
47
+ "config": {
48
+ "mode": "int8",
49
+ "source_name": null
50
+ },
51
+ "registered_name": null
52
+ },
53
+ "decoder_block_0/attention/attention_output": {
54
+ "module": "keras.dtype_policies",
55
+ "class_name": "QuantizedDTypePolicy",
56
+ "config": {
57
+ "mode": "int8",
58
+ "source_name": null
59
+ },
60
+ "registered_name": null
61
+ },
62
+ "decoder_block_0/attention/value": {
63
+ "module": "keras.dtype_policies",
64
+ "class_name": "QuantizedDTypePolicy",
65
+ "config": {
66
+ "mode": "int8",
67
+ "source_name": null
68
+ },
69
+ "registered_name": null
70
+ },
71
+ "decoder_block_0/attention/key": {
72
+ "module": "keras.dtype_policies",
73
+ "class_name": "QuantizedDTypePolicy",
74
+ "config": {
75
+ "mode": "int8",
76
+ "source_name": null
77
+ },
78
+ "registered_name": null
79
+ },
80
+ "decoder_block_0/attention/query": {
81
+ "module": "keras.dtype_policies",
82
+ "class_name": "QuantizedDTypePolicy",
83
+ "config": {
84
+ "mode": "int8",
85
+ "source_name": null
86
+ },
87
+ "registered_name": null
88
+ },
89
+ "decoder_block_1/ffw_linear": {
90
+ "module": "keras.dtype_policies",
91
+ "class_name": "QuantizedDTypePolicy",
92
+ "config": {
93
+ "mode": "int8",
94
+ "source_name": null
95
+ },
96
+ "registered_name": null
97
+ },
98
+ "decoder_block_1/ffw_gating_2": {
99
+ "module": "keras.dtype_policies",
100
+ "class_name": "QuantizedDTypePolicy",
101
+ "config": {
102
+ "mode": "int8",
103
+ "source_name": null
104
+ },
105
+ "registered_name": null
106
+ },
107
+ "decoder_block_1/ffw_gating": {
108
+ "module": "keras.dtype_policies",
109
+ "class_name": "QuantizedDTypePolicy",
110
+ "config": {
111
+ "mode": "int8",
112
+ "source_name": null
113
+ },
114
+ "registered_name": null
115
+ },
116
+ "decoder_block_1/attention/attention_output": {
117
+ "module": "keras.dtype_policies",
118
+ "class_name": "QuantizedDTypePolicy",
119
+ "config": {
120
+ "mode": "int8",
121
+ "source_name": null
122
+ },
123
+ "registered_name": null
124
+ },
125
+ "decoder_block_1/attention/value": {
126
+ "module": "keras.dtype_policies",
127
+ "class_name": "QuantizedDTypePolicy",
128
+ "config": {
129
+ "mode": "int8",
130
+ "source_name": null
131
+ },
132
+ "registered_name": null
133
+ },
134
+ "decoder_block_1/attention/key": {
135
+ "module": "keras.dtype_policies",
136
+ "class_name": "QuantizedDTypePolicy",
137
+ "config": {
138
+ "mode": "int8",
139
+ "source_name": null
140
+ },
141
+ "registered_name": null
142
+ },
143
+ "decoder_block_1/attention/query": {
144
+ "module": "keras.dtype_policies",
145
+ "class_name": "QuantizedDTypePolicy",
146
+ "config": {
147
+ "mode": "int8",
148
+ "source_name": null
149
+ },
150
+ "registered_name": null
151
+ },
152
+ "decoder_block_2/ffw_linear": {
153
+ "module": "keras.dtype_policies",
154
+ "class_name": "QuantizedDTypePolicy",
155
+ "config": {
156
+ "mode": "int8",
157
+ "source_name": null
158
+ },
159
+ "registered_name": null
160
+ },
161
+ "decoder_block_2/ffw_gating_2": {
162
+ "module": "keras.dtype_policies",
163
+ "class_name": "QuantizedDTypePolicy",
164
+ "config": {
165
+ "mode": "int8",
166
+ "source_name": null
167
+ },
168
+ "registered_name": null
169
+ },
170
+ "decoder_block_2/ffw_gating": {
171
+ "module": "keras.dtype_policies",
172
+ "class_name": "QuantizedDTypePolicy",
173
+ "config": {
174
+ "mode": "int8",
175
+ "source_name": null
176
+ },
177
+ "registered_name": null
178
+ },
179
+ "decoder_block_2/attention/attention_output": {
180
+ "module": "keras.dtype_policies",
181
+ "class_name": "QuantizedDTypePolicy",
182
+ "config": {
183
+ "mode": "int8",
184
+ "source_name": null
185
+ },
186
+ "registered_name": null
187
+ },
188
+ "decoder_block_2/attention/value": {
189
+ "module": "keras.dtype_policies",
190
+ "class_name": "QuantizedDTypePolicy",
191
+ "config": {
192
+ "mode": "int8",
193
+ "source_name": null
194
+ },
195
+ "registered_name": null
196
+ },
197
+ "decoder_block_2/attention/key": {
198
+ "module": "keras.dtype_policies",
199
+ "class_name": "QuantizedDTypePolicy",
200
+ "config": {
201
+ "mode": "int8",
202
+ "source_name": null
203
+ },
204
+ "registered_name": null
205
+ },
206
+ "decoder_block_2/attention/query": {
207
+ "module": "keras.dtype_policies",
208
+ "class_name": "QuantizedDTypePolicy",
209
+ "config": {
210
+ "mode": "int8",
211
+ "source_name": null
212
+ },
213
+ "registered_name": null
214
+ },
215
+ "decoder_block_3/ffw_linear": {
216
+ "module": "keras.dtype_policies",
217
+ "class_name": "QuantizedDTypePolicy",
218
+ "config": {
219
+ "mode": "int8",
220
+ "source_name": null
221
+ },
222
+ "registered_name": null
223
+ },
224
+ "decoder_block_3/ffw_gating_2": {
225
+ "module": "keras.dtype_policies",
226
+ "class_name": "QuantizedDTypePolicy",
227
+ "config": {
228
+ "mode": "int8",
229
+ "source_name": null
230
+ },
231
+ "registered_name": null
232
+ },
233
+ "decoder_block_3/ffw_gating": {
234
+ "module": "keras.dtype_policies",
235
+ "class_name": "QuantizedDTypePolicy",
236
+ "config": {
237
+ "mode": "int8",
238
+ "source_name": null
239
+ },
240
+ "registered_name": null
241
+ },
242
+ "decoder_block_3/attention/attention_output": {
243
+ "module": "keras.dtype_policies",
244
+ "class_name": "QuantizedDTypePolicy",
245
+ "config": {
246
+ "mode": "int8",
247
+ "source_name": null
248
+ },
249
+ "registered_name": null
250
+ },
251
+ "decoder_block_3/attention/value": {
252
+ "module": "keras.dtype_policies",
253
+ "class_name": "QuantizedDTypePolicy",
254
+ "config": {
255
+ "mode": "int8",
256
+ "source_name": null
257
+ },
258
+ "registered_name": null
259
+ },
260
+ "decoder_block_3/attention/key": {
261
+ "module": "keras.dtype_policies",
262
+ "class_name": "QuantizedDTypePolicy",
263
+ "config": {
264
+ "mode": "int8",
265
+ "source_name": null
266
+ },
267
+ "registered_name": null
268
+ },
269
+ "decoder_block_3/attention/query": {
270
+ "module": "keras.dtype_policies",
271
+ "class_name": "QuantizedDTypePolicy",
272
+ "config": {
273
+ "mode": "int8",
274
+ "source_name": null
275
+ },
276
+ "registered_name": null
277
+ },
278
+ "decoder_block_4/ffw_linear": {
279
+ "module": "keras.dtype_policies",
280
+ "class_name": "QuantizedDTypePolicy",
281
+ "config": {
282
+ "mode": "int8",
283
+ "source_name": null
284
+ },
285
+ "registered_name": null
286
+ },
287
+ "decoder_block_4/ffw_gating_2": {
288
+ "module": "keras.dtype_policies",
289
+ "class_name": "QuantizedDTypePolicy",
290
+ "config": {
291
+ "mode": "int8",
292
+ "source_name": null
293
+ },
294
+ "registered_name": null
295
+ },
296
+ "decoder_block_4/ffw_gating": {
297
+ "module": "keras.dtype_policies",
298
+ "class_name": "QuantizedDTypePolicy",
299
+ "config": {
300
+ "mode": "int8",
301
+ "source_name": null
302
+ },
303
+ "registered_name": null
304
+ },
305
+ "decoder_block_4/attention/attention_output": {
306
+ "module": "keras.dtype_policies",
307
+ "class_name": "QuantizedDTypePolicy",
308
+ "config": {
309
+ "mode": "int8",
310
+ "source_name": null
311
+ },
312
+ "registered_name": null
313
+ },
314
+ "decoder_block_4/attention/value": {
315
+ "module": "keras.dtype_policies",
316
+ "class_name": "QuantizedDTypePolicy",
317
+ "config": {
318
+ "mode": "int8",
319
+ "source_name": null
320
+ },
321
+ "registered_name": null
322
+ },
323
+ "decoder_block_4/attention/key": {
324
+ "module": "keras.dtype_policies",
325
+ "class_name": "QuantizedDTypePolicy",
326
+ "config": {
327
+ "mode": "int8",
328
+ "source_name": null
329
+ },
330
+ "registered_name": null
331
+ },
332
+ "decoder_block_4/attention/query": {
333
+ "module": "keras.dtype_policies",
334
+ "class_name": "QuantizedDTypePolicy",
335
+ "config": {
336
+ "mode": "int8",
337
+ "source_name": null
338
+ },
339
+ "registered_name": null
340
+ },
341
+ "decoder_block_5/ffw_linear": {
342
+ "module": "keras.dtype_policies",
343
+ "class_name": "QuantizedDTypePolicy",
344
+ "config": {
345
+ "mode": "int8",
346
+ "source_name": null
347
+ },
348
+ "registered_name": null
349
+ },
350
+ "decoder_block_5/ffw_gating_2": {
351
+ "module": "keras.dtype_policies",
352
+ "class_name": "QuantizedDTypePolicy",
353
+ "config": {
354
+ "mode": "int8",
355
+ "source_name": null
356
+ },
357
+ "registered_name": null
358
+ },
359
+ "decoder_block_5/ffw_gating": {
360
+ "module": "keras.dtype_policies",
361
+ "class_name": "QuantizedDTypePolicy",
362
+ "config": {
363
+ "mode": "int8",
364
+ "source_name": null
365
+ },
366
+ "registered_name": null
367
+ },
368
+ "decoder_block_5/attention/attention_output": {
369
+ "module": "keras.dtype_policies",
370
+ "class_name": "QuantizedDTypePolicy",
371
+ "config": {
372
+ "mode": "int8",
373
+ "source_name": null
374
+ },
375
+ "registered_name": null
376
+ },
377
+ "decoder_block_5/attention/value": {
378
+ "module": "keras.dtype_policies",
379
+ "class_name": "QuantizedDTypePolicy",
380
+ "config": {
381
+ "mode": "int8",
382
+ "source_name": null
383
+ },
384
+ "registered_name": null
385
+ },
386
+ "decoder_block_5/attention/key": {
387
+ "module": "keras.dtype_policies",
388
+ "class_name": "QuantizedDTypePolicy",
389
+ "config": {
390
+ "mode": "int8",
391
+ "source_name": null
392
+ },
393
+ "registered_name": null
394
+ },
395
+ "decoder_block_5/attention/query": {
396
+ "module": "keras.dtype_policies",
397
+ "class_name": "QuantizedDTypePolicy",
398
+ "config": {
399
+ "mode": "int8",
400
+ "source_name": null
401
+ },
402
+ "registered_name": null
403
+ },
404
+ "decoder_block_6/ffw_linear": {
405
+ "module": "keras.dtype_policies",
406
+ "class_name": "QuantizedDTypePolicy",
407
+ "config": {
408
+ "mode": "int8",
409
+ "source_name": null
410
+ },
411
+ "registered_name": null
412
+ },
413
+ "decoder_block_6/ffw_gating_2": {
414
+ "module": "keras.dtype_policies",
415
+ "class_name": "QuantizedDTypePolicy",
416
+ "config": {
417
+ "mode": "int8",
418
+ "source_name": null
419
+ },
420
+ "registered_name": null
421
+ },
422
+ "decoder_block_6/ffw_gating": {
423
+ "module": "keras.dtype_policies",
424
+ "class_name": "QuantizedDTypePolicy",
425
+ "config": {
426
+ "mode": "int8",
427
+ "source_name": null
428
+ },
429
+ "registered_name": null
430
+ },
431
+ "decoder_block_6/attention/attention_output": {
432
+ "module": "keras.dtype_policies",
433
+ "class_name": "QuantizedDTypePolicy",
434
+ "config": {
435
+ "mode": "int8",
436
+ "source_name": null
437
+ },
438
+ "registered_name": null
439
+ },
440
+ "decoder_block_6/attention/value": {
441
+ "module": "keras.dtype_policies",
442
+ "class_name": "QuantizedDTypePolicy",
443
+ "config": {
444
+ "mode": "int8",
445
+ "source_name": null
446
+ },
447
+ "registered_name": null
448
+ },
449
+ "decoder_block_6/attention/key": {
450
+ "module": "keras.dtype_policies",
451
+ "class_name": "QuantizedDTypePolicy",
452
+ "config": {
453
+ "mode": "int8",
454
+ "source_name": null
455
+ },
456
+ "registered_name": null
457
+ },
458
+ "decoder_block_6/attention/query": {
459
+ "module": "keras.dtype_policies",
460
+ "class_name": "QuantizedDTypePolicy",
461
+ "config": {
462
+ "mode": "int8",
463
+ "source_name": null
464
+ },
465
+ "registered_name": null
466
+ },
467
+ "decoder_block_7/ffw_linear": {
468
+ "module": "keras.dtype_policies",
469
+ "class_name": "QuantizedDTypePolicy",
470
+ "config": {
471
+ "mode": "int8",
472
+ "source_name": null
473
+ },
474
+ "registered_name": null
475
+ },
476
+ "decoder_block_7/ffw_gating_2": {
477
+ "module": "keras.dtype_policies",
478
+ "class_name": "QuantizedDTypePolicy",
479
+ "config": {
480
+ "mode": "int8",
481
+ "source_name": null
482
+ },
483
+ "registered_name": null
484
+ },
485
+ "decoder_block_7/ffw_gating": {
486
+ "module": "keras.dtype_policies",
487
+ "class_name": "QuantizedDTypePolicy",
488
+ "config": {
489
+ "mode": "int8",
490
+ "source_name": null
491
+ },
492
+ "registered_name": null
493
+ },
494
+ "decoder_block_7/attention/attention_output": {
495
+ "module": "keras.dtype_policies",
496
+ "class_name": "QuantizedDTypePolicy",
497
+ "config": {
498
+ "mode": "int8",
499
+ "source_name": null
500
+ },
501
+ "registered_name": null
502
+ },
503
+ "decoder_block_7/attention/value": {
504
+ "module": "keras.dtype_policies",
505
+ "class_name": "QuantizedDTypePolicy",
506
+ "config": {
507
+ "mode": "int8",
508
+ "source_name": null
509
+ },
510
+ "registered_name": null
511
+ },
512
+ "decoder_block_7/attention/key": {
513
+ "module": "keras.dtype_policies",
514
+ "class_name": "QuantizedDTypePolicy",
515
+ "config": {
516
+ "mode": "int8",
517
+ "source_name": null
518
+ },
519
+ "registered_name": null
520
+ },
521
+ "decoder_block_7/attention/query": {
522
+ "module": "keras.dtype_policies",
523
+ "class_name": "QuantizedDTypePolicy",
524
+ "config": {
525
+ "mode": "int8",
526
+ "source_name": null
527
+ },
528
+ "registered_name": null
529
+ },
530
+ "decoder_block_8/ffw_linear": {
531
+ "module": "keras.dtype_policies",
532
+ "class_name": "QuantizedDTypePolicy",
533
+ "config": {
534
+ "mode": "int8",
535
+ "source_name": null
536
+ },
537
+ "registered_name": null
538
+ },
539
+ "decoder_block_8/ffw_gating_2": {
540
+ "module": "keras.dtype_policies",
541
+ "class_name": "QuantizedDTypePolicy",
542
+ "config": {
543
+ "mode": "int8",
544
+ "source_name": null
545
+ },
546
+ "registered_name": null
547
+ },
548
+ "decoder_block_8/ffw_gating": {
549
+ "module": "keras.dtype_policies",
550
+ "class_name": "QuantizedDTypePolicy",
551
+ "config": {
552
+ "mode": "int8",
553
+ "source_name": null
554
+ },
555
+ "registered_name": null
556
+ },
557
+ "decoder_block_8/attention/attention_output": {
558
+ "module": "keras.dtype_policies",
559
+ "class_name": "QuantizedDTypePolicy",
560
+ "config": {
561
+ "mode": "int8",
562
+ "source_name": null
563
+ },
564
+ "registered_name": null
565
+ },
566
+ "decoder_block_8/attention/value": {
567
+ "module": "keras.dtype_policies",
568
+ "class_name": "QuantizedDTypePolicy",
569
+ "config": {
570
+ "mode": "int8",
571
+ "source_name": null
572
+ },
573
+ "registered_name": null
574
+ },
575
+ "decoder_block_8/attention/key": {
576
+ "module": "keras.dtype_policies",
577
+ "class_name": "QuantizedDTypePolicy",
578
+ "config": {
579
+ "mode": "int8",
580
+ "source_name": null
581
+ },
582
+ "registered_name": null
583
+ },
584
+ "decoder_block_8/attention/query": {
585
+ "module": "keras.dtype_policies",
586
+ "class_name": "QuantizedDTypePolicy",
587
+ "config": {
588
+ "mode": "int8",
589
+ "source_name": null
590
+ },
591
+ "registered_name": null
592
+ },
593
+ "decoder_block_9/ffw_linear": {
594
+ "module": "keras.dtype_policies",
595
+ "class_name": "QuantizedDTypePolicy",
596
+ "config": {
597
+ "mode": "int8",
598
+ "source_name": null
599
+ },
600
+ "registered_name": null
601
+ },
602
+ "decoder_block_9/ffw_gating_2": {
603
+ "module": "keras.dtype_policies",
604
+ "class_name": "QuantizedDTypePolicy",
605
+ "config": {
606
+ "mode": "int8",
607
+ "source_name": null
608
+ },
609
+ "registered_name": null
610
+ },
611
+ "decoder_block_9/ffw_gating": {
612
+ "module": "keras.dtype_policies",
613
+ "class_name": "QuantizedDTypePolicy",
614
+ "config": {
615
+ "mode": "int8",
616
+ "source_name": null
617
+ },
618
+ "registered_name": null
619
+ },
620
+ "decoder_block_9/attention/attention_output": {
621
+ "module": "keras.dtype_policies",
622
+ "class_name": "QuantizedDTypePolicy",
623
+ "config": {
624
+ "mode": "int8",
625
+ "source_name": null
626
+ },
627
+ "registered_name": null
628
+ },
629
+ "decoder_block_9/attention/value": {
630
+ "module": "keras.dtype_policies",
631
+ "class_name": "QuantizedDTypePolicy",
632
+ "config": {
633
+ "mode": "int8",
634
+ "source_name": null
635
+ },
636
+ "registered_name": null
637
+ },
638
+ "decoder_block_9/attention/key": {
639
+ "module": "keras.dtype_policies",
640
+ "class_name": "QuantizedDTypePolicy",
641
+ "config": {
642
+ "mode": "int8",
643
+ "source_name": null
644
+ },
645
+ "registered_name": null
646
+ },
647
+ "decoder_block_9/attention/query": {
648
+ "module": "keras.dtype_policies",
649
+ "class_name": "QuantizedDTypePolicy",
650
+ "config": {
651
+ "mode": "int8",
652
+ "source_name": null
653
+ },
654
+ "registered_name": null
655
+ },
656
+ "decoder_block_10/ffw_linear": {
657
+ "module": "keras.dtype_policies",
658
+ "class_name": "QuantizedDTypePolicy",
659
+ "config": {
660
+ "mode": "int8",
661
+ "source_name": null
662
+ },
663
+ "registered_name": null
664
+ },
665
+ "decoder_block_10/ffw_gating_2": {
666
+ "module": "keras.dtype_policies",
667
+ "class_name": "QuantizedDTypePolicy",
668
+ "config": {
669
+ "mode": "int8",
670
+ "source_name": null
671
+ },
672
+ "registered_name": null
673
+ },
674
+ "decoder_block_10/ffw_gating": {
675
+ "module": "keras.dtype_policies",
676
+ "class_name": "QuantizedDTypePolicy",
677
+ "config": {
678
+ "mode": "int8",
679
+ "source_name": null
680
+ },
681
+ "registered_name": null
682
+ },
683
+ "decoder_block_10/attention/attention_output": {
684
+ "module": "keras.dtype_policies",
685
+ "class_name": "QuantizedDTypePolicy",
686
+ "config": {
687
+ "mode": "int8",
688
+ "source_name": null
689
+ },
690
+ "registered_name": null
691
+ },
692
+ "decoder_block_10/attention/value": {
693
+ "module": "keras.dtype_policies",
694
+ "class_name": "QuantizedDTypePolicy",
695
+ "config": {
696
+ "mode": "int8",
697
+ "source_name": null
698
+ },
699
+ "registered_name": null
700
+ },
701
+ "decoder_block_10/attention/key": {
702
+ "module": "keras.dtype_policies",
703
+ "class_name": "QuantizedDTypePolicy",
704
+ "config": {
705
+ "mode": "int8",
706
+ "source_name": null
707
+ },
708
+ "registered_name": null
709
+ },
710
+ "decoder_block_10/attention/query": {
711
+ "module": "keras.dtype_policies",
712
+ "class_name": "QuantizedDTypePolicy",
713
+ "config": {
714
+ "mode": "int8",
715
+ "source_name": null
716
+ },
717
+ "registered_name": null
718
+ },
719
+ "decoder_block_11/ffw_linear": {
720
+ "module": "keras.dtype_policies",
721
+ "class_name": "QuantizedDTypePolicy",
722
+ "config": {
723
+ "mode": "int8",
724
+ "source_name": null
725
+ },
726
+ "registered_name": null
727
+ },
728
+ "decoder_block_11/ffw_gating_2": {
729
+ "module": "keras.dtype_policies",
730
+ "class_name": "QuantizedDTypePolicy",
731
+ "config": {
732
+ "mode": "int8",
733
+ "source_name": null
734
+ },
735
+ "registered_name": null
736
+ },
737
+ "decoder_block_11/ffw_gating": {
738
+ "module": "keras.dtype_policies",
739
+ "class_name": "QuantizedDTypePolicy",
740
+ "config": {
741
+ "mode": "int8",
742
+ "source_name": null
743
+ },
744
+ "registered_name": null
745
+ },
746
+ "decoder_block_11/attention/attention_output": {
747
+ "module": "keras.dtype_policies",
748
+ "class_name": "QuantizedDTypePolicy",
749
+ "config": {
750
+ "mode": "int8",
751
+ "source_name": null
752
+ },
753
+ "registered_name": null
754
+ },
755
+ "decoder_block_11/attention/value": {
756
+ "module": "keras.dtype_policies",
757
+ "class_name": "QuantizedDTypePolicy",
758
+ "config": {
759
+ "mode": "int8",
760
+ "source_name": null
761
+ },
762
+ "registered_name": null
763
+ },
764
+ "decoder_block_11/attention/key": {
765
+ "module": "keras.dtype_policies",
766
+ "class_name": "QuantizedDTypePolicy",
767
+ "config": {
768
+ "mode": "int8",
769
+ "source_name": null
770
+ },
771
+ "registered_name": null
772
+ },
773
+ "decoder_block_11/attention/query": {
774
+ "module": "keras.dtype_policies",
775
+ "class_name": "QuantizedDTypePolicy",
776
+ "config": {
777
+ "mode": "int8",
778
+ "source_name": null
779
+ },
780
+ "registered_name": null
781
+ },
782
+ "decoder_block_12/ffw_linear": {
783
+ "module": "keras.dtype_policies",
784
+ "class_name": "QuantizedDTypePolicy",
785
+ "config": {
786
+ "mode": "int8",
787
+ "source_name": null
788
+ },
789
+ "registered_name": null
790
+ },
791
+ "decoder_block_12/ffw_gating_2": {
792
+ "module": "keras.dtype_policies",
793
+ "class_name": "QuantizedDTypePolicy",
794
+ "config": {
795
+ "mode": "int8",
796
+ "source_name": null
797
+ },
798
+ "registered_name": null
799
+ },
800
+ "decoder_block_12/ffw_gating": {
801
+ "module": "keras.dtype_policies",
802
+ "class_name": "QuantizedDTypePolicy",
803
+ "config": {
804
+ "mode": "int8",
805
+ "source_name": null
806
+ },
807
+ "registered_name": null
808
+ },
809
+ "decoder_block_12/attention/attention_output": {
810
+ "module": "keras.dtype_policies",
811
+ "class_name": "QuantizedDTypePolicy",
812
+ "config": {
813
+ "mode": "int8",
814
+ "source_name": null
815
+ },
816
+ "registered_name": null
817
+ },
818
+ "decoder_block_12/attention/value": {
819
+ "module": "keras.dtype_policies",
820
+ "class_name": "QuantizedDTypePolicy",
821
+ "config": {
822
+ "mode": "int8",
823
+ "source_name": null
824
+ },
825
+ "registered_name": null
826
+ },
827
+ "decoder_block_12/attention/key": {
828
+ "module": "keras.dtype_policies",
829
+ "class_name": "QuantizedDTypePolicy",
830
+ "config": {
831
+ "mode": "int8",
832
+ "source_name": null
833
+ },
834
+ "registered_name": null
835
+ },
836
+ "decoder_block_12/attention/query": {
837
+ "module": "keras.dtype_policies",
838
+ "class_name": "QuantizedDTypePolicy",
839
+ "config": {
840
+ "mode": "int8",
841
+ "source_name": null
842
+ },
843
+ "registered_name": null
844
+ },
845
+ "decoder_block_13/ffw_linear": {
846
+ "module": "keras.dtype_policies",
847
+ "class_name": "QuantizedDTypePolicy",
848
+ "config": {
849
+ "mode": "int8",
850
+ "source_name": null
851
+ },
852
+ "registered_name": null
853
+ },
854
+ "decoder_block_13/ffw_gating_2": {
855
+ "module": "keras.dtype_policies",
856
+ "class_name": "QuantizedDTypePolicy",
857
+ "config": {
858
+ "mode": "int8",
859
+ "source_name": null
860
+ },
861
+ "registered_name": null
862
+ },
863
+ "decoder_block_13/ffw_gating": {
864
+ "module": "keras.dtype_policies",
865
+ "class_name": "QuantizedDTypePolicy",
866
+ "config": {
867
+ "mode": "int8",
868
+ "source_name": null
869
+ },
870
+ "registered_name": null
871
+ },
872
+ "decoder_block_13/attention/attention_output": {
873
+ "module": "keras.dtype_policies",
874
+ "class_name": "QuantizedDTypePolicy",
875
+ "config": {
876
+ "mode": "int8",
877
+ "source_name": null
878
+ },
879
+ "registered_name": null
880
+ },
881
+ "decoder_block_13/attention/value": {
882
+ "module": "keras.dtype_policies",
883
+ "class_name": "QuantizedDTypePolicy",
884
+ "config": {
885
+ "mode": "int8",
886
+ "source_name": null
887
+ },
888
+ "registered_name": null
889
+ },
890
+ "decoder_block_13/attention/key": {
891
+ "module": "keras.dtype_policies",
892
+ "class_name": "QuantizedDTypePolicy",
893
+ "config": {
894
+ "mode": "int8",
895
+ "source_name": null
896
+ },
897
+ "registered_name": null
898
+ },
899
+ "decoder_block_13/attention/query": {
900
+ "module": "keras.dtype_policies",
901
+ "class_name": "QuantizedDTypePolicy",
902
+ "config": {
903
+ "mode": "int8",
904
+ "source_name": null
905
+ },
906
+ "registered_name": null
907
+ },
908
+ "decoder_block_14/ffw_linear": {
909
+ "module": "keras.dtype_policies",
910
+ "class_name": "QuantizedDTypePolicy",
911
+ "config": {
912
+ "mode": "int8",
913
+ "source_name": null
914
+ },
915
+ "registered_name": null
916
+ },
917
+ "decoder_block_14/ffw_gating_2": {
918
+ "module": "keras.dtype_policies",
919
+ "class_name": "QuantizedDTypePolicy",
920
+ "config": {
921
+ "mode": "int8",
922
+ "source_name": null
923
+ },
924
+ "registered_name": null
925
+ },
926
+ "decoder_block_14/ffw_gating": {
927
+ "module": "keras.dtype_policies",
928
+ "class_name": "QuantizedDTypePolicy",
929
+ "config": {
930
+ "mode": "int8",
931
+ "source_name": null
932
+ },
933
+ "registered_name": null
934
+ },
935
+ "decoder_block_14/attention/attention_output": {
936
+ "module": "keras.dtype_policies",
937
+ "class_name": "QuantizedDTypePolicy",
938
+ "config": {
939
+ "mode": "int8",
940
+ "source_name": null
941
+ },
942
+ "registered_name": null
943
+ },
944
+ "decoder_block_14/attention/value": {
945
+ "module": "keras.dtype_policies",
946
+ "class_name": "QuantizedDTypePolicy",
947
+ "config": {
948
+ "mode": "int8",
949
+ "source_name": null
950
+ },
951
+ "registered_name": null
952
+ },
953
+ "decoder_block_14/attention/key": {
954
+ "module": "keras.dtype_policies",
955
+ "class_name": "QuantizedDTypePolicy",
956
+ "config": {
957
+ "mode": "int8",
958
+ "source_name": null
959
+ },
960
+ "registered_name": null
961
+ },
962
+ "decoder_block_14/attention/query": {
963
+ "module": "keras.dtype_policies",
964
+ "class_name": "QuantizedDTypePolicy",
965
+ "config": {
966
+ "mode": "int8",
967
+ "source_name": null
968
+ },
969
+ "registered_name": null
970
+ },
971
+ "decoder_block_15/ffw_linear": {
972
+ "module": "keras.dtype_policies",
973
+ "class_name": "QuantizedDTypePolicy",
974
+ "config": {
975
+ "mode": "int8",
976
+ "source_name": null
977
+ },
978
+ "registered_name": null
979
+ },
980
+ "decoder_block_15/ffw_gating_2": {
981
+ "module": "keras.dtype_policies",
982
+ "class_name": "QuantizedDTypePolicy",
983
+ "config": {
984
+ "mode": "int8",
985
+ "source_name": null
986
+ },
987
+ "registered_name": null
988
+ },
989
+ "decoder_block_15/ffw_gating": {
990
+ "module": "keras.dtype_policies",
991
+ "class_name": "QuantizedDTypePolicy",
992
+ "config": {
993
+ "mode": "int8",
994
+ "source_name": null
995
+ },
996
+ "registered_name": null
997
+ },
998
+ "decoder_block_15/attention/attention_output": {
999
+ "module": "keras.dtype_policies",
1000
+ "class_name": "QuantizedDTypePolicy",
1001
+ "config": {
1002
+ "mode": "int8",
1003
+ "source_name": null
1004
+ },
1005
+ "registered_name": null
1006
+ },
1007
+ "decoder_block_15/attention/value": {
1008
+ "module": "keras.dtype_policies",
1009
+ "class_name": "QuantizedDTypePolicy",
1010
+ "config": {
1011
+ "mode": "int8",
1012
+ "source_name": null
1013
+ },
1014
+ "registered_name": null
1015
+ },
1016
+ "decoder_block_15/attention/key": {
1017
+ "module": "keras.dtype_policies",
1018
+ "class_name": "QuantizedDTypePolicy",
1019
+ "config": {
1020
+ "mode": "int8",
1021
+ "source_name": null
1022
+ },
1023
+ "registered_name": null
1024
+ },
1025
+ "decoder_block_15/attention/query": {
1026
+ "module": "keras.dtype_policies",
1027
+ "class_name": "QuantizedDTypePolicy",
1028
+ "config": {
1029
+ "mode": "int8",
1030
+ "source_name": null
1031
+ },
1032
+ "registered_name": null
1033
+ },
1034
+ "decoder_block_16/ffw_linear": {
1035
+ "module": "keras.dtype_policies",
1036
+ "class_name": "QuantizedDTypePolicy",
1037
+ "config": {
1038
+ "mode": "int8",
1039
+ "source_name": null
1040
+ },
1041
+ "registered_name": null
1042
+ },
1043
+ "decoder_block_16/ffw_gating_2": {
1044
+ "module": "keras.dtype_policies",
1045
+ "class_name": "QuantizedDTypePolicy",
1046
+ "config": {
1047
+ "mode": "int8",
1048
+ "source_name": null
1049
+ },
1050
+ "registered_name": null
1051
+ },
1052
+ "decoder_block_16/ffw_gating": {
1053
+ "module": "keras.dtype_policies",
1054
+ "class_name": "QuantizedDTypePolicy",
1055
+ "config": {
1056
+ "mode": "int8",
1057
+ "source_name": null
1058
+ },
1059
+ "registered_name": null
1060
+ },
1061
+ "decoder_block_16/attention/attention_output": {
1062
+ "module": "keras.dtype_policies",
1063
+ "class_name": "QuantizedDTypePolicy",
1064
+ "config": {
1065
+ "mode": "int8",
1066
+ "source_name": null
1067
+ },
1068
+ "registered_name": null
1069
+ },
1070
+ "decoder_block_16/attention/value": {
1071
+ "module": "keras.dtype_policies",
1072
+ "class_name": "QuantizedDTypePolicy",
1073
+ "config": {
1074
+ "mode": "int8",
1075
+ "source_name": null
1076
+ },
1077
+ "registered_name": null
1078
+ },
1079
+ "decoder_block_16/attention/key": {
1080
+ "module": "keras.dtype_policies",
1081
+ "class_name": "QuantizedDTypePolicy",
1082
+ "config": {
1083
+ "mode": "int8",
1084
+ "source_name": null
1085
+ },
1086
+ "registered_name": null
1087
+ },
1088
+ "decoder_block_16/attention/query": {
1089
+ "module": "keras.dtype_policies",
1090
+ "class_name": "QuantizedDTypePolicy",
1091
+ "config": {
1092
+ "mode": "int8",
1093
+ "source_name": null
1094
+ },
1095
+ "registered_name": null
1096
+ },
1097
+ "decoder_block_17/ffw_linear": {
1098
+ "module": "keras.dtype_policies",
1099
+ "class_name": "QuantizedDTypePolicy",
1100
+ "config": {
1101
+ "mode": "int8",
1102
+ "source_name": null
1103
+ },
1104
+ "registered_name": null
1105
+ },
1106
+ "decoder_block_17/ffw_gating_2": {
1107
+ "module": "keras.dtype_policies",
1108
+ "class_name": "QuantizedDTypePolicy",
1109
+ "config": {
1110
+ "mode": "int8",
1111
+ "source_name": null
1112
+ },
1113
+ "registered_name": null
1114
+ },
1115
+ "decoder_block_17/ffw_gating": {
1116
+ "module": "keras.dtype_policies",
1117
+ "class_name": "QuantizedDTypePolicy",
1118
+ "config": {
1119
+ "mode": "int8",
1120
+ "source_name": null
1121
+ },
1122
+ "registered_name": null
1123
+ },
1124
+ "decoder_block_17/attention/attention_output": {
1125
+ "module": "keras.dtype_policies",
1126
+ "class_name": "QuantizedDTypePolicy",
1127
+ "config": {
1128
+ "mode": "int8",
1129
+ "source_name": null
1130
+ },
1131
+ "registered_name": null
1132
+ },
1133
+ "decoder_block_17/attention/value": {
1134
+ "module": "keras.dtype_policies",
1135
+ "class_name": "QuantizedDTypePolicy",
1136
+ "config": {
1137
+ "mode": "int8",
1138
+ "source_name": null
1139
+ },
1140
+ "registered_name": null
1141
+ },
1142
+ "decoder_block_17/attention/key": {
1143
+ "module": "keras.dtype_policies",
1144
+ "class_name": "QuantizedDTypePolicy",
1145
+ "config": {
1146
+ "mode": "int8",
1147
+ "source_name": null
1148
+ },
1149
+ "registered_name": null
1150
+ },
1151
+ "decoder_block_17/attention/query": {
1152
+ "module": "keras.dtype_policies",
1153
+ "class_name": "QuantizedDTypePolicy",
1154
+ "config": {
1155
+ "mode": "int8",
1156
+ "source_name": null
1157
+ },
1158
+ "registered_name": null
1159
+ },
1160
+ "decoder_block_18/ffw_linear": {
1161
+ "module": "keras.dtype_policies",
1162
+ "class_name": "QuantizedDTypePolicy",
1163
+ "config": {
1164
+ "mode": "int8",
1165
+ "source_name": null
1166
+ },
1167
+ "registered_name": null
1168
+ },
1169
+ "decoder_block_18/ffw_gating_2": {
1170
+ "module": "keras.dtype_policies",
1171
+ "class_name": "QuantizedDTypePolicy",
1172
+ "config": {
1173
+ "mode": "int8",
1174
+ "source_name": null
1175
+ },
1176
+ "registered_name": null
1177
+ },
1178
+ "decoder_block_18/ffw_gating": {
1179
+ "module": "keras.dtype_policies",
1180
+ "class_name": "QuantizedDTypePolicy",
1181
+ "config": {
1182
+ "mode": "int8",
1183
+ "source_name": null
1184
+ },
1185
+ "registered_name": null
1186
+ },
1187
+ "decoder_block_18/attention/attention_output": {
1188
+ "module": "keras.dtype_policies",
1189
+ "class_name": "QuantizedDTypePolicy",
1190
+ "config": {
1191
+ "mode": "int8",
1192
+ "source_name": null
1193
+ },
1194
+ "registered_name": null
1195
+ },
1196
+ "decoder_block_18/attention/value": {
1197
+ "module": "keras.dtype_policies",
1198
+ "class_name": "QuantizedDTypePolicy",
1199
+ "config": {
1200
+ "mode": "int8",
1201
+ "source_name": null
1202
+ },
1203
+ "registered_name": null
1204
+ },
1205
+ "decoder_block_18/attention/key": {
1206
+ "module": "keras.dtype_policies",
1207
+ "class_name": "QuantizedDTypePolicy",
1208
+ "config": {
1209
+ "mode": "int8",
1210
+ "source_name": null
1211
+ },
1212
+ "registered_name": null
1213
+ },
1214
+ "decoder_block_18/attention/query": {
1215
+ "module": "keras.dtype_policies",
1216
+ "class_name": "QuantizedDTypePolicy",
1217
+ "config": {
1218
+ "mode": "int8",
1219
+ "source_name": null
1220
+ },
1221
+ "registered_name": null
1222
+ },
1223
+ "decoder_block_19/ffw_linear": {
1224
+ "module": "keras.dtype_policies",
1225
+ "class_name": "QuantizedDTypePolicy",
1226
+ "config": {
1227
+ "mode": "int8",
1228
+ "source_name": null
1229
+ },
1230
+ "registered_name": null
1231
+ },
1232
+ "decoder_block_19/ffw_gating_2": {
1233
+ "module": "keras.dtype_policies",
1234
+ "class_name": "QuantizedDTypePolicy",
1235
+ "config": {
1236
+ "mode": "int8",
1237
+ "source_name": null
1238
+ },
1239
+ "registered_name": null
1240
+ },
1241
+ "decoder_block_19/ffw_gating": {
1242
+ "module": "keras.dtype_policies",
1243
+ "class_name": "QuantizedDTypePolicy",
1244
+ "config": {
1245
+ "mode": "int8",
1246
+ "source_name": null
1247
+ },
1248
+ "registered_name": null
1249
+ },
1250
+ "decoder_block_19/attention/attention_output": {
1251
+ "module": "keras.dtype_policies",
1252
+ "class_name": "QuantizedDTypePolicy",
1253
+ "config": {
1254
+ "mode": "int8",
1255
+ "source_name": null
1256
+ },
1257
+ "registered_name": null
1258
+ },
1259
+ "decoder_block_19/attention/value": {
1260
+ "module": "keras.dtype_policies",
1261
+ "class_name": "QuantizedDTypePolicy",
1262
+ "config": {
1263
+ "mode": "int8",
1264
+ "source_name": null
1265
+ },
1266
+ "registered_name": null
1267
+ },
1268
+ "decoder_block_19/attention/key": {
1269
+ "module": "keras.dtype_policies",
1270
+ "class_name": "QuantizedDTypePolicy",
1271
+ "config": {
1272
+ "mode": "int8",
1273
+ "source_name": null
1274
+ },
1275
+ "registered_name": null
1276
+ },
1277
+ "decoder_block_19/attention/query": {
1278
+ "module": "keras.dtype_policies",
1279
+ "class_name": "QuantizedDTypePolicy",
1280
+ "config": {
1281
+ "mode": "int8",
1282
+ "source_name": null
1283
+ },
1284
+ "registered_name": null
1285
+ },
1286
+ "decoder_block_20/ffw_linear": {
1287
+ "module": "keras.dtype_policies",
1288
+ "class_name": "QuantizedDTypePolicy",
1289
+ "config": {
1290
+ "mode": "int8",
1291
+ "source_name": null
1292
+ },
1293
+ "registered_name": null
1294
+ },
1295
+ "decoder_block_20/ffw_gating_2": {
1296
+ "module": "keras.dtype_policies",
1297
+ "class_name": "QuantizedDTypePolicy",
1298
+ "config": {
1299
+ "mode": "int8",
1300
+ "source_name": null
1301
+ },
1302
+ "registered_name": null
1303
+ },
1304
+ "decoder_block_20/ffw_gating": {
1305
+ "module": "keras.dtype_policies",
1306
+ "class_name": "QuantizedDTypePolicy",
1307
+ "config": {
1308
+ "mode": "int8",
1309
+ "source_name": null
1310
+ },
1311
+ "registered_name": null
1312
+ },
1313
+ "decoder_block_20/attention/attention_output": {
1314
+ "module": "keras.dtype_policies",
1315
+ "class_name": "QuantizedDTypePolicy",
1316
+ "config": {
1317
+ "mode": "int8",
1318
+ "source_name": null
1319
+ },
1320
+ "registered_name": null
1321
+ },
1322
+ "decoder_block_20/attention/value": {
1323
+ "module": "keras.dtype_policies",
1324
+ "class_name": "QuantizedDTypePolicy",
1325
+ "config": {
1326
+ "mode": "int8",
1327
+ "source_name": null
1328
+ },
1329
+ "registered_name": null
1330
+ },
1331
+ "decoder_block_20/attention/key": {
1332
+ "module": "keras.dtype_policies",
1333
+ "class_name": "QuantizedDTypePolicy",
1334
+ "config": {
1335
+ "mode": "int8",
1336
+ "source_name": null
1337
+ },
1338
+ "registered_name": null
1339
+ },
1340
+ "decoder_block_20/attention/query": {
1341
+ "module": "keras.dtype_policies",
1342
+ "class_name": "QuantizedDTypePolicy",
1343
+ "config": {
1344
+ "mode": "int8",
1345
+ "source_name": null
1346
+ },
1347
+ "registered_name": null
1348
+ },
1349
+ "decoder_block_21/ffw_linear": {
1350
+ "module": "keras.dtype_policies",
1351
+ "class_name": "QuantizedDTypePolicy",
1352
+ "config": {
1353
+ "mode": "int8",
1354
+ "source_name": null
1355
+ },
1356
+ "registered_name": null
1357
+ },
1358
+ "decoder_block_21/ffw_gating_2": {
1359
+ "module": "keras.dtype_policies",
1360
+ "class_name": "QuantizedDTypePolicy",
1361
+ "config": {
1362
+ "mode": "int8",
1363
+ "source_name": null
1364
+ },
1365
+ "registered_name": null
1366
+ },
1367
+ "decoder_block_21/ffw_gating": {
1368
+ "module": "keras.dtype_policies",
1369
+ "class_name": "QuantizedDTypePolicy",
1370
+ "config": {
1371
+ "mode": "int8",
1372
+ "source_name": null
1373
+ },
1374
+ "registered_name": null
1375
+ },
1376
+ "decoder_block_21/attention/attention_output": {
1377
+ "module": "keras.dtype_policies",
1378
+ "class_name": "QuantizedDTypePolicy",
1379
+ "config": {
1380
+ "mode": "int8",
1381
+ "source_name": null
1382
+ },
1383
+ "registered_name": null
1384
+ },
1385
+ "decoder_block_21/attention/value": {
1386
+ "module": "keras.dtype_policies",
1387
+ "class_name": "QuantizedDTypePolicy",
1388
+ "config": {
1389
+ "mode": "int8",
1390
+ "source_name": null
1391
+ },
1392
+ "registered_name": null
1393
+ },
1394
+ "decoder_block_21/attention/key": {
1395
+ "module": "keras.dtype_policies",
1396
+ "class_name": "QuantizedDTypePolicy",
1397
+ "config": {
1398
+ "mode": "int8",
1399
+ "source_name": null
1400
+ },
1401
+ "registered_name": null
1402
+ },
1403
+ "decoder_block_21/attention/query": {
1404
+ "module": "keras.dtype_policies",
1405
+ "class_name": "QuantizedDTypePolicy",
1406
+ "config": {
1407
+ "mode": "int8",
1408
+ "source_name": null
1409
+ },
1410
+ "registered_name": null
1411
+ },
1412
+ "decoder_block_22/ffw_linear": {
1413
+ "module": "keras.dtype_policies",
1414
+ "class_name": "QuantizedDTypePolicy",
1415
+ "config": {
1416
+ "mode": "int8",
1417
+ "source_name": null
1418
+ },
1419
+ "registered_name": null
1420
+ },
1421
+ "decoder_block_22/ffw_gating_2": {
1422
+ "module": "keras.dtype_policies",
1423
+ "class_name": "QuantizedDTypePolicy",
1424
+ "config": {
1425
+ "mode": "int8",
1426
+ "source_name": null
1427
+ },
1428
+ "registered_name": null
1429
+ },
1430
+ "decoder_block_22/ffw_gating": {
1431
+ "module": "keras.dtype_policies",
1432
+ "class_name": "QuantizedDTypePolicy",
1433
+ "config": {
1434
+ "mode": "int8",
1435
+ "source_name": null
1436
+ },
1437
+ "registered_name": null
1438
+ },
1439
+ "decoder_block_22/attention/attention_output": {
1440
+ "module": "keras.dtype_policies",
1441
+ "class_name": "QuantizedDTypePolicy",
1442
+ "config": {
1443
+ "mode": "int8",
1444
+ "source_name": null
1445
+ },
1446
+ "registered_name": null
1447
+ },
1448
+ "decoder_block_22/attention/value": {
1449
+ "module": "keras.dtype_policies",
1450
+ "class_name": "QuantizedDTypePolicy",
1451
+ "config": {
1452
+ "mode": "int8",
1453
+ "source_name": null
1454
+ },
1455
+ "registered_name": null
1456
+ },
1457
+ "decoder_block_22/attention/key": {
1458
+ "module": "keras.dtype_policies",
1459
+ "class_name": "QuantizedDTypePolicy",
1460
+ "config": {
1461
+ "mode": "int8",
1462
+ "source_name": null
1463
+ },
1464
+ "registered_name": null
1465
+ },
1466
+ "decoder_block_22/attention/query": {
1467
+ "module": "keras.dtype_policies",
1468
+ "class_name": "QuantizedDTypePolicy",
1469
+ "config": {
1470
+ "mode": "int8",
1471
+ "source_name": null
1472
+ },
1473
+ "registered_name": null
1474
+ },
1475
+ "decoder_block_23/ffw_linear": {
1476
+ "module": "keras.dtype_policies",
1477
+ "class_name": "QuantizedDTypePolicy",
1478
+ "config": {
1479
+ "mode": "int8",
1480
+ "source_name": null
1481
+ },
1482
+ "registered_name": null
1483
+ },
1484
+ "decoder_block_23/ffw_gating_2": {
1485
+ "module": "keras.dtype_policies",
1486
+ "class_name": "QuantizedDTypePolicy",
1487
+ "config": {
1488
+ "mode": "int8",
1489
+ "source_name": null
1490
+ },
1491
+ "registered_name": null
1492
+ },
1493
+ "decoder_block_23/ffw_gating": {
1494
+ "module": "keras.dtype_policies",
1495
+ "class_name": "QuantizedDTypePolicy",
1496
+ "config": {
1497
+ "mode": "int8",
1498
+ "source_name": null
1499
+ },
1500
+ "registered_name": null
1501
+ },
1502
+ "decoder_block_23/attention/attention_output": {
1503
+ "module": "keras.dtype_policies",
1504
+ "class_name": "QuantizedDTypePolicy",
1505
+ "config": {
1506
+ "mode": "int8",
1507
+ "source_name": null
1508
+ },
1509
+ "registered_name": null
1510
+ },
1511
+ "decoder_block_23/attention/value": {
1512
+ "module": "keras.dtype_policies",
1513
+ "class_name": "QuantizedDTypePolicy",
1514
+ "config": {
1515
+ "mode": "int8",
1516
+ "source_name": null
1517
+ },
1518
+ "registered_name": null
1519
+ },
1520
+ "decoder_block_23/attention/key": {
1521
+ "module": "keras.dtype_policies",
1522
+ "class_name": "QuantizedDTypePolicy",
1523
+ "config": {
1524
+ "mode": "int8",
1525
+ "source_name": null
1526
+ },
1527
+ "registered_name": null
1528
+ },
1529
+ "decoder_block_23/attention/query": {
1530
+ "module": "keras.dtype_policies",
1531
+ "class_name": "QuantizedDTypePolicy",
1532
+ "config": {
1533
+ "mode": "int8",
1534
+ "source_name": null
1535
+ },
1536
+ "registered_name": null
1537
+ },
1538
+ "decoder_block_24/ffw_linear": {
1539
+ "module": "keras.dtype_policies",
1540
+ "class_name": "QuantizedDTypePolicy",
1541
+ "config": {
1542
+ "mode": "int8",
1543
+ "source_name": null
1544
+ },
1545
+ "registered_name": null
1546
+ },
1547
+ "decoder_block_24/ffw_gating_2": {
1548
+ "module": "keras.dtype_policies",
1549
+ "class_name": "QuantizedDTypePolicy",
1550
+ "config": {
1551
+ "mode": "int8",
1552
+ "source_name": null
1553
+ },
1554
+ "registered_name": null
1555
+ },
1556
+ "decoder_block_24/ffw_gating": {
1557
+ "module": "keras.dtype_policies",
1558
+ "class_name": "QuantizedDTypePolicy",
1559
+ "config": {
1560
+ "mode": "int8",
1561
+ "source_name": null
1562
+ },
1563
+ "registered_name": null
1564
+ },
1565
+ "decoder_block_24/attention/attention_output": {
1566
+ "module": "keras.dtype_policies",
1567
+ "class_name": "QuantizedDTypePolicy",
1568
+ "config": {
1569
+ "mode": "int8",
1570
+ "source_name": null
1571
+ },
1572
+ "registered_name": null
1573
+ },
1574
+ "decoder_block_24/attention/value": {
1575
+ "module": "keras.dtype_policies",
1576
+ "class_name": "QuantizedDTypePolicy",
1577
+ "config": {
1578
+ "mode": "int8",
1579
+ "source_name": null
1580
+ },
1581
+ "registered_name": null
1582
+ },
1583
+ "decoder_block_24/attention/key": {
1584
+ "module": "keras.dtype_policies",
1585
+ "class_name": "QuantizedDTypePolicy",
1586
+ "config": {
1587
+ "mode": "int8",
1588
+ "source_name": null
1589
+ },
1590
+ "registered_name": null
1591
+ },
1592
+ "decoder_block_24/attention/query": {
1593
+ "module": "keras.dtype_policies",
1594
+ "class_name": "QuantizedDTypePolicy",
1595
+ "config": {
1596
+ "mode": "int8",
1597
+ "source_name": null
1598
+ },
1599
+ "registered_name": null
1600
+ },
1601
+ "decoder_block_25/ffw_linear": {
1602
+ "module": "keras.dtype_policies",
1603
+ "class_name": "QuantizedDTypePolicy",
1604
+ "config": {
1605
+ "mode": "int8",
1606
+ "source_name": null
1607
+ },
1608
+ "registered_name": null
1609
+ },
1610
+ "decoder_block_25/ffw_gating_2": {
1611
+ "module": "keras.dtype_policies",
1612
+ "class_name": "QuantizedDTypePolicy",
1613
+ "config": {
1614
+ "mode": "int8",
1615
+ "source_name": null
1616
+ },
1617
+ "registered_name": null
1618
+ },
1619
+ "decoder_block_25/ffw_gating": {
1620
+ "module": "keras.dtype_policies",
1621
+ "class_name": "QuantizedDTypePolicy",
1622
+ "config": {
1623
+ "mode": "int8",
1624
+ "source_name": null
1625
+ },
1626
+ "registered_name": null
1627
+ },
1628
+ "decoder_block_25/attention/attention_output": {
1629
+ "module": "keras.dtype_policies",
1630
+ "class_name": "QuantizedDTypePolicy",
1631
+ "config": {
1632
+ "mode": "int8",
1633
+ "source_name": null
1634
+ },
1635
+ "registered_name": null
1636
+ },
1637
+ "decoder_block_25/attention/value": {
1638
+ "module": "keras.dtype_policies",
1639
+ "class_name": "QuantizedDTypePolicy",
1640
+ "config": {
1641
+ "mode": "int8",
1642
+ "source_name": null
1643
+ },
1644
+ "registered_name": null
1645
+ },
1646
+ "decoder_block_25/attention/key": {
1647
+ "module": "keras.dtype_policies",
1648
+ "class_name": "QuantizedDTypePolicy",
1649
+ "config": {
1650
+ "mode": "int8",
1651
+ "source_name": null
1652
+ },
1653
+ "registered_name": null
1654
+ },
1655
+ "decoder_block_25/attention/query": {
1656
+ "module": "keras.dtype_policies",
1657
+ "class_name": "QuantizedDTypePolicy",
1658
+ "config": {
1659
+ "mode": "int8",
1660
+ "source_name": null
1661
+ },
1662
+ "registered_name": null
1663
+ }
1664
+ }
1665
+ },
1666
+ "registered_name": null
1667
+ },
1668
+ "vocabulary_size": 256000,
1669
+ "num_layers": 26,
1670
+ "num_query_heads": 8,
1671
+ "num_key_value_heads": 4,
1672
+ "hidden_dim": 2304,
1673
+ "intermediate_dim": 18432,
1674
+ "head_dim": 256,
1675
+ "layer_norm_epsilon": 1e-06,
1676
+ "dropout": 0,
1677
+ "query_head_dim_normalize": true,
1678
+ "use_post_ffw_norm": true,
1679
+ "use_post_attention_norm": true,
1680
+ "final_logit_soft_cap": 30.0,
1681
+ "attention_logit_soft_cap": 50.0,
1682
+ "sliding_window_size": 4096,
1683
+ "use_sliding_window_attention": true
1684
+ },
1685
+ "registered_name": "keras_nlp>GemmaBackbone"
1686
+ },
1687
+ "preprocessor": {
1688
+ "module": "keras_nlp.src.models.gemma.gemma_causal_lm_preprocessor",
1689
+ "class_name": "GemmaCausalLMPreprocessor",
1690
+ "config": {
1691
+ "name": "gemma_causal_lm_preprocessor",
1692
+ "trainable": true,
1693
+ "dtype": {
1694
+ "module": "keras",
1695
+ "class_name": "DTypePolicy",
1696
+ "config": {
1697
+ "name": "mixed_bfloat16"
1698
+ },
1699
+ "registered_name": null
1700
+ },
1701
+ "tokenizer": {
1702
+ "module": "keras_nlp.src.models.gemma.gemma_tokenizer",
1703
+ "class_name": "GemmaTokenizer",
1704
+ "config": {
1705
+ "name": "gemma_tokenizer",
1706
+ "trainable": true,
1707
+ "dtype": {
1708
+ "module": "keras",
1709
+ "class_name": "DTypePolicy",
1710
+ "config": {
1711
+ "name": "int32"
1712
+ },
1713
+ "registered_name": null
1714
+ },
1715
+ "proto": null,
1716
+ "sequence_length": null
1717
+ },
1718
+ "registered_name": "keras_nlp>GemmaTokenizer"
1719
+ },
1720
+ "sequence_length": 2048,
1721
+ "add_start_token": true,
1722
+ "add_end_token": true
1723
+ },
1724
+ "registered_name": "keras_nlp>GemmaCausalLMPreprocessor"
1725
+ },
1726
+ "name": "gemma_causal_lm"
1727
+ },
1728
+ "registered_name": "keras_nlp>GemmaCausalLM"
1729
+ }
tokenizer.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "module": "keras_nlp.src.models.gemma.gemma_tokenizer",
3
+ "class_name": "GemmaTokenizer",
4
+ "config": {
5
+ "name": "gemma_tokenizer",
6
+ "trainable": true,
7
+ "dtype": {
8
+ "module": "keras",
9
+ "class_name": "DTypePolicy",
10
+ "config": {
11
+ "name": "int32"
12
+ },
13
+ "registered_name": null
14
+ },
15
+ "proto": null,
16
+ "sequence_length": null
17
+ },
18
+ "registered_name": "keras_nlp>GemmaTokenizer"
19
+ }