merge_method: slerp | |
base_model: "SvalTek/Gemma-7B-ColdBrew-RP" | |
slices: | |
- sources: | |
- model: "SvalTek/Gemma-7B-ColdBrew-RP" | |
layer_range: [0, 42] | |
- model: "KishoreK/ActionGemma-9B" | |
layer_range: [0, 42] | |
parameters: | |
t: | |
- filter: self_attn # coherence from ActionGemma | |
value: [0.3, 0.5, 0.7, 0.9, 1] | |
- filter: mlp # creativity from ColdBrew | |
value: [0.7, 0.5, 0.3, 0.1, 0] | |
- filter: layer_norm # consistency from ActionGemma | |
value: [0.4, 0.6, 0.8, 1, 1] | |
- filter: pos_embed # sequence understanding from ActionGemma | |
value: [0.5, 0.7, 0.9, 1, 1] | |
- value: 0.5 | |
dtype: bfloat16 | |
tokenizer_source: base |