v000000 commited on
Commit
7ef75e6
1 Parent(s): 5cf01ba

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +94 -0
README.md CHANGED
@@ -46,4 +46,98 @@ Both experts are used in tandem when generating a token.
46
 
47
  {output}<|eot_id|>
48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  ```
 
46
 
47
  {output}<|eot_id|>
48
 
49
+ ```
50
+
51
+ # Recipe (I'm sorry...):
52
+ ```yaml
53
+ slices:
54
+ - sources:
55
+ - model: Sao10K/L3.1-8B-Niitama-v1.1+grimjim/Llama-3-Instruct-abliteration-LoRA-8B
56
+ layer_range: [0, 32]
57
+ - model: akjindal53244/Llama-3.1-Storm-8B
58
+ layer_range: [0, 32]
59
+ merge_method: nearswap
60
+ base_model: Sao10K/L3.1-8B-Niitama-v1.1+grimjim/Llama-3-Instruct-abliteration-LoRA-8B
61
+ parameters:
62
+ t:
63
+ - value: 0.0001
64
+ dtype: bfloat16
65
+ out_type: float16 #oops
66
+ slices:
67
+ - sources:
68
+ - model: v000000/Llama-3.1-8B-Stheno-v3.4-abliterated
69
+ layer_range: [0, 32]
70
+ - model: akjindal53244/Llama-3.1-Storm-8B
71
+ layer_range: [0, 32]
72
+ merge_method: slerp
73
+ base_model: v000000/Llama-3.1-8B-Stheno-v3.4-abliterated
74
+ parameters:
75
+ t:
76
+ - filter: self_attn
77
+ value: [0.1, 0.6, 0.3, 0.8, 0.5]
78
+ - filter: mlp
79
+ value: [0.9, 0.4, 0.7, 0.2, 0.5]
80
+ - value: 0.5
81
+ dtype: float32
82
+ models:
83
+ - model: arcee-ai/Llama-3.1-SuperNova-Lite
84
+ parameters:
85
+ weight: 1.0
86
+ - model: v000000/L3.1-Niitorm-8B-t0.0001
87
+ parameters:
88
+ weight: 0.4
89
+ merge_method: task_arithmetic
90
+ base_model: arcee-ai/Llama-3.1-SuperNova-Lite
91
+ parameters:
92
+ normalize: false
93
+ dtype: float16
94
+ models:
95
+ - model: arcee-ai/Llama-3.1-SuperNova-Lite
96
+ parameters:
97
+ weight: 0.0
98
+ - model: v000000/L3.1-Niitorm-8B-t0.0001
99
+ parameters:
100
+ weight: 1.25
101
+ merge_method: task_arithmetic
102
+ base_model: arcee-ai/Llama-3.1-SuperNova-Lite
103
+ parameters:
104
+ normalize: false
105
+ dtype: float16
106
+ models:
107
+ - model: v000000/L3.1-8B-RP-Test-003-Task_Arithmetic
108
+ merge_method: slerp
109
+ base_model: v000000/L3.1-8B-RP-Test-002-Task_Arithmetic+grimjim/Llama-3-Instruct-abliteration-LoRA-8B
110
+ # This model needed some abliteration^
111
+ parameters:
112
+ t:
113
+ - value: [0, 0, 0.3, 0.4, 0.5, 0.6, 0.5, 0.4, 0.3, 0, 0]
114
+ dtype: float16
115
+ base_model: nothingiisreal/L3.1-8B-Celeste-V1.5+grimjim/Llama-3-Instruct-abliteration-LoRA-8B
116
+ dtype: bfloat16
117
+ merge_method: task_arithmetic
118
+ parameters:
119
+ normalize: false
120
+ slices:
121
+ - sources:
122
+ - layer_range: [0, 32]
123
+ model: nothingiisreal/L3.1-8B-Celeste-V1.5+grimjim/Llama-3-Instruct-abliteration-LoRA-8B
124
+ parameters:
125
+ weight: 0.7
126
+ - layer_range: [0, 32]
127
+ model: v000000/L3.1-Sthenorm-8B
128
+ parameters:
129
+ weight: 0.2
130
+ - layer_range: [0, 32]
131
+ model: nothingiisreal/L3.1-8B-Celeste-V1.5
132
+ parameters:
133
+ weight: 0.2
134
+ base_model: crestf411/L3.1-8B-sunfall-stheno-v0.6.1
135
+ experts_per_token: 2
136
+ local_experts: 2
137
+ gate_mode: random
138
+ dtype: bfloat16
139
+ experts:
140
+ - source_model: v000000/L3.1-Storniitova-8B
141
+ - source_model: x0000001/l3.1-part_aaa
142
+
143
  ```