final_model
This is a merge of pre-trained language models created using mergekit.
Merge Details
Merge Method
This model was merged using the breadcrumbs_ties merge method using ./Yosegi-0603 as a base.
Models Merged
The following models were included in the merge:
- ./Ninja-2B_JP
- ./Yosegi-0601
Configuration
The following YAML configuration was used to produce this model:
base_model: ./Yosegi-0603
dtype: bfloat16
merge_method: breadcrumbs_ties
parameters:
int8_mask: 1.0
normalize: 0.0
slices:
- sources:
- layer_range: [0, 2]
model: ./Yosegi-0601
parameters:
density:
- filter: self_attn
value: 1.0
- filter: mlp
value: 1.0
- value: 1.0
gamma:
- filter: self_attn
value: -0.050387850856855765
- filter: mlp
value: -0.17075015661203768
- value: -0.008041653902986862
weight:
- filter: self_attn
value: 0.0999312941470471
- filter: mlp
value: 0.541727762184749
- value: 0.6837012779994258
- layer_range: [0, 2]
model: ./Ninja-2B_JP
parameters:
density:
- filter: self_attn
value: 0.8218846237599902
- filter: mlp
value: 1.0
- value: 0.9254078866667358
gamma:
- filter: self_attn
value: -0.11213758231875963
- filter: mlp
value: 0.021586098873668948
- value: -0.12827998218659437
weight:
- filter: self_attn
value: 0.40391646444657003
- filter: mlp
value: 0.623121864641881
- value: 0.5967833694632534
- layer_range: [0, 2]
model: ./Yosegi-0603
- sources:
- layer_range: [2, 4]
model: ./Yosegi-0601
parameters:
density:
- filter: self_attn
value: 0.8079479346300947
- filter: mlp
value: 1.0
- value: 0.710146185559419
gamma:
- filter: self_attn
value: 0.1383609589681566
- filter: mlp
value: 0.21188532059635062
- value: 0.2994723556443468
weight:
- filter: self_attn
value: 0.48107070906079974
- filter: mlp
value: 0.5848073552919492
- value: 0.4583842493359253
- layer_range: [2, 4]
model: ./Ninja-2B_JP
parameters:
density:
- filter: self_attn
value: 1.0
- filter: mlp
value: 0.934378153535579
- value: 1.0
gamma:
- filter: self_attn
value: 0.073192612278188
- filter: mlp
value: 0.07939126555063317
- value: -0.06891845030175699
weight:
- filter: self_attn
value: 0.32120386994101
- filter: mlp
value: 0.5001108459121922
- value: 0.9138710221666694
- layer_range: [2, 4]
model: ./Yosegi-0603
- sources:
- layer_range: [4, 6]
model: ./Yosegi-0601
parameters:
density:
- filter: self_attn
value: 1.0
- filter: mlp
value: 0.7237519222177541
- value: 0.776951124863642
gamma:
- filter: self_attn
value: -0.2265121048274062
- filter: mlp
value: -0.1757947421960496
- value: -0.11401593728931929
weight:
- filter: self_attn
value: 0.6448742737026658
- filter: mlp
value: 0.13809748641457986
- value: 0.3950550285769662
- layer_range: [4, 6]
model: ./Ninja-2B_JP
parameters:
density:
- filter: self_attn
value: 0.9649359194114893
- filter: mlp
value: 0.916637032428399
- value: 1.0
gamma:
- filter: self_attn
value: -0.16291684846287688
- filter: mlp
value: -0.19013548712121703
- value: 0.038409066391918795
weight:
- filter: self_attn
value: 0.1977358472772336
- filter: mlp
value: 0.22661167907612348
- value: 0.6426575016448257
- layer_range: [4, 6]
model: ./Yosegi-0603
- sources:
- layer_range: [6, 8]
model: ./Yosegi-0601
parameters:
density:
- filter: self_attn
value: 0.8727809666891416
- filter: mlp
value: 1.0
- value: 0.5160677785559116
gamma:
- filter: self_attn
value: 0.14245180617134273
- filter: mlp
value: 0.08189992601998919
- value: -0.1038827997670827
weight:
- filter: self_attn
value: 0.23575676914257698
- filter: mlp
value: 0.4047231670507743
- value: 0.34207794631274374
- layer_range: [6, 8]
model: ./Ninja-2B_JP
parameters:
density:
- filter: self_attn
value: 1.0
- filter: mlp
value: 1.0
- value: 1.0
gamma:
- filter: self_attn
value: 0.576775501046583
- filter: mlp
value: -0.046028636298718645
- value: -0.024161321403060265
weight:
- filter: self_attn
value: 0.833089842843994
- filter: mlp
value: 0.5434667434613458
- value: 0.2946693008513797
- layer_range: [6, 8]
model: ./Yosegi-0603
- sources:
- layer_range: [8, 10]
model: ./Yosegi-0601
parameters:
density:
- filter: self_attn
value: 1.0
- filter: mlp
value: 1.0
- value: 0.9930269337531187
gamma:
- filter: self_attn
value: 0.4549980941970383
- filter: mlp
value: 0.10362988739411173
- value: -0.43800391668559174
weight:
- filter: self_attn
value: 0.19663450954683193
- filter: mlp
value: 0.16783989984505265
- value: 0.7465091417598162
- layer_range: [8, 10]
model: ./Ninja-2B_JP
parameters:
density:
- filter: self_attn
value: 0.797370597380894
- filter: mlp
value: 1.0
- value: 1.0
gamma:
- filter: self_attn
value: -0.0665958634205702
- filter: mlp
value: -0.058297473060129834
- value: -0.38206760673090134
weight:
- filter: self_attn
value: 0.7015967347604024
- filter: mlp
value: 0.7733694864324641
- value: 0.7636921732342238
- layer_range: [8, 10]
model: ./Yosegi-0603
- sources:
- layer_range: [10, 12]
model: ./Yosegi-0601
parameters:
density:
- filter: self_attn
value: 0.8047576867589878
- filter: mlp
value: 0.8852533319203653
- value: 0.7707342647603538
gamma:
- filter: self_attn
value: -0.054343999574509694
- filter: mlp
value: -0.3465154355167133
- value: 0.022315854655582765
weight:
- filter: self_attn
value: 0.4396484757291151
- filter: mlp
value: 0.34318396468602314
- value: 0.8236034746664869
- layer_range: [10, 12]
model: ./Ninja-2B_JP
parameters:
density:
- filter: self_attn
value: 0.9058471193805165
- filter: mlp
value: 1.0
- value: 1.0
gamma:
- filter: self_attn
value: 0.1221058588826469
- filter: mlp
value: -0.4004985640890659
- value: 0.3219195440395816
weight:
- filter: self_attn
value: 0.3565443612269864
- filter: mlp
value: 0.2817057075232181
- value: 0.5934890337808251
- layer_range: [10, 12]
model: ./Yosegi-0603
- sources:
- layer_range: [12, 14]
model: ./Yosegi-0601
parameters:
density:
- filter: self_attn
value: 1.0
- filter: mlp
value: 1.0
- value: 1.0
gamma:
- filter: self_attn
value: -0.027897116191693133
- filter: mlp
value: -0.1765379388255607
- value: 0.09108936063176161
weight:
- filter: self_attn
value: 0.4499753137521779
- filter: mlp
value: 0.901296236087911
- value: 0.3548680126954006
- layer_range: [12, 14]
model: ./Ninja-2B_JP
parameters:
density:
- filter: self_attn
value: 0.8973815150776497
- filter: mlp
value: 0.6029953465961999
- value: 1.0
gamma:
- filter: self_attn
value: 0.10393082898402586
- filter: mlp
value: 0.15993577688878796
- value: 0.011410411917833683
weight:
- filter: self_attn
value: 0.2211644023056492
- filter: mlp
value: 0.5677387594231849
- value: 0.1316535663010981
- layer_range: [12, 14]
model: ./Yosegi-0603
- sources:
- layer_range: [14, 16]
model: ./Yosegi-0601
parameters:
density:
- filter: self_attn
value: 0.9584597245055072
- filter: mlp
value: 1.0
- value: 1.0
gamma:
- filter: self_attn
value: -0.17789727632680347
- filter: mlp
value: 0.2182263440314275
- value: 0.1449547656126498
weight:
- filter: self_attn
value: 0.4551004762874224
- filter: mlp
value: 0.9182082826762857
- value: 0.3736989395186422
- layer_range: [14, 16]
model: ./Ninja-2B_JP
parameters:
density:
- filter: self_attn
value: 0.7414465107848625
- filter: mlp
value: 1.0
- value: 0.7894887419395906
gamma:
- filter: self_attn
value: -0.07343933395880992
- filter: mlp
value: 0.250800731630588
- value: -0.2948778134297542
weight:
- filter: self_attn
value: 0.43125199001016495
- filter: mlp
value: 0.6182726353394477
- value: 0.838902157446268
- layer_range: [14, 16]
model: ./Yosegi-0603
- sources:
- layer_range: [16, 18]
model: ./Yosegi-0601
parameters:
density:
- filter: self_attn
value: 0.9474287877268394
- filter: mlp
value: 1.0
- value: 0.9613380133344519
gamma:
- filter: self_attn
value: -0.08608895546593046
- filter: mlp
value: -0.07275416053291164
- value: -0.5796137860399382
weight:
- filter: self_attn
value: 0.5593420897751296
- filter: mlp
value: 0.7339447992880666
- value: 0.5447558586689005
- layer_range: [16, 18]
model: ./Ninja-2B_JP
parameters:
density:
- filter: self_attn
value: 0.9321536960575384
- filter: mlp
value: 1.0
- value: 0.9613033408813294
gamma:
- filter: self_attn
value: 0.20610728738224296
- filter: mlp
value: 0.2002206706624053
- value: -0.45349278793293785
weight:
- filter: self_attn
value: 0.16162975594196963
- filter: mlp
value: 0.21262726992327483
- value: 0.061213622827234075
- layer_range: [16, 18]
model: ./Yosegi-0603
- sources:
- layer_range: [18, 20]
model: ./Yosegi-0601
parameters:
density:
- filter: self_attn
value: 1.0
- filter: mlp
value: 1.0
- value: 1.0
gamma:
- filter: self_attn
value: 0.03922456593148313
- filter: mlp
value: 0.3318035822806869
- value: -0.10373990685028205
weight:
- filter: self_attn
value: 0.8254441016674987
- filter: mlp
value: 0.4568039342431161
- value: 0.3152648515747969
- layer_range: [18, 20]
model: ./Ninja-2B_JP
parameters:
density:
- filter: self_attn
value: 1.0
- filter: mlp
value: 1.0
- value: 0.9807358937293073
gamma:
- filter: self_attn
value: -0.22734036563128657
- filter: mlp
value: 0.26113222150270854
- value: 0.17739039022957015
weight:
- filter: self_attn
value: 0.33759130475641996
- filter: mlp
value: 0.616639215544168
- value: 0.47560658618977714
- layer_range: [18, 20]
model: ./Yosegi-0603
- sources:
- layer_range: [20, 22]
model: ./Yosegi-0601
parameters:
density:
- filter: self_attn
value: 0.9394514442960196
- filter: mlp
value: 1.0
- value: 0.9885037757465567
gamma:
- filter: self_attn
value: -0.17365709450334324
- filter: mlp
value: 0.0712279381144505
- value: 0.11809665485306464
weight:
- filter: self_attn
value: 0.485610337254665
- filter: mlp
value: 0.8406593173801935
- value: 0.5024102481819739
- layer_range: [20, 22]
model: ./Ninja-2B_JP
parameters:
density:
- filter: self_attn
value: 1.0
- filter: mlp
value: 1.0
- value: 1.0
gamma:
- filter: self_attn
value: -0.09980202641768818
- filter: mlp
value: 0.051454493742856926
- value: 0.14619126408666103
weight:
- filter: self_attn
value: 0.54772456079406
- filter: mlp
value: 0.3440893571099615
- value: 0.3747271233512448
- layer_range: [20, 22]
model: ./Yosegi-0603
- sources:
- layer_range: [22, 24]
model: ./Yosegi-0601
parameters:
density:
- filter: self_attn
value: 1.0
- filter: mlp
value: 0.9474712362889293
- value: 1.0
gamma:
- filter: self_attn
value: -0.16020032978118146
- filter: mlp
value: -0.025085248873309034
- value: 0.06046174910893976
weight:
- filter: self_attn
value: 0.8654189362345427
- filter: mlp
value: 0.6344956382288498
- value: 0.6383979001549549
- layer_range: [22, 24]
model: ./Ninja-2B_JP
parameters:
density:
- filter: self_attn
value: 0.8240762427167851
- filter: mlp
value: 1.0
- value: 0.9004913821398048
gamma:
- filter: self_attn
value: -0.12224186789525764
- filter: mlp
value: -0.25877585460700525
- value: 0.35149388360871714
weight:
- filter: self_attn
value: 0.4294356408713786
- filter: mlp
value: 0.3920647298630233
- value: 0.795891295390721
- layer_range: [22, 24]
model: ./Yosegi-0603
- sources:
- layer_range: [24, 26]
model: ./Yosegi-0601
parameters:
density:
- filter: self_attn
value: 1.0
- filter: mlp
value: 1.0
- value: 1.0
gamma:
- filter: self_attn
value: 0.16915580088030202
- filter: mlp
value: 0.2602652727555053
- value: 0.16985672723305376
weight:
- filter: self_attn
value: 0.420377024485687
- filter: mlp
value: 0.3401141209432324
- value: 0.4953511256159331
- layer_range: [24, 26]
model: ./Ninja-2B_JP
parameters:
density:
- filter: self_attn
value: 0.7290652609253236
- filter: mlp
value: 1.0
- value: 1.0
gamma:
- filter: self_attn
value: -0.1039167464696765
- filter: mlp
value: -0.18476572570059685
- value: 0.1221387313921081
weight:
- filter: self_attn
value: 0.2925002157134928
- filter: mlp
value: 0.3854740639588027
- value: 0.555448110317977
- layer_range: [24, 26]
model: ./Yosegi-0603
- sources:
- layer_range: [26, 28]
model: ./Yosegi-0601
parameters:
density:
- filter: self_attn
value: 1.0
- filter: mlp
value: 0.9104496350690235
- value: 1.0
gamma:
- filter: self_attn
value: 0.24831264214235005
- filter: mlp
value: -0.03903149241855605
- value: 0.14189425093398259
weight:
- filter: self_attn
value: 0.7685811138035815
- filter: mlp
value: 0.06535011571274918
- value: 0.696502559577317
- layer_range: [26, 28]
model: ./Ninja-2B_JP
parameters:
density:
- filter: self_attn
value: 0.9236218028490522
- filter: mlp
value: 1.0
- value: 1.0
gamma:
- filter: self_attn
value: -0.2451400735890047
- filter: mlp
value: -0.21555851418482214
- value: 0.020418471695148876
weight:
- filter: self_attn
value: 0.451368534421561
- filter: mlp
value: 0.27412879847687055
- value: 0.18339776770537336
- layer_range: [26, 28]
model: ./Yosegi-0603
- sources:
- layer_range: [28, 30]
model: ./Yosegi-0601
parameters:
density:
- filter: self_attn
value: 0.8590812961904566
- filter: mlp
value: 1.0
- value: 1.0
gamma:
- filter: self_attn
value: -0.06934549536310654
- filter: mlp
value: -0.28464693250998063
- value: -0.0588491947891552
weight:
- filter: self_attn
value: 0.26716389671655294
- filter: mlp
value: 0.8228280162386532
- value: 0.24197568479527135
- layer_range: [28, 30]
model: ./Ninja-2B_JP
parameters:
density:
- filter: self_attn
value: 0.7277181780542642
- filter: mlp
value: 0.74166025738732
- value: 1.0
gamma:
- filter: self_attn
value: 0.1772650150670655
- filter: mlp
value: 0.06545031487123437
- value: -0.28681451125993446
weight:
- filter: self_attn
value: 0.5781944040541174
- filter: mlp
value: 0.2288692970435767
- value: 0.689751088930503
- layer_range: [28, 30]
model: ./Yosegi-0603
- sources:
- layer_range: [30, 32]
model: ./Yosegi-0601
parameters:
density:
- filter: self_attn
value: 0.8177341862620365
- filter: mlp
value: 0.8875629677599377
- value: 1.0
gamma:
- filter: self_attn
value: -0.06572527259889459
- filter: mlp
value: -0.18979543285938766
- value: -0.24122036571646263
weight:
- filter: self_attn
value: 0.5818433594657613
- filter: mlp
value: 0.36676821100234736
- value: 0.3580688869263428
- layer_range: [30, 32]
model: ./Ninja-2B_JP
parameters:
density:
- filter: self_attn
value: 0.8306036003344672
- filter: mlp
value: 0.6993970248745297
- value: 1.0
gamma:
- filter: self_attn
value: -0.20599853236581384
- filter: mlp
value: -0.2001187634455465
- value: -0.07654635090020837
weight:
- filter: self_attn
value: 0.37120677279712305
- filter: mlp
value: 0.13105486609905853
- value: 0.7204857820148367
- layer_range: [30, 32]
model: ./Yosegi-0603
tokenizer_source: union
- Downloads last month
- 16
This model does not have enough activity to be deployed to Inference API (serverless) yet. Increase its social
visibility and check back later, or deploy to Inference Endpoints (dedicated)
instead.