Adding `safetensors` variant of this model

#1
by TimeRobber - opened
This view is limited to 50 files because it contains too many changes.  See the raw diff here.
Files changed (50) hide show
  1. .gitattributes +71 -0
  2. model.safetensors.index.json +1 -0
  3. model_00001-of-00072.safetensors +3 -0
  4. model_00002-of-00072.safetensors +3 -0
  5. model_00003-of-00072.safetensors +3 -0
  6. model_00004-of-00072.safetensors +3 -0
  7. model_00005-of-00072.safetensors +3 -0
  8. model_00006-of-00072.safetensors +3 -0
  9. model_00007-of-00072.safetensors +3 -0
  10. model_00008-of-00072.safetensors +3 -0
  11. model_00009-of-00072.safetensors +3 -0
  12. model_00010-of-00072.safetensors +3 -0
  13. model_00011-of-00072.safetensors +3 -0
  14. model_00012-of-00072.safetensors +3 -0
  15. model_00013-of-00072.safetensors +3 -0
  16. model_00014-of-00072.safetensors +3 -0
  17. model_00015-of-00072.safetensors +3 -0
  18. model_00016-of-00072.safetensors +3 -0
  19. model_00017-of-00072.safetensors +3 -0
  20. model_00018-of-00072.safetensors +3 -0
  21. model_00019-of-00072.safetensors +3 -0
  22. model_00020-of-00072.safetensors +3 -0
  23. model_00021-of-00072.safetensors +3 -0
  24. model_00022-of-00072.safetensors +3 -0
  25. model_00023-of-00072.safetensors +3 -0
  26. model_00024-of-00072.safetensors +3 -0
  27. model_00025-of-00072.safetensors +3 -0
  28. model_00026-of-00072.safetensors +3 -0
  29. model_00027-of-00072.safetensors +3 -0
  30. model_00028-of-00072.safetensors +3 -0
  31. model_00029-of-00072.safetensors +3 -0
  32. model_00030-of-00072.safetensors +3 -0
  33. model_00031-of-00072.safetensors +3 -0
  34. model_00032-of-00072.safetensors +3 -0
  35. model_00033-of-00072.safetensors +3 -0
  36. model_00034-of-00072.safetensors +3 -0
  37. model_00035-of-00072.safetensors +3 -0
  38. model_00036-of-00072.safetensors +3 -0
  39. model_00037-of-00072.safetensors +3 -0
  40. model_00038-of-00072.safetensors +3 -0
  41. model_00039-of-00072.safetensors +3 -0
  42. model_00040-of-00072.safetensors +3 -0
  43. model_00041-of-00072.safetensors +3 -0
  44. model_00042-of-00072.safetensors +3 -0
  45. model_00043-of-00072.safetensors +3 -0
  46. model_00044-of-00072.safetensors +3 -0
  47. model_00045-of-00072.safetensors +3 -0
  48. model_00046-of-00072.safetensors +3 -0
  49. model_00047-of-00072.safetensors +3 -0
  50. model_00048-of-00072.safetensors +3 -0
.gitattributes CHANGED
@@ -36,3 +36,74 @@ tr13-176B-ml-t0-logs/logs/p31lossseq/main_log.txt filter=lfs diff=lfs merge=lfs
36
  logs/logs/xp3capmixnewcodelonglossseq/main_log.txt filter=lfs diff=lfs merge=lfs -text
37
  logs/logs/xp3zzlossseq/main_log.txt filter=lfs diff=lfs merge=lfs -text
38
  logs/logs/p31lossseq/main_log.txt filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  logs/logs/xp3capmixnewcodelonglossseq/main_log.txt filter=lfs diff=lfs merge=lfs -text
37
  logs/logs/xp3zzlossseq/main_log.txt filter=lfs diff=lfs merge=lfs -text
38
  logs/logs/p31lossseq/main_log.txt filter=lfs diff=lfs merge=lfs -text
39
+ model_00007-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
40
+ model_00035-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
41
+ model_00068-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
42
+ model_00014-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
43
+ model_00060-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
44
+ model_00026-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
45
+ model_00043-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
46
+ model_00041-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
47
+ model_00015-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
48
+ model_00019-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
49
+ model_00013-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
50
+ model_00029-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
51
+ model_00055-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
52
+ model_00056-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
53
+ model_00012-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
54
+ model_00049-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
55
+ model_00027-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
56
+ model_00052-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
57
+ model_00004-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
58
+ model_00037-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
59
+ model_00045-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
60
+ model_00011-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
61
+ model_00067-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
62
+ model_00064-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
63
+ model_00034-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
64
+ model_00005-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
65
+ model_00033-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
66
+ model_00039-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
67
+ model_00042-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
68
+ model_00044-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
69
+ model_00071-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
70
+ model_00053-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
71
+ model_00006-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
72
+ model_00021-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
73
+ model_00036-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
74
+ model_00051-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
75
+ model_00047-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
76
+ model_00024-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
77
+ model_00017-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
78
+ model_00003-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
79
+ model_00023-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
80
+ model_00018-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
81
+ model_00002-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
82
+ model_00016-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
83
+ model_00031-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
84
+ model_00065-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
85
+ model_00022-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
86
+ model_00008-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
87
+ model_00048-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
88
+ model_00063-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
89
+ model_00050-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
90
+ model_00046-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
91
+ model_00009-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
92
+ model_00061-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
93
+ model_00069-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
94
+ model_00070-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
95
+ model_00028-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
96
+ model_00020-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
97
+ model_00025-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
98
+ model_00030-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
99
+ model_00066-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
100
+ model_00032-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
101
+ model_00057-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
102
+ model_00010-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
103
+ model_00058-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
104
+ model_00001-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
105
+ model_00062-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
106
+ model_00038-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
107
+ model_00059-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
108
+ model_00054-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
109
+ model_00040-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
model.safetensors.index.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"metadata": {"total_size": 352494542848}, "weight_map": {"h.0.input_layernorm.bias": "model_00002-of-00072.safetensors", "h.0.input_layernorm.weight": "model_00002-of-00072.safetensors", "h.0.mlp.dense_4h_to_h.bias": "model_00002-of-00072.safetensors", "h.0.mlp.dense_4h_to_h.weight": "model_00002-of-00072.safetensors", "h.0.mlp.dense_h_to_4h.bias": "model_00002-of-00072.safetensors", "h.0.mlp.dense_h_to_4h.weight": "model_00002-of-00072.safetensors", "h.0.post_attention_layernorm.bias": "model_00002-of-00072.safetensors", "h.0.post_attention_layernorm.weight": "model_00002-of-00072.safetensors", "h.0.self_attention.dense.bias": "model_00002-of-00072.safetensors", "h.0.self_attention.dense.weight": "model_00002-of-00072.safetensors", "h.0.self_attention.query_key_value.bias": "model_00002-of-00072.safetensors", "h.0.self_attention.query_key_value.weight": "model_00002-of-00072.safetensors", "h.1.input_layernorm.bias": "model_00003-of-00072.safetensors", "h.1.input_layernorm.weight": "model_00003-of-00072.safetensors", "h.1.mlp.dense_4h_to_h.bias": "model_00003-of-00072.safetensors", "h.1.mlp.dense_4h_to_h.weight": "model_00003-of-00072.safetensors", "h.1.mlp.dense_h_to_4h.bias": "model_00003-of-00072.safetensors", "h.1.mlp.dense_h_to_4h.weight": "model_00003-of-00072.safetensors", "h.1.post_attention_layernorm.bias": "model_00003-of-00072.safetensors", "h.1.post_attention_layernorm.weight": "model_00003-of-00072.safetensors", "h.1.self_attention.dense.bias": "model_00003-of-00072.safetensors", "h.1.self_attention.dense.weight": "model_00003-of-00072.safetensors", "h.1.self_attention.query_key_value.bias": "model_00003-of-00072.safetensors", "h.1.self_attention.query_key_value.weight": "model_00003-of-00072.safetensors", "h.10.input_layernorm.bias": "model_00012-of-00072.safetensors", "h.10.input_layernorm.weight": "model_00012-of-00072.safetensors", "h.10.mlp.dense_4h_to_h.bias": "model_00012-of-00072.safetensors", "h.10.mlp.dense_4h_to_h.weight": "model_00012-of-00072.safetensors", "h.10.mlp.dense_h_to_4h.bias": "model_00012-of-00072.safetensors", "h.10.mlp.dense_h_to_4h.weight": "model_00012-of-00072.safetensors", "h.10.post_attention_layernorm.bias": "model_00012-of-00072.safetensors", "h.10.post_attention_layernorm.weight": "model_00012-of-00072.safetensors", "h.10.self_attention.dense.bias": "model_00012-of-00072.safetensors", "h.10.self_attention.dense.weight": "model_00012-of-00072.safetensors", "h.10.self_attention.query_key_value.bias": "model_00012-of-00072.safetensors", "h.10.self_attention.query_key_value.weight": "model_00012-of-00072.safetensors", "h.11.input_layernorm.bias": "model_00013-of-00072.safetensors", "h.11.input_layernorm.weight": "model_00013-of-00072.safetensors", "h.11.mlp.dense_4h_to_h.bias": "model_00013-of-00072.safetensors", "h.11.mlp.dense_4h_to_h.weight": "model_00013-of-00072.safetensors", "h.11.mlp.dense_h_to_4h.bias": "model_00013-of-00072.safetensors", "h.11.mlp.dense_h_to_4h.weight": "model_00013-of-00072.safetensors", "h.11.post_attention_layernorm.bias": "model_00013-of-00072.safetensors", "h.11.post_attention_layernorm.weight": "model_00013-of-00072.safetensors", "h.11.self_attention.dense.bias": "model_00013-of-00072.safetensors", "h.11.self_attention.dense.weight": "model_00013-of-00072.safetensors", "h.11.self_attention.query_key_value.bias": "model_00013-of-00072.safetensors", "h.11.self_attention.query_key_value.weight": "model_00013-of-00072.safetensors", "h.12.input_layernorm.bias": "model_00014-of-00072.safetensors", "h.12.input_layernorm.weight": "model_00014-of-00072.safetensors", "h.12.mlp.dense_4h_to_h.bias": "model_00014-of-00072.safetensors", "h.12.mlp.dense_4h_to_h.weight": "model_00014-of-00072.safetensors", "h.12.mlp.dense_h_to_4h.bias": "model_00014-of-00072.safetensors", "h.12.mlp.dense_h_to_4h.weight": "model_00014-of-00072.safetensors", "h.12.post_attention_layernorm.bias": "model_00014-of-00072.safetensors", "h.12.post_attention_layernorm.weight": "model_00014-of-00072.safetensors", "h.12.self_attention.dense.bias": "model_00014-of-00072.safetensors", "h.12.self_attention.dense.weight": "model_00014-of-00072.safetensors", "h.12.self_attention.query_key_value.bias": "model_00014-of-00072.safetensors", "h.12.self_attention.query_key_value.weight": "model_00014-of-00072.safetensors", "h.13.input_layernorm.bias": "model_00015-of-00072.safetensors", "h.13.input_layernorm.weight": "model_00015-of-00072.safetensors", "h.13.mlp.dense_4h_to_h.bias": "model_00015-of-00072.safetensors", "h.13.mlp.dense_4h_to_h.weight": "model_00015-of-00072.safetensors", "h.13.mlp.dense_h_to_4h.bias": "model_00015-of-00072.safetensors", "h.13.mlp.dense_h_to_4h.weight": "model_00015-of-00072.safetensors", "h.13.post_attention_layernorm.bias": "model_00015-of-00072.safetensors", "h.13.post_attention_layernorm.weight": "model_00015-of-00072.safetensors", "h.13.self_attention.dense.bias": "model_00015-of-00072.safetensors", "h.13.self_attention.dense.weight": "model_00015-of-00072.safetensors", "h.13.self_attention.query_key_value.bias": "model_00015-of-00072.safetensors", "h.13.self_attention.query_key_value.weight": "model_00015-of-00072.safetensors", "h.14.input_layernorm.bias": "model_00016-of-00072.safetensors", "h.14.input_layernorm.weight": "model_00016-of-00072.safetensors", "h.14.mlp.dense_4h_to_h.bias": "model_00016-of-00072.safetensors", "h.14.mlp.dense_4h_to_h.weight": "model_00016-of-00072.safetensors", "h.14.mlp.dense_h_to_4h.bias": "model_00016-of-00072.safetensors", "h.14.mlp.dense_h_to_4h.weight": "model_00016-of-00072.safetensors", "h.14.post_attention_layernorm.bias": "model_00016-of-00072.safetensors", "h.14.post_attention_layernorm.weight": "model_00016-of-00072.safetensors", "h.14.self_attention.dense.bias": "model_00016-of-00072.safetensors", "h.14.self_attention.dense.weight": "model_00016-of-00072.safetensors", "h.14.self_attention.query_key_value.bias": "model_00016-of-00072.safetensors", "h.14.self_attention.query_key_value.weight": "model_00016-of-00072.safetensors", "h.15.input_layernorm.bias": "model_00017-of-00072.safetensors", "h.15.input_layernorm.weight": "model_00017-of-00072.safetensors", "h.15.mlp.dense_4h_to_h.bias": "model_00017-of-00072.safetensors", "h.15.mlp.dense_4h_to_h.weight": "model_00017-of-00072.safetensors", "h.15.mlp.dense_h_to_4h.bias": "model_00017-of-00072.safetensors", "h.15.mlp.dense_h_to_4h.weight": "model_00017-of-00072.safetensors", "h.15.post_attention_layernorm.bias": "model_00017-of-00072.safetensors", "h.15.post_attention_layernorm.weight": "model_00017-of-00072.safetensors", "h.15.self_attention.dense.bias": "model_00017-of-00072.safetensors", "h.15.self_attention.dense.weight": "model_00017-of-00072.safetensors", "h.15.self_attention.query_key_value.bias": "model_00017-of-00072.safetensors", "h.15.self_attention.query_key_value.weight": "model_00017-of-00072.safetensors", "h.16.input_layernorm.bias": "model_00018-of-00072.safetensors", "h.16.input_layernorm.weight": "model_00018-of-00072.safetensors", "h.16.mlp.dense_4h_to_h.bias": "model_00018-of-00072.safetensors", "h.16.mlp.dense_4h_to_h.weight": "model_00018-of-00072.safetensors", "h.16.mlp.dense_h_to_4h.bias": "model_00018-of-00072.safetensors", "h.16.mlp.dense_h_to_4h.weight": "model_00018-of-00072.safetensors", "h.16.post_attention_layernorm.bias": "model_00018-of-00072.safetensors", "h.16.post_attention_layernorm.weight": "model_00018-of-00072.safetensors", "h.16.self_attention.dense.bias": "model_00018-of-00072.safetensors", "h.16.self_attention.dense.weight": "model_00018-of-00072.safetensors", "h.16.self_attention.query_key_value.bias": "model_00018-of-00072.safetensors", "h.16.self_attention.query_key_value.weight": "model_00018-of-00072.safetensors", "h.17.input_layernorm.bias": "model_00019-of-00072.safetensors", "h.17.input_layernorm.weight": "model_00019-of-00072.safetensors", "h.17.mlp.dense_4h_to_h.bias": "model_00019-of-00072.safetensors", "h.17.mlp.dense_4h_to_h.weight": "model_00019-of-00072.safetensors", "h.17.mlp.dense_h_to_4h.bias": "model_00019-of-00072.safetensors", "h.17.mlp.dense_h_to_4h.weight": "model_00019-of-00072.safetensors", "h.17.post_attention_layernorm.bias": "model_00019-of-00072.safetensors", "h.17.post_attention_layernorm.weight": "model_00019-of-00072.safetensors", "h.17.self_attention.dense.bias": "model_00019-of-00072.safetensors", "h.17.self_attention.dense.weight": "model_00019-of-00072.safetensors", "h.17.self_attention.query_key_value.bias": "model_00019-of-00072.safetensors", "h.17.self_attention.query_key_value.weight": "model_00019-of-00072.safetensors", "h.18.input_layernorm.bias": "model_00020-of-00072.safetensors", "h.18.input_layernorm.weight": "model_00020-of-00072.safetensors", "h.18.mlp.dense_4h_to_h.bias": "model_00020-of-00072.safetensors", "h.18.mlp.dense_4h_to_h.weight": "model_00020-of-00072.safetensors", "h.18.mlp.dense_h_to_4h.bias": "model_00020-of-00072.safetensors", "h.18.mlp.dense_h_to_4h.weight": "model_00020-of-00072.safetensors", "h.18.post_attention_layernorm.bias": "model_00020-of-00072.safetensors", "h.18.post_attention_layernorm.weight": "model_00020-of-00072.safetensors", "h.18.self_attention.dense.bias": "model_00020-of-00072.safetensors", "h.18.self_attention.dense.weight": "model_00020-of-00072.safetensors", "h.18.self_attention.query_key_value.bias": "model_00020-of-00072.safetensors", "h.18.self_attention.query_key_value.weight": "model_00020-of-00072.safetensors", "h.19.input_layernorm.bias": "model_00021-of-00072.safetensors", "h.19.input_layernorm.weight": "model_00021-of-00072.safetensors", "h.19.mlp.dense_4h_to_h.bias": "model_00021-of-00072.safetensors", "h.19.mlp.dense_4h_to_h.weight": "model_00021-of-00072.safetensors", "h.19.mlp.dense_h_to_4h.bias": "model_00021-of-00072.safetensors", "h.19.mlp.dense_h_to_4h.weight": "model_00021-of-00072.safetensors", "h.19.post_attention_layernorm.bias": "model_00021-of-00072.safetensors", "h.19.post_attention_layernorm.weight": "model_00021-of-00072.safetensors", "h.19.self_attention.dense.bias": "model_00021-of-00072.safetensors", "h.19.self_attention.dense.weight": "model_00021-of-00072.safetensors", "h.19.self_attention.query_key_value.bias": "model_00021-of-00072.safetensors", "h.19.self_attention.query_key_value.weight": "model_00021-of-00072.safetensors", "h.2.input_layernorm.bias": "model_00004-of-00072.safetensors", "h.2.input_layernorm.weight": "model_00004-of-00072.safetensors", "h.2.mlp.dense_4h_to_h.bias": "model_00004-of-00072.safetensors", "h.2.mlp.dense_4h_to_h.weight": "model_00004-of-00072.safetensors", "h.2.mlp.dense_h_to_4h.bias": "model_00004-of-00072.safetensors", "h.2.mlp.dense_h_to_4h.weight": "model_00004-of-00072.safetensors", "h.2.post_attention_layernorm.bias": "model_00004-of-00072.safetensors", "h.2.post_attention_layernorm.weight": "model_00004-of-00072.safetensors", "h.2.self_attention.dense.bias": "model_00004-of-00072.safetensors", "h.2.self_attention.dense.weight": "model_00004-of-00072.safetensors", "h.2.self_attention.query_key_value.bias": "model_00004-of-00072.safetensors", "h.2.self_attention.query_key_value.weight": "model_00004-of-00072.safetensors", "h.20.input_layernorm.bias": "model_00022-of-00072.safetensors", "h.20.input_layernorm.weight": "model_00022-of-00072.safetensors", "h.20.mlp.dense_4h_to_h.bias": "model_00022-of-00072.safetensors", "h.20.mlp.dense_4h_to_h.weight": "model_00022-of-00072.safetensors", "h.20.mlp.dense_h_to_4h.bias": "model_00022-of-00072.safetensors", "h.20.mlp.dense_h_to_4h.weight": "model_00022-of-00072.safetensors", "h.20.post_attention_layernorm.bias": "model_00022-of-00072.safetensors", "h.20.post_attention_layernorm.weight": "model_00022-of-00072.safetensors", "h.20.self_attention.dense.bias": "model_00022-of-00072.safetensors", "h.20.self_attention.dense.weight": "model_00022-of-00072.safetensors", "h.20.self_attention.query_key_value.bias": "model_00022-of-00072.safetensors", "h.20.self_attention.query_key_value.weight": "model_00022-of-00072.safetensors", "h.21.input_layernorm.bias": "model_00023-of-00072.safetensors", "h.21.input_layernorm.weight": "model_00023-of-00072.safetensors", "h.21.mlp.dense_4h_to_h.bias": "model_00023-of-00072.safetensors", "h.21.mlp.dense_4h_to_h.weight": "model_00023-of-00072.safetensors", "h.21.mlp.dense_h_to_4h.bias": "model_00023-of-00072.safetensors", "h.21.mlp.dense_h_to_4h.weight": "model_00023-of-00072.safetensors", "h.21.post_attention_layernorm.bias": "model_00023-of-00072.safetensors", "h.21.post_attention_layernorm.weight": "model_00023-of-00072.safetensors", "h.21.self_attention.dense.bias": "model_00023-of-00072.safetensors", "h.21.self_attention.dense.weight": "model_00023-of-00072.safetensors", "h.21.self_attention.query_key_value.bias": "model_00023-of-00072.safetensors", "h.21.self_attention.query_key_value.weight": "model_00023-of-00072.safetensors", "h.22.input_layernorm.bias": "model_00024-of-00072.safetensors", "h.22.input_layernorm.weight": "model_00024-of-00072.safetensors", "h.22.mlp.dense_4h_to_h.bias": "model_00024-of-00072.safetensors", "h.22.mlp.dense_4h_to_h.weight": "model_00024-of-00072.safetensors", "h.22.mlp.dense_h_to_4h.bias": "model_00024-of-00072.safetensors", "h.22.mlp.dense_h_to_4h.weight": "model_00024-of-00072.safetensors", "h.22.post_attention_layernorm.bias": "model_00024-of-00072.safetensors", "h.22.post_attention_layernorm.weight": "model_00024-of-00072.safetensors", "h.22.self_attention.dense.bias": "model_00024-of-00072.safetensors", "h.22.self_attention.dense.weight": "model_00024-of-00072.safetensors", "h.22.self_attention.query_key_value.bias": "model_00024-of-00072.safetensors", "h.22.self_attention.query_key_value.weight": "model_00024-of-00072.safetensors", "h.23.input_layernorm.bias": "model_00025-of-00072.safetensors", "h.23.input_layernorm.weight": "model_00025-of-00072.safetensors", "h.23.mlp.dense_4h_to_h.bias": "model_00025-of-00072.safetensors", "h.23.mlp.dense_4h_to_h.weight": "model_00025-of-00072.safetensors", "h.23.mlp.dense_h_to_4h.bias": "model_00025-of-00072.safetensors", "h.23.mlp.dense_h_to_4h.weight": "model_00025-of-00072.safetensors", "h.23.post_attention_layernorm.bias": "model_00025-of-00072.safetensors", "h.23.post_attention_layernorm.weight": "model_00025-of-00072.safetensors", "h.23.self_attention.dense.bias": "model_00025-of-00072.safetensors", "h.23.self_attention.dense.weight": "model_00025-of-00072.safetensors", "h.23.self_attention.query_key_value.bias": "model_00025-of-00072.safetensors", "h.23.self_attention.query_key_value.weight": "model_00025-of-00072.safetensors", "h.24.input_layernorm.bias": "model_00026-of-00072.safetensors", "h.24.input_layernorm.weight": "model_00026-of-00072.safetensors", "h.24.mlp.dense_4h_to_h.bias": "model_00026-of-00072.safetensors", "h.24.mlp.dense_4h_to_h.weight": "model_00026-of-00072.safetensors", "h.24.mlp.dense_h_to_4h.bias": "model_00026-of-00072.safetensors", "h.24.mlp.dense_h_to_4h.weight": "model_00026-of-00072.safetensors", "h.24.post_attention_layernorm.bias": "model_00026-of-00072.safetensors", "h.24.post_attention_layernorm.weight": "model_00026-of-00072.safetensors", "h.24.self_attention.dense.bias": "model_00026-of-00072.safetensors", "h.24.self_attention.dense.weight": "model_00026-of-00072.safetensors", "h.24.self_attention.query_key_value.bias": "model_00026-of-00072.safetensors", "h.24.self_attention.query_key_value.weight": "model_00026-of-00072.safetensors", "h.25.input_layernorm.bias": "model_00027-of-00072.safetensors", "h.25.input_layernorm.weight": "model_00027-of-00072.safetensors", "h.25.mlp.dense_4h_to_h.bias": "model_00027-of-00072.safetensors", "h.25.mlp.dense_4h_to_h.weight": "model_00027-of-00072.safetensors", "h.25.mlp.dense_h_to_4h.bias": "model_00027-of-00072.safetensors", "h.25.mlp.dense_h_to_4h.weight": "model_00027-of-00072.safetensors", "h.25.post_attention_layernorm.bias": "model_00027-of-00072.safetensors", "h.25.post_attention_layernorm.weight": "model_00027-of-00072.safetensors", "h.25.self_attention.dense.bias": "model_00027-of-00072.safetensors", "h.25.self_attention.dense.weight": "model_00027-of-00072.safetensors", "h.25.self_attention.query_key_value.bias": "model_00027-of-00072.safetensors", "h.25.self_attention.query_key_value.weight": "model_00027-of-00072.safetensors", "h.26.input_layernorm.bias": "model_00028-of-00072.safetensors", "h.26.input_layernorm.weight": "model_00028-of-00072.safetensors", "h.26.mlp.dense_4h_to_h.bias": "model_00028-of-00072.safetensors", "h.26.mlp.dense_4h_to_h.weight": "model_00028-of-00072.safetensors", "h.26.mlp.dense_h_to_4h.bias": "model_00028-of-00072.safetensors", "h.26.mlp.dense_h_to_4h.weight": "model_00028-of-00072.safetensors", "h.26.post_attention_layernorm.bias": "model_00028-of-00072.safetensors", "h.26.post_attention_layernorm.weight": "model_00028-of-00072.safetensors", "h.26.self_attention.dense.bias": "model_00028-of-00072.safetensors", "h.26.self_attention.dense.weight": "model_00028-of-00072.safetensors", "h.26.self_attention.query_key_value.bias": "model_00028-of-00072.safetensors", "h.26.self_attention.query_key_value.weight": "model_00028-of-00072.safetensors", "h.27.input_layernorm.bias": "model_00029-of-00072.safetensors", "h.27.input_layernorm.weight": "model_00029-of-00072.safetensors", "h.27.mlp.dense_4h_to_h.bias": "model_00029-of-00072.safetensors", "h.27.mlp.dense_4h_to_h.weight": "model_00029-of-00072.safetensors", "h.27.mlp.dense_h_to_4h.bias": "model_00029-of-00072.safetensors", "h.27.mlp.dense_h_to_4h.weight": "model_00029-of-00072.safetensors", "h.27.post_attention_layernorm.bias": "model_00029-of-00072.safetensors", "h.27.post_attention_layernorm.weight": "model_00029-of-00072.safetensors", "h.27.self_attention.dense.bias": "model_00029-of-00072.safetensors", "h.27.self_attention.dense.weight": "model_00029-of-00072.safetensors", "h.27.self_attention.query_key_value.bias": "model_00029-of-00072.safetensors", "h.27.self_attention.query_key_value.weight": "model_00029-of-00072.safetensors", "h.28.input_layernorm.bias": "model_00030-of-00072.safetensors", "h.28.input_layernorm.weight": "model_00030-of-00072.safetensors", "h.28.mlp.dense_4h_to_h.bias": "model_00030-of-00072.safetensors", "h.28.mlp.dense_4h_to_h.weight": "model_00030-of-00072.safetensors", "h.28.mlp.dense_h_to_4h.bias": "model_00030-of-00072.safetensors", "h.28.mlp.dense_h_to_4h.weight": "model_00030-of-00072.safetensors", "h.28.post_attention_layernorm.bias": "model_00030-of-00072.safetensors", "h.28.post_attention_layernorm.weight": "model_00030-of-00072.safetensors", "h.28.self_attention.dense.bias": "model_00030-of-00072.safetensors", "h.28.self_attention.dense.weight": "model_00030-of-00072.safetensors", "h.28.self_attention.query_key_value.bias": "model_00030-of-00072.safetensors", "h.28.self_attention.query_key_value.weight": "model_00030-of-00072.safetensors", "h.29.input_layernorm.bias": "model_00031-of-00072.safetensors", "h.29.input_layernorm.weight": "model_00031-of-00072.safetensors", "h.29.mlp.dense_4h_to_h.bias": "model_00031-of-00072.safetensors", "h.29.mlp.dense_4h_to_h.weight": "model_00031-of-00072.safetensors", "h.29.mlp.dense_h_to_4h.bias": "model_00031-of-00072.safetensors", "h.29.mlp.dense_h_to_4h.weight": "model_00031-of-00072.safetensors", "h.29.post_attention_layernorm.bias": "model_00031-of-00072.safetensors", "h.29.post_attention_layernorm.weight": "model_00031-of-00072.safetensors", "h.29.self_attention.dense.bias": "model_00031-of-00072.safetensors", "h.29.self_attention.dense.weight": "model_00031-of-00072.safetensors", "h.29.self_attention.query_key_value.bias": "model_00031-of-00072.safetensors", "h.29.self_attention.query_key_value.weight": "model_00031-of-00072.safetensors", "h.3.input_layernorm.bias": "model_00005-of-00072.safetensors", "h.3.input_layernorm.weight": "model_00005-of-00072.safetensors", "h.3.mlp.dense_4h_to_h.bias": "model_00005-of-00072.safetensors", "h.3.mlp.dense_4h_to_h.weight": "model_00005-of-00072.safetensors", "h.3.mlp.dense_h_to_4h.bias": "model_00005-of-00072.safetensors", "h.3.mlp.dense_h_to_4h.weight": "model_00005-of-00072.safetensors", "h.3.post_attention_layernorm.bias": "model_00005-of-00072.safetensors", "h.3.post_attention_layernorm.weight": "model_00005-of-00072.safetensors", "h.3.self_attention.dense.bias": "model_00005-of-00072.safetensors", "h.3.self_attention.dense.weight": "model_00005-of-00072.safetensors", "h.3.self_attention.query_key_value.bias": "model_00005-of-00072.safetensors", "h.3.self_attention.query_key_value.weight": "model_00005-of-00072.safetensors", "h.30.input_layernorm.bias": "model_00032-of-00072.safetensors", "h.30.input_layernorm.weight": "model_00032-of-00072.safetensors", "h.30.mlp.dense_4h_to_h.bias": "model_00032-of-00072.safetensors", "h.30.mlp.dense_4h_to_h.weight": "model_00032-of-00072.safetensors", "h.30.mlp.dense_h_to_4h.bias": "model_00032-of-00072.safetensors", "h.30.mlp.dense_h_to_4h.weight": "model_00032-of-00072.safetensors", "h.30.post_attention_layernorm.bias": "model_00032-of-00072.safetensors", "h.30.post_attention_layernorm.weight": "model_00032-of-00072.safetensors", "h.30.self_attention.dense.bias": "model_00032-of-00072.safetensors", "h.30.self_attention.dense.weight": "model_00032-of-00072.safetensors", "h.30.self_attention.query_key_value.bias": "model_00032-of-00072.safetensors", "h.30.self_attention.query_key_value.weight": "model_00032-of-00072.safetensors", "h.31.input_layernorm.bias": "model_00033-of-00072.safetensors", "h.31.input_layernorm.weight": "model_00033-of-00072.safetensors", "h.31.mlp.dense_4h_to_h.bias": "model_00033-of-00072.safetensors", "h.31.mlp.dense_4h_to_h.weight": "model_00033-of-00072.safetensors", "h.31.mlp.dense_h_to_4h.bias": "model_00033-of-00072.safetensors", "h.31.mlp.dense_h_to_4h.weight": "model_00033-of-00072.safetensors", "h.31.post_attention_layernorm.bias": "model_00033-of-00072.safetensors", "h.31.post_attention_layernorm.weight": "model_00033-of-00072.safetensors", "h.31.self_attention.dense.bias": "model_00033-of-00072.safetensors", "h.31.self_attention.dense.weight": "model_00033-of-00072.safetensors", "h.31.self_attention.query_key_value.bias": "model_00033-of-00072.safetensors", "h.31.self_attention.query_key_value.weight": "model_00033-of-00072.safetensors", "h.32.input_layernorm.bias": "model_00034-of-00072.safetensors", "h.32.input_layernorm.weight": "model_00034-of-00072.safetensors", "h.32.mlp.dense_4h_to_h.bias": "model_00034-of-00072.safetensors", "h.32.mlp.dense_4h_to_h.weight": "model_00034-of-00072.safetensors", "h.32.mlp.dense_h_to_4h.bias": "model_00034-of-00072.safetensors", "h.32.mlp.dense_h_to_4h.weight": "model_00034-of-00072.safetensors", "h.32.post_attention_layernorm.bias": "model_00034-of-00072.safetensors", "h.32.post_attention_layernorm.weight": "model_00034-of-00072.safetensors", "h.32.self_attention.dense.bias": "model_00034-of-00072.safetensors", "h.32.self_attention.dense.weight": "model_00034-of-00072.safetensors", "h.32.self_attention.query_key_value.bias": "model_00034-of-00072.safetensors", "h.32.self_attention.query_key_value.weight": "model_00034-of-00072.safetensors", "h.33.input_layernorm.bias": "model_00035-of-00072.safetensors", "h.33.input_layernorm.weight": "model_00035-of-00072.safetensors", "h.33.mlp.dense_4h_to_h.bias": "model_00035-of-00072.safetensors", "h.33.mlp.dense_4h_to_h.weight": "model_00035-of-00072.safetensors", "h.33.mlp.dense_h_to_4h.bias": "model_00035-of-00072.safetensors", "h.33.mlp.dense_h_to_4h.weight": "model_00035-of-00072.safetensors", "h.33.post_attention_layernorm.bias": "model_00035-of-00072.safetensors", "h.33.post_attention_layernorm.weight": "model_00035-of-00072.safetensors", "h.33.self_attention.dense.bias": "model_00035-of-00072.safetensors", "h.33.self_attention.dense.weight": "model_00035-of-00072.safetensors", "h.33.self_attention.query_key_value.bias": "model_00035-of-00072.safetensors", "h.33.self_attention.query_key_value.weight": "model_00035-of-00072.safetensors", "h.34.input_layernorm.bias": "model_00036-of-00072.safetensors", "h.34.input_layernorm.weight": "model_00036-of-00072.safetensors", "h.34.mlp.dense_4h_to_h.bias": "model_00036-of-00072.safetensors", "h.34.mlp.dense_4h_to_h.weight": "model_00036-of-00072.safetensors", "h.34.mlp.dense_h_to_4h.bias": "model_00036-of-00072.safetensors", "h.34.mlp.dense_h_to_4h.weight": "model_00036-of-00072.safetensors", "h.34.post_attention_layernorm.bias": "model_00036-of-00072.safetensors", "h.34.post_attention_layernorm.weight": "model_00036-of-00072.safetensors", "h.34.self_attention.dense.bias": "model_00036-of-00072.safetensors", "h.34.self_attention.dense.weight": "model_00036-of-00072.safetensors", "h.34.self_attention.query_key_value.bias": "model_00036-of-00072.safetensors", "h.34.self_attention.query_key_value.weight": "model_00036-of-00072.safetensors", "h.35.input_layernorm.bias": "model_00037-of-00072.safetensors", "h.35.input_layernorm.weight": "model_00037-of-00072.safetensors", "h.35.mlp.dense_4h_to_h.bias": "model_00037-of-00072.safetensors", "h.35.mlp.dense_4h_to_h.weight": "model_00037-of-00072.safetensors", "h.35.mlp.dense_h_to_4h.bias": "model_00037-of-00072.safetensors", "h.35.mlp.dense_h_to_4h.weight": "model_00037-of-00072.safetensors", "h.35.post_attention_layernorm.bias": "model_00037-of-00072.safetensors", "h.35.post_attention_layernorm.weight": "model_00037-of-00072.safetensors", "h.35.self_attention.dense.bias": "model_00037-of-00072.safetensors", "h.35.self_attention.dense.weight": "model_00037-of-00072.safetensors", "h.35.self_attention.query_key_value.bias": "model_00037-of-00072.safetensors", "h.35.self_attention.query_key_value.weight": "model_00037-of-00072.safetensors", "h.36.input_layernorm.bias": "model_00038-of-00072.safetensors", "h.36.input_layernorm.weight": "model_00038-of-00072.safetensors", "h.36.mlp.dense_4h_to_h.bias": "model_00038-of-00072.safetensors", "h.36.mlp.dense_4h_to_h.weight": "model_00038-of-00072.safetensors", "h.36.mlp.dense_h_to_4h.bias": "model_00038-of-00072.safetensors", "h.36.mlp.dense_h_to_4h.weight": "model_00038-of-00072.safetensors", "h.36.post_attention_layernorm.bias": "model_00038-of-00072.safetensors", "h.36.post_attention_layernorm.weight": "model_00038-of-00072.safetensors", "h.36.self_attention.dense.bias": "model_00038-of-00072.safetensors", "h.36.self_attention.dense.weight": "model_00038-of-00072.safetensors", "h.36.self_attention.query_key_value.bias": "model_00038-of-00072.safetensors", "h.36.self_attention.query_key_value.weight": "model_00038-of-00072.safetensors", "h.37.input_layernorm.bias": "model_00039-of-00072.safetensors", "h.37.input_layernorm.weight": "model_00039-of-00072.safetensors", "h.37.mlp.dense_4h_to_h.bias": "model_00039-of-00072.safetensors", "h.37.mlp.dense_4h_to_h.weight": "model_00039-of-00072.safetensors", "h.37.mlp.dense_h_to_4h.bias": "model_00039-of-00072.safetensors", "h.37.mlp.dense_h_to_4h.weight": "model_00039-of-00072.safetensors", "h.37.post_attention_layernorm.bias": "model_00039-of-00072.safetensors", "h.37.post_attention_layernorm.weight": "model_00039-of-00072.safetensors", "h.37.self_attention.dense.bias": "model_00039-of-00072.safetensors", "h.37.self_attention.dense.weight": "model_00039-of-00072.safetensors", "h.37.self_attention.query_key_value.bias": "model_00039-of-00072.safetensors", "h.37.self_attention.query_key_value.weight": "model_00039-of-00072.safetensors", "h.38.input_layernorm.bias": "model_00040-of-00072.safetensors", "h.38.input_layernorm.weight": "model_00040-of-00072.safetensors", "h.38.mlp.dense_4h_to_h.bias": "model_00040-of-00072.safetensors", "h.38.mlp.dense_4h_to_h.weight": "model_00040-of-00072.safetensors", "h.38.mlp.dense_h_to_4h.bias": "model_00040-of-00072.safetensors", "h.38.mlp.dense_h_to_4h.weight": "model_00040-of-00072.safetensors", "h.38.post_attention_layernorm.bias": "model_00040-of-00072.safetensors", "h.38.post_attention_layernorm.weight": "model_00040-of-00072.safetensors", "h.38.self_attention.dense.bias": "model_00040-of-00072.safetensors", "h.38.self_attention.dense.weight": "model_00040-of-00072.safetensors", "h.38.self_attention.query_key_value.bias": "model_00040-of-00072.safetensors", "h.38.self_attention.query_key_value.weight": "model_00040-of-00072.safetensors", "h.39.input_layernorm.bias": "model_00041-of-00072.safetensors", "h.39.input_layernorm.weight": "model_00041-of-00072.safetensors", "h.39.mlp.dense_4h_to_h.bias": "model_00041-of-00072.safetensors", "h.39.mlp.dense_4h_to_h.weight": "model_00041-of-00072.safetensors", "h.39.mlp.dense_h_to_4h.bias": "model_00041-of-00072.safetensors", "h.39.mlp.dense_h_to_4h.weight": "model_00041-of-00072.safetensors", "h.39.post_attention_layernorm.bias": "model_00041-of-00072.safetensors", "h.39.post_attention_layernorm.weight": "model_00041-of-00072.safetensors", "h.39.self_attention.dense.bias": "model_00041-of-00072.safetensors", "h.39.self_attention.dense.weight": "model_00041-of-00072.safetensors", "h.39.self_attention.query_key_value.bias": "model_00041-of-00072.safetensors", "h.39.self_attention.query_key_value.weight": "model_00041-of-00072.safetensors", "h.4.input_layernorm.bias": "model_00006-of-00072.safetensors", "h.4.input_layernorm.weight": "model_00006-of-00072.safetensors", "h.4.mlp.dense_4h_to_h.bias": "model_00006-of-00072.safetensors", "h.4.mlp.dense_4h_to_h.weight": "model_00006-of-00072.safetensors", "h.4.mlp.dense_h_to_4h.bias": "model_00006-of-00072.safetensors", "h.4.mlp.dense_h_to_4h.weight": "model_00006-of-00072.safetensors", "h.4.post_attention_layernorm.bias": "model_00006-of-00072.safetensors", "h.4.post_attention_layernorm.weight": "model_00006-of-00072.safetensors", "h.4.self_attention.dense.bias": "model_00006-of-00072.safetensors", "h.4.self_attention.dense.weight": "model_00006-of-00072.safetensors", "h.4.self_attention.query_key_value.bias": "model_00006-of-00072.safetensors", "h.4.self_attention.query_key_value.weight": "model_00006-of-00072.safetensors", "h.40.input_layernorm.bias": "model_00042-of-00072.safetensors", "h.40.input_layernorm.weight": "model_00042-of-00072.safetensors", "h.40.mlp.dense_4h_to_h.bias": "model_00042-of-00072.safetensors", "h.40.mlp.dense_4h_to_h.weight": "model_00042-of-00072.safetensors", "h.40.mlp.dense_h_to_4h.bias": "model_00042-of-00072.safetensors", "h.40.mlp.dense_h_to_4h.weight": "model_00042-of-00072.safetensors", "h.40.post_attention_layernorm.bias": "model_00042-of-00072.safetensors", "h.40.post_attention_layernorm.weight": "model_00042-of-00072.safetensors", "h.40.self_attention.dense.bias": "model_00042-of-00072.safetensors", "h.40.self_attention.dense.weight": "model_00042-of-00072.safetensors", "h.40.self_attention.query_key_value.bias": "model_00042-of-00072.safetensors", "h.40.self_attention.query_key_value.weight": "model_00042-of-00072.safetensors", "h.41.input_layernorm.bias": "model_00043-of-00072.safetensors", "h.41.input_layernorm.weight": "model_00043-of-00072.safetensors", "h.41.mlp.dense_4h_to_h.bias": "model_00043-of-00072.safetensors", "h.41.mlp.dense_4h_to_h.weight": "model_00043-of-00072.safetensors", "h.41.mlp.dense_h_to_4h.bias": "model_00043-of-00072.safetensors", "h.41.mlp.dense_h_to_4h.weight": "model_00043-of-00072.safetensors", "h.41.post_attention_layernorm.bias": "model_00043-of-00072.safetensors", "h.41.post_attention_layernorm.weight": "model_00043-of-00072.safetensors", "h.41.self_attention.dense.bias": "model_00043-of-00072.safetensors", "h.41.self_attention.dense.weight": "model_00043-of-00072.safetensors", "h.41.self_attention.query_key_value.bias": "model_00043-of-00072.safetensors", "h.41.self_attention.query_key_value.weight": "model_00043-of-00072.safetensors", "h.42.input_layernorm.bias": "model_00044-of-00072.safetensors", "h.42.input_layernorm.weight": "model_00044-of-00072.safetensors", "h.42.mlp.dense_4h_to_h.bias": "model_00044-of-00072.safetensors", "h.42.mlp.dense_4h_to_h.weight": "model_00044-of-00072.safetensors", "h.42.mlp.dense_h_to_4h.bias": "model_00044-of-00072.safetensors", "h.42.mlp.dense_h_to_4h.weight": "model_00044-of-00072.safetensors", "h.42.post_attention_layernorm.bias": "model_00044-of-00072.safetensors", "h.42.post_attention_layernorm.weight": "model_00044-of-00072.safetensors", "h.42.self_attention.dense.bias": "model_00044-of-00072.safetensors", "h.42.self_attention.dense.weight": "model_00044-of-00072.safetensors", "h.42.self_attention.query_key_value.bias": "model_00044-of-00072.safetensors", "h.42.self_attention.query_key_value.weight": "model_00044-of-00072.safetensors", "h.43.input_layernorm.bias": "model_00045-of-00072.safetensors", "h.43.input_layernorm.weight": "model_00045-of-00072.safetensors", "h.43.mlp.dense_4h_to_h.bias": "model_00045-of-00072.safetensors", "h.43.mlp.dense_4h_to_h.weight": "model_00045-of-00072.safetensors", "h.43.mlp.dense_h_to_4h.bias": "model_00045-of-00072.safetensors", "h.43.mlp.dense_h_to_4h.weight": "model_00045-of-00072.safetensors", "h.43.post_attention_layernorm.bias": "model_00045-of-00072.safetensors", "h.43.post_attention_layernorm.weight": "model_00045-of-00072.safetensors", "h.43.self_attention.dense.bias": "model_00045-of-00072.safetensors", "h.43.self_attention.dense.weight": "model_00045-of-00072.safetensors", "h.43.self_attention.query_key_value.bias": "model_00045-of-00072.safetensors", "h.43.self_attention.query_key_value.weight": "model_00045-of-00072.safetensors", "h.44.input_layernorm.bias": "model_00046-of-00072.safetensors", "h.44.input_layernorm.weight": "model_00046-of-00072.safetensors", "h.44.mlp.dense_4h_to_h.bias": "model_00046-of-00072.safetensors", "h.44.mlp.dense_4h_to_h.weight": "model_00046-of-00072.safetensors", "h.44.mlp.dense_h_to_4h.bias": "model_00046-of-00072.safetensors", "h.44.mlp.dense_h_to_4h.weight": "model_00046-of-00072.safetensors", "h.44.post_attention_layernorm.bias": "model_00046-of-00072.safetensors", "h.44.post_attention_layernorm.weight": "model_00046-of-00072.safetensors", "h.44.self_attention.dense.bias": "model_00046-of-00072.safetensors", "h.44.self_attention.dense.weight": "model_00046-of-00072.safetensors", "h.44.self_attention.query_key_value.bias": "model_00046-of-00072.safetensors", "h.44.self_attention.query_key_value.weight": "model_00046-of-00072.safetensors", "h.45.input_layernorm.bias": "model_00047-of-00072.safetensors", "h.45.input_layernorm.weight": "model_00047-of-00072.safetensors", "h.45.mlp.dense_4h_to_h.bias": "model_00047-of-00072.safetensors", "h.45.mlp.dense_4h_to_h.weight": "model_00047-of-00072.safetensors", "h.45.mlp.dense_h_to_4h.bias": "model_00047-of-00072.safetensors", "h.45.mlp.dense_h_to_4h.weight": "model_00047-of-00072.safetensors", "h.45.post_attention_layernorm.bias": "model_00047-of-00072.safetensors", "h.45.post_attention_layernorm.weight": "model_00047-of-00072.safetensors", "h.45.self_attention.dense.bias": "model_00047-of-00072.safetensors", "h.45.self_attention.dense.weight": "model_00047-of-00072.safetensors", "h.45.self_attention.query_key_value.bias": "model_00047-of-00072.safetensors", "h.45.self_attention.query_key_value.weight": "model_00047-of-00072.safetensors", "h.46.input_layernorm.bias": "model_00048-of-00072.safetensors", "h.46.input_layernorm.weight": "model_00048-of-00072.safetensors", "h.46.mlp.dense_4h_to_h.bias": "model_00048-of-00072.safetensors", "h.46.mlp.dense_4h_to_h.weight": "model_00048-of-00072.safetensors", "h.46.mlp.dense_h_to_4h.bias": "model_00048-of-00072.safetensors", "h.46.mlp.dense_h_to_4h.weight": "model_00048-of-00072.safetensors", "h.46.post_attention_layernorm.bias": "model_00048-of-00072.safetensors", "h.46.post_attention_layernorm.weight": "model_00048-of-00072.safetensors", "h.46.self_attention.dense.bias": "model_00048-of-00072.safetensors", "h.46.self_attention.dense.weight": "model_00048-of-00072.safetensors", "h.46.self_attention.query_key_value.bias": "model_00048-of-00072.safetensors", "h.46.self_attention.query_key_value.weight": "model_00048-of-00072.safetensors", "h.47.input_layernorm.bias": "model_00049-of-00072.safetensors", "h.47.input_layernorm.weight": "model_00049-of-00072.safetensors", "h.47.mlp.dense_4h_to_h.bias": "model_00049-of-00072.safetensors", "h.47.mlp.dense_4h_to_h.weight": "model_00049-of-00072.safetensors", "h.47.mlp.dense_h_to_4h.bias": "model_00049-of-00072.safetensors", "h.47.mlp.dense_h_to_4h.weight": "model_00049-of-00072.safetensors", "h.47.post_attention_layernorm.bias": "model_00049-of-00072.safetensors", "h.47.post_attention_layernorm.weight": "model_00049-of-00072.safetensors", "h.47.self_attention.dense.bias": "model_00049-of-00072.safetensors", "h.47.self_attention.dense.weight": "model_00049-of-00072.safetensors", "h.47.self_attention.query_key_value.bias": "model_00049-of-00072.safetensors", "h.47.self_attention.query_key_value.weight": "model_00049-of-00072.safetensors", "h.48.input_layernorm.bias": "model_00050-of-00072.safetensors", "h.48.input_layernorm.weight": "model_00050-of-00072.safetensors", "h.48.mlp.dense_4h_to_h.bias": "model_00050-of-00072.safetensors", "h.48.mlp.dense_4h_to_h.weight": "model_00050-of-00072.safetensors", "h.48.mlp.dense_h_to_4h.bias": "model_00050-of-00072.safetensors", "h.48.mlp.dense_h_to_4h.weight": "model_00050-of-00072.safetensors", "h.48.post_attention_layernorm.bias": "model_00050-of-00072.safetensors", "h.48.post_attention_layernorm.weight": "model_00050-of-00072.safetensors", "h.48.self_attention.dense.bias": "model_00050-of-00072.safetensors", "h.48.self_attention.dense.weight": "model_00050-of-00072.safetensors", "h.48.self_attention.query_key_value.bias": "model_00050-of-00072.safetensors", "h.48.self_attention.query_key_value.weight": "model_00050-of-00072.safetensors", "h.49.input_layernorm.bias": "model_00051-of-00072.safetensors", "h.49.input_layernorm.weight": "model_00051-of-00072.safetensors", "h.49.mlp.dense_4h_to_h.bias": "model_00051-of-00072.safetensors", "h.49.mlp.dense_4h_to_h.weight": "model_00051-of-00072.safetensors", "h.49.mlp.dense_h_to_4h.bias": "model_00051-of-00072.safetensors", "h.49.mlp.dense_h_to_4h.weight": "model_00051-of-00072.safetensors", "h.49.post_attention_layernorm.bias": "model_00051-of-00072.safetensors", "h.49.post_attention_layernorm.weight": "model_00051-of-00072.safetensors", "h.49.self_attention.dense.bias": "model_00051-of-00072.safetensors", "h.49.self_attention.dense.weight": "model_00051-of-00072.safetensors", "h.49.self_attention.query_key_value.bias": "model_00051-of-00072.safetensors", "h.49.self_attention.query_key_value.weight": "model_00051-of-00072.safetensors", "h.5.input_layernorm.bias": "model_00007-of-00072.safetensors", "h.5.input_layernorm.weight": "model_00007-of-00072.safetensors", "h.5.mlp.dense_4h_to_h.bias": "model_00007-of-00072.safetensors", "h.5.mlp.dense_4h_to_h.weight": "model_00007-of-00072.safetensors", "h.5.mlp.dense_h_to_4h.bias": "model_00007-of-00072.safetensors", "h.5.mlp.dense_h_to_4h.weight": "model_00007-of-00072.safetensors", "h.5.post_attention_layernorm.bias": "model_00007-of-00072.safetensors", "h.5.post_attention_layernorm.weight": "model_00007-of-00072.safetensors", "h.5.self_attention.dense.bias": "model_00007-of-00072.safetensors", "h.5.self_attention.dense.weight": "model_00007-of-00072.safetensors", "h.5.self_attention.query_key_value.bias": "model_00007-of-00072.safetensors", "h.5.self_attention.query_key_value.weight": "model_00007-of-00072.safetensors", "h.50.input_layernorm.bias": "model_00052-of-00072.safetensors", "h.50.input_layernorm.weight": "model_00052-of-00072.safetensors", "h.50.mlp.dense_4h_to_h.bias": "model_00052-of-00072.safetensors", "h.50.mlp.dense_4h_to_h.weight": "model_00052-of-00072.safetensors", "h.50.mlp.dense_h_to_4h.bias": "model_00052-of-00072.safetensors", "h.50.mlp.dense_h_to_4h.weight": "model_00052-of-00072.safetensors", "h.50.post_attention_layernorm.bias": "model_00052-of-00072.safetensors", "h.50.post_attention_layernorm.weight": "model_00052-of-00072.safetensors", "h.50.self_attention.dense.bias": "model_00052-of-00072.safetensors", "h.50.self_attention.dense.weight": "model_00052-of-00072.safetensors", "h.50.self_attention.query_key_value.bias": "model_00052-of-00072.safetensors", "h.50.self_attention.query_key_value.weight": "model_00052-of-00072.safetensors", "h.51.input_layernorm.bias": "model_00053-of-00072.safetensors", "h.51.input_layernorm.weight": "model_00053-of-00072.safetensors", "h.51.mlp.dense_4h_to_h.bias": "model_00053-of-00072.safetensors", "h.51.mlp.dense_4h_to_h.weight": "model_00053-of-00072.safetensors", "h.51.mlp.dense_h_to_4h.bias": "model_00053-of-00072.safetensors", "h.51.mlp.dense_h_to_4h.weight": "model_00053-of-00072.safetensors", "h.51.post_attention_layernorm.bias": "model_00053-of-00072.safetensors", "h.51.post_attention_layernorm.weight": "model_00053-of-00072.safetensors", "h.51.self_attention.dense.bias": "model_00053-of-00072.safetensors", "h.51.self_attention.dense.weight": "model_00053-of-00072.safetensors", "h.51.self_attention.query_key_value.bias": "model_00053-of-00072.safetensors", "h.51.self_attention.query_key_value.weight": "model_00053-of-00072.safetensors", "h.52.input_layernorm.bias": "model_00054-of-00072.safetensors", "h.52.input_layernorm.weight": "model_00054-of-00072.safetensors", "h.52.mlp.dense_4h_to_h.bias": "model_00054-of-00072.safetensors", "h.52.mlp.dense_4h_to_h.weight": "model_00054-of-00072.safetensors", "h.52.mlp.dense_h_to_4h.bias": "model_00054-of-00072.safetensors", "h.52.mlp.dense_h_to_4h.weight": "model_00054-of-00072.safetensors", "h.52.post_attention_layernorm.bias": "model_00054-of-00072.safetensors", "h.52.post_attention_layernorm.weight": "model_00054-of-00072.safetensors", "h.52.self_attention.dense.bias": "model_00054-of-00072.safetensors", "h.52.self_attention.dense.weight": "model_00054-of-00072.safetensors", "h.52.self_attention.query_key_value.bias": "model_00054-of-00072.safetensors", "h.52.self_attention.query_key_value.weight": "model_00054-of-00072.safetensors", "h.53.input_layernorm.bias": "model_00055-of-00072.safetensors", "h.53.input_layernorm.weight": "model_00055-of-00072.safetensors", "h.53.mlp.dense_4h_to_h.bias": "model_00055-of-00072.safetensors", "h.53.mlp.dense_4h_to_h.weight": "model_00055-of-00072.safetensors", "h.53.mlp.dense_h_to_4h.bias": "model_00055-of-00072.safetensors", "h.53.mlp.dense_h_to_4h.weight": "model_00055-of-00072.safetensors", "h.53.post_attention_layernorm.bias": "model_00055-of-00072.safetensors", "h.53.post_attention_layernorm.weight": "model_00055-of-00072.safetensors", "h.53.self_attention.dense.bias": "model_00055-of-00072.safetensors", "h.53.self_attention.dense.weight": "model_00055-of-00072.safetensors", "h.53.self_attention.query_key_value.bias": "model_00055-of-00072.safetensors", "h.53.self_attention.query_key_value.weight": "model_00055-of-00072.safetensors", "h.54.input_layernorm.bias": "model_00056-of-00072.safetensors", "h.54.input_layernorm.weight": "model_00056-of-00072.safetensors", "h.54.mlp.dense_4h_to_h.bias": "model_00056-of-00072.safetensors", "h.54.mlp.dense_4h_to_h.weight": "model_00056-of-00072.safetensors", "h.54.mlp.dense_h_to_4h.bias": "model_00056-of-00072.safetensors", "h.54.mlp.dense_h_to_4h.weight": "model_00056-of-00072.safetensors", "h.54.post_attention_layernorm.bias": "model_00056-of-00072.safetensors", "h.54.post_attention_layernorm.weight": "model_00056-of-00072.safetensors", "h.54.self_attention.dense.bias": "model_00056-of-00072.safetensors", "h.54.self_attention.dense.weight": "model_00056-of-00072.safetensors", "h.54.self_attention.query_key_value.bias": "model_00056-of-00072.safetensors", "h.54.self_attention.query_key_value.weight": "model_00056-of-00072.safetensors", "h.55.input_layernorm.bias": "model_00057-of-00072.safetensors", "h.55.input_layernorm.weight": "model_00057-of-00072.safetensors", "h.55.mlp.dense_4h_to_h.bias": "model_00057-of-00072.safetensors", "h.55.mlp.dense_4h_to_h.weight": "model_00057-of-00072.safetensors", "h.55.mlp.dense_h_to_4h.bias": "model_00057-of-00072.safetensors", "h.55.mlp.dense_h_to_4h.weight": "model_00057-of-00072.safetensors", "h.55.post_attention_layernorm.bias": "model_00057-of-00072.safetensors", "h.55.post_attention_layernorm.weight": "model_00057-of-00072.safetensors", "h.55.self_attention.dense.bias": "model_00057-of-00072.safetensors", "h.55.self_attention.dense.weight": "model_00057-of-00072.safetensors", "h.55.self_attention.query_key_value.bias": "model_00057-of-00072.safetensors", "h.55.self_attention.query_key_value.weight": "model_00057-of-00072.safetensors", "h.56.input_layernorm.bias": "model_00058-of-00072.safetensors", "h.56.input_layernorm.weight": "model_00058-of-00072.safetensors", "h.56.mlp.dense_4h_to_h.bias": "model_00058-of-00072.safetensors", "h.56.mlp.dense_4h_to_h.weight": "model_00058-of-00072.safetensors", "h.56.mlp.dense_h_to_4h.bias": "model_00058-of-00072.safetensors", "h.56.mlp.dense_h_to_4h.weight": "model_00058-of-00072.safetensors", "h.56.post_attention_layernorm.bias": "model_00058-of-00072.safetensors", "h.56.post_attention_layernorm.weight": "model_00058-of-00072.safetensors", "h.56.self_attention.dense.bias": "model_00058-of-00072.safetensors", "h.56.self_attention.dense.weight": "model_00058-of-00072.safetensors", "h.56.self_attention.query_key_value.bias": "model_00058-of-00072.safetensors", "h.56.self_attention.query_key_value.weight": "model_00058-of-00072.safetensors", "h.57.input_layernorm.bias": "model_00059-of-00072.safetensors", "h.57.input_layernorm.weight": "model_00059-of-00072.safetensors", "h.57.mlp.dense_4h_to_h.bias": "model_00059-of-00072.safetensors", "h.57.mlp.dense_4h_to_h.weight": "model_00059-of-00072.safetensors", "h.57.mlp.dense_h_to_4h.bias": "model_00059-of-00072.safetensors", "h.57.mlp.dense_h_to_4h.weight": "model_00059-of-00072.safetensors", "h.57.post_attention_layernorm.bias": "model_00059-of-00072.safetensors", "h.57.post_attention_layernorm.weight": "model_00059-of-00072.safetensors", "h.57.self_attention.dense.bias": "model_00059-of-00072.safetensors", "h.57.self_attention.dense.weight": "model_00059-of-00072.safetensors", "h.57.self_attention.query_key_value.bias": "model_00059-of-00072.safetensors", "h.57.self_attention.query_key_value.weight": "model_00059-of-00072.safetensors", "h.58.input_layernorm.bias": "model_00060-of-00072.safetensors", "h.58.input_layernorm.weight": "model_00060-of-00072.safetensors", "h.58.mlp.dense_4h_to_h.bias": "model_00060-of-00072.safetensors", "h.58.mlp.dense_4h_to_h.weight": "model_00060-of-00072.safetensors", "h.58.mlp.dense_h_to_4h.bias": "model_00060-of-00072.safetensors", "h.58.mlp.dense_h_to_4h.weight": "model_00060-of-00072.safetensors", "h.58.post_attention_layernorm.bias": "model_00060-of-00072.safetensors", "h.58.post_attention_layernorm.weight": "model_00060-of-00072.safetensors", "h.58.self_attention.dense.bias": "model_00060-of-00072.safetensors", "h.58.self_attention.dense.weight": "model_00060-of-00072.safetensors", "h.58.self_attention.query_key_value.bias": "model_00060-of-00072.safetensors", "h.58.self_attention.query_key_value.weight": "model_00060-of-00072.safetensors", "h.59.input_layernorm.bias": "model_00061-of-00072.safetensors", "h.59.input_layernorm.weight": "model_00061-of-00072.safetensors", "h.59.mlp.dense_4h_to_h.bias": "model_00061-of-00072.safetensors", "h.59.mlp.dense_4h_to_h.weight": "model_00061-of-00072.safetensors", "h.59.mlp.dense_h_to_4h.bias": "model_00061-of-00072.safetensors", "h.59.mlp.dense_h_to_4h.weight": "model_00061-of-00072.safetensors", "h.59.post_attention_layernorm.bias": "model_00061-of-00072.safetensors", "h.59.post_attention_layernorm.weight": "model_00061-of-00072.safetensors", "h.59.self_attention.dense.bias": "model_00061-of-00072.safetensors", "h.59.self_attention.dense.weight": "model_00061-of-00072.safetensors", "h.59.self_attention.query_key_value.bias": "model_00061-of-00072.safetensors", "h.59.self_attention.query_key_value.weight": "model_00061-of-00072.safetensors", "h.6.input_layernorm.bias": "model_00008-of-00072.safetensors", "h.6.input_layernorm.weight": "model_00008-of-00072.safetensors", "h.6.mlp.dense_4h_to_h.bias": "model_00008-of-00072.safetensors", "h.6.mlp.dense_4h_to_h.weight": "model_00008-of-00072.safetensors", "h.6.mlp.dense_h_to_4h.bias": "model_00008-of-00072.safetensors", "h.6.mlp.dense_h_to_4h.weight": "model_00008-of-00072.safetensors", "h.6.post_attention_layernorm.bias": "model_00008-of-00072.safetensors", "h.6.post_attention_layernorm.weight": "model_00008-of-00072.safetensors", "h.6.self_attention.dense.bias": "model_00008-of-00072.safetensors", "h.6.self_attention.dense.weight": "model_00008-of-00072.safetensors", "h.6.self_attention.query_key_value.bias": "model_00008-of-00072.safetensors", "h.6.self_attention.query_key_value.weight": "model_00008-of-00072.safetensors", "h.60.input_layernorm.bias": "model_00062-of-00072.safetensors", "h.60.input_layernorm.weight": "model_00062-of-00072.safetensors", "h.60.mlp.dense_4h_to_h.bias": "model_00062-of-00072.safetensors", "h.60.mlp.dense_4h_to_h.weight": "model_00062-of-00072.safetensors", "h.60.mlp.dense_h_to_4h.bias": "model_00062-of-00072.safetensors", "h.60.mlp.dense_h_to_4h.weight": "model_00062-of-00072.safetensors", "h.60.post_attention_layernorm.bias": "model_00062-of-00072.safetensors", "h.60.post_attention_layernorm.weight": "model_00062-of-00072.safetensors", "h.60.self_attention.dense.bias": "model_00062-of-00072.safetensors", "h.60.self_attention.dense.weight": "model_00062-of-00072.safetensors", "h.60.self_attention.query_key_value.bias": "model_00062-of-00072.safetensors", "h.60.self_attention.query_key_value.weight": "model_00062-of-00072.safetensors", "h.61.input_layernorm.bias": "model_00063-of-00072.safetensors", "h.61.input_layernorm.weight": "model_00063-of-00072.safetensors", "h.61.mlp.dense_4h_to_h.bias": "model_00063-of-00072.safetensors", "h.61.mlp.dense_4h_to_h.weight": "model_00063-of-00072.safetensors", "h.61.mlp.dense_h_to_4h.bias": "model_00063-of-00072.safetensors", "h.61.mlp.dense_h_to_4h.weight": "model_00063-of-00072.safetensors", "h.61.post_attention_layernorm.bias": "model_00063-of-00072.safetensors", "h.61.post_attention_layernorm.weight": "model_00063-of-00072.safetensors", "h.61.self_attention.dense.bias": "model_00063-of-00072.safetensors", "h.61.self_attention.dense.weight": "model_00063-of-00072.safetensors", "h.61.self_attention.query_key_value.bias": "model_00063-of-00072.safetensors", "h.61.self_attention.query_key_value.weight": "model_00063-of-00072.safetensors", "h.62.input_layernorm.bias": "model_00064-of-00072.safetensors", "h.62.input_layernorm.weight": "model_00064-of-00072.safetensors", "h.62.mlp.dense_4h_to_h.bias": "model_00064-of-00072.safetensors", "h.62.mlp.dense_4h_to_h.weight": "model_00064-of-00072.safetensors", "h.62.mlp.dense_h_to_4h.bias": "model_00064-of-00072.safetensors", "h.62.mlp.dense_h_to_4h.weight": "model_00064-of-00072.safetensors", "h.62.post_attention_layernorm.bias": "model_00064-of-00072.safetensors", "h.62.post_attention_layernorm.weight": "model_00064-of-00072.safetensors", "h.62.self_attention.dense.bias": "model_00064-of-00072.safetensors", "h.62.self_attention.dense.weight": "model_00064-of-00072.safetensors", "h.62.self_attention.query_key_value.bias": "model_00064-of-00072.safetensors", "h.62.self_attention.query_key_value.weight": "model_00064-of-00072.safetensors", "h.63.input_layernorm.bias": "model_00065-of-00072.safetensors", "h.63.input_layernorm.weight": "model_00065-of-00072.safetensors", "h.63.mlp.dense_4h_to_h.bias": "model_00065-of-00072.safetensors", "h.63.mlp.dense_4h_to_h.weight": "model_00065-of-00072.safetensors", "h.63.mlp.dense_h_to_4h.bias": "model_00065-of-00072.safetensors", "h.63.mlp.dense_h_to_4h.weight": "model_00065-of-00072.safetensors", "h.63.post_attention_layernorm.bias": "model_00065-of-00072.safetensors", "h.63.post_attention_layernorm.weight": "model_00065-of-00072.safetensors", "h.63.self_attention.dense.bias": "model_00065-of-00072.safetensors", "h.63.self_attention.dense.weight": "model_00065-of-00072.safetensors", "h.63.self_attention.query_key_value.bias": "model_00065-of-00072.safetensors", "h.63.self_attention.query_key_value.weight": "model_00065-of-00072.safetensors", "h.64.input_layernorm.bias": "model_00066-of-00072.safetensors", "h.64.input_layernorm.weight": "model_00066-of-00072.safetensors", "h.64.mlp.dense_4h_to_h.bias": "model_00066-of-00072.safetensors", "h.64.mlp.dense_4h_to_h.weight": "model_00066-of-00072.safetensors", "h.64.mlp.dense_h_to_4h.bias": "model_00066-of-00072.safetensors", "h.64.mlp.dense_h_to_4h.weight": "model_00066-of-00072.safetensors", "h.64.post_attention_layernorm.bias": "model_00066-of-00072.safetensors", "h.64.post_attention_layernorm.weight": "model_00066-of-00072.safetensors", "h.64.self_attention.dense.bias": "model_00066-of-00072.safetensors", "h.64.self_attention.dense.weight": "model_00066-of-00072.safetensors", "h.64.self_attention.query_key_value.bias": "model_00066-of-00072.safetensors", "h.64.self_attention.query_key_value.weight": "model_00066-of-00072.safetensors", "h.65.input_layernorm.bias": "model_00067-of-00072.safetensors", "h.65.input_layernorm.weight": "model_00067-of-00072.safetensors", "h.65.mlp.dense_4h_to_h.bias": "model_00067-of-00072.safetensors", "h.65.mlp.dense_4h_to_h.weight": "model_00067-of-00072.safetensors", "h.65.mlp.dense_h_to_4h.bias": "model_00067-of-00072.safetensors", "h.65.mlp.dense_h_to_4h.weight": "model_00067-of-00072.safetensors", "h.65.post_attention_layernorm.bias": "model_00067-of-00072.safetensors", "h.65.post_attention_layernorm.weight": "model_00067-of-00072.safetensors", "h.65.self_attention.dense.bias": "model_00067-of-00072.safetensors", "h.65.self_attention.dense.weight": "model_00067-of-00072.safetensors", "h.65.self_attention.query_key_value.bias": "model_00067-of-00072.safetensors", "h.65.self_attention.query_key_value.weight": "model_00067-of-00072.safetensors", "h.66.input_layernorm.bias": "model_00068-of-00072.safetensors", "h.66.input_layernorm.weight": "model_00068-of-00072.safetensors", "h.66.mlp.dense_4h_to_h.bias": "model_00068-of-00072.safetensors", "h.66.mlp.dense_4h_to_h.weight": "model_00068-of-00072.safetensors", "h.66.mlp.dense_h_to_4h.bias": "model_00068-of-00072.safetensors", "h.66.mlp.dense_h_to_4h.weight": "model_00068-of-00072.safetensors", "h.66.post_attention_layernorm.bias": "model_00068-of-00072.safetensors", "h.66.post_attention_layernorm.weight": "model_00068-of-00072.safetensors", "h.66.self_attention.dense.bias": "model_00068-of-00072.safetensors", "h.66.self_attention.dense.weight": "model_00068-of-00072.safetensors", "h.66.self_attention.query_key_value.bias": "model_00068-of-00072.safetensors", "h.66.self_attention.query_key_value.weight": "model_00068-of-00072.safetensors", "h.67.input_layernorm.bias": "model_00069-of-00072.safetensors", "h.67.input_layernorm.weight": "model_00069-of-00072.safetensors", "h.67.mlp.dense_4h_to_h.bias": "model_00069-of-00072.safetensors", "h.67.mlp.dense_4h_to_h.weight": "model_00069-of-00072.safetensors", "h.67.mlp.dense_h_to_4h.bias": "model_00069-of-00072.safetensors", "h.67.mlp.dense_h_to_4h.weight": "model_00069-of-00072.safetensors", "h.67.post_attention_layernorm.bias": "model_00069-of-00072.safetensors", "h.67.post_attention_layernorm.weight": "model_00069-of-00072.safetensors", "h.67.self_attention.dense.bias": "model_00069-of-00072.safetensors", "h.67.self_attention.dense.weight": "model_00069-of-00072.safetensors", "h.67.self_attention.query_key_value.bias": "model_00069-of-00072.safetensors", "h.67.self_attention.query_key_value.weight": "model_00069-of-00072.safetensors", "h.68.input_layernorm.bias": "model_00070-of-00072.safetensors", "h.68.input_layernorm.weight": "model_00070-of-00072.safetensors", "h.68.mlp.dense_4h_to_h.bias": "model_00070-of-00072.safetensors", "h.68.mlp.dense_4h_to_h.weight": "model_00070-of-00072.safetensors", "h.68.mlp.dense_h_to_4h.bias": "model_00070-of-00072.safetensors", "h.68.mlp.dense_h_to_4h.weight": "model_00070-of-00072.safetensors", "h.68.post_attention_layernorm.bias": "model_00070-of-00072.safetensors", "h.68.post_attention_layernorm.weight": "model_00070-of-00072.safetensors", "h.68.self_attention.dense.bias": "model_00070-of-00072.safetensors", "h.68.self_attention.dense.weight": "model_00070-of-00072.safetensors", "h.68.self_attention.query_key_value.bias": "model_00070-of-00072.safetensors", "h.68.self_attention.query_key_value.weight": "model_00070-of-00072.safetensors", "h.69.input_layernorm.bias": "model_00071-of-00072.safetensors", "h.69.input_layernorm.weight": "model_00071-of-00072.safetensors", "h.69.mlp.dense_4h_to_h.bias": "model_00071-of-00072.safetensors", "h.69.mlp.dense_4h_to_h.weight": "model_00071-of-00072.safetensors", "h.69.mlp.dense_h_to_4h.bias": "model_00071-of-00072.safetensors", "h.69.mlp.dense_h_to_4h.weight": "model_00071-of-00072.safetensors", "h.69.post_attention_layernorm.bias": "model_00071-of-00072.safetensors", "h.69.post_attention_layernorm.weight": "model_00071-of-00072.safetensors", "h.69.self_attention.dense.bias": "model_00071-of-00072.safetensors", "h.69.self_attention.dense.weight": "model_00071-of-00072.safetensors", "h.69.self_attention.query_key_value.bias": "model_00071-of-00072.safetensors", "h.69.self_attention.query_key_value.weight": "model_00071-of-00072.safetensors", "h.7.input_layernorm.bias": "model_00009-of-00072.safetensors", "h.7.input_layernorm.weight": "model_00009-of-00072.safetensors", "h.7.mlp.dense_4h_to_h.bias": "model_00009-of-00072.safetensors", "h.7.mlp.dense_4h_to_h.weight": "model_00009-of-00072.safetensors", "h.7.mlp.dense_h_to_4h.bias": "model_00009-of-00072.safetensors", "h.7.mlp.dense_h_to_4h.weight": "model_00009-of-00072.safetensors", "h.7.post_attention_layernorm.bias": "model_00009-of-00072.safetensors", "h.7.post_attention_layernorm.weight": "model_00009-of-00072.safetensors", "h.7.self_attention.dense.bias": "model_00009-of-00072.safetensors", "h.7.self_attention.dense.weight": "model_00009-of-00072.safetensors", "h.7.self_attention.query_key_value.bias": "model_00009-of-00072.safetensors", "h.7.self_attention.query_key_value.weight": "model_00009-of-00072.safetensors", "h.8.input_layernorm.bias": "model_00010-of-00072.safetensors", "h.8.input_layernorm.weight": "model_00010-of-00072.safetensors", "h.8.mlp.dense_4h_to_h.bias": "model_00010-of-00072.safetensors", "h.8.mlp.dense_4h_to_h.weight": "model_00010-of-00072.safetensors", "h.8.mlp.dense_h_to_4h.bias": "model_00010-of-00072.safetensors", "h.8.mlp.dense_h_to_4h.weight": "model_00010-of-00072.safetensors", "h.8.post_attention_layernorm.bias": "model_00010-of-00072.safetensors", "h.8.post_attention_layernorm.weight": "model_00010-of-00072.safetensors", "h.8.self_attention.dense.bias": "model_00010-of-00072.safetensors", "h.8.self_attention.dense.weight": "model_00010-of-00072.safetensors", "h.8.self_attention.query_key_value.bias": "model_00010-of-00072.safetensors", "h.8.self_attention.query_key_value.weight": "model_00010-of-00072.safetensors", "h.9.input_layernorm.bias": "model_00011-of-00072.safetensors", "h.9.input_layernorm.weight": "model_00011-of-00072.safetensors", "h.9.mlp.dense_4h_to_h.bias": "model_00011-of-00072.safetensors", "h.9.mlp.dense_4h_to_h.weight": "model_00011-of-00072.safetensors", "h.9.mlp.dense_h_to_4h.bias": "model_00011-of-00072.safetensors", "h.9.mlp.dense_h_to_4h.weight": "model_00011-of-00072.safetensors", "h.9.post_attention_layernorm.bias": "model_00011-of-00072.safetensors", "h.9.post_attention_layernorm.weight": "model_00011-of-00072.safetensors", "h.9.self_attention.dense.bias": "model_00011-of-00072.safetensors", "h.9.self_attention.dense.weight": "model_00011-of-00072.safetensors", "h.9.self_attention.query_key_value.bias": "model_00011-of-00072.safetensors", "h.9.self_attention.query_key_value.weight": "model_00011-of-00072.safetensors", "ln_f.bias": "model_00072-of-00072.safetensors", "ln_f.weight": "model_00072-of-00072.safetensors", "word_embeddings.weight": "model_00001-of-00072.safetensors", "word_embeddings_layernorm.bias": "model_00001-of-00072.safetensors", "word_embeddings_layernorm.weight": "model_00001-of-00072.safetensors"}}
model_00001-of-00072.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:994b72408da2fd0cd99854528e5db5323fa7f98998e928478b7b9b2961fa7c19
3
+ size 7193289051
model_00002-of-00072.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e2b0689f9a6c1137f5ce2ddd6a7d3d2708e86334174175443265a288b5c42f9b
3
+ size 4932875531
model_00003-of-00072.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b44a492e74c13fa0e7a5a64c288ed341824ec100a36a72401561442d6bcc6841
3
+ size 4932875531
model_00004-of-00072.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b30c540c26f99c3c5d7e4ad02a58a422031534a00196f990b9ad24af79b7904
3
+ size 4932875531
model_00005-of-00072.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:98a117f92df94f4ee63f823ab0890b3cf31c9186625b368f267b65da5cfed559
3
+ size 4932875531
model_00006-of-00072.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dca2e322c85eaf4afc067b1afa5c172f81ddcafc20d13adc9fc806846e62cca5
3
+ size 4932875531
model_00007-of-00072.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a6f6504ff6354a2dbf3110a6a50daf24e7d9b7a29bb5891572559ce78b71780
3
+ size 4932875531
model_00008-of-00072.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c62f5a9b1cfdb6e14a678caa704186a4000cedeb6a047a434ddcffc323229806
3
+ size 4932875531
model_00009-of-00072.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45d609a80c4162319f7652ea44f168221f82759c3a320cfac3660bf818f708c3
3
+ size 4932875531
model_00010-of-00072.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3cc4965d0bbde6392185bc8ba792a5b599b6ea39726b57d6e8e752542f938a49
3
+ size 4932875531
model_00011-of-00072.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb964b7ac826ec9fddc2eb61d5fdd32653bc855a14c16f4bbe56924385e117ef
3
+ size 4932875531
model_00012-of-00072.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b535b3ec8c447c220aa612c0f8a8ca17928427e9fdd13915a853e98a54de633
3
+ size 4932875543
model_00013-of-00072.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6498da058b0a9f36b12a13684047517f0fc863980847c0d3df8e3085df4f723e
3
+ size 4932875543
model_00014-of-00072.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5de752a5c66543d8adb7548f2cb6121cefb0edcfbcdcff61cdc39aa5c0a31ad6
3
+ size 4932875543
model_00015-of-00072.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:423abb3cb74c83c1d9e6887b18adfa8d4349c7c62b8494addabc4bcd5eddbd23
3
+ size 4932875543
model_00016-of-00072.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5eeaf9ecd49277fb9551eea8dee683e278cfc8f18a497692199bb26e334f6eac
3
+ size 4932875543
model_00017-of-00072.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49050db778b4fe213963108dc8d76b8718b35889a8a9f12fb01f6880c5def9c0
3
+ size 4932875543
model_00018-of-00072.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:456d316970ba92785b6f52f5ff998ceaad96cb9464633823fa8dca29aaa6ad09
3
+ size 4932875543
model_00019-of-00072.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b21e46c69790640a99841d7b23d640275946b7c99a6f2b7985c7c816584a407
3
+ size 4932875543
model_00020-of-00072.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8acd344ce56493e4901d406027bf9f88385d4c68594d48214af59cf78534a94
3
+ size 4932875543
model_00021-of-00072.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b48bdd0b8a01e599a6e4e607692ddc0505818735ca4ab4b2371da733a9b35e1
3
+ size 4932875543
model_00022-of-00072.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5897e2f0a9683b9d49811fe30deb461e4d30c5c37034953fe31c9515befd34a1
3
+ size 4932875543
model_00023-of-00072.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0740d2798a12687cd10985a7ffa88d22111168ba59866aa53799b8a8e1c5fda1
3
+ size 4932875543
model_00024-of-00072.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:697d91fb41a3015613a1b2bae5fc3224af7c81dcfa43796af5e1dc1cb9d872ad
3
+ size 4932875543
model_00025-of-00072.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2146ea66a8d6d7fa762b9b8084f9f1b29a445e8b0bef46694af44e9894a04ecf
3
+ size 4932875543
model_00026-of-00072.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95e0948f76650b956f6976aa0eabd8194e3b80430e171df3d14eeecb2c5c4a70
3
+ size 4932875543
model_00027-of-00072.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f66ce06a60cd3b3b085dbebeff833cd5510047c299e98dc87b936ab2d5c9bc52
3
+ size 4932875543
model_00028-of-00072.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3e12fdebc4f3cb0e92754ed1b379f4edb1422632e031d7804f08f436d84e036
3
+ size 4932875543
model_00029-of-00072.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68ed59dc66836d6034ed5da884d1812a760966416e7294c987e9ffb2ce240c24
3
+ size 4932875543
model_00030-of-00072.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec26a9dd14d23fedbb82edfe0879d5f1503e5a051eb308c831a11238fb4294fa
3
+ size 4932875543
model_00031-of-00072.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2e0c3318aabb013d4a48ff871c4550d718c2f68b7088051637303425e033f45
3
+ size 4932875543
model_00032-of-00072.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0776f0a61f6df60732edefe23d6990c22eba306693857df077c083c234e79235
3
+ size 4932875543
model_00033-of-00072.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:22ee6e7be1a916a67e6b3c707411ca9f2460b3c290fe9a59cab948e3ea1d78d6
3
+ size 4932875543
model_00034-of-00072.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b5b413253cf5a9afc8c6e30962bad03ebddeb8e05ff79a38be11045d680ff09
3
+ size 4932875543
model_00035-of-00072.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:97faa992fbfcc4f8d3ad4ac8bbc274c8a19d969a4693d37a5705d087d02dcc97
3
+ size 4932875543
model_00036-of-00072.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df3fd31785ac9071d7373ea080bc0add35cb5e9da80b6032acf5b7a3da6740a7
3
+ size 4932875543
model_00037-of-00072.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5204f0a325dd6957ac7fb50721ea673bc3aec77f0c0b321d296523c512ed9c38
3
+ size 4932875543
model_00038-of-00072.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b00cf8536d13000c50a74d2051ad99405e30cd4ca9f12105d8969e48bc29bd51
3
+ size 4932875543
model_00039-of-00072.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69f087d035224b20881afe98e5db0a68b438a891b4d53bf4b598c853aecf25bf
3
+ size 4932875543
model_00040-of-00072.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b3dc468943c23e4552400e1ef1d703c5a48f56ed9b50180278f4af8b0169844f
3
+ size 4932875543
model_00041-of-00072.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:081720fa9373987be0555ab87aeeb0152c55154d520f5b2194fd61bcd424f5ea
3
+ size 4932875543
model_00042-of-00072.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:83778fe861ea1d3899fb16cb8b415cc7cef46834156b62930bcf6355554dc6d0
3
+ size 4932875543
model_00043-of-00072.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b595b6aa4ea1650719dbd86c4c5691356bf23d5b74ee6a61898b3ea31b0fc6f
3
+ size 4932875543
model_00044-of-00072.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c89aabdea4b993eb0f7f996005559bb3f52cf1997f4ded797aba36765c73aad
3
+ size 4932875543
model_00045-of-00072.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:54e034d46c4db8c91188860c40b64bf5c00e3b5064dc46a9152acabf8ac14ef9
3
+ size 4932875543
model_00046-of-00072.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:61559dec39143c7a1197c82612bf17a78c438e51dc821ddf6e49cbdd1e877f1a
3
+ size 4932875543
model_00047-of-00072.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c97f83d253b9afa0b2fbfcbca421d02c3ee3f51efebc1cd9d85d7b3d230b1ead
3
+ size 4932875543
model_00048-of-00072.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:513e2a76f92420713f3adcf1264ccdf9b9c26ba632222d5a8483372fc4516f74
3
+ size 4932875543