tclf90 commited on
Commit
40945b7
1 Parent(s): 732c58a

'优化模型量化损失'

Browse files
README.md CHANGED
@@ -16,7 +16,7 @@ tags:
16
 
17
 
18
  ### 【模型更新日期】
19
- ``` 2024-06-05 23:50 ```
20
 
21
  ### 【模型大小】
22
  `11.0GB`
@@ -29,6 +29,9 @@ tags:
29
  ### 【更新日志】
30
 
31
  ```
 
 
 
32
  2004-06-05 23:50
33
  首次commit
34
 
 
16
 
17
 
18
  ### 【模型更新日期】
19
+ ``` 2024-06-18 ```
20
 
21
  ### 【模型大小】
22
  `11.0GB`
 
29
  ### 【更新日志】
30
 
31
  ```
32
+ 2004-06-18
33
+ 优化模型量化损失
34
+
35
  2004-06-05 23:50
36
  首次commit
37
 
config.json CHANGED
@@ -28,7 +28,7 @@
28
  "hidden_dropout": 0.0,
29
  "hidden_size": 4096,
30
  "kv_channels": 128,
31
- "layernorm_epsilon": 1e-06,
32
  "model_type": "chatglm",
33
  "multi_query_attention": true,
34
  "multi_query_group_num": 2,
 
28
  "hidden_dropout": 0.0,
29
  "hidden_size": 4096,
30
  "kv_channels": 128,
31
+ "layernorm_epsilon": 1.5625e-07,
32
  "model_type": "chatglm",
33
  "multi_query_attention": true,
34
  "multi_query_group_num": 2,
generation_config.json CHANGED
@@ -5,6 +5,5 @@
5
  151336,
6
  151338
7
  ],
8
- "pad_token_id": 151329,
9
  "transformers_version": "4.40.2"
10
  }
 
5
  151336,
6
  151338
7
  ],
 
8
  "transformers_version": "4.40.2"
9
  }
model-00001-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4c79e4ae12da0ae7b9b233f2fe8b6b3a46269c58f38c596dd030ab0ed7845505
3
  size 4970796416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f9f259f678707d482f9e75b80f4a4e964fe4e3757cdcf360d923931e71456f5
3
  size 4970796416
model-00002-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c82ba03bd76f42c804706423fc4be54486cf53b255b86798acce637e318d14b4
3
  size 4819195016
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:939de54bfc2b87aa79fce72b92d4da8743b09d662b7e92f8466f613fb0aa61eb
3
  size 4819195016
modeling_chatglm.py CHANGED
@@ -324,7 +324,7 @@ class SelfAttention(torch.nn.Module):
324
  )
325
 
326
  def forward(
327
- self, hidden_states, attention_mask, rotary_pos_emb, kv_cache=None, use_cache=True
328
  ):
329
  # hidden_states: [b, sq, h]
330
 
 
324
  )
325
 
326
  def forward(
327
+ self, hidden_states, attention_mask, rotary_pos_emb=None, kv_cache=None, use_cache=True
328
  ):
329
  # hidden_states: [b, sq, h]
330