Merge branch 'main' of https://huggingface.co/THUDM/glm-4-9b
Browse files- LICENSE +1 -1
- README.md +7 -8
- modeling_chatglm.py +4 -7
LICENSE
CHANGED
@@ -45,7 +45,7 @@ The glm-4-9b License
|
|
45 |
|
46 |
2. License
|
47 |
|
48 |
-
|
49 |
This license allows you to use all open source models in this repository for free for academic research. For users who wish to use the models for commercial purposes, please do so [here](https://open.bigmodel.cn/mla/form)
|
50 |
Complete registration. Registered users are free to use this model for commercial activities, but must comply with all terms and conditions of this license.
|
51 |
The copyright notice and this license notice shall be included in all copies or substantial portions of the Software.
|
|
|
45 |
|
46 |
2. License
|
47 |
|
48 |
+
Under the terms and conditions of this license, the Licensor hereby grants you a non-exclusive, worldwide, non-transferable, non-sublicensable, revocable, royalty-free copyright license.
|
49 |
This license allows you to use all open source models in this repository for free for academic research. For users who wish to use the models for commercial purposes, please do so [here](https://open.bigmodel.cn/mla/form)
|
50 |
Complete registration. Registered users are free to use this model for commercial activities, but must comply with all terms and conditions of this license.
|
51 |
The copyright notice and this license notice shall be included in all copies or substantial portions of the Software.
|
README.md
CHANGED
@@ -2,15 +2,15 @@
|
|
2 |
license: other
|
3 |
license_name: glm-4
|
4 |
license_link: https://huggingface.co/THUDM/glm-4-9b/LICENSE
|
5 |
-
|
6 |
language:
|
7 |
-
|
8 |
-
|
9 |
tags:
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
inference: false
|
|
|
14 |
---
|
15 |
|
16 |
# GLM-4-9B
|
@@ -62,5 +62,4 @@ GLM-4 模型的权重的使用则需要遵循 [LICENSE](LICENSE)。
|
|
62 |
pages={320--335},
|
63 |
year={2022}
|
64 |
}
|
65 |
-
```
|
66 |
-
|
|
|
2 |
license: other
|
3 |
license_name: glm-4
|
4 |
license_link: https://huggingface.co/THUDM/glm-4-9b/LICENSE
|
|
|
5 |
language:
|
6 |
+
- zh
|
7 |
+
- en
|
8 |
tags:
|
9 |
+
- glm
|
10 |
+
- chatglm
|
11 |
+
- thudm
|
12 |
inference: false
|
13 |
+
pipeline_tag: text-generation
|
14 |
---
|
15 |
|
16 |
# GLM-4-9B
|
|
|
62 |
pages={320--335},
|
63 |
year={2022}
|
64 |
}
|
65 |
+
```
|
|
modeling_chatglm.py
CHANGED
@@ -253,15 +253,12 @@ class CoreAttention(torch.nn.Module):
|
|
253 |
# This is actually dropping out entire tokens to attend to, which might
|
254 |
# seem a bit unusual, but is taken from the original Transformer paper.
|
255 |
attention_probs = self.attention_dropout(attention_probs)
|
256 |
-
# =========================
|
257 |
-
# Context layer. [sq, b, hp]
|
258 |
-
# =========================
|
259 |
-
|
260 |
-
# value_layer -> context layer.
|
261 |
-
# [sk, b, np, hn] --> [b, np, sq, hn]
|
262 |
|
|
|
|
|
|
|
263 |
# context layer shape: [b, np, sq, hn]
|
264 |
-
output_size = (value_layer.size(
|
265 |
# change view [b * np, sk, hn]
|
266 |
value_layer = value_layer.view(output_size[0] * output_size[1], value_layer.size(2), -1)
|
267 |
# change view [b * np, sq, sk]
|
|
|
253 |
# This is actually dropping out entire tokens to attend to, which might
|
254 |
# seem a bit unusual, but is taken from the original Transformer paper.
|
255 |
attention_probs = self.attention_dropout(attention_probs)
|
|
|
|
|
|
|
|
|
|
|
|
|
256 |
|
257 |
+
# query layer shape: [b * np, sq, hn]
|
258 |
+
# value layer shape: [b, np, sk, hn]
|
259 |
+
# attention shape: [b, np, sq, sk]
|
260 |
# context layer shape: [b, np, sq, hn]
|
261 |
+
output_size = (value_layer.size(0), value_layer.size(1), query_layer.size(1), value_layer.size(3))
|
262 |
# change view [b * np, sk, hn]
|
263 |
value_layer = value_layer.view(output_size[0] * output_size[1], value_layer.size(2), -1)
|
264 |
# change view [b * np, sq, sk]
|