carsonhxsu
commited on
Commit
•
53f87ca
1
Parent(s):
1c9695e
Update code
Browse files- README.md +1 -1
- demo.py +5 -5
- lyraChatGLM/ftlib/libth_transformer_sm70_cu11.so +0 -3
- lyraChatGLM/ftlib/libth_transformer_sm70_cu12.so +0 -3
- lyraChatGLM/ftlib/libth_transformer_sm80_cu11.so +0 -3
- lyraChatGLM/ftlib/libth_transformer_sm80_cu12.so +0 -3
- lyraChatGLM/lyra_glm.py +4 -1
- lyraChatGLM/model.py +1 -0
README.md
CHANGED
@@ -86,7 +86,7 @@ python demo.py
|
|
86 |
```python
|
87 |
from lyraChatGLM import LyraChatGLM6B
|
88 |
|
89 |
-
model_path = "./models/1-gpu-fp16.
|
90 |
tokenizer_path = "./models"
|
91 |
data_type = "fp16"
|
92 |
int8_mode = 0 # 1 for INT8 WEIGHT ONLY PTQ
|
|
|
86 |
```python
|
87 |
from lyraChatGLM import LyraChatGLM6B
|
88 |
|
89 |
+
model_path = "./models/1-gpu-fp16.bin"
|
90 |
tokenizer_path = "./models"
|
91 |
data_type = "fp16"
|
92 |
int8_mode = 0 # 1 for INT8 WEIGHT ONLY PTQ
|
demo.py
CHANGED
@@ -3,13 +3,13 @@ import numpy as np
|
|
3 |
|
4 |
model_path = "./models/1-gpu-fp16.bin"
|
5 |
tokenizer_path = "./models"
|
6 |
-
|
7 |
int8_mode = 0
|
8 |
max_output_length = 150
|
9 |
-
arch = "
|
10 |
-
cuda_version =
|
11 |
|
12 |
-
model = LyraChatGLM6B(model_path, tokenizer_path,
|
13 |
|
14 |
prompt = "今天天气大概 25度,有点小雨,吹着风,我想去户外散步,应该穿什么样的衣服裤子鞋子搭配。"
|
15 |
# test_batch_size = 256
|
@@ -19,4 +19,4 @@ prompts = [prompt, ]
|
|
19 |
# # If you want to get different output in same batch, you can set do_sample to True
|
20 |
output_texts = model.generate(prompts, output_length=max_output_length,top_k=30, top_p=0.85, temperature=0.35, repetition_penalty=1.2, do_sample=False)
|
21 |
|
22 |
-
print(output_texts)
|
|
|
3 |
|
4 |
model_path = "./models/1-gpu-fp16.bin"
|
5 |
tokenizer_path = "./models"
|
6 |
+
inference_data_type = "fp16"
|
7 |
int8_mode = 0
|
8 |
max_output_length = 150
|
9 |
+
arch = "Volta" # Ampere or Volta
|
10 |
+
cuda_version = 11 # cuda version, we currently support 11 and 12
|
11 |
|
12 |
+
model = LyraChatGLM6B(model_path, tokenizer_path, inference_data_type, int8_mode, arch, cuda_version)
|
13 |
|
14 |
prompt = "今天天气大概 25度,有点小雨,吹着风,我想去户外散步,应该穿什么样的衣服裤子鞋子搭配。"
|
15 |
# test_batch_size = 256
|
|
|
19 |
# # If you want to get different output in same batch, you can set do_sample to True
|
20 |
output_texts = model.generate(prompts, output_length=max_output_length,top_k=30, top_p=0.85, temperature=0.35, repetition_penalty=1.2, do_sample=False)
|
21 |
|
22 |
+
print(output_texts)
|
lyraChatGLM/ftlib/libth_transformer_sm70_cu11.so
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:a4a778897f6c5f77b0ea1cb14bb63732da9c3cc4e16ff16d9f911dcc8b6f6be5
|
3 |
-
size 114267536
|
|
|
|
|
|
|
|
lyraChatGLM/ftlib/libth_transformer_sm70_cu12.so
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:99ac80b2f4c161bbacbf64a7607f323c612c7c5f26b83eaec7f559425f3a818b
|
3 |
-
size 114186112
|
|
|
|
|
|
|
|
lyraChatGLM/ftlib/libth_transformer_sm80_cu11.so
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:a1d6cd03321b671275fcabb4136562845233875564047ccde20401fca4df45c2
|
3 |
-
size 200834616
|
|
|
|
|
|
|
|
lyraChatGLM/ftlib/libth_transformer_sm80_cu12.so
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:2da10aad8e92bcdf45b15884cee63e845f582cd28bcc0f7f1c2a4f6a101e9646
|
3 |
-
size 200916960
|
|
|
|
|
|
|
|
lyraChatGLM/lyra_glm.py
CHANGED
@@ -134,7 +134,10 @@ class LyraChatGLM6B:
|
|
134 |
ones_int = torch.ones(size=[batch_size], dtype=torch.int32)
|
135 |
ones_float = torch.ones(size=[batch_size], dtype=torch.float32)
|
136 |
|
137 |
-
input_token_ids = self.tokenizer(prompts, return_tensors="pt", padding=True).input_ids.int()
|
|
|
|
|
|
|
138 |
input_lengths = torch.IntTensor([len(ids) for ids in input_token_ids])
|
139 |
mask_positions = torch.IntTensor([seq.index(130001) for seq in input_token_ids.tolist()])
|
140 |
|
|
|
134 |
ones_int = torch.ones(size=[batch_size], dtype=torch.int32)
|
135 |
ones_float = torch.ones(size=[batch_size], dtype=torch.float32)
|
136 |
|
137 |
+
# input_token_ids = self.tokenizer(prompts, return_tensors="pt", padding=True).input_ids.int()
|
138 |
+
raw_input_token_ids = self.tokenizer(prompts, padding=True)
|
139 |
+
input_token_ids = torch.tensor (raw_input_token_ids["input_ids"],dtype=torch.int32)
|
140 |
+
|
141 |
input_lengths = torch.IntTensor([len(ids) for ids in input_token_ids])
|
142 |
mask_positions = torch.IntTensor([seq.index(130001) for seq in input_token_ids.tolist()])
|
143 |
|
lyraChatGLM/model.py
CHANGED
@@ -123,6 +123,7 @@ class ChatGLM6BModel(nn.Module):
|
|
123 |
self.adapter_inter_size,
|
124 |
self.use_attention_linear_bias,
|
125 |
self.model_path,
|
|
|
126 |
inference_data_type,
|
127 |
self.shared_contexts_ratio)
|
128 |
self.build_model = True
|
|
|
123 |
self.adapter_inter_size,
|
124 |
self.use_attention_linear_bias,
|
125 |
self.model_path,
|
126 |
+
self.weights_data_type,
|
127 |
inference_data_type,
|
128 |
self.shared_contexts_ratio)
|
129 |
self.build_model = True
|