Spaces:
Runtime error
Runtime error
NewBreaker
commited on
add app.py int4 cpu model and stream show
Browse files
app.py
CHANGED
@@ -2,10 +2,35 @@ from transformers import AutoModel, AutoTokenizer
|
|
2 |
import gradio as gr
|
3 |
import mdtex2html
|
4 |
|
|
|
5 |
# tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True)
|
6 |
# model = AutoModel.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True).half().cuda()
|
7 |
tokenizer = AutoTokenizer.from_pretrained("models/chatglm-6b-int4", trust_remote_code=True, revision="")
|
8 |
model = AutoModel.from_pretrained("models/chatglm-6b-int4", trust_remote_code=True, revision="").float()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
|
10 |
model = model.eval()
|
11 |
|
|
|
2 |
import gradio as gr
|
3 |
import mdtex2html
|
4 |
|
5 |
+
<<<<<<< HEAD
|
6 |
# tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True)
|
7 |
# model = AutoModel.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True).half().cuda()
|
8 |
tokenizer = AutoTokenizer.from_pretrained("models/chatglm-6b-int4", trust_remote_code=True, revision="")
|
9 |
model = AutoModel.from_pretrained("models/chatglm-6b-int4", trust_remote_code=True, revision="").float()
|
10 |
+
=======
|
11 |
+
# tokenizer = AutoTokenizer.from_pretrained(".\\models\\chatglm-6b-int4", trust_remote_code=True, revision="")
|
12 |
+
# model = AutoModel.from_pretrained(".\\models\\chatglm-6b-int4", trust_remote_code=True, revision="").half().cuda()
|
13 |
+
# chatglm-6b-int4 cuda,本地可以运行成功
|
14 |
+
# tokenizer = AutoTokenizer.from_pretrained(".\\models\\chatglm-6b-int4", trust_remote_code=True, revision="")
|
15 |
+
# model = AutoModel.from_pretrained(".\\models\\chatglm-6b-int4", trust_remote_code=True, revision="").half().cuda()
|
16 |
+
|
17 |
+
|
18 |
+
# chatglm-6b-int4 CPU,
|
19 |
+
tokenizer = AutoTokenizer.from_pretrained("models/chatglm-6b-int4", trust_remote_code=True, revision="")
|
20 |
+
model = AutoModel.from_pretrained("models/chatglm-6b-int4", trust_remote_code=True, revision="").float()
|
21 |
+
|
22 |
+
|
23 |
+
|
24 |
+
# chatglm-6b
|
25 |
+
# kernel_file = "./models/chatglm-6b-int4/quantization_kernels.so"
|
26 |
+
# tokenizer = AutoTokenizer.from_pretrained("./models/chatglm-6b-int4", trust_remote_code=True, revision="")
|
27 |
+
# model = AutoModel.from_pretrained("./models/chatglm-6b-int4", trust_remote_code=True, revision="").half().cuda()
|
28 |
+
# model = AutoModel.from_pretrained("./models/chatglm-6b-int4", trust_remote_code=True, revision="").float()
|
29 |
+
|
30 |
+
|
31 |
+
|
32 |
+
# model = model.quantize(bits=model_args.quantization_bit, kernel_file=kernel_file)
|
33 |
+
>>>>>>> a5ffac38caa4a817f0c1633254d42160662806a8
|
34 |
|
35 |
model = model.eval()
|
36 |
|