Spaces:

NewBreaker
/

chatglm-6b-int4

Runtime error

App Files Files Community

NewBreaker commited on Apr 28, 2023

Commit

c549d70

•

2 Parent(s): ae25e3a a5ffac3

add app.py int4 cpu model and stream show

Browse files

Files changed (1) hide show

app.py +25 -0

app.py CHANGED Viewed

@@ -2,10 +2,35 @@ from transformers import AutoModel, AutoTokenizer
 import gradio as gr
 import mdtex2html
 # tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True)
 # model = AutoModel.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True).half().cuda()
 tokenizer = AutoTokenizer.from_pretrained("models/chatglm-6b-int4", trust_remote_code=True, revision="")
 model = AutoModel.from_pretrained("models/chatglm-6b-int4", trust_remote_code=True, revision="").float()
 model = model.eval()

 import gradio as gr
 import mdtex2html
+<<<<<<< HEAD
 # tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True)
 # model = AutoModel.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True).half().cuda()
 tokenizer = AutoTokenizer.from_pretrained("models/chatglm-6b-int4", trust_remote_code=True, revision="")
 model = AutoModel.from_pretrained("models/chatglm-6b-int4", trust_remote_code=True, revision="").float()
+=======
+# tokenizer = AutoTokenizer.from_pretrained(".\\models\\chatglm-6b-int4", trust_remote_code=True, revision="")
+# model = AutoModel.from_pretrained(".\\models\\chatglm-6b-int4", trust_remote_code=True, revision="").half().cuda()
+# chatglm-6b-int4 cuda，本地可以运行成功
+# tokenizer = AutoTokenizer.from_pretrained(".\\models\\chatglm-6b-int4", trust_remote_code=True, revision="")
+# model = AutoModel.from_pretrained(".\\models\\chatglm-6b-int4", trust_remote_code=True, revision="").half().cuda()
+# chatglm-6b-int4 CPU，
+tokenizer = AutoTokenizer.from_pretrained("models/chatglm-6b-int4", trust_remote_code=True, revision="")
+model = AutoModel.from_pretrained("models/chatglm-6b-int4", trust_remote_code=True, revision="").float()
+# chatglm-6b
+# kernel_file = "./models/chatglm-6b-int4/quantization_kernels.so"
+# tokenizer = AutoTokenizer.from_pretrained("./models/chatglm-6b-int4", trust_remote_code=True, revision="")
+# model = AutoModel.from_pretrained("./models/chatglm-6b-int4", trust_remote_code=True, revision="").half().cuda()
+# model = AutoModel.from_pretrained("./models/chatglm-6b-int4", trust_remote_code=True, revision="").float()
+# model = model.quantize(bits=model_args.quantization_bit, kernel_file=kernel_file)
+>>>>>>> a5ffac38caa4a817f0c1633254d42160662806a8
 model = model.eval()