NewBreaker commited on
Commit
c549d70
2 Parent(s): ae25e3a a5ffac3

add app.py int4 cpu model and stream show

Browse files
Files changed (1) hide show
  1. app.py +25 -0
app.py CHANGED
@@ -2,10 +2,35 @@ from transformers import AutoModel, AutoTokenizer
2
  import gradio as gr
3
  import mdtex2html
4
 
 
5
  # tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True)
6
  # model = AutoModel.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True).half().cuda()
7
  tokenizer = AutoTokenizer.from_pretrained("models/chatglm-6b-int4", trust_remote_code=True, revision="")
8
  model = AutoModel.from_pretrained("models/chatglm-6b-int4", trust_remote_code=True, revision="").float()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
  model = model.eval()
11
 
 
2
  import gradio as gr
3
  import mdtex2html
4
 
5
+ <<<<<<< HEAD
6
  # tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True)
7
  # model = AutoModel.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True).half().cuda()
8
  tokenizer = AutoTokenizer.from_pretrained("models/chatglm-6b-int4", trust_remote_code=True, revision="")
9
  model = AutoModel.from_pretrained("models/chatglm-6b-int4", trust_remote_code=True, revision="").float()
10
+ =======
11
+ # tokenizer = AutoTokenizer.from_pretrained(".\\models\\chatglm-6b-int4", trust_remote_code=True, revision="")
12
+ # model = AutoModel.from_pretrained(".\\models\\chatglm-6b-int4", trust_remote_code=True, revision="").half().cuda()
13
+ # chatglm-6b-int4 cuda,本地可以运行成功
14
+ # tokenizer = AutoTokenizer.from_pretrained(".\\models\\chatglm-6b-int4", trust_remote_code=True, revision="")
15
+ # model = AutoModel.from_pretrained(".\\models\\chatglm-6b-int4", trust_remote_code=True, revision="").half().cuda()
16
+
17
+
18
+ # chatglm-6b-int4 CPU,
19
+ tokenizer = AutoTokenizer.from_pretrained("models/chatglm-6b-int4", trust_remote_code=True, revision="")
20
+ model = AutoModel.from_pretrained("models/chatglm-6b-int4", trust_remote_code=True, revision="").float()
21
+
22
+
23
+
24
+ # chatglm-6b
25
+ # kernel_file = "./models/chatglm-6b-int4/quantization_kernels.so"
26
+ # tokenizer = AutoTokenizer.from_pretrained("./models/chatglm-6b-int4", trust_remote_code=True, revision="")
27
+ # model = AutoModel.from_pretrained("./models/chatglm-6b-int4", trust_remote_code=True, revision="").half().cuda()
28
+ # model = AutoModel.from_pretrained("./models/chatglm-6b-int4", trust_remote_code=True, revision="").float()
29
+
30
+
31
+
32
+ # model = model.quantize(bits=model_args.quantization_bit, kernel_file=kernel_file)
33
+ >>>>>>> a5ffac38caa4a817f0c1633254d42160662806a8
34
 
35
  model = model.eval()
36