Spaces:

NewBreaker
/

chatglm-6b-int4

Runtime error

NewBreaker commited on Apr 29, 2023

Commit

057dc4f

•

1 Parent(s): 324a277

添加了Api的调用

Files changed (4) hide show

1.py CHANGED Viewed

@@ -1,14 +1,19 @@
-from transformers import AutoTokenizer, AutoModel
-tokenizer = AutoTokenizer.from_pretrained(".\\models\\chatglm-6b-int4", trust_remote_code=True, revision="")
-model = AutoModel.from_pretrained(".\\models\\chatglm-6b-int4", trust_remote_code=True, revision="").half().cuda()
-kernel_file = ".\\models\\chatglm-6b-int4\\quantization_kernels.so"
-model = model.quantize(bits=4,kernel_file=kernel_file)
-model = model.eval()
-response, history = model.chat(tokenizer, "你好", history=[])
-print(response)

+import requests
+url = "http://127.0.0.1:8000/stream"    # 替换成实际的 API 地址
+data = {
+    "prompt": "你好",    # 下面是请求体中的数据内容
+    "history": []
+}
+response = requests.get(url, json=data,stream=True)
+if response.status_code == 200:
+    for line in response.iter_lines():
+        if line:
+            print(line.decode())
+else:
+    print("请求失败，状态码：", response.status_code)

api.py CHANGED Viewed

@@ -52,8 +52,8 @@ async def create_item(request: Request):
 if __name__ == '__main__':
     # tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True)
     # model = AutoModel.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True).half().cuda()
-    tokenizer = AutoTokenizer.from_pretrained("models/chatglm-6b-int4", trust_remote_code=True, revision="")
-    model = AutoModel.from_pretrained("models/chatglm-6b-int4", trust_remote_code=True, revision="").half().cuda()
     model.eval()

 if __name__ == '__main__':
     # tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True)
     # model = AutoModel.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True).half().cuda()
+    tokenizer = AutoTokenizer.from_pretrained("models\\chatglm-6b-int4", trust_remote_code=True, revision="")
+    model = AutoModel.from_pretrained("models\\chatglm-6b-int4", trust_remote_code=True, revision="").half().cuda()
     model.eval()

app.py CHANGED Viewed

@@ -3,7 +3,7 @@ import gradio as gr
 import mdtex2html
 tokenizer = AutoTokenizer.from_pretrained("models/chatglm-6b-int4", trust_remote_code=True, revision="")
-model = AutoModel.from_pretrained("models/chatglm-6b-int4", trust_remote_code=True, revision="").float()
 # tokenizer = AutoTokenizer.from_pretrained(".\\models\\chatglm-6b-int4", trust_remote_code=True, revision="")
 # model = AutoModel.from_pretrained(".\\models\\chatglm-6b-int4", trust_remote_code=True, revision="").half().cuda()
 # chatglm-6b-int4 cuda，本地可以运行成功

 import mdtex2html
 tokenizer = AutoTokenizer.from_pretrained("models/chatglm-6b-int4", trust_remote_code=True, revision="")
+model = AutoModel.from_pretrained("models/chatglm-6b-int4", trust_remote_code=True, revision="").float().cuda()
 # tokenizer = AutoTokenizer.from_pretrained(".\\models\\chatglm-6b-int4", trust_remote_code=True, revision="")
 # model = AutoModel.from_pretrained(".\\models\\chatglm-6b-int4", trust_remote_code=True, revision="").half().cuda()
 # chatglm-6b-int4 cuda，本地可以运行成功

cal_api.py ADDED Viewed

+import requests
+url = "http://127.0.0.1:8000"  # 替换成实际的 API 地址
+data = {
+    "prompt": "你好",  # 下面是请求体中的数据内容
+    "history": []
+}
+response = requests.post(url, json=data)
+if response.status_code == 200:
+    result = response.json()
+    print(result.get("response"))
+    print(result.get("history"))
+else:
+    print("请求失败，状态码：", response.status_code)