Spaces:
Runtime error
Runtime error
limafang
commited on
Commit
•
b40a4c8
1
Parent(s):
849ad01
上传utils
Browse files- utils/API.py +244 -0
- utils/__pycache__/API.cpython-310.pyc +0 -0
- utils/__pycache__/tools.cpython-310.pyc +0 -0
- utils/__pycache__/tools.cpython-37.pyc +0 -0
- utils/tools.py +119 -0
utils/API.py
ADDED
@@ -0,0 +1,244 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
import base64
|
3 |
+
import hmac
|
4 |
+
import json
|
5 |
+
from datetime import datetime, timezone
|
6 |
+
from urllib.parse import urlencode, urlparse
|
7 |
+
from websocket import create_connection, WebSocketConnectionClosedException
|
8 |
+
from utils.tools import get_prompt, process_response, init_script, create_script
|
9 |
+
|
10 |
+
|
11 |
+
class SparkAPI:
|
12 |
+
__api_url = 'wss://spark-api.xf-yun.com/v1.1/chat'
|
13 |
+
__max_token = 4096
|
14 |
+
|
15 |
+
def __init__(self, app_id, api_key, api_secret):
|
16 |
+
self.__app_id = app_id
|
17 |
+
self.__api_key = api_key
|
18 |
+
self.__api_secret = api_secret
|
19 |
+
|
20 |
+
def __set_max_tokens(self, token):
|
21 |
+
if isinstance(token, int) is False or token < 0:
|
22 |
+
print("set_max_tokens() error: tokens should be a positive integer!")
|
23 |
+
return
|
24 |
+
self.__max_token = token
|
25 |
+
|
26 |
+
def __get_authorization_url(self):
|
27 |
+
authorize_url = urlparse(self.__api_url)
|
28 |
+
# 1. generate data
|
29 |
+
date = datetime.now(timezone.utc).strftime('%a, %d %b %Y %H:%M:%S %Z')
|
30 |
+
|
31 |
+
"""
|
32 |
+
Generation rule of Authorization parameters
|
33 |
+
1) Obtain the APIKey and APISecret parameters from the console.
|
34 |
+
2) Use the aforementioned date to dynamically concatenate a string tmp. Here we take Huobi's URL as an example,
|
35 |
+
the actual usage requires replacing the host and path with the specific request URL.
|
36 |
+
"""
|
37 |
+
signature_origin = "host: {}\ndate: {}\nGET {} HTTP/1.1".format(
|
38 |
+
authorize_url.netloc, date, authorize_url.path
|
39 |
+
)
|
40 |
+
signature = base64.b64encode(
|
41 |
+
hmac.new(
|
42 |
+
self.__api_secret.encode(),
|
43 |
+
signature_origin.encode(),
|
44 |
+
digestmod='sha256'
|
45 |
+
).digest()
|
46 |
+
).decode()
|
47 |
+
authorization_origin = \
|
48 |
+
'api_key="{}",algorithm="{}",headers="{}",signature="{}"'.format(
|
49 |
+
self.__api_key, "hmac-sha256", "host date request-line", signature
|
50 |
+
)
|
51 |
+
authorization = base64.b64encode(
|
52 |
+
authorization_origin.encode()).decode()
|
53 |
+
params = {
|
54 |
+
"authorization": authorization,
|
55 |
+
"date": date,
|
56 |
+
"host": authorize_url.netloc
|
57 |
+
}
|
58 |
+
|
59 |
+
ws_url = self.__api_url + "?" + urlencode(params)
|
60 |
+
return ws_url
|
61 |
+
|
62 |
+
def __build_inputs(
|
63 |
+
self,
|
64 |
+
message: dict,
|
65 |
+
user_id: str = "001",
|
66 |
+
domain: str = "general",
|
67 |
+
temperature: float = 0.5,
|
68 |
+
max_tokens: int = 4096
|
69 |
+
):
|
70 |
+
input_dict = {
|
71 |
+
"header": {
|
72 |
+
"app_id": self.__app_id,
|
73 |
+
"uid": user_id,
|
74 |
+
},
|
75 |
+
"parameter": {
|
76 |
+
"chat": {
|
77 |
+
"domain": domain,
|
78 |
+
"temperature": temperature,
|
79 |
+
"max_tokens": max_tokens,
|
80 |
+
}
|
81 |
+
},
|
82 |
+
"payload": {
|
83 |
+
"message": message
|
84 |
+
}
|
85 |
+
}
|
86 |
+
return json.dumps(input_dict)
|
87 |
+
|
88 |
+
def chat(
|
89 |
+
self,
|
90 |
+
query: str,
|
91 |
+
history: list = None, # store the conversation history
|
92 |
+
user_id: str = "001",
|
93 |
+
domain: str = "general",
|
94 |
+
max_tokens: int = 4096,
|
95 |
+
temperature: float = 0.5,
|
96 |
+
):
|
97 |
+
if history is None:
|
98 |
+
history = []
|
99 |
+
|
100 |
+
# the max of max_length is 4096
|
101 |
+
max_tokens = min(max_tokens, 4096)
|
102 |
+
url = self.__get_authorization_url()
|
103 |
+
ws = create_connection(url)
|
104 |
+
message = get_prompt(query, history)
|
105 |
+
input_str = self.__build_inputs(
|
106 |
+
message=message,
|
107 |
+
user_id=user_id,
|
108 |
+
domain=domain,
|
109 |
+
temperature=temperature,
|
110 |
+
max_tokens=max_tokens,
|
111 |
+
)
|
112 |
+
ws.send(input_str)
|
113 |
+
response_str = ws.recv()
|
114 |
+
try:
|
115 |
+
while True:
|
116 |
+
response, history, status = process_response(
|
117 |
+
response_str, history)
|
118 |
+
"""
|
119 |
+
The final return result, which means a complete conversation.
|
120 |
+
doc url: https://www.xfyun.cn/doc/spark/Web.html#_1-%E6%8E%A5%E5%8F%A3%E8%AF%B4%E6%98%8E
|
121 |
+
"""
|
122 |
+
if len(response) == 0 or status == 2:
|
123 |
+
break
|
124 |
+
response_str = ws.recv()
|
125 |
+
return response
|
126 |
+
|
127 |
+
except WebSocketConnectionClosedException:
|
128 |
+
print("Connection closed")
|
129 |
+
finally:
|
130 |
+
ws.close()
|
131 |
+
# Stream output statement, used for terminal chat.
|
132 |
+
|
133 |
+
def streaming_output(
|
134 |
+
self,
|
135 |
+
query: str,
|
136 |
+
history: list = None, # store the conversation history
|
137 |
+
user_id: str = "001",
|
138 |
+
domain: str = "general",
|
139 |
+
max_tokens: int = 4096,
|
140 |
+
temperature: float = 0.5,
|
141 |
+
):
|
142 |
+
if history is None:
|
143 |
+
history = []
|
144 |
+
# the max of max_length is 4096
|
145 |
+
max_tokens = min(max_tokens, 4096)
|
146 |
+
url = self.__get_authorization_url()
|
147 |
+
ws = create_connection(url)
|
148 |
+
|
149 |
+
message = get_prompt(query, history)
|
150 |
+
input_str = self.__build_inputs(
|
151 |
+
message=message,
|
152 |
+
user_id=user_id,
|
153 |
+
domain=domain,
|
154 |
+
temperature=temperature,
|
155 |
+
max_tokens=max_tokens,
|
156 |
+
)
|
157 |
+
# print(input_str)
|
158 |
+
# send question or prompt to url, and receive the answer
|
159 |
+
ws.send(input_str)
|
160 |
+
response_str = ws.recv()
|
161 |
+
|
162 |
+
# Continuous conversation
|
163 |
+
try:
|
164 |
+
while True:
|
165 |
+
response, history, status = process_response(
|
166 |
+
response_str, history)
|
167 |
+
yield response, history
|
168 |
+
if len(response) == 0 or status == 2:
|
169 |
+
break
|
170 |
+
response_str = ws.recv()
|
171 |
+
|
172 |
+
except WebSocketConnectionClosedException:
|
173 |
+
print("Connection closed")
|
174 |
+
finally:
|
175 |
+
ws.close()
|
176 |
+
|
177 |
+
def chat_stream(self):
|
178 |
+
history = []
|
179 |
+
try:
|
180 |
+
print("输入init来初始化剧本,输入create来创作剧本,输入exit或stop来终止对话\n")
|
181 |
+
while True:
|
182 |
+
query = input("Ask: ")
|
183 |
+
if query == 'init':
|
184 |
+
jsonfile = input("请输入剧本文件路径:")
|
185 |
+
script_data = init_script(history, jsonfile)
|
186 |
+
print(
|
187 |
+
f"正在导入剧本{script_data['name']},角色信息:{script_data['characters']},剧情介绍:{script_data['summary']}")
|
188 |
+
query = f"我希望你能够扮演这个剧本杀游戏的主持人,我希望你能够逐步引导玩家到达最终结局,同时希望你在游戏中设定一些随机事件,需要玩家依靠自身的能力解决,当玩家做出偏离主线的行为或者与剧本无关的行为时,你需要委婉地将玩家引导至正常游玩路线中,对于玩家需要决策的事件,你需要提供一些行动推荐,下面是剧本介绍:{script_data}"
|
189 |
+
if query == 'create':
|
190 |
+
name = input('请输入剧本名称:')
|
191 |
+
characters = input('请输入角色信息:')
|
192 |
+
summary = input('请输入剧情介绍:')
|
193 |
+
details = input('请输入剧本细节')
|
194 |
+
create_script(name, characters, summary, details)
|
195 |
+
print('剧本创建成功!')
|
196 |
+
continue
|
197 |
+
if query == "exit" or query == "stop":
|
198 |
+
break
|
199 |
+
for response, _ in self.streaming_output(query, history):
|
200 |
+
print("\r" + response, end="")
|
201 |
+
print("\n")
|
202 |
+
finally:
|
203 |
+
print("\nThank you for using the SparkDesk AI. Welcome to use it again!")
|
204 |
+
|
205 |
+
|
206 |
+
from langchain.llms.base import LLM
|
207 |
+
from typing import Any, List, Mapping, Optional
|
208 |
+
class Spark_forlangchain(LLM):
|
209 |
+
|
210 |
+
# 类的成员变量,类型为整型
|
211 |
+
n: int
|
212 |
+
app_id: str
|
213 |
+
api_key: str
|
214 |
+
api_secret: str
|
215 |
+
# 用于指定该子类对象的类型
|
216 |
+
|
217 |
+
@property
|
218 |
+
def _llm_type(self) -> str:
|
219 |
+
return "Spark"
|
220 |
+
|
221 |
+
# 重写基类方法,根据用户输入的prompt来响应用户,返回字符串
|
222 |
+
def _call(
|
223 |
+
self,
|
224 |
+
query: str,
|
225 |
+
history: list = None, # store the conversation history
|
226 |
+
user_id: str = "001",
|
227 |
+
domain: str = "general",
|
228 |
+
max_tokens: int = 4096,
|
229 |
+
temperature: float = 0.7,
|
230 |
+
stop: Optional[List[str]] = None,
|
231 |
+
) -> str:
|
232 |
+
if stop is not None:
|
233 |
+
raise ValueError("stop kwargs are not permitted.")
|
234 |
+
bot = SparkAPI(app_id=self.app_id, api_key=self.api_key,
|
235 |
+
api_secret=self.api_secret)
|
236 |
+
response = bot.chat(query, history, user_id,
|
237 |
+
domain, max_tokens, temperature)
|
238 |
+
return response
|
239 |
+
|
240 |
+
# 返回一个字典类型,包含LLM的唯一标识
|
241 |
+
@property
|
242 |
+
def _identifying_params(self) -> Mapping[str, Any]:
|
243 |
+
"""Get the identifying parameters."""
|
244 |
+
return {"n": self.n}
|
utils/__pycache__/API.cpython-310.pyc
ADDED
Binary file (6.6 kB). View file
|
|
utils/__pycache__/tools.cpython-310.pyc
ADDED
Binary file (3.7 kB). View file
|
|
utils/__pycache__/tools.cpython-37.pyc
ADDED
Binary file (1.93 kB). View file
|
|
utils/tools.py
ADDED
@@ -0,0 +1,119 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
import os
|
3 |
+
import shutil
|
4 |
+
from glob import glob
|
5 |
+
|
6 |
+
def read_json_file(file_path):
|
7 |
+
file_path = "./script/"+file_path
|
8 |
+
with open(file_path, 'r', encoding='utf-8') as file:
|
9 |
+
data = json.load(file)
|
10 |
+
return data
|
11 |
+
|
12 |
+
|
13 |
+
def get_prompt(query: str, history: list):
|
14 |
+
use_message = {"role": "user", "content": query}
|
15 |
+
if history is None:
|
16 |
+
history = []
|
17 |
+
history.append(use_message)
|
18 |
+
message = {"text": history}
|
19 |
+
return message
|
20 |
+
|
21 |
+
|
22 |
+
def process_response(response_str: str, history: list):
|
23 |
+
res_dict: dict = json.loads(response_str)
|
24 |
+
code = res_dict.get("header", {}).get("code")
|
25 |
+
status = res_dict.get("header", {}).get("status", 2)
|
26 |
+
|
27 |
+
if code == 0:
|
28 |
+
res_dict = res_dict.get("payload", {}).get(
|
29 |
+
"choices", {}).get("text", [{}])[0]
|
30 |
+
res_content = res_dict.get("content", "")
|
31 |
+
|
32 |
+
if len(res_dict) > 0 and len(res_content) > 0:
|
33 |
+
# Ignore the unnecessary data
|
34 |
+
if "index" in res_dict:
|
35 |
+
del res_dict["index"]
|
36 |
+
response = res_content
|
37 |
+
|
38 |
+
if status == 0:
|
39 |
+
history.append(res_dict)
|
40 |
+
else:
|
41 |
+
history[-1]["content"] += response
|
42 |
+
response = history[-1]["content"]
|
43 |
+
|
44 |
+
return response, history, status
|
45 |
+
else:
|
46 |
+
return "", history, status
|
47 |
+
else:
|
48 |
+
print("error code ", code)
|
49 |
+
print("you can see this website to know code detail")
|
50 |
+
print("https://www.xfyun.cn/doc/spark/%E6%8E%A5%E5%8F%A3%E8%AF%B4%E6%98%8E.html")
|
51 |
+
return "", history, status
|
52 |
+
|
53 |
+
|
54 |
+
def init_script(history: list, jsonfile):
|
55 |
+
script_data = read_json_file(jsonfile)
|
56 |
+
return script_data
|
57 |
+
|
58 |
+
|
59 |
+
def create_script(name, characters, summary, details):
|
60 |
+
|
61 |
+
import os
|
62 |
+
if not os.path.exists("script"):
|
63 |
+
os.mkdir("script")
|
64 |
+
data = {
|
65 |
+
"name": name,
|
66 |
+
"characters": characters,
|
67 |
+
"summary": summary,
|
68 |
+
"details": details
|
69 |
+
}
|
70 |
+
json_data = json.dumps(data, ensure_ascii=False)
|
71 |
+
print(json_data)
|
72 |
+
with open(f"./script/{name}.json", "w", encoding='utf-8') as file:
|
73 |
+
file.write(json_data)
|
74 |
+
pass
|
75 |
+
|
76 |
+
|
77 |
+
def txt2vec(name: str, file_path: str):
|
78 |
+
from langchain.document_loaders import TextLoader
|
79 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
80 |
+
loader = TextLoader(file_path)
|
81 |
+
data = loader.load()
|
82 |
+
text_splitter = RecursiveCharacterTextSplitter(
|
83 |
+
chunk_size=256, chunk_overlap=128)
|
84 |
+
split_docs = text_splitter.split_documents(data)
|
85 |
+
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
|
86 |
+
import sentence_transformers
|
87 |
+
EMBEDDING_MODEL = "model/text2vec_ernie/"
|
88 |
+
embeddings = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL)
|
89 |
+
embeddings.client = sentence_transformers.SentenceTransformer(
|
90 |
+
embeddings.model_name, device='cuda')
|
91 |
+
from langchain.vectorstores import FAISS
|
92 |
+
db = FAISS.from_documents(split_docs, embeddings)
|
93 |
+
db.save_local(f"data/faiss/{name}/")
|
94 |
+
|
95 |
+
|
96 |
+
def pdf2vec(name: str, file_path: str):
|
97 |
+
from langchain.document_loaders import PyPDFLoader
|
98 |
+
loader = PyPDFLoader(file_path)
|
99 |
+
split_docs = loader.load_and_split()
|
100 |
+
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
|
101 |
+
import sentence_transformers
|
102 |
+
EMBEDDING_MODEL = "model/text2vec_ernie/"
|
103 |
+
embeddings = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL)
|
104 |
+
embeddings.client = sentence_transformers.SentenceTransformer(
|
105 |
+
embeddings.model_name, device='cuda')
|
106 |
+
from langchain.vectorstores import FAISS
|
107 |
+
db = FAISS.from_documents(split_docs, embeddings)
|
108 |
+
db.save_local(f"data/faiss/{name}/")
|
109 |
+
def mycopyfile(srcfile, dstpath): # 复制函数
|
110 |
+
if not os.path.isfile(srcfile):
|
111 |
+
print("%s not exist!" % (srcfile))
|
112 |
+
else:
|
113 |
+
fpath, fname = os.path.split(srcfile)
|
114 |
+
print(fpath)
|
115 |
+
print(fname) # 分离文件名和路径
|
116 |
+
if not os.path.exists(dstpath):
|
117 |
+
os.makedirs(dstpath) # 创建路径
|
118 |
+
shutil.copy(srcfile, dstpath + fname) # 复制文件
|
119 |
+
print("copy %s -> %s" % (srcfile, dstpath + fname))
|