xu song
commited on
Commit
•
459fbe3
1
Parent(s):
55b26f1
update
Browse files- app.py +29 -26
- app_util.py +14 -7
- config.py +9 -0
- simulator.py → models/base_model.py +0 -0
- models/cpp_qwen2.py +13 -14
- models/hf_qwen2.py +1 -1
app.py
CHANGED
@@ -4,15 +4,13 @@
|
|
4 |
|
5 |
# 难点
|
6 |
|
7 |
-
|
8 |
-
|
9 |
## TODO
|
10 |
|
11 |
-
|
12 |
-
- 代码和表格的预览
|
13 |
-
- markdown解析:mdtex2html
|
14 |
-
- 可编辑chatbot:https://github.com/gradio-app/gradio/issues/4444
|
15 |
-
-
|
16 |
|
17 |
|
18 |
## Reference
|
@@ -20,6 +18,7 @@
|
|
20 |
- https://github.com/GaiZhenbiao/ChuanhuChatGPT/
|
21 |
"""
|
22 |
|
|
|
23 |
from app_util import *
|
24 |
|
25 |
system_list = [
|
@@ -58,38 +57,42 @@ with gr.Blocks() as demo:
|
|
58 |
gr.Dropdown(
|
59 |
["moss", "chatglm-2", "chatpdf"],
|
60 |
value="moss",
|
61 |
-
label="
|
62 |
-
|
63 |
-
)
|
64 |
-
gr.Dropdown(
|
65 |
-
["moss", "chatglm-2", "gpt3.5-turbo"],
|
66 |
-
value="gpt3.5-turbo",
|
67 |
-
label="回复生成器",
|
68 |
# info="Will add more animals later!"
|
69 |
)
|
70 |
|
71 |
-
|
72 |
-
|
|
|
|
|
73 |
info="Larger temperature increase the randomness")
|
74 |
slider_top_p = gr.Slider(
|
75 |
minimum=0.1,
|
76 |
maximum=1.0,
|
77 |
-
value=
|
78 |
step=0.05,
|
79 |
label="Top-p (nucleus sampling)",
|
80 |
)
|
81 |
|
82 |
########
|
83 |
history = gr.State([{"role": "system", "content": system_list[0]}])
|
84 |
-
system.change(reset_state, inputs=[system], outputs=[chatbot, history]
|
85 |
-
|
86 |
-
|
87 |
-
clear_btn.click(reset_state, inputs=[system], outputs=[chatbot, history], show_progress="full")
|
88 |
|
89 |
generate_btn.click(generate, [chatbot, history], outputs=[generated_text, chatbot, history],
|
90 |
show_progress="full")
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
|
5 |
# 难点
|
6 |
|
|
|
|
|
7 |
## TODO
|
8 |
|
9 |
+
|
10 |
+
-[x] 代码和表格的预览
|
11 |
+
-[x] markdown解析:mdtex2html
|
12 |
+
-[ ] 可编辑chatbot:https://github.com/gradio-app/gradio/issues/4444
|
13 |
+
-[ ] 乱码问题
|
14 |
|
15 |
|
16 |
## Reference
|
|
|
18 |
- https://github.com/GaiZhenbiao/ChuanhuChatGPT/
|
19 |
"""
|
20 |
|
21 |
+
import config
|
22 |
from app_util import *
|
23 |
|
24 |
system_list = [
|
|
|
57 |
gr.Dropdown(
|
58 |
["moss", "chatglm-2", "chatpdf"],
|
59 |
value="moss",
|
60 |
+
label="model",
|
61 |
+
interactive=True,
|
|
|
|
|
|
|
|
|
|
|
62 |
# info="Will add more animals later!"
|
63 |
)
|
64 |
|
65 |
+
slider_max_tokens = gr.Slider(minimum=1, maximum=config.MAX_SEQUENCE_LENGTH,
|
66 |
+
value=config.DEFAULT_MAX_TOKENS, step=1, label="Max tokens")
|
67 |
+
slider_temperature = gr.Slider(minimum=0.1, maximum=10.0,
|
68 |
+
value=config.DEFAULT_TEMPERATURE, step=0.1, label="Temperature",
|
69 |
info="Larger temperature increase the randomness")
|
70 |
slider_top_p = gr.Slider(
|
71 |
minimum=0.1,
|
72 |
maximum=1.0,
|
73 |
+
value=config.DEFAULT_TOP_P,
|
74 |
step=0.05,
|
75 |
label="Top-p (nucleus sampling)",
|
76 |
)
|
77 |
|
78 |
########
|
79 |
history = gr.State([{"role": "system", "content": system_list[0]}])
|
80 |
+
system.change(reset_state, inputs=[system], outputs=[chatbot, history])
|
81 |
+
clear_btn.click(reset_state, inputs=[system], outputs=[chatbot, history])
|
|
|
|
|
82 |
|
83 |
generate_btn.click(generate, [chatbot, history], outputs=[generated_text, chatbot, history],
|
84 |
show_progress="full")
|
85 |
+
retry_btn.click(undo_generate, [chatbot, history], outputs=[generated_text, chatbot, history],
|
86 |
+
show_progress="full")
|
87 |
+
retry_btn.click(generate, [chatbot, history], outputs=[generated_text, chatbot, history],
|
88 |
+
show_progress="full")
|
89 |
+
undo_btn.click(undo_generate, [chatbot, history], outputs=[generated_text, chatbot, history],
|
90 |
+
show_progress="full")
|
91 |
+
|
92 |
+
slider_max_tokens.change(set_max_tokens, inputs=[slider_max_tokens])
|
93 |
+
slider_top_p.change(set_top_p, inputs=[slider_top_p])
|
94 |
+
slider_temperature.change(set_temperature, inputs=[slider_temperature])
|
95 |
+
|
96 |
+
# demo.queue().launch(share=False, server_name="0.0.0.0")
|
97 |
+
# demo.queue().launch(concurrency_count=1, max_size=5)
|
98 |
+
demo.queue().launch()
|
app_util.py
CHANGED
@@ -1,11 +1,8 @@
|
|
1 |
-
|
2 |
import gradio as gr
|
3 |
from utils.logging_util import logger
|
4 |
from models.cpp_qwen2 import bot
|
5 |
|
6 |
|
7 |
-
|
8 |
-
|
9 |
#
|
10 |
# def postprocess(self, y):
|
11 |
# if y is None:
|
@@ -95,11 +92,21 @@ def set_max_tokens(max_tokens):
|
|
95 |
bot.generation_kwargs["max_tokens"] = max_tokens
|
96 |
|
97 |
|
98 |
-
def
|
99 |
-
|
100 |
|
101 |
|
102 |
-
def
|
103 |
-
|
104 |
|
105 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import gradio as gr
|
2 |
from utils.logging_util import logger
|
3 |
from models.cpp_qwen2 import bot
|
4 |
|
5 |
|
|
|
|
|
6 |
#
|
7 |
# def postprocess(self, y):
|
8 |
# if y is None:
|
|
|
92 |
bot.generation_kwargs["max_tokens"] = max_tokens
|
93 |
|
94 |
|
95 |
+
def set_top_p(top_p):
|
96 |
+
bot.generation_kwargs["top_p"] = top_p
|
97 |
|
98 |
|
99 |
+
def set_temperature(temperature):
|
100 |
+
bot.generation_kwargs["temperature"] = temperature
|
101 |
|
102 |
|
103 |
+
def undo_generate(chatbot, history):
|
104 |
+
if history[-1]["role"] == "user":
|
105 |
+
history = history[:-1]
|
106 |
+
chatbot = chatbot[:-1]
|
107 |
+
elif history[-1]["role"] == "assistant":
|
108 |
+
history = history[:-1]
|
109 |
+
chatbot[-1] = (chatbot[-1][0], None)
|
110 |
+
else:
|
111 |
+
pass
|
112 |
+
return "", chatbot, history
|
config.py
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
|
3 |
+
MAX_SEQUENCE_LENGTH = 2048 # max_seq_len
|
4 |
+
|
5 |
+
DEFAULT_MAX_TOKENS = 512
|
6 |
+
# DEFAULT_MAX_NEW_TOKENS = None
|
7 |
+
DEFAULT_TOP_K = 100
|
8 |
+
DEFAULT_TOP_P = 0.95
|
9 |
+
DEFAULT_TEMPERATURE = 5
|
simulator.py → models/base_model.py
RENAMED
File without changes
|
models/cpp_qwen2.py
CHANGED
@@ -17,11 +17,12 @@ python convert_hf_to_gguf.py /workspace/xusong/huggingface/models/Qwen1.5-0.5B-C
|
|
17 |
|
18 |
import json
|
19 |
import copy
|
20 |
-
from
|
21 |
import llama_cpp
|
22 |
# import llama_cpp.llama_tokenizer
|
23 |
from transformers import AutoTokenizer
|
24 |
from utils.logging_util import logger
|
|
|
25 |
|
26 |
|
27 |
class Qwen2Simulator(Simulator):
|
@@ -30,9 +31,12 @@ class Qwen2Simulator(Simulator):
|
|
30 |
if from_local:
|
31 |
self.hf_tokenizer = AutoTokenizer.from_pretrained(
|
32 |
"/workspace/xusong/huggingface/models/Qwen2-0.5B-Instruct/")
|
33 |
-
self.llm = llama_cpp.Llama(
|
|
|
34 |
model_path="/workspace/xusong/huggingface/models/Qwen2-0.5B-Instruct-GGUF/qwen2-0_5b-instruct-fp16.gguf",
|
35 |
tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer(self.hf_tokenizer),
|
|
|
|
|
36 |
verbose=False,
|
37 |
)
|
38 |
else:
|
@@ -43,13 +47,13 @@ class Qwen2Simulator(Simulator):
|
|
43 |
tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer(self.hf_tokenizer),
|
44 |
verbose=False,
|
45 |
)
|
46 |
-
logger.info(f"llm has been initialized: {self.llm}")
|
47 |
|
48 |
self.generation_kwargs = dict(
|
49 |
-
temperature=
|
50 |
-
|
51 |
-
top_k=
|
52 |
-
max_tokens=
|
53 |
repeat_penalty=1.1,
|
54 |
# qwen2-0.5b-chat 有时内容生成结束没有<|im_end|>,直接跟 <|im_start|>
|
55 |
stop=[
|
@@ -58,7 +62,6 @@ class Qwen2Simulator(Simulator):
|
|
58 |
"<|endoftext|>",
|
59 |
],
|
60 |
)
|
61 |
-
### local
|
62 |
|
63 |
def generate_query(self, messages, stream=True):
|
64 |
"""
|
@@ -66,7 +69,7 @@ class Qwen2Simulator(Simulator):
|
|
66 |
:return:
|
67 |
"""
|
68 |
assert messages[-1]["role"] != "user"
|
69 |
-
logger.info(f"generating {json.dumps(messages)}")
|
70 |
inputs = self.hf_tokenizer.apply_chat_template(
|
71 |
messages,
|
72 |
tokenize=False,
|
@@ -78,7 +81,6 @@ class Qwen2Simulator(Simulator):
|
|
78 |
else:
|
79 |
return self._generate(inputs)
|
80 |
|
81 |
-
|
82 |
def generate_response(self, messages, stream=True):
|
83 |
assert messages[-1]["role"] == "user"
|
84 |
logger.info(f"generating {json.dumps(messages, ensure_ascii=False)}")
|
@@ -93,10 +95,6 @@ class Qwen2Simulator(Simulator):
|
|
93 |
return self._generate(inputs)
|
94 |
|
95 |
def _generate(self, inputs):
|
96 |
-
"""
|
97 |
-
TODO: chat with cache.
|
98 |
-
|
99 |
-
"""
|
100 |
logger.info(f"generation_kwargs {self.generation_kwargs}")
|
101 |
output = self.llm(
|
102 |
inputs,
|
@@ -117,6 +115,7 @@ class Qwen2Simulator(Simulator):
|
|
117 |
generated_text += stream["choices"][0]["text"]
|
118 |
yield generated_text
|
119 |
|
|
|
120 |
bot = Qwen2Simulator()
|
121 |
|
122 |
if __name__ == "__main__":
|
|
|
17 |
|
18 |
import json
|
19 |
import copy
|
20 |
+
from base_model import Simulator
|
21 |
import llama_cpp
|
22 |
# import llama_cpp.llama_tokenizer
|
23 |
from transformers import AutoTokenizer
|
24 |
from utils.logging_util import logger
|
25 |
+
import config
|
26 |
|
27 |
|
28 |
class Qwen2Simulator(Simulator):
|
|
|
31 |
if from_local:
|
32 |
self.hf_tokenizer = AutoTokenizer.from_pretrained(
|
33 |
"/workspace/xusong/huggingface/models/Qwen2-0.5B-Instruct/")
|
34 |
+
self.llm = llama_cpp.Llama( # n_ctx, n_threads
|
35 |
+
|
36 |
model_path="/workspace/xusong/huggingface/models/Qwen2-0.5B-Instruct-GGUF/qwen2-0_5b-instruct-fp16.gguf",
|
37 |
tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer(self.hf_tokenizer),
|
38 |
+
n_ctx=config.MAX_SEQUENCE_LENGTH, #
|
39 |
+
# n_threads=None, # 默认会根据cpu数来设置 n_threads
|
40 |
verbose=False,
|
41 |
)
|
42 |
else:
|
|
|
47 |
tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer(self.hf_tokenizer),
|
48 |
verbose=False,
|
49 |
)
|
50 |
+
logger.info(f"llm has been initialized: {self.llm}, n_threads={self.llm.n_threads}, n_ctx={self.llm.n_ctx}")
|
51 |
|
52 |
self.generation_kwargs = dict(
|
53 |
+
temperature=config.DEFAULT_TEMPERATURE,
|
54 |
+
top_p=config.DEFAULT_TOP_P,
|
55 |
+
top_k=config.DEFAULT_TOP_K,
|
56 |
+
max_tokens=config.DEFAULT_MAX_TOKENS,
|
57 |
repeat_penalty=1.1,
|
58 |
# qwen2-0.5b-chat 有时内容生成结束没有<|im_end|>,直接跟 <|im_start|>
|
59 |
stop=[
|
|
|
62 |
"<|endoftext|>",
|
63 |
],
|
64 |
)
|
|
|
65 |
|
66 |
def generate_query(self, messages, stream=True):
|
67 |
"""
|
|
|
69 |
:return:
|
70 |
"""
|
71 |
assert messages[-1]["role"] != "user"
|
72 |
+
logger.info(f"generating {json.dumps(messages, ensure_ascii=False)}")
|
73 |
inputs = self.hf_tokenizer.apply_chat_template(
|
74 |
messages,
|
75 |
tokenize=False,
|
|
|
81 |
else:
|
82 |
return self._generate(inputs)
|
83 |
|
|
|
84 |
def generate_response(self, messages, stream=True):
|
85 |
assert messages[-1]["role"] == "user"
|
86 |
logger.info(f"generating {json.dumps(messages, ensure_ascii=False)}")
|
|
|
95 |
return self._generate(inputs)
|
96 |
|
97 |
def _generate(self, inputs):
|
|
|
|
|
|
|
|
|
98 |
logger.info(f"generation_kwargs {self.generation_kwargs}")
|
99 |
output = self.llm(
|
100 |
inputs,
|
|
|
115 |
generated_text += stream["choices"][0]["text"]
|
116 |
yield generated_text
|
117 |
|
118 |
+
|
119 |
bot = Qwen2Simulator()
|
120 |
|
121 |
if __name__ == "__main__":
|
models/hf_qwen2.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
"Qwen/Qwen2-0.5B-Instruct"
|
2 |
|
3 |
from threading import Thread
|
4 |
-
from
|
5 |
|
6 |
from transformers import TextIteratorStreamer
|
7 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
|
|
1 |
"Qwen/Qwen2-0.5B-Instruct"
|
2 |
|
3 |
from threading import Thread
|
4 |
+
from base_model import Simulator
|
5 |
|
6 |
from transformers import TextIteratorStreamer
|
7 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|