xu song commited on
Commit
459fbe3
1 Parent(s): 55b26f1
app.py CHANGED
@@ -4,15 +4,13 @@
4
 
5
  # 难点
6
 
7
-
8
-
9
  ## TODO
10
 
11
- - 第一句:
12
- - 代码和表格的预览
13
- - markdown解析:mdtex2html
14
- - 可编辑chatbot:https://github.com/gradio-app/gradio/issues/4444
15
- - 一个button,
16
 
17
 
18
  ## Reference
@@ -20,6 +18,7 @@
20
  - https://github.com/GaiZhenbiao/ChuanhuChatGPT/
21
  """
22
 
 
23
  from app_util import *
24
 
25
  system_list = [
@@ -58,38 +57,42 @@ with gr.Blocks() as demo:
58
  gr.Dropdown(
59
  ["moss", "chatglm-2", "chatpdf"],
60
  value="moss",
61
- label="问题生成器",
62
- # info="Will add more animals later!"
63
- )
64
- gr.Dropdown(
65
- ["moss", "chatglm-2", "gpt3.5-turbo"],
66
- value="gpt3.5-turbo",
67
- label="回复生成器",
68
  # info="Will add more animals later!"
69
  )
70
 
71
- slider_max_new_tokens = gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens")
72
- slider_temperature = gr.Slider(minimum=0.1, maximum=10.0, value=5, step=0.1, label="Temperature",
 
 
73
  info="Larger temperature increase the randomness")
74
  slider_top_p = gr.Slider(
75
  minimum=0.1,
76
  maximum=1.0,
77
- value=0.95,
78
  step=0.05,
79
  label="Top-p (nucleus sampling)",
80
  )
81
 
82
  ########
83
  history = gr.State([{"role": "system", "content": system_list[0]}])
84
- system.change(reset_state, inputs=[system], outputs=[chatbot, history], show_progress="full")
85
- # submit_btn.click(reset_user_input, [], [user_input])
86
-
87
- clear_btn.click(reset_state, inputs=[system], outputs=[chatbot, history], show_progress="full")
88
 
89
  generate_btn.click(generate, [chatbot, history], outputs=[generated_text, chatbot, history],
90
  show_progress="full")
91
-
92
- slider_max_new_tokens.change(set_max_tokens, inputs=[slider_max_new_tokens])
93
-
94
- demo.queue().launch(share=False, server_name="0.0.0.0")
95
- # demo.queue().launch(share=True)
 
 
 
 
 
 
 
 
 
 
4
 
5
  # 难点
6
 
 
 
7
  ## TODO
8
 
9
+
10
+ -[x] 代码和表格的预览
11
+ -[x] markdown解析:mdtex2html
12
+ -[ ] 可编辑chatbot:https://github.com/gradio-app/gradio/issues/4444
13
+ -[ ] 乱码问题
14
 
15
 
16
  ## Reference
 
18
  - https://github.com/GaiZhenbiao/ChuanhuChatGPT/
19
  """
20
 
21
+ import config
22
  from app_util import *
23
 
24
  system_list = [
 
57
  gr.Dropdown(
58
  ["moss", "chatglm-2", "chatpdf"],
59
  value="moss",
60
+ label="model",
61
+ interactive=True,
 
 
 
 
 
62
  # info="Will add more animals later!"
63
  )
64
 
65
+ slider_max_tokens = gr.Slider(minimum=1, maximum=config.MAX_SEQUENCE_LENGTH,
66
+ value=config.DEFAULT_MAX_TOKENS, step=1, label="Max tokens")
67
+ slider_temperature = gr.Slider(minimum=0.1, maximum=10.0,
68
+ value=config.DEFAULT_TEMPERATURE, step=0.1, label="Temperature",
69
  info="Larger temperature increase the randomness")
70
  slider_top_p = gr.Slider(
71
  minimum=0.1,
72
  maximum=1.0,
73
+ value=config.DEFAULT_TOP_P,
74
  step=0.05,
75
  label="Top-p (nucleus sampling)",
76
  )
77
 
78
  ########
79
  history = gr.State([{"role": "system", "content": system_list[0]}])
80
+ system.change(reset_state, inputs=[system], outputs=[chatbot, history])
81
+ clear_btn.click(reset_state, inputs=[system], outputs=[chatbot, history])
 
 
82
 
83
  generate_btn.click(generate, [chatbot, history], outputs=[generated_text, chatbot, history],
84
  show_progress="full")
85
+ retry_btn.click(undo_generate, [chatbot, history], outputs=[generated_text, chatbot, history],
86
+ show_progress="full")
87
+ retry_btn.click(generate, [chatbot, history], outputs=[generated_text, chatbot, history],
88
+ show_progress="full")
89
+ undo_btn.click(undo_generate, [chatbot, history], outputs=[generated_text, chatbot, history],
90
+ show_progress="full")
91
+
92
+ slider_max_tokens.change(set_max_tokens, inputs=[slider_max_tokens])
93
+ slider_top_p.change(set_top_p, inputs=[slider_top_p])
94
+ slider_temperature.change(set_temperature, inputs=[slider_temperature])
95
+
96
+ # demo.queue().launch(share=False, server_name="0.0.0.0")
97
+ # demo.queue().launch(concurrency_count=1, max_size=5)
98
+ demo.queue().launch()
app_util.py CHANGED
@@ -1,11 +1,8 @@
1
-
2
  import gradio as gr
3
  from utils.logging_util import logger
4
  from models.cpp_qwen2 import bot
5
 
6
 
7
-
8
-
9
  #
10
  # def postprocess(self, y):
11
  # if y is None:
@@ -95,11 +92,21 @@ def set_max_tokens(max_tokens):
95
  bot.generation_kwargs["max_tokens"] = max_tokens
96
 
97
 
98
- def clear_history():
99
- pass
100
 
101
 
102
- def undo_generate():
103
- pass
104
 
105
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
  from utils.logging_util import logger
3
  from models.cpp_qwen2 import bot
4
 
5
 
 
 
6
  #
7
  # def postprocess(self, y):
8
  # if y is None:
 
92
  bot.generation_kwargs["max_tokens"] = max_tokens
93
 
94
 
95
+ def set_top_p(top_p):
96
+ bot.generation_kwargs["top_p"] = top_p
97
 
98
 
99
+ def set_temperature(temperature):
100
+ bot.generation_kwargs["temperature"] = temperature
101
 
102
 
103
+ def undo_generate(chatbot, history):
104
+ if history[-1]["role"] == "user":
105
+ history = history[:-1]
106
+ chatbot = chatbot[:-1]
107
+ elif history[-1]["role"] == "assistant":
108
+ history = history[:-1]
109
+ chatbot[-1] = (chatbot[-1][0], None)
110
+ else:
111
+ pass
112
+ return "", chatbot, history
config.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+
2
+
3
+ MAX_SEQUENCE_LENGTH = 2048 # max_seq_len
4
+
5
+ DEFAULT_MAX_TOKENS = 512
6
+ # DEFAULT_MAX_NEW_TOKENS = None
7
+ DEFAULT_TOP_K = 100
8
+ DEFAULT_TOP_P = 0.95
9
+ DEFAULT_TEMPERATURE = 5
simulator.py → models/base_model.py RENAMED
File without changes
models/cpp_qwen2.py CHANGED
@@ -17,11 +17,12 @@ python convert_hf_to_gguf.py /workspace/xusong/huggingface/models/Qwen1.5-0.5B-C
17
 
18
  import json
19
  import copy
20
- from simulator import Simulator
21
  import llama_cpp
22
  # import llama_cpp.llama_tokenizer
23
  from transformers import AutoTokenizer
24
  from utils.logging_util import logger
 
25
 
26
 
27
  class Qwen2Simulator(Simulator):
@@ -30,9 +31,12 @@ class Qwen2Simulator(Simulator):
30
  if from_local:
31
  self.hf_tokenizer = AutoTokenizer.from_pretrained(
32
  "/workspace/xusong/huggingface/models/Qwen2-0.5B-Instruct/")
33
- self.llm = llama_cpp.Llama(
 
34
  model_path="/workspace/xusong/huggingface/models/Qwen2-0.5B-Instruct-GGUF/qwen2-0_5b-instruct-fp16.gguf",
35
  tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer(self.hf_tokenizer),
 
 
36
  verbose=False,
37
  )
38
  else:
@@ -43,13 +47,13 @@ class Qwen2Simulator(Simulator):
43
  tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer(self.hf_tokenizer),
44
  verbose=False,
45
  )
46
- logger.info(f"llm has been initialized: {self.llm}")
47
 
48
  self.generation_kwargs = dict(
49
- temperature=5,
50
- # top_p=0.1,
51
- top_k=40,
52
- max_tokens=20,
53
  repeat_penalty=1.1,
54
  # qwen2-0.5b-chat 有时内容生成结束没有<|im_end|>,直接跟 <|im_start|>
55
  stop=[
@@ -58,7 +62,6 @@ class Qwen2Simulator(Simulator):
58
  "<|endoftext|>",
59
  ],
60
  )
61
- ### local
62
 
63
  def generate_query(self, messages, stream=True):
64
  """
@@ -66,7 +69,7 @@ class Qwen2Simulator(Simulator):
66
  :return:
67
  """
68
  assert messages[-1]["role"] != "user"
69
- logger.info(f"generating {json.dumps(messages)}")
70
  inputs = self.hf_tokenizer.apply_chat_template(
71
  messages,
72
  tokenize=False,
@@ -78,7 +81,6 @@ class Qwen2Simulator(Simulator):
78
  else:
79
  return self._generate(inputs)
80
 
81
-
82
  def generate_response(self, messages, stream=True):
83
  assert messages[-1]["role"] == "user"
84
  logger.info(f"generating {json.dumps(messages, ensure_ascii=False)}")
@@ -93,10 +95,6 @@ class Qwen2Simulator(Simulator):
93
  return self._generate(inputs)
94
 
95
  def _generate(self, inputs):
96
- """
97
- TODO: chat with cache.
98
-
99
- """
100
  logger.info(f"generation_kwargs {self.generation_kwargs}")
101
  output = self.llm(
102
  inputs,
@@ -117,6 +115,7 @@ class Qwen2Simulator(Simulator):
117
  generated_text += stream["choices"][0]["text"]
118
  yield generated_text
119
 
 
120
  bot = Qwen2Simulator()
121
 
122
  if __name__ == "__main__":
 
17
 
18
  import json
19
  import copy
20
+ from base_model import Simulator
21
  import llama_cpp
22
  # import llama_cpp.llama_tokenizer
23
  from transformers import AutoTokenizer
24
  from utils.logging_util import logger
25
+ import config
26
 
27
 
28
  class Qwen2Simulator(Simulator):
 
31
  if from_local:
32
  self.hf_tokenizer = AutoTokenizer.from_pretrained(
33
  "/workspace/xusong/huggingface/models/Qwen2-0.5B-Instruct/")
34
+ self.llm = llama_cpp.Llama( # n_ctx, n_threads
35
+
36
  model_path="/workspace/xusong/huggingface/models/Qwen2-0.5B-Instruct-GGUF/qwen2-0_5b-instruct-fp16.gguf",
37
  tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer(self.hf_tokenizer),
38
+ n_ctx=config.MAX_SEQUENCE_LENGTH, #
39
+ # n_threads=None, # 默认会根据cpu数来设置 n_threads
40
  verbose=False,
41
  )
42
  else:
 
47
  tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer(self.hf_tokenizer),
48
  verbose=False,
49
  )
50
+ logger.info(f"llm has been initialized: {self.llm}, n_threads={self.llm.n_threads}, n_ctx={self.llm.n_ctx}")
51
 
52
  self.generation_kwargs = dict(
53
+ temperature=config.DEFAULT_TEMPERATURE,
54
+ top_p=config.DEFAULT_TOP_P,
55
+ top_k=config.DEFAULT_TOP_K,
56
+ max_tokens=config.DEFAULT_MAX_TOKENS,
57
  repeat_penalty=1.1,
58
  # qwen2-0.5b-chat 有时内容生成结束没有<|im_end|>,直接跟 <|im_start|>
59
  stop=[
 
62
  "<|endoftext|>",
63
  ],
64
  )
 
65
 
66
  def generate_query(self, messages, stream=True):
67
  """
 
69
  :return:
70
  """
71
  assert messages[-1]["role"] != "user"
72
+ logger.info(f"generating {json.dumps(messages, ensure_ascii=False)}")
73
  inputs = self.hf_tokenizer.apply_chat_template(
74
  messages,
75
  tokenize=False,
 
81
  else:
82
  return self._generate(inputs)
83
 
 
84
  def generate_response(self, messages, stream=True):
85
  assert messages[-1]["role"] == "user"
86
  logger.info(f"generating {json.dumps(messages, ensure_ascii=False)}")
 
95
  return self._generate(inputs)
96
 
97
  def _generate(self, inputs):
 
 
 
 
98
  logger.info(f"generation_kwargs {self.generation_kwargs}")
99
  output = self.llm(
100
  inputs,
 
115
  generated_text += stream["choices"][0]["text"]
116
  yield generated_text
117
 
118
+
119
  bot = Qwen2Simulator()
120
 
121
  if __name__ == "__main__":
models/hf_qwen2.py CHANGED
@@ -1,7 +1,7 @@
1
  "Qwen/Qwen2-0.5B-Instruct"
2
 
3
  from threading import Thread
4
- from simulator import Simulator
5
 
6
  from transformers import TextIteratorStreamer
7
  from transformers import AutoModelForCausalLM, AutoTokenizer
 
1
  "Qwen/Qwen2-0.5B-Instruct"
2
 
3
  from threading import Thread
4
+ from base_model import Simulator
5
 
6
  from transformers import TextIteratorStreamer
7
  from transformers import AutoModelForCausalLM, AutoTokenizer