xu song commited on
Commit
123d4a3
1 Parent(s): 1b2b08c
Files changed (2) hide show
  1. app_util.py +2 -0
  2. models/cpp_qwen2.py +2 -2
app_util.py CHANGED
@@ -29,6 +29,7 @@ def generate_query(chatbot, history):
29
  for query, query_tokens in streamer:
30
  chatbot[-1] = (query, None)
31
  yield query, chatbot, history
 
32
  history.append({"role": "user", "content": query, "tokens": query_tokens})
33
  yield query, chatbot, history
34
 
@@ -49,6 +50,7 @@ def generate_response(chatbot, history):
49
  chatbot[-1] = (query, response)
50
  yield response, chatbot, history
51
 
 
52
  history.append({"role": "assistant", "content": response, "tokens": response_tokens})
53
  print(f"chatbot is {chatbot}")
54
  print(f"history is {history}")
 
29
  for query, query_tokens in streamer:
30
  chatbot[-1] = (query, None)
31
  yield query, chatbot, history
32
+ query_tokens = bot.strip_stoptokens(query_tokens)
33
  history.append({"role": "user", "content": query, "tokens": query_tokens})
34
  yield query, chatbot, history
35
 
 
50
  chatbot[-1] = (query, response)
51
  yield response, chatbot, history
52
 
53
+ response_tokens = bot.strip_stoptokens(response_tokens)
54
  history.append({"role": "assistant", "content": response, "tokens": response_tokens})
55
  print(f"chatbot is {chatbot}")
56
  print(f"history is {history}")
models/cpp_qwen2.py CHANGED
@@ -86,7 +86,7 @@ class Qwen2Simulator(Simulator):
86
  def tokenize(self, text):
87
  return self.llm.tokenize(text.encode("utf-8"))
88
 
89
- def _strip_stoptokens(self, tokens):
90
  while tokens and tokens[0] in self.stop_tokens:
91
  tokens.pop(0)
92
  while tokens and tokens[-1] in self.stop_tokens:
@@ -104,7 +104,7 @@ class Qwen2Simulator(Simulator):
104
  if "tokens" not in message: # tokens
105
  message["tokens"] = self.tokenize(message["content"])
106
  input_ids += self.tokenize(f"<|im_start|>{message['role']}\n") \
107
- + self._strip_stoptokens(message["tokens"]) \
108
  + self.tokenize("<|im_end|>\n")
109
  input_ids += start_tokens
110
  if stream:
 
86
  def tokenize(self, text):
87
  return self.llm.tokenize(text.encode("utf-8"))
88
 
89
+ def strip_stoptokens(self, tokens):
90
  while tokens and tokens[0] in self.stop_tokens:
91
  tokens.pop(0)
92
  while tokens and tokens[-1] in self.stop_tokens:
 
104
  if "tokens" not in message: # tokens
105
  message["tokens"] = self.tokenize(message["content"])
106
  input_ids += self.tokenize(f"<|im_start|>{message['role']}\n") \
107
+ + message["tokens"] \
108
  + self.tokenize("<|im_end|>\n")
109
  input_ids += start_tokens
110
  if stream: