xu song commited on
Commit
d8e1d2b
1 Parent(s): 1e92d4c
Files changed (1) hide show
  1. models/cpp_qwen2.py +10 -2
models/cpp_qwen2.py CHANGED
@@ -128,6 +128,8 @@ class Qwen2Simulator(Simulator):
128
 
129
  self.user_start_tokens = self.tokenize("<|im_start|>user\n")
130
  self.assistant_start_tokens = self.tokenize("<|im_start|>assistant\n")
 
 
131
 
132
  def tokenize(self, text):
133
  return self.llm.tokenize(text.encode("utf-8"))
@@ -142,6 +144,13 @@ class Qwen2Simulator(Simulator):
142
  return tokens
143
 
144
  def generate(self, history, stream=True):
 
 
 
 
 
 
 
145
  if history[-1]['role'] in ["user"]:
146
  start_tokens = self.assistant_start_tokens
147
  elif history[-1]['role'] in ["assistant", "system"]:
@@ -162,8 +171,6 @@ class Qwen2Simulator(Simulator):
162
 
163
  def _stream_generate(self, input_ids):
164
  logger.info(f"generation_kwargs {self.generation_kwargs}")
165
-
166
- # self.llm.generate .set_cache .last_n_tokens_size .reset .ctx ._ctx
167
  output = self.llm.create_completion(
168
  input_ids,
169
  stream=True,
@@ -190,6 +197,7 @@ if __name__ == "__main__":
190
  print(generated_text, generated_tokens)
191
 
192
  for i in range(3):
 
193
  messages.append(
194
  {"role": "user" if i % 2 == 0 else "assistant", "content": generated_text, "tokens": generated_tokens})
195
  print("######## requesting", messages)
 
128
 
129
  self.user_start_tokens = self.tokenize("<|im_start|>user\n")
130
  self.assistant_start_tokens = self.tokenize("<|im_start|>assistant\n")
131
+ # self.llm.generate .set_cache .last_n_tokens_size .reset .ctx ._ctx
132
+ # self.llm.set_cache()
133
 
134
  def tokenize(self, text):
135
  return self.llm.tokenize(text.encode("utf-8"))
 
144
  return tokens
145
 
146
  def generate(self, history, stream=True):
147
+ """
148
+ 额外前向:remains 5 to forward "<|im_end|>\n<|im_start|>assistant\n"
149
+
150
+ :param history:
151
+ :param stream:
152
+ :return:
153
+ """
154
  if history[-1]['role'] in ["user"]:
155
  start_tokens = self.assistant_start_tokens
156
  elif history[-1]['role'] in ["assistant", "system"]:
 
171
 
172
  def _stream_generate(self, input_ids):
173
  logger.info(f"generation_kwargs {self.generation_kwargs}")
 
 
174
  output = self.llm.create_completion(
175
  input_ids,
176
  stream=True,
 
197
  print(generated_text, generated_tokens)
198
 
199
  for i in range(3):
200
+ generated_tokens = bot.strip_stoptokens(generated_tokens)
201
  messages.append(
202
  {"role": "user" if i % 2 == 0 else "assistant", "content": generated_text, "tokens": generated_tokens})
203
  print("######## requesting", messages)