NGUYEN, Xuan Phi commited on
Commit
4a01c79
1 Parent(s): 3b65eaa
multipurpose_chatbot/configs.py CHANGED
@@ -184,7 +184,7 @@ CHUNK_OVERLAP = int(os.environ.get("CHUNK_SIZE", "50"))
184
 
185
 
186
  DEFAULT_SYSTEM_PROMPT = """You are a helpful, respectful, honest and safe AI assistant."""
187
- DEFAULT_SYSTEM_PROMPT = """You are SeaLLM, you are a helpful, respectful and honest AI assistant. Based on your internal clock, the current date time: {cur_datetime}.
188
 
189
  Your knowledge base was last updated on August 2023. Thus, you should answer questions about events prior to and after August 2023 the way a highly informed individual in August 2023 would if they were talking to someone from the above date, and can let the human know this when relevant.
190
 
 
184
 
185
 
186
  DEFAULT_SYSTEM_PROMPT = """You are a helpful, respectful, honest and safe AI assistant."""
187
+ DEFAULT_SYSTEM_PROMPT = """You are SeaLLM, you are a helpful, respectful and honest AI assistant. Based on your internal clock, the current date time is {cur_datetime}.
188
 
189
  Your knowledge base was last updated on August 2023. Thus, you should answer questions about events prior to and after August 2023 the way a highly informed individual in August 2023 would if they were talking to someone from the above date, and can let the human know this when relevant.
190
 
multipurpose_chatbot/demos/chat_interface.py CHANGED
@@ -110,7 +110,7 @@ def get_datetime_string():
110
  # tz_string = datetime.now().astimezone()
111
  # dt_string = now.strftime("%B %d, %Y, %H:%M:%S")
112
  # dt_string = datetime.now().astimezone().strftime("%B %d, %Y, %H:%M GMT%Z")
113
- dt_string = datetime.now().astimezone().strftime("%B %d, %Y")
114
  return dt_string
115
 
116
 
 
110
  # tz_string = datetime.now().astimezone()
111
  # dt_string = now.strftime("%B %d, %Y, %H:%M:%S")
112
  # dt_string = datetime.now().astimezone().strftime("%B %d, %Y, %H:%M GMT%Z")
113
+ dt_string = datetime.now().strftime("%B %d, %Y")
114
  return dt_string
115
 
116
 
multipurpose_chatbot/engines/transformers_engine.py CHANGED
@@ -550,40 +550,47 @@ class TransformersEngine(BaseEngine):
550
  self._model.sample = types.MethodType(NewGenerationMixin.sample_stream, self._model)
551
 
552
  self.maybe_raise_safety(prompt)
553
-
554
- with torch.no_grad():
555
- inputs = self.tokenizer(prompt, return_tensors='pt')
556
- num_tokens = inputs.input_ids.size(1)
557
 
558
- inputs = inputs.to(self._model.device)
 
 
 
559
 
560
- generator = self._model.generate(
561
- **inputs,
562
- do_sample=True,
563
- temperature=temperature,
564
- max_new_tokens=max_tokens,
565
- pad_token_id=self.tokenizer.pad_token_id,
566
- )
 
 
 
 
 
 
567
 
568
- out_tokens = []
569
- response = None
570
- for index, token in enumerate(generator):
571
- out_tokens.extend(token.tolist())
572
- response = self.tokenizer.decode(out_tokens)
573
- if "<|im_start|>assistant\n" in response:
574
- response = response.split("<|im_start|>assistant\n")[-1]
575
- num_tokens += 1
576
- # print(f"{response}", end='\r')
577
- # sys.stdout.flush()
578
- self.maybe_raise_safety(response, gen_index=index)
579
- yield response, num_tokens
580
-
581
- del generator
582
- if response is not None:
583
- if "<|im_start|>assistant\n" in response:
584
- response = response.split("<|im_start|>assistant\n")[-1]
585
-
586
- self.maybe_raise_safety(response)
587
- full_text = prompt + response
588
- num_tokens = len(self.tokenizer.encode(full_text))
589
- yield response, num_tokens
 
 
 
550
  self._model.sample = types.MethodType(NewGenerationMixin.sample_stream, self._model)
551
 
552
  self.maybe_raise_safety(prompt)
 
 
 
 
553
 
554
+ if temperature == 0:
555
+ temperature = 0.0001
556
+
557
+ try:
558
 
559
+ with torch.no_grad():
560
+ inputs = self.tokenizer(prompt, return_tensors='pt')
561
+ num_tokens = inputs.input_ids.size(1)
562
+
563
+ inputs = inputs.to(self._model.device)
564
+
565
+ generator = self._model.generate(
566
+ **inputs,
567
+ do_sample=True,
568
+ temperature=temperature,
569
+ max_new_tokens=max_tokens,
570
+ pad_token_id=self.tokenizer.pad_token_id,
571
+ )
572
 
573
+ out_tokens = []
574
+ response = None
575
+ for index, token in enumerate(generator):
576
+ out_tokens.extend(token.tolist())
577
+ response = self.tokenizer.decode(out_tokens)
578
+ if "<|im_start|>assistant\n" in response:
579
+ response = response.split("<|im_start|>assistant\n")[-1]
580
+ num_tokens += 1
581
+ # print(f"{response}", end='\r')
582
+ # sys.stdout.flush()
583
+ self.maybe_raise_safety(response, gen_index=index)
584
+ yield response, num_tokens
585
+
586
+ del generator
587
+ if response is not None:
588
+ if "<|im_start|>assistant\n" in response:
589
+ response = response.split("<|im_start|>assistant\n")[-1]
590
+
591
+ self.maybe_raise_safety(response)
592
+ full_text = prompt + response
593
+ num_tokens = len(self.tokenizer.encode(full_text))
594
+ yield response, num_tokens
595
+ except RuntimeError as e:
596
+ raise gr.Error(str(e))