File size: 2,413 Bytes
14e4843 3020792 d6d7ec6 3020792 14e4843 9ceb74b 85e30d4 14e4843 d6d7ec6 14e4843 d6d7ec6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 |
from typing import List, Literal, Optional, Tuple, Union
import torch
import transformers
from lm_eval.api.registry import register_model
from src.backend.hflm_with_measurement import HFLMWithMeasurement
@register_model("hf-chat")
class HFLMwithChatTemplate(HFLMWithMeasurement):
def __init__(self, use_chat_template=True, **kwargs):
super().__init__(**kwargs)
self.use_chat_template = use_chat_template
def tok_batch_encode(
self,
strings: List[str],
padding_side: str = "left",
left_truncate_len: int = None,
truncation: bool = False,
) -> Tuple[torch.Tensor, torch.Tensor]:
if self.use_chat_template:
try:
updated_strings = []
for input_string in strings:
messages = [
{"role": "user", "content": f"{input_string}"},
]
if "dbrx" in self.model.name_or_path:
updated_string = self.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
else:
updated_string = self.tokenizer.apply_chat_template(messages, tokenize=False)
updated_strings.append(updated_string)
strings = updated_strings[:]
except:
print(f"failed to update input string with chat template: {self._model}")
# encode a batch of strings. converts to tensors and pads automatically, unlike tok_encode.
old_padding_side = self.tokenizer.padding_side
self.tokenizer.padding_side = padding_side
if self.AUTO_MODEL_CLASS == transformers.AutoModelForCausalLM:
add_special_tokens = False
elif self.AUTO_MODEL_CLASS == transformers.AutoModelForSeq2SeqLM:
add_special_tokens = True
encoding = self.tokenizer(
strings,
truncation=truncation,
padding="longest",
return_tensors="pt",
add_special_tokens=add_special_tokens,
)
if left_truncate_len:
encoding["input_ids"] = encoding["input_ids"][:, -left_truncate_len:]
encoding["attention_mask"] = encoding["attention_mask"][:, -left_truncate_len:]
self.tokenizer.padding_side = old_padding_side
return encoding["input_ids"], encoding["attention_mask"]
|