Spaces:
Runtime error
Runtime error
File size: 5,875 Bytes
402c662 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 |
import torch
import torch.nn.functional as F
def apply_temperature(scores, tempt):
if tempt > 0:
scores = scores / tempt
return scores
def apply_top_p(scores, top_p, filter_value=-float("Inf"), min_tokens_to_keep=1):
if top_p > 0 and top_p < 1:
sorted_logits, sorted_indices = torch.sort(scores, descending=False)
cumulative_probs = sorted_logits.softmax(dim=-1).cumsum(dim=-1)
# Remove tokens with cumulative top_p above the threshold (token with 0 are kept)
sorted_indices_to_remove = cumulative_probs <= (1 - top_p)
if min_tokens_to_keep > 1:
# Keep at least min_tokens_to_keep
sorted_indices_to_remove[..., -min_tokens_to_keep:] = 0
# scatter sorted tensors to original indexing
indices_to_remove = sorted_indices_to_remove.scatter(
1, sorted_indices, sorted_indices_to_remove
)
scores = scores.masked_fill(indices_to_remove, filter_value)
return scores
def apply_top_k(logits, top_k):
top_k = min(top_k, logits.size(-1)) # Safety check
if top_k > 0:
# Remove all tokens with a probability less than the last token of the top-k
indices_to_remove = logits < torch.topk(logits.float(), top_k)[0][..., -1, None]
logits[indices_to_remove] = -float("Inf")
return logits
def apply_advanced_repetition_penalty(
input_ids, scores, penalty_range, penalty_slope, penalty
):
penalty_range = int(penalty_range)
clipped_penalty_range = min(input_ids.shape[-1], penalty_range)
if penalty != 1.0:
if penalty_range > 0:
if clipped_penalty_range < input_ids.shape[1]:
input_ids = input_ids[..., -clipped_penalty_range:]
if penalty_slope != 0:
_penalty = (
torch.arange(
penalty_range, dtype=scores.dtype, device=scores.device
)
/ (penalty_range - 1)
) * 2.0 - 1
_penalty = (penalty_slope * _penalty) / (
1 + torch.abs(_penalty) * (penalty_slope - 1)
)
_penalty = 1 + ((_penalty + 1) / 2).unsqueeze(0) * (penalty - 1)
penalty = _penalty[..., -clipped_penalty_range:]
score = torch.gather(scores, 1, input_ids)
score = torch.where(score <= 0, score * penalty, score / penalty)
scores.scatter_(1, input_ids, score)
return scores
class LmGeneration:
def __init__(self, model, tokenizer):
self.model = model
self.tokenizer = tokenizer
def generate(self, args, prompts, cut_off=None, cut_off_times=1):
if cut_off is not None:
cut_off_times = [cut_off_times for i in range(len(prompts))]
batch = len(prompts)
assert batch <= args.batch_size
prompt_tokens = [args.tokenizer.encode(x, bos=True, eos=False) for x in prompts]
min_prompt_len = min([len(x) for x in prompt_tokens])
# max_prompt_len = max([len(x) for x in prompt_tokens])
total_len = args.seq_length
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
tokens = torch.full((batch, total_len), self.tokenizer.pad_id).to(device).long()
for idx, t in enumerate(prompt_tokens):
tokens[idx, : len(t)] = torch.tensor(t).long()
mask = tokens != self.tokenizer.pad_id
start_pos = min_prompt_len
prev_pos = 0
continue_exsample = [i for i in range(batch)]
with torch.no_grad():
for cur_pos in range(start_pos, total_len):
logits = self.model.forward(tokens[continue_exsample, prev_pos:cur_pos], prev_pos, continue_exsample).float()
next_token_scores = apply_top_k(logits, top_k=args.top_k)
next_token_scores = apply_top_p(next_token_scores, args.top_p)
next_token_scores = apply_temperature(next_token_scores, args.temperature)
next_token_scores = apply_advanced_repetition_penalty(
tokens[continue_exsample, :cur_pos],
next_token_scores,
args.repetition_penalty_range,
args.repetition_penalty_slope,
args.repetition_penalty
)
scores = F.softmax(next_token_scores, dim=-1)
next_token = torch.multinomial(scores, num_samples=1).squeeze(1)
next_token = next_token.reshape(-1)
next_token = torch.where(
mask[continue_exsample, cur_pos], tokens[continue_exsample, cur_pos], next_token
)
tokens[continue_exsample, cur_pos] = next_token
prev_pos = cur_pos
# remove eos examples.
continue_exsample = []
for i, t in enumerate(tokens.tolist()):
try:
t.index(self.tokenizer.eos_id)
except ValueError:
if cut_off is not None:
if cut_off == self.tokenizer.decode(t[:cur_pos + 1])[-len(cut_off):]:
if cut_off_times[i] == 1:
continue
else:
cut_off_times[i] -= 1
continue_exsample.append(i)
if len(continue_exsample) == 0:
break
decoder = []
for i, t in enumerate(tokens.tolist()):
t = t[: args.seq_length]
try:
t = t[: t.index(self.tokenizer.pad_id)]
t = t[: t.index(self.tokenizer.eos_id)]
except ValueError:
pass
decoder.append(self.tokenizer.decode(t))
return decoder |