File size: 5,540 Bytes
20cc109
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
04b6315
20cc109
 
 
 
 
 
 
 
 
9a93a48
20cc109
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
04b6315
20cc109
 
04b6315
20cc109
 
 
 
 
 
 
eda9db4
20cc109
eda9db4
 
20cc109
b7dea2d
eda9db4
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
#import gradio as gr
#from transformers import pipeline

#sentiment = pipeline("sentiment-analysis")

#def get_sentiment(input_text):
#    return sentiment(input_text)

#iface = gr.Interface(fn = get_sentiment,
#                    inputs = "text",
#                    outputs = ["text"],
#                    title = "Sentiment Analysis",
#                    description = "Ciao!!!")
#
#iface.launch(inline = False)

import gradio as gr
from typing import *
import torch
import transformers

from transformers import LlamaTokenizer, LlamaForCausalLM, GenerationConfig

tokenizer = LlamaTokenizer.from_pretrained("decapoda-research/llama-7b-hf")
model = LlamaForCausalLM.from_pretrained(
    "decapoda-research/llama-7b-hf",
    device_map="cpu",
)

def evaluate(question):
    prompt = f"The conversation between human and AI assistant.\n[|Human|] {question}.\n[|AI|] "
    inputs = tokenizer(question, return_tensors="pt")
    input_ids = inputs["input_ids"].cuda()
    generation_output = model.generate(
        input_ids=input_ids,
        generation_config=GenerationConfig(
            temperature=1,
            top_p=0.95,
            num_beams=4,
            max_context_length_tokens=2048,
        ),
        return_dict_in_generate=True,
        output_scores=True,
        max_new_tokens=512
    )
    output = tokenizer.decode(generation_output.sequences[0]).split("[|AI|]")[1]
    return output


def generate_prompt_with_history(text:str, history: str, tokenizer, max_length=2048):
    history = ["\n[|Human|]{}\n[|AI|]{}".format(x[0],x[1]) for x in history]
    history.append("\n[|Human|]{}\n[|AI|]".format(text))
    history_text = ""

    for x in history[::-1]:
        if tokenizer(history_text + x, return_tensors="pt")['input_ids'].size(-1) <= max_length:
            history_text = x + history_text
            flag = True
    if flag:
        return  history_text, tokenizer(history_text, return_tensors="pt")
    else:
        return False


def is_stop_word_or_prefix(s: str, stop_words: list) -> bool:
    for stop_word in stop_words:
        if s.endswith(stop_word):
            return True
        for i in range(1, len(stop_word)):
            if s.endswith(stop_word[:i]):
                return True
    return False


def greedy_search(input_ids: torch.Tensor,
                  model: torch.nn.Module,
                  tokenizer: transformers.PreTrainedTokenizer,
                  stop_words: list,
                  max_length: int,
                  temperature: float = 1.0,
                  top_p: float = 1.0,
                  top_k: int = 25) -> Iterator[str]:
    generated_tokens = []
    past_key_values = None
    current_length = 1
    for i in range(max_length):
        with torch.no_grad():
            if past_key_values is None:
                outputs = model(input_ids)
            else:
                outputs = model(input_ids[:, -1:], past_key_values=past_key_values)
            logits = outputs.logits[:, -1, :]
            past_key_values = outputs.past_key_values

        logits /= temperature

        probs = torch.softmax(logits, dim=-1)

        probs_sort, probs_idx = torch.sort(probs, dim=-1, descending=True)
        probs_sum = torch.cumsum(probs_sort, dim=-1)
        mask = probs_sum - probs_sort > top_p
        probs_sort[mask] = 0.0

        probs_sort.div_(probs_sort.sum(dim=-1, keepdim=True))
        next_token = torch.multinomial(probs_sort, num_samples=1)
        next_token = torch.gather(probs_idx, -1, next_token)

        input_ids = torch.cat((input_ids, next_token), dim=-1)

        generated_tokens.append(next_token[0].item())
        text = tokenizer.decode(generated_tokens)

        yield text
        if any([x in text for x in stop_words]):
            return 
@torch.no_grad()


def predict(text:str,
            chatbot,
            history:str = "",
            top_p:float = 0.95,
            temperature:float = 1.0,
            max_length_tokens:int = 512,
            max_context_length_tokens:int = 2048):
    if text=="":
        return ""

    inputs = generate_prompt_with_history(text, history, tokenizer, max_length=max_context_length_tokens)
    prompt,inputs=inputs
    begin_length = len(prompt)
    
    input_ids = inputs["input_ids"].to(chatbot.device)
    output = []

    for x in greedy_search(input_ids,model,tokenizer,stop_words=["[|Human|]", "[|AI|]"],max_length=max_length_tokens,temperature=temperature,top_p=top_p):
        if is_stop_word_or_prefix(x,["[|Human|]", "[|AI|]"]) is False:
            if "[|Human|]" in x:
                x = x[:x.index("[|Human|]")].strip()
            elif "[| Human |]" in x:
                x = x[:x.index("[| Human |]")].strip()
            if "[|AI|]" in x:
                x = x[:x.index("[|AI|]")].strip() 
            x = x.strip(" ")   
            output.append(x)
    return output[-1]

#text = "Can you give a more formal definition?"
#print(predict(text, model))

#sentiment = pipeline("sentiment-analysis")

#def get_sentiment(input_text):
#    return sentiment(input_text)

#iface = gr.Interface(fn = get_sentiment,
#                    inputs = "text",
#                    outputs = ["text"],
#                    title = "Sentiment Analysis",
#                    description = "Ciao!!!")
#
#iface.launch(inline = False)

iface = gr.Interface(fn = predict,
                    inputs = "text",
                    outputs = ["text"],
                    title = "Learn with ChadGPT",
                    description = "Ciao!!!")

iface.launch(inline = False)