#import gradio as gr #from transformers import pipeline #sentiment = pipeline("sentiment-analysis") #def get_sentiment(input_text): # return sentiment(input_text) #iface = gr.Interface(fn = get_sentiment, # inputs = "text", # outputs = ["text"], # title = "Sentiment Analysis", # description = "Ciao!!!") # #iface.launch(inline = False) import gradio as gr from typing import * import torch import transformers from transformers import LlamaTokenizer, LlamaForCausalLM, GenerationConfig tokenizer = LlamaTokenizer.from_pretrained("decapoda-research/llama-7b-hf") model = LlamaForCausalLM.from_pretrained( "decapoda-research/llama-7b-hf", device_map="cpu", ) def evaluate(question): prompt = f"The conversation between human and AI assistant.\n[|Human|] {question}.\n[|AI|] " inputs = tokenizer(question, return_tensors="pt") input_ids = inputs["input_ids"].cuda() generation_output = model.generate( input_ids=input_ids, generation_config=GenerationConfig( temperature=1, top_p=0.95, num_beams=4, max_context_length_tokens=2048, ), return_dict_in_generate=True, output_scores=True, max_new_tokens=512 ) output = tokenizer.decode(generation_output.sequences[0]).split("[|AI|]")[1] return output def generate_prompt_with_history(text:str, history: str, tokenizer, max_length=2048): history = ["\n[|Human|]{}\n[|AI|]{}".format(x[0],x[1]) for x in history] history.append("\n[|Human|]{}\n[|AI|]".format(text)) history_text = "" for x in history[::-1]: if tokenizer(history_text + x, return_tensors="pt")['input_ids'].size(-1) <= max_length: history_text = x + history_text flag = True if flag: return history_text, tokenizer(history_text, return_tensors="pt") else: return False def is_stop_word_or_prefix(s: str, stop_words: list) -> bool: for stop_word in stop_words: if s.endswith(stop_word): return True for i in range(1, len(stop_word)): if s.endswith(stop_word[:i]): return True return False def greedy_search(input_ids: torch.Tensor, model: torch.nn.Module, tokenizer: transformers.PreTrainedTokenizer, stop_words: list, max_length: int, temperature: float = 1.0, top_p: float = 1.0, top_k: int = 25) -> Iterator[str]: generated_tokens = [] past_key_values = None current_length = 1 for i in range(max_length): with torch.no_grad(): if past_key_values is None: outputs = model(input_ids) else: outputs = model(input_ids[:, -1:], past_key_values=past_key_values) logits = outputs.logits[:, -1, :] past_key_values = outputs.past_key_values logits /= temperature probs = torch.softmax(logits, dim=-1) probs_sort, probs_idx = torch.sort(probs, dim=-1, descending=True) probs_sum = torch.cumsum(probs_sort, dim=-1) mask = probs_sum - probs_sort > top_p probs_sort[mask] = 0.0 probs_sort.div_(probs_sort.sum(dim=-1, keepdim=True)) next_token = torch.multinomial(probs_sort, num_samples=1) next_token = torch.gather(probs_idx, -1, next_token) input_ids = torch.cat((input_ids, next_token), dim=-1) generated_tokens.append(next_token[0].item()) text = tokenizer.decode(generated_tokens) yield text if any([x in text for x in stop_words]): return @torch.no_grad() def predict(text:str, chatbot, history:str = "", top_p:float = 0.95, temperature:float = 1.0, max_length_tokens:int = 512, max_context_length_tokens:int = 2048): if text=="": return "" inputs = generate_prompt_with_history(text, history, tokenizer, max_length=max_context_length_tokens) prompt,inputs=inputs begin_length = len(prompt) input_ids = inputs["input_ids"].to(chatbot.device) output = [] for x in greedy_search(input_ids,model,tokenizer,stop_words=["[|Human|]", "[|AI|]"],max_length=max_length_tokens,temperature=temperature,top_p=top_p): if is_stop_word_or_prefix(x,["[|Human|]", "[|AI|]"]) is False: if "[|Human|]" in x: x = x[:x.index("[|Human|]")].strip() elif "[| Human |]" in x: x = x[:x.index("[| Human |]")].strip() if "[|AI|]" in x: x = x[:x.index("[|AI|]")].strip() x = x.strip(" ") output.append(x) return output[-1] #text = "Can you give a more formal definition?" #print(predict(text, model)) #sentiment = pipeline("sentiment-analysis") #def get_sentiment(input_text): # return sentiment(input_text) #iface = gr.Interface(fn = get_sentiment, # inputs = "text", # outputs = ["text"], # title = "Sentiment Analysis", # description = "Ciao!!!") # #iface.launch(inline = False) iface = gr.Interface(fn = predict, inputs = "text", outputs = ["text"], title = "Learn with ChadGPT", description = "Ciao!!!") iface.launch(inline = False)