import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM import lftk import spacy import time import os import openai # Load the Vicuna 7B model and tokenizer vicuna_tokenizer = AutoTokenizer.from_pretrained("lmsys/vicuna-7b-v1.3") vicuna_model = AutoModelForCausalLM.from_pretrained("lmsys/vicuna-7b-v1.3") # Load the LLaMA 7b model and tokenizer llama_tokenizer = AutoTokenizer.from_pretrained("daryl149/llama-2-7b-chat-hf") llama_model = AutoModelForCausalLM.from_pretrained("daryl149/llama-2-7b-chat-hf") template_single = '''Please output any <{}> in the following sentence one per line without any additional text: "{}"''' # def linguistic_features(message): # # Load a trained spaCy pipeline # nlp = spacy.load("en_core_web_sm") # # Create a spaCy doc object # doc = nlp(message) # # Initiate LFTK extractor by passing in the doc # LFTK_extractor = lftk.Extractor(docs=doc) # # Customize LFTK extractor (optional) # LFTK_extractor.customize(stop_words=True, punctuations=False, round_decimal=3) # # Use LFTK to dynamically extract handcrafted linguistic features # features_to_extract = lftk.search_features(family="wordsent", language="general", return_format="list_key") # extracted_features = LFTK_extractor.extract(features=features_to_extract) # print('Linguistic Features:', extracted_features) # return extracted_features def update_api_key(new_key): global api_key os.environ['OPENAI_API_TOKEN'] = new_key openai.api_key = os.environ['OPENAI_API_TOKEN'] def chat(system_prompt, user_prompt, model = 'gpt-3.5-turbo', temperature = 0, verbose = False): ''' Normal call of OpenAI API ''' response = openai.ChatCompletion.create( temperature = temperature, model=model, messages=[ {"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt} ]) res = response['choices'][0]['message']['content'] if verbose: print('System prompt:', system_prompt) print('User prompt:', user_prompt) print('GPT response:', res) return res def format_chat_prompt(message, chat_history, max_convo_length): prompt = "" for turn in chat_history[-max_convo_length:]: user_message, bot_message = turn prompt = f"{prompt}\nUser: {user_message}\nAssistant: {bot_message}" prompt = f"{prompt}\nUser: {message}\nAssistant:" return prompt def gpt_respond(tab_name, message, chat_history, max_convo_length = 10): formatted_prompt = format_chat_prompt(message, chat_history, max_convo_length) print('Prompt + Context:') print(formatted_prompt) bot_message = chat(system_prompt = f'''Generate the output only for the assistant. Please output any <{tab_name}> in the following sentence one per line without any additional text.''', user_prompt = formatted_prompt) chat_history.append((message, bot_message)) return "", chat_history def vicuna_respond(tab_name, message, chat_history): formatted_prompt = f'''Generate the output only for the assistant. Please output any {tab_name} in the following sentence one per line without any additional text: {message}''' print('Vicuna Ling Ents Fn - Prompt + Context:') print(formatted_prompt) input_ids = vicuna_tokenizer.encode(formatted_prompt, return_tensors="pt") output_ids = vicuna_model.generate(input_ids, do_sample=True, max_length=1024, num_beams=5, no_repeat_ngram_size=2) bot_message = vicuna_tokenizer.decode(output_ids[0], skip_special_tokens=True) print(bot_message) # Remove formatted prompt from bot_message bot_message = bot_message.replace(formatted_prompt, '') print(bot_message) chat_history.append((formatted_prompt, bot_message)) time.sleep(2) return tab_name, "", chat_history def llama_respond(tab_name, message, chat_history): formatted_prompt = f'''Generate the output only for the assistant. Please output any {tab_name} in the following sentence one per line without any additional text: {message}''' # print('Llama - Prompt + Context:') # print(formatted_prompt) input_ids = llama_tokenizer.encode(formatted_prompt, return_tensors="pt") output_ids = llama_model.generate(input_ids, do_sample=True, max_length=1024, num_beams=5, no_repeat_ngram_size=2) bot_message = llama_tokenizer.decode(output_ids[0], skip_special_tokens=True) # Remove formatted prompt from bot_message bot_message = bot_message.replace(formatted_prompt, '') # print(bot_message) chat_history.append((formatted_prompt, bot_message)) time.sleep(2) return tab_name, "", chat_history def gpt_strategies_respond(strategy, task_name, task_ling_ent, message, chat_history, max_convo_length = 10): formatted_system_prompt = "" if (task_name == "POS Tagging"): if (strategy == "S1"): formatted_system_prompt = f'''Generate the output only for the assistant. Please output any {task_ling_ent} in the following sentence one per line without any additional text: {message}''' elif (strategy == "S2"): formatted_system_prompt = f'''Please POS tag the following sentence using Universal POS tag set without generating any additional text: {message}''' elif (strategy == "S3"): formatted_system_prompt = f'''Please POS tag the following sentence using Universal POS tag set without generating any additional text: {message}''' elif (task_name == "Chunking"): if (strategy == "S1"): formatted_system_prompt = f'''Generate the output only for the assistant. Please output any {task_ling_ent} in the following sentence one per line without any additional text: {message}''' elif (strategy == "S2"): formatted_system_prompt = f'''Please chunk the following sentence in CoNLL 2000 format with BIO tags without outputing any additional text: {message}''' elif (strategy == "S3"): formatted_system_prompt = f'''Please chunk the following sentence in CoNLL 2000 format with BIO tags without outputing any additional text: {message}''' formatted_prompt = format_chat_prompt(message, chat_history, max_convo_length) print('Prompt + Context:') print(formatted_prompt) bot_message = chat(system_prompt = formatted_system_prompt, user_prompt = formatted_prompt) chat_history.append((message, bot_message)) return "", chat_history def vicuna_strategies_respond(strategy, task_name, task_ling_ent, message, chat_history): formatted_prompt = "" if (task_name == "POS Tagging"): if (strategy == "S1"): formatted_prompt = f'''Generate the output only for the assistant. Please output any {task_ling_ent} in the following sentence one per line without any additional text: {message}''' elif (strategy == "S2"): formatted_prompt = f'''Please POS tag the following sentence using Universal POS tag set without generating any additional text: {message}''' elif (strategy == "S3"): formatted_prompt = f'''Please POS tag the following sentence using Universal POS tag set without generating any additional text: {message}''' elif (task_name == "Chunking"): if (strategy == "S1"): formatted_prompt = f'''Generate the output only for the assistant. Please output any {task_ling_ent} in the following sentence one per line without any additional text: {message}''' elif (strategy == "S2"): formatted_prompt = f'''Please chunk the following sentence in CoNLL 2000 format with BIO tags without outputing any additional text: {message}''' elif (strategy == "S3"): formatted_prompt = f'''Please chunk the following sentence in CoNLL 2000 format with BIO tags without outputing any additional text: {message}''' print('Vicuna Strategy Fn - Prompt + Context:') print(formatted_prompt) input_ids = vicuna_tokenizer.encode(formatted_prompt, return_tensors="pt") output_ids = vicuna_model.generate(input_ids, do_sample=True, max_length=1024, num_beams=5, no_repeat_ngram_size=2) bot_message = vicuna_tokenizer.decode(output_ids[0], skip_special_tokens=True) print(bot_message) # Remove formatted prompt from bot_message bot_message = bot_message.replace(formatted_prompt, '') print(bot_message) chat_history.append((formatted_prompt, bot_message)) time.sleep(2) return task_name, "", chat_history def llama_strategies_respond(strategy, task_name, task_ling_ent, message, chat_history): formatted_prompt = "" if (task_name == "POS Tagging"): if (strategy == "S1"): formatted_prompt = f'''Generate the output only for the assistant. Please output any {task_ling_ent} in the following sentence one per line without any additional text: {message}''' elif (strategy == "S2"): formatted_prompt = f'''Please POS tag the following sentence using Universal POS tag set without generating any additional text: {message}''' elif (strategy == "S3"): formatted_prompt = f'''Please POS tag the following sentence using Universal POS tag set without generating any additional text: {message}''' elif (task_name == "Chunking"): if (strategy == "S1"): formatted_prompt = f'''Generate the output only for the assistant. Please output any {task_ling_ent} in the following sentence one per line without any additional text: {message}''' elif (strategy == "S2"): formatted_prompt = f'''Please chunk the following sentence in CoNLL 2000 format with BIO tags without outputing any additional text: {message}''' elif (strategy == "S3"): formatted_prompt = f'''Please chunk the following sentence in CoNLL 2000 format with BIO tags without outputing any additional text: {message}''' # print('Llama Strategies - Prompt + Context:') # print(formatted_prompt) input_ids = llama_tokenizer.encode(formatted_prompt, return_tensors="pt") output_ids = llama_model.generate(input_ids, do_sample=True, max_length=1024, num_beams=5, no_repeat_ngram_size=2) bot_message = llama_tokenizer.decode(output_ids[0], skip_special_tokens=True) # print(bot_message) # Remove formatted prompt from bot_message bot_message = bot_message.replace(formatted_prompt, '') # print(bot_message) chat_history.append((formatted_prompt, bot_message)) time.sleep(2) return task_name, "", chat_history def interface(): # prompt = template_single.format(tab_name, textbox_prompt) with gr.Tab("Linguistic Entities"): gr.Markdown("