import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM import lftk import spacy import time import os import openai # Load the Vicuna 7B model and tokenizer vicuna_tokenizer = AutoTokenizer.from_pretrained("lmsys/vicuna-7b-v1.3") vicuna_model = AutoModelForCausalLM.from_pretrained("lmsys/vicuna-7b-v1.3") # Load the LLaMA 7b model and tokenizer llama_tokenizer = AutoTokenizer.from_pretrained("daryl149/llama-2-7b-chat-hf") llama_model = AutoModelForCausalLM.from_pretrained("daryl149/llama-2-7b-chat-hf") def update_api_key(new_key): print("update_api_key ran") global api_key os.environ['OPENAI_API_TOKEN'] = new_key openai.api_key = os.environ['OPENAI_API_TOKEN'] def chat(system_prompt, user_prompt, model = 'gpt-3.5-turbo', temperature = 0, verbose = False): ''' Normal call of OpenAI API ''' response = openai.ChatCompletion.create( temperature = temperature, model=model, messages=[ {"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt} ]) res = response['choices'][0]['message']['content'] if verbose: print('System prompt:', system_prompt) print('User prompt:', user_prompt) print('GPT response:', res) return res def format_chat_prompt(message, chat_history, max_convo_length): prompt = "" for turn in chat_history[-max_convo_length:]: user_message, bot_message = turn prompt = f"{prompt}\nUser: {user_message}\nAssistant: {bot_message}" prompt = f"{prompt}\nUser: {message}\nAssistant:" return prompt def gpt_respond(have_key, tab_name, message, chat_history, max_convo_length = 10): if (have_key == "No"): return "", chat_history formatted_prompt = format_chat_prompt(message, chat_history, max_convo_length) print('Prompt + Context:') print(formatted_prompt) bot_message = chat(system_prompt = f'''Generate the output only for the assistant. Output any <{tab_name}> in the following sentence one per line.''', user_prompt = formatted_prompt) chat_history.append((message, bot_message)) return "", chat_history def vicuna_respond(tab_name, message, chat_history): formatted_prompt = f'''Output any {tab_name} in the following sentence one per line: "{message}"''' print('Vicuna Ling Ents Fn - Prompt + Context:') print(formatted_prompt) input_ids = vicuna_tokenizer.encode(formatted_prompt, return_tensors="pt") output_ids = vicuna_model.generate(input_ids, do_sample=True, max_length=1024, num_beams=5, no_repeat_ngram_size=2) bot_message = vicuna_tokenizer.decode(output_ids[0], skip_special_tokens=True) print(bot_message) # Remove formatted prompt from bot_message bot_message = bot_message.replace(formatted_prompt, '') print(bot_message) chat_history.append((formatted_prompt, bot_message)) time.sleep(2) return tab_name, "", chat_history def llama_respond(tab_name, message, chat_history): formatted_prompt = f'''Output any {tab_name} in the following sentence one per line: "{message}"''' # print('Llama - Prompt + Context:') # print(formatted_prompt) input_ids = llama_tokenizer.encode(formatted_prompt, return_tensors="pt") output_ids = llama_model.generate(input_ids, do_sample=True, max_length=1024, num_beams=5, no_repeat_ngram_size=2) bot_message = llama_tokenizer.decode(output_ids[0], skip_special_tokens=True) # Remove formatted prompt from bot_message bot_message = bot_message.replace(formatted_prompt, '') # print(bot_message) chat_history.append((formatted_prompt, bot_message)) time.sleep(2) return tab_name, "", chat_history def gpt_strategies_respond(have_key, strategy, task_name, task_ling_ent, message, chat_history, max_convo_length = 10): if (have_key == "No"): return "", chat_history formatted_system_prompt = "" if (task_name == "POS Tagging"): if (strategy == "S1"): formatted_system_prompt = f'''Generate the output only for the assistant. Output any {task_ling_ent} in the following sentence one per line: "{message}"''' elif (strategy == "S2"): formatted_system_prompt = f'''POS tag the following sentence using Universal POS tag set: "{message}"''' elif (strategy == "S3"): formatted_system_prompt = f'''POS tag the following sentence using Universal POS tag set: "{message}"''' elif (task_name == "Chunking"): if (strategy == "S1"): formatted_system_prompt = f'''Generate the output only for the assistant. Output any {task_ling_ent} in the following sentence one per line: "{message}"''' elif (strategy == "S2"): formatted_system_prompt = f'''Chunk the following sentence in CoNLL 2000 format with BIO tags: "{message}"''' elif (strategy == "S3"): formatted_system_prompt = f'''Chunk the following sentence in CoNLL 2000 format with BIO tags: "{message}"''' formatted_prompt = format_chat_prompt(message, chat_history, max_convo_length) print('Prompt + Context:') print(formatted_prompt) bot_message = chat(system_prompt = formatted_system_prompt, user_prompt = formatted_prompt) chat_history.append((message, bot_message)) return "", chat_history def vicuna_strategies_respond(strategy, task_name, task_ling_ent, message, chat_history): formatted_prompt = "" if (task_name == "POS Tagging"): if (strategy == "S1"): formatted_prompt = f'''Output any {task_ling_ent} in the following sentence one per line: "{message}"''' elif (strategy == "S2"): formatted_prompt = f'''POS tag the following sentence using Universal POS tag set: "{message}"''' elif (strategy == "S3"): formatted_prompt = f'''POS tag the following sentence using Universal POS tag set: "{message}"''' elif (task_name == "Chunking"): if (strategy == "S1"): formatted_prompt = f'''Output any {task_ling_ent} in the following sentence one per line: "{message}"''' elif (strategy == "S2"): formatted_prompt = f'''Chunk the following sentence in CoNLL 2000 format with BIO tags: "{message}"''' elif (strategy == "S3"): formatted_prompt = f'''Chunk the following sentence in CoNLL 2000 format with BIO tags: "{message}"''' print('Vicuna Strategy Fn - Prompt + Context:') print(formatted_prompt) input_ids = vicuna_tokenizer.encode(formatted_prompt, return_tensors="pt") output_ids = vicuna_model.generate(input_ids, do_sample=True, max_length=1024, num_beams=5, no_repeat_ngram_size=2) bot_message = vicuna_tokenizer.decode(output_ids[0], skip_special_tokens=True) print(bot_message) # Remove formatted prompt from bot_message bot_message = bot_message.replace(formatted_prompt, '') print(bot_message) chat_history.append((formatted_prompt, bot_message)) time.sleep(2) return task_name, "", chat_history def llama_strategies_respond(strategy, task_name, task_ling_ent, message, chat_history): formatted_prompt = "" if (task_name == "POS Tagging"): if (strategy == "S1"): formatted_prompt = f'''Output any {task_ling_ent} in the following sentence one per line: "{message}"''' elif (strategy == "S2"): formatted_prompt = f'''POS tag the following sentence using Universal POS tag set: "{message}"''' elif (strategy == "S3"): formatted_prompt = f'''POS tag the following sentence using Universal POS tag set: "{message}"''' elif (task_name == "Chunking"): if (strategy == "S1"): formatted_prompt = f'''Output any {task_ling_ent} in the following sentence one per line: "{message}"''' elif (strategy == "S2"): formatted_prompt = f'''Chunk the following sentence in CoNLL 2000 format with BIO tags: "{message}"''' elif (strategy == "S3"): formatted_prompt = f'''Chunk the following sentence in CoNLL 2000 format with BIO tags: "{message}"''' # print('Llama Strategies - Prompt + Context:') # print(formatted_prompt) input_ids = llama_tokenizer.encode(formatted_prompt, return_tensors="pt") output_ids = llama_model.generate(input_ids, do_sample=True, max_length=1024, num_beams=5, no_repeat_ngram_size=2) bot_message = llama_tokenizer.decode(output_ids[0], skip_special_tokens=True) # print(bot_message) # Remove formatted prompt from bot_message bot_message = bot_message.replace(formatted_prompt, '') # print(bot_message) chat_history.append((formatted_prompt, bot_message)) time.sleep(2) return task_name, "", chat_history def interface(): with gr.Tab("Linguistic Entities"): gr.Markdown(""" ## 📜 Step-By-Step Instructions - Enter a sentence for three models to process (Vicuna-7b, LLaMA-7b and GPT-3.5). - If you own an OpenAI API key, select 'Yes' in the dropdown. If you don't own one, select 'No'. - If you selected 'Yes', enter your OpenAI API Key [Link to your OpenAI keys](https://platform.openai.com/api-keys). - If you selected 'No', leave the 'OpenAI Key' field blank and continue with the rest. - Select a Linguistic Entity from the Dropdown. - Click 'Submit' to send your inputs to the models. - To enter a new prompt, scroll to the bottom and click 'Clear' to start again. ### ⏳ After you click 'Submit', the models will take a couple seconds to process your inputs. ### 🤖 Then, the models will output the linguistic entity found in your prompt based on your selection! Note: If you get an 'Error' in the gpt-3.5 model, check the following: - Check that you entered your key correctly without any extra characters. - If you used a free key, it means you exceeded your quota from the free API Key. """) # Inputs ling_ents_prompt = gr.Textbox(show_label=False, placeholder="Write a prompt and press enter") with gr.Row(): # Will activate after getting API key have_key2 = gr.Dropdown(["Yes", "No"], label="Do you own an API Key?", scale=0.5) ling_ents_apikey_input = gr.Textbox(label="Open AI Key", placeholder="Enter your Openai key here", type="password") linguistic_entities = gr.Dropdown(["Noun", "Determiner", "Noun phrase", "Verb phrase", "Dependent clause", "T-units"], label="Linguistic Entity") ling_ents_btn = gr.Button(value="Submit") # Outputs user_prompt_1 = gr.Textbox(label="Original prompt") # linguistic_features_textbox = gr.Textbox(label="Linguistic Features", disabled=True) with gr.Row(): gpt_ling_ents_chatbot = gr.Chatbot(label="gpt-3.5") llama_ling_ents_chatbot = gr.Chatbot(label="llama-7b") vicuna_ling_ents_chatbot = gr.Chatbot(label="vicuna-7b") clear = gr.ClearButton(components=[ling_ents_prompt, ling_ents_apikey_input, have_key2, linguistic_entities, vicuna_ling_ents_chatbot, llama_ling_ents_chatbot, gpt_ling_ents_chatbot,]) # Event Handler for API Key ling_ents_btn.click(update_api_key, inputs=ling_ents_apikey_input) def update_textbox(prompt): return prompt task_btn.click(fn=update_textbox, inputs=user_prompt_1, outputs=user_prompt_1, api_name="task_btn") # Event Handler for GPT 3.5 Chatbot ling_ents_btn.click(gpt_respond, inputs=[have_key2, linguistic_entities, ling_ents_prompt, gpt_ling_ents_chatbot], outputs=[ling_ents_prompt, gpt_ling_ents_chatbot]) # Event Handler for LLaMA Chatbot ling_ents_btn.click(llama_respond, inputs=[linguistic_entities, ling_ents_prompt, llama_ling_ents_chatbot], outputs=[linguistic_entities, ling_ents_prompt, llama_ling_ents_chatbot]) # Event Handler for Vicuna Chatbot ling_ents_btn.click(vicuna_respond, inputs=[linguistic_entities, ling_ents_prompt, vicuna_ling_ents_chatbot], outputs=[linguistic_entities, ling_ents_prompt, vicuna_ling_ents_chatbot]) with gr.Tab("CoreNLP"): with gr.Row(): gr.Markdown(""" ## 📜 Step-By-Step Instructions - Enter a sentence for three models to process (Vicuna-7b, LLaMA-7b and GPT-3.5). - If you own an OpenAI API key, select 'Yes' in the dropdown. If you don't own one, select 'No'. - If you selected 'Yes', enter your OpenAI API Key [Link to your OpenAI keys](https://platform.openai.com/api-keys). - If you selected 'No', leave the 'OpenAI Key' field blank and continue with the rest. - Select a Task from the Dropdown. - Select a Linguistic Entity from the Dropdown. - Click 'Submit' to send your inputs to the models. - To enter a new prompt, scroll to the bottom and click 'Clear' to start again. ### ⏳ After you click 'Submit', the models will take a couple seconds to process your inputs. ### 🤖 Then, the models will output the POS Tagging or Chunking in your prompt with three different strategies based on your selections! Note: If you get an 'Error' in the gpt-3.5 model, check the following: - Check that you entered your key correctly without any extra characters. - If you used a free key, it means you exceeded your quota from the free API Key. """) gr.Markdown(""" ### 🛠️ How each Strategy works - Strategy 1 - QA-Based Prompting - The model is prompted with a question-answer format. The input consists of a question, and the model generates a response based on the understanding of the question and its knowledge. - Strategy 2 - Instruction-Based Prompting - Involves providing the model with explicit instructions on how to generate a response. Instead of relying solely on context or previous knowledge, the instructions guide the model in generating content that aligns with specific criteria. - Strategy 3 - Structured Prompting - Involves presenting information to the model in a structured format, often with defined sections or categories. The model then generates responses following the given structure. """) # Inputs task_prompt = gr.Textbox(show_label=False, placeholder="Write a prompt and press enter") with gr.Row(): have_key = gr.Dropdown(["Yes", "No"], label="Do you own an API Key?", scale=0.5) task_apikey_input = gr.Textbox(label="Open AI Key", placeholder="Enter your OpenAI key here", type="password", visible=True) task = gr.Dropdown(["POS Tagging", "Chunking"], label="Task") task_linguistic_entities = gr.Dropdown(["Noun", "Determiner", "Noun phrase", "Verb phrase", "Dependent clause", "T-units"], label="Linguistic Entity For Strategy 1") task_btn = gr.Button(value="Submit") # Outputs user_prompt_2 = gr.Textbox(label="Original prompt", ) gr.Markdown("### Strategy 1 - QA-Based Prompting") strategy1 = gr.Markdown("S1", visible=False) with gr.Row(): gpt_S1_chatbot = gr.Chatbot(label="gpt-3.5") llama_S1_chatbot = gr.Chatbot(label="llama-7b") vicuna_S1_chatbot = gr.Chatbot(label="vicuna-7b") gr.Markdown("### Strategy 2 - Instruction-Based Prompting") strategy2 = gr.Markdown("S2", visible=False) with gr.Row(): gpt_S2_chatbot = gr.Chatbot(label="gpt-3.5") llama_S2_chatbot = gr.Chatbot(label="llama-7b") vicuna_S2_chatbot = gr.Chatbot(label="vicuna-7b") gr.Markdown("### Strategy 3 - Structured Prompting") strategy3 = gr.Markdown("S3", visible=False) with gr.Row(): gpt_S3_chatbot = gr.Chatbot(label="gpt-3.5") llama_S3_chatbot = gr.Chatbot(label="llama-7b") vicuna_S3_chatbot = gr.Chatbot(label="vicuna-7b") clear_all = gr.ClearButton(components=[task_prompt, task_apikey_input, have_key, task, task_linguistic_entities, vicuna_S1_chatbot, llama_S1_chatbot, gpt_S1_chatbot, vicuna_S2_chatbot, llama_S2_chatbot, gpt_S2_chatbot, vicuna_S3_chatbot, llama_S3_chatbot, gpt_S3_chatbot]) # Event Handler for API Key task_btn.click(update_api_key, inputs=task_apikey_input) # Show user's original prompt def update_textbox(prompt): return prompt task_btn.click(fn=update_textbox, inputs=user_prompt_2, outputs=user_prompt_2, api_name="task_btn") # Event Handler for GPT 3.5 Chatbot POS/Chunk, user must submit api key before submitting the prompt # Will activate after getting API key # task_apikey_btn.click(update_api_key, inputs=ling_ents_apikey_input) task_btn.click(gpt_strategies_respond, inputs=[have_key, strategy1, task, task_linguistic_entities, task_prompt, gpt_S1_chatbot], outputs=[task_prompt, gpt_S1_chatbot]) task_btn.click(gpt_strategies_respond, inputs=[have_key, strategy2, task, task_linguistic_entities, task_prompt, gpt_S2_chatbot], outputs=[task_prompt, gpt_S2_chatbot]) task_btn.click(gpt_strategies_respond, inputs=[have_key, strategy3, task, task_linguistic_entities, task_prompt, gpt_S3_chatbot], outputs=[task_prompt, gpt_S3_chatbot]) # Event Handler for LLaMA Chatbot POS/Chunk task_btn.click(llama_strategies_respond, inputs=[strategy1, task, task_linguistic_entities, task_prompt, llama_S1_chatbot], outputs=[task, task_prompt, llama_S1_chatbot]) task_btn.click(llama_strategies_respond, inputs=[strategy2, task, task_linguistic_entities, task_prompt, llama_S2_chatbot], outputs=[task, task_prompt, llama_S2_chatbot]) task_btn.click(llama_strategies_respond, inputs=[strategy3, task, task_linguistic_entities, task_prompt, llama_S3_chatbot], outputs=[task, task_prompt, llama_S3_chatbot]) # vicuna_strategies_respond(strategy, task_name, task_ling_ent, message, chat_history): # Event Handlers for Vicuna Chatbot POS/Chunk task_btn.click(vicuna_strategies_respond, inputs=[strategy1, task, task_linguistic_entities, task_prompt, vicuna_S1_chatbot], outputs=[task, task_prompt, vicuna_S1_chatbot]) task_btn.click(vicuna_strategies_respond, inputs=[strategy2, task, task_linguistic_entities, task_prompt, vicuna_S2_chatbot], outputs=[task, task_prompt, vicuna_S2_chatbot]) task_btn.click(vicuna_strategies_respond, inputs=[strategy3, task, task_linguistic_entities, task_prompt, vicuna_S3_chatbot], outputs=[task, task_prompt, vicuna_S3_chatbot]) with gr.Blocks(theme=gr.themes.Soft()) as demo: gr.Markdown(""" # Assessing the Articulate ## A Comparative Analysis of the Core Linguistic Knowledge in Large Language Models """) # load interface interface() demo.launch()