Spaces:
Sleeping
Sleeping
import gradio as gr | |
from transformers import AutoTokenizer, AutoModelForCausalLM | |
import lftk | |
import spacy | |
import time | |
import os | |
import openai | |
# Load the Vicuna 7B model and tokenizer | |
vicuna_tokenizer = AutoTokenizer.from_pretrained("lmsys/vicuna-7b-v1.3") | |
vicuna_model = AutoModelForCausalLM.from_pretrained("lmsys/vicuna-7b-v1.3") | |
# Load the LLaMA 7b model and tokenizer | |
llama_tokenizer = AutoTokenizer.from_pretrained("daryl149/llama-2-7b-chat-hf") | |
llama_model = AutoModelForCausalLM.from_pretrained("daryl149/llama-2-7b-chat-hf") | |
template_single = '''Please output any <{}> in the following sentence one per line without any additional text: "{}"''' | |
# def linguistic_features(message): | |
# # Load a trained spaCy pipeline | |
# nlp = spacy.load("en_core_web_sm") | |
# # Create a spaCy doc object | |
# doc = nlp(message) | |
# # Initiate LFTK extractor by passing in the doc | |
# LFTK_extractor = lftk.Extractor(docs=doc) | |
# # Customize LFTK extractor (optional) | |
# LFTK_extractor.customize(stop_words=True, punctuations=False, round_decimal=3) | |
# # Use LFTK to dynamically extract handcrafted linguistic features | |
# features_to_extract = lftk.search_features(family="wordsent", language="general", return_format="list_key") | |
# extracted_features = LFTK_extractor.extract(features=features_to_extract) | |
# print('Linguistic Features:', extracted_features) | |
# return extracted_features | |
def update_api_key(new_key): | |
global api_key | |
os.environ['OPENAI_API_TOKEN'] = new_key | |
openai.api_key = os.environ['OPENAI_API_TOKEN'] | |
def chat(system_prompt, user_prompt, model = 'gpt-3.5-turbo', temperature = 0, verbose = False): | |
''' Normal call of OpenAI API ''' | |
response = openai.ChatCompletion.create( | |
temperature = temperature, | |
model=model, | |
messages=[ | |
{"role": "system", "content": system_prompt}, | |
{"role": "user", "content": user_prompt} | |
]) | |
res = response['choices'][0]['message']['content'] | |
if verbose: | |
print('System prompt:', system_prompt) | |
print('User prompt:', user_prompt) | |
print('GPT response:', res) | |
return res | |
def format_chat_prompt(message, chat_history, max_convo_length): | |
prompt = "" | |
for turn in chat_history[-max_convo_length:]: | |
user_message, bot_message = turn | |
prompt = f"{prompt}\nUser: {user_message}\nAssistant: {bot_message}" | |
prompt = f"{prompt}\nUser: {message}\nAssistant:" | |
return prompt | |
def gpt_respond(tab_name, message, chat_history, max_convo_length = 10): | |
formatted_prompt = format_chat_prompt(message, chat_history, max_convo_length) | |
print('Prompt + Context:') | |
print(formatted_prompt) | |
bot_message = chat(system_prompt = f'''Generate the output only for the assistant. Please output any <{tab_name}> in the following sentence one per line without any additional text.''', | |
user_prompt = formatted_prompt) | |
chat_history.append((message, bot_message)) | |
return "", chat_history | |
def vicuna_respond(tab_name, message, chat_history): | |
formatted_prompt = f'''Generate the output only for the assistant. Please output any {tab_name} in the following sentence one per line without any additional text: {message}''' | |
print('Vicuna Ling Ents Fn - Prompt + Context:') | |
print(formatted_prompt) | |
input_ids = vicuna_tokenizer.encode(formatted_prompt, return_tensors="pt") | |
output_ids = vicuna_model.generate(input_ids, do_sample=True, max_length=1024, num_beams=5, no_repeat_ngram_size=2) | |
bot_message = vicuna_tokenizer.decode(output_ids[0], skip_special_tokens=True) | |
print(bot_message) | |
# Remove formatted prompt from bot_message | |
bot_message = bot_message.replace(formatted_prompt, '') | |
print(bot_message) | |
chat_history.append((formatted_prompt, bot_message)) | |
time.sleep(2) | |
return tab_name, "", chat_history | |
def llama_respond(tab_name, message, chat_history): | |
formatted_prompt = f'''Generate the output only for the assistant. Please output any {tab_name} in the following sentence one per line without any additional text: {message}''' | |
# print('Llama - Prompt + Context:') | |
# print(formatted_prompt) | |
input_ids = llama_tokenizer.encode(formatted_prompt, return_tensors="pt") | |
output_ids = llama_model.generate(input_ids, do_sample=True, max_length=1024, num_beams=5, no_repeat_ngram_size=2) | |
bot_message = llama_tokenizer.decode(output_ids[0], skip_special_tokens=True) | |
# Remove formatted prompt from bot_message | |
bot_message = bot_message.replace(formatted_prompt, '') | |
# print(bot_message) | |
chat_history.append((formatted_prompt, bot_message)) | |
time.sleep(2) | |
return tab_name, "", chat_history | |
def gpt_strategies_respond(strategy, task_name, task_ling_ent, message, chat_history, max_convo_length = 10): | |
formatted_system_prompt = "" | |
if (task_name == "POS Tagging"): | |
if (strategy == "S1"): | |
formatted_system_prompt = f'''Generate the output only for the assistant. Please output any {task_ling_ent} in the following sentence one per line without any additional text: {message}''' | |
elif (strategy == "S2"): | |
formatted_system_prompt = f'''Please POS tag the following sentence using Universal POS tag set without generating any additional text: {message}''' | |
elif (strategy == "S3"): | |
formatted_system_prompt = f'''Please POS tag the following sentence using Universal POS tag set without generating any additional text: {message}''' | |
elif (task_name == "Chunking"): | |
if (strategy == "S1"): | |
formatted_system_prompt = f'''Generate the output only for the assistant. Please output any {task_ling_ent} in the following sentence one per line without any additional text: {message}''' | |
elif (strategy == "S2"): | |
formatted_system_prompt = f'''Please chunk the following sentence in CoNLL 2000 format with BIO tags without outputing any additional text: {message}''' | |
elif (strategy == "S3"): | |
formatted_system_prompt = f'''Please chunk the following sentence in CoNLL 2000 format with BIO tags without outputing any additional text: {message}''' | |
formatted_prompt = format_chat_prompt(message, chat_history, max_convo_length) | |
print('Prompt + Context:') | |
print(formatted_prompt) | |
bot_message = chat(system_prompt = formatted_system_prompt, | |
user_prompt = formatted_prompt) | |
chat_history.append((message, bot_message)) | |
return "", chat_history | |
def vicuna_strategies_respond(strategy, task_name, task_ling_ent, message, chat_history): | |
formatted_prompt = "" | |
if (task_name == "POS Tagging"): | |
if (strategy == "S1"): | |
formatted_prompt = f'''Generate the output only for the assistant. Please output any {task_ling_ent} in the following sentence one per line without any additional text: {message}''' | |
elif (strategy == "S2"): | |
formatted_prompt = f'''Please POS tag the following sentence using Universal POS tag set without generating any additional text: {message}''' | |
elif (strategy == "S3"): | |
formatted_prompt = f'''Please POS tag the following sentence using Universal POS tag set without generating any additional text: {message}''' | |
elif (task_name == "Chunking"): | |
if (strategy == "S1"): | |
formatted_prompt = f'''Generate the output only for the assistant. Please output any {task_ling_ent} in the following sentence one per line without any additional text: {message}''' | |
elif (strategy == "S2"): | |
formatted_prompt = f'''Please chunk the following sentence in CoNLL 2000 format with BIO tags without outputing any additional text: {message}''' | |
elif (strategy == "S3"): | |
formatted_prompt = f'''Please chunk the following sentence in CoNLL 2000 format with BIO tags without outputing any additional text: {message}''' | |
print('Vicuna Strategy Fn - Prompt + Context:') | |
print(formatted_prompt) | |
input_ids = vicuna_tokenizer.encode(formatted_prompt, return_tensors="pt") | |
output_ids = vicuna_model.generate(input_ids, do_sample=True, max_length=1024, num_beams=5, no_repeat_ngram_size=2) | |
bot_message = vicuna_tokenizer.decode(output_ids[0], skip_special_tokens=True) | |
print(bot_message) | |
# Remove formatted prompt from bot_message | |
bot_message = bot_message.replace(formatted_prompt, '') | |
print(bot_message) | |
chat_history.append((formatted_prompt, bot_message)) | |
time.sleep(2) | |
return task_name, "", chat_history | |
def llama_strategies_respond(strategy, task_name, task_ling_ent, message, chat_history): | |
formatted_prompt = "" | |
if (task_name == "POS Tagging"): | |
if (strategy == "S1"): | |
formatted_prompt = f'''Generate the output only for the assistant. Please output any {task_ling_ent} in the following sentence one per line without any additional text: {message}''' | |
elif (strategy == "S2"): | |
formatted_prompt = f'''Please POS tag the following sentence using Universal POS tag set without generating any additional text: {message}''' | |
elif (strategy == "S3"): | |
formatted_prompt = f'''Please POS tag the following sentence using Universal POS tag set without generating any additional text: {message}''' | |
elif (task_name == "Chunking"): | |
if (strategy == "S1"): | |
formatted_prompt = f'''Generate the output only for the assistant. Please output any {task_ling_ent} in the following sentence one per line without any additional text: {message}''' | |
elif (strategy == "S2"): | |
formatted_prompt = f'''Please chunk the following sentence in CoNLL 2000 format with BIO tags without outputing any additional text: {message}''' | |
elif (strategy == "S3"): | |
formatted_prompt = f'''Please chunk the following sentence in CoNLL 2000 format with BIO tags without outputing any additional text: {message}''' | |
# print('Llama Strategies - Prompt + Context:') | |
# print(formatted_prompt) | |
input_ids = llama_tokenizer.encode(formatted_prompt, return_tensors="pt") | |
output_ids = llama_model.generate(input_ids, do_sample=True, max_length=1024, num_beams=5, no_repeat_ngram_size=2) | |
bot_message = llama_tokenizer.decode(output_ids[0], skip_special_tokens=True) | |
# print(bot_message) | |
# Remove formatted prompt from bot_message | |
bot_message = bot_message.replace(formatted_prompt, '') | |
# print(bot_message) | |
chat_history.append((formatted_prompt, bot_message)) | |
time.sleep(2) | |
return task_name, "", chat_history | |
def interface(): | |
# prompt = template_single.format(tab_name, textbox_prompt) | |
with gr.Tab("Linguistic Entities"): | |
gr.Markdown("<h2>π Rules<h2>") | |
gr.Markdown("<h4>- Enter a sentence for three models to process (Vicuna-7b, LLaMA-7b and GPT-3.5). \n- Enter your OpenAI Api Key and click on 'Submit Key'. \n- Select a Linguistic Entity from the Dropdown. \n- Click 'Submit' to send your inputs to the models. \n- Scroll to the bottom and click 'Clear' to start again.<h4>") | |
gr.Markdown("<h4>π€ Now the models will output the linguistic entities found in your prompt based on your selections!") | |
# Inputs | |
ling_ents_prompt = gr.Textbox(show_label=False, placeholder="Write a prompt and press enter") | |
with gr.Row(): | |
# Will activate after getting API key | |
ling_ents_apikey_input = gr.Textbox(label="Open AI Key", placeholder="Enter your Openai key here", type="password") | |
ling_ents_apikey_btn = gr.Button(value="Submit Key", scale=0) | |
linguistic_entities = gr.Dropdown(["Noun", "Determiner", "Noun phrase", "Verb phrase", "Dependent clause", "T-units"], label="Linguistic Entity") | |
ling_ents_btn = gr.Button(value="Submit") | |
# Outputs | |
gr.Markdown("Strategy 1 QA-Based Prompting") | |
linguistic_features_textbox = gr.Textbox(label="Linguistic Features", disabled=True) | |
with gr.Row(): | |
vicuna_ling_ents_chatbot = gr.Chatbot(label="vicuna-7b") | |
llama_ling_ents_chatbot = gr.Chatbot(label="llama-7b") | |
gpt_ling_ents_chatbot = gr.Chatbot(label="gpt-3.5") | |
clear = gr.ClearButton(components=[ling_ents_prompt, ling_ents_apikey_input, vicuna_ling_ents_chatbot, llama_ling_ents_chatbot, gpt_ling_ents_chatbot]) | |
# Event Handler for Vicuna Chatbot | |
ling_ents_btn.click(vicuna_respond, inputs=[linguistic_entities, ling_ents_prompt, vicuna_ling_ents_chatbot], | |
outputs=[linguistic_entities, ling_ents_prompt, vicuna_ling_ents_chatbot]) | |
# Event Handler for LLaMA Chatbot | |
ling_ents_btn.click(llama_respond, inputs=[linguistic_entities, ling_ents_prompt, llama_ling_ents_chatbot], | |
outputs=[linguistic_entities, ling_ents_prompt, llama_ling_ents_chatbot]) | |
# Event Handler for GPT 3.5 Chatbot, user must submit api key before submitting the prompt | |
# Will activate after getting API key | |
# ling_ents_apikey_btn.click(update_api_key, inputs=ling_ents_apikey_input) | |
# ling_ents_btn.click(gpt_respond, inputs=[linguistic_entities, ling_ents_prompt, gpt_ling_ents_chatbot], | |
# outputs=[linguistic_entities, ling_ents_prompt, gpt_ling_ents_chatbot]) | |
with gr.Tab("POS/Chunking"): | |
gr.Markdown("<h2>π Rules<h2>") | |
gr.Markdown("<h4>- Enter a sentence for three models to process (Vicuna-7b, LLaMA-7b and GPT-3.5). \n- Enter your OpenAI Api Key and click on 'Submit Key'. \n- Select a Task from the Dropdown \n- Select a Linguistic Entity from the Dropdown. \n- Click 'Submit' to send your inputs to the models. \n- Scroll to the bottom and click 'Clear' to start again.<h4>") | |
gr.Markdown("<h4>π€ Now the models will output the POS Tagging or Chunking in your prompt with three Strategies based on your selections!") | |
# Inputs | |
task_prompt = gr.Textbox(show_label=False, placeholder="Write a prompt and press enter") | |
with gr.Row(): | |
task_apikey_input = gr.Textbox(label="Open AI Key", placeholder="Enter your Openai key here", type="password") | |
task_apikey_btn = gr.Button(value="Submit Key", scale=0) | |
task = gr.Dropdown(["POS Tagging", "Chunking"], label="Task") | |
task_linguistic_entities = gr.Dropdown(["Noun", "Determiner", "Noun phrase", "Verb phrase", "Dependent clause", "T-units"], label="Linguistic Entity For Strategy 1") | |
task_btn = gr.Button(value="Submit") | |
# Outputs | |
gr.Markdown("Strategy 1 QA-Based Prompting") | |
strategy1 = gr.Markdown("S1", visible=False) | |
with gr.Row(): | |
vicuna_S1_chatbot = gr.Chatbot(label="vicuna-7b") | |
llama_S1_chatbot = gr.Chatbot(label="llama-7b") | |
gpt_S1_chatbot = gr.Chatbot(label="gpt-3.5") | |
gr.Markdown("Strategy 2 Instruction-Based Prompting") | |
strategy2 = gr.Markdown("S2", visible=False) | |
with gr.Row(): | |
vicuna_S2_chatbot = gr.Chatbot(label="vicuna-7b") | |
llama_S2_chatbot = gr.Chatbot(label="llama-7b") | |
gpt_S2_chatbot = gr.Chatbot(label="gpt-3.5") | |
gr.Markdown("Strategy 3 Structured Prompting") | |
strategy3 = gr.Markdown("S3", visible=False) | |
with gr.Row(): | |
vicuna_S3_chatbot = gr.Chatbot(label="vicuna-7b") | |
llama_S3_chatbot = gr.Chatbot(label="llama-7b") | |
gpt_S3_chatbot = gr.Chatbot(label="gpt-3.5") | |
clear_all = gr.ClearButton(components=[task_prompt, task_apikey_input, | |
vicuna_S1_chatbot, llama_S1_chatbot, gpt_S1_chatbot, | |
vicuna_S2_chatbot, llama_S2_chatbot, gpt_S2_chatbot, | |
vicuna_S3_chatbot, llama_S3_chatbot, gpt_S3_chatbot]) | |
# vicuna_strategies_respond(strategy, task_name, task_ling_ent, message, chat_history): | |
# Event Handlers for Vicuna Chatbot POS/Chunk | |
task_btn.click(vicuna_strategies_respond, inputs=[strategy1, task, task_linguistic_entities, task_prompt, vicuna_S1_chatbot], | |
outputs=[task, task_prompt, vicuna_S1_chatbot]) | |
task_btn.click(vicuna_strategies_respond, inputs=[strategy2, task, task_linguistic_entities, task_prompt, vicuna_S2_chatbot], | |
outputs=[task, task_prompt, vicuna_S2_chatbot]) | |
task_btn.click(vicuna_strategies_respond, inputs=[strategy3, task, task_linguistic_entities, task_prompt, vicuna_S3_chatbot], | |
outputs=[task, task_prompt, vicuna_S3_chatbot]) | |
# Event Handler for LLaMA Chatbot POS/Chunk | |
task_btn.click(llama_strategies_respond, inputs=[strategy1, task, task_linguistic_entities, task_prompt, llama_S1_chatbot], | |
outputs=[task, task_prompt, llama_S1_chatbot]) | |
task_btn.click(llama_strategies_respond, inputs=[strategy2, task, task_linguistic_entities, task_prompt, llama_S2_chatbot], | |
outputs=[task, task_prompt, llama_S2_chatbot]) | |
task_btn.click(llama_strategies_respond, inputs=[strategy3, task, task_linguistic_entities, task_prompt, llama_S3_chatbot], | |
outputs=[task, task_prompt, llama_S3_chatbot]) | |
# Event Handler for GPT 3.5 Chatbot POS/Chunk, user must submit api key before submitting the prompt | |
# Will activate after getting API key | |
# task_apikey_btn.click(update_api_key, inputs=ling_ents_apikey_input) | |
# task_btn.click(gpt_strategies_respond, inputs=[strategy1, task, task_linguistic_entities, gpt_S1_chatbot], | |
# outputs=[task, task_prompt, gpt_S1_chatbot]) | |
# task_btn.click(gpt_strategies_respond, inputs=[strategy1, task, task_linguistic_entities, gpt_S2_chatbot], | |
# outputs=[task, task_prompt, gpt_S2_chatbot]) | |
# task_btn.click(gpt_strategies_respond, inputs=[strategy1, task, task_linguistic_entities, gpt_S3_chatbot], | |
# outputs=[task, task_prompt, gpt_S3_chatbot]) | |
with gr.Blocks(theme=gr.themes.Soft()) as demo: | |
gr.Markdown("# LLM Evaluator With Linguistic Scrutiny") | |
# load interface | |
interface() | |
demo.launch() | |