Spaces:
Sleeping
Sleeping
import os | |
import sys | |
import json | |
import time | |
import openai | |
import pickle | |
import argparse | |
import requests | |
from tqdm import tqdm | |
import torch | |
from transformers import AutoTokenizer, AutoModelForCausalLM, LlamaForCausalLM, LlamaTokenizer | |
from fastchat.model import load_model, get_conversation_template, add_model_args | |
from nltk.tag.mapping import _UNIVERSAL_TAGS | |
import gradio as gr | |
from transformers import pipeline | |
demo = gr.Blocks() | |
uni_tags = list(_UNIVERSAL_TAGS) | |
uni_tags[-1] = 'PUNC' | |
bio_tags = ['B', 'I', 'O'] | |
chunk_tags = ['ADJP', 'ADVP', 'CONJP', 'INTJ', 'LST', 'NP', 'O', 'PP', 'PRT', 'SBAR', 'UCP', 'VP'] | |
syntags = ['NP', 'S', 'VP', 'ADJP', 'ADVP', 'SBAR', 'TOP', 'PP', 'POS', 'NAC', "''", 'SINV', 'PRN', 'QP', 'WHNP', 'RB', 'FRAG', | |
'WHADVP', 'NX', 'PRT', 'VBZ', 'VBP', 'MD', 'NN', 'WHPP', 'SQ', 'SBARQ', 'LST', 'INTJ', 'X', 'UCP', 'CONJP', 'NNP', 'CD', 'JJ', | |
'VBD', 'WHADJP', 'PRP', 'RRC', 'NNS', 'SYM', 'CC'] | |
openai.api_key = " " | |
# determinant vs. determiner | |
# https://wikidiff.com/determiner/determinant | |
ents_prompt = ['Noun','Verb','Adjective','Adverb','Preposition/Subord','Coordinating Conjunction',# 'Cardinal Number', | |
'Determiner', | |
'Noun Phrase','Verb Phrase','Adjective Phrase','Adverb Phrase','Preposition Phrase','Conjunction Phrase','Coordinate Phrase','Quantitave Phrase','Complex Nominal', | |
'Clause','Dependent Clause','Fragment Clause','T-unit','Complex T-unit',# 'Fragment T-unit', | |
][7:] | |
ents = ['NN', 'VB', 'JJ', 'RB', 'IN', 'CC', 'DT', 'NP', 'VP', 'ADJP', 'ADVP', 'PP', 'CONJP', 'CP', 'QP', 'CN', 'C', 'DC', 'FC', 'T', 'CT'][7:] | |
ents_prompt_uni_tags = ['Verb', 'Noun', 'Pronoun', 'Adjective', 'Adverb', 'Preposition and Postposition', 'Coordinating Conjunction', | |
'Determiner', 'Cardinal Number', 'Particles or other function words', | |
'Words that cannot be assigned a POS tag', 'Punctuation'] | |
ents = uni_tags + ents | |
ents_prompt = ents_prompt_uni_tags + ents_prompt | |
for i, j in zip(ents, ents_prompt): | |
print(i, j) | |
model_mapping = { | |
'gpt3.5': 'gpt2', | |
#'vicuna-7b': 'lmsys/vicuna-7b-v1.3', | |
#'llama-7b': './llama/hf/7B', | |
} | |
with open('sample_uniform_1k_2.txt', 'r') as f: | |
selected_idx = f.readlines() | |
selected_idx = [int(i.strip()) for i in selected_idx]#[s:e] | |
ptb = [] | |
with open('ptb.jsonl', 'r') as f: | |
for l in f: | |
ptb.append(json.loads(l)) | |
## Prompt 1 | |
template_all = '''Please output the <Noun, Verb, Adjective, Adverb, Preposition/Subord, Coordinating Conjunction, Cardinal Number, Determiner, Noun Phrase, Verb Phrase, Adjective Phrase, Adverb Phrase, Preposition Phrase, Conjunction Phrase, Coordinate Phrase, Quantitave Phrase, Complex Nominal, Clause, Dependent Clause, Fragment Clause, T-unit, Complex T-unit, Fragment T-unit> in the following sentence without any additional text in json format: "{}"''' | |
template_single = '''Please output any <{}> in the following sentence one per line without any additional text: "{}"''' | |
## Prompt 2 | |
prompt2_pos = '''Please pos tag the following sentence using Universal POS tag set without generating any additional text: {}''' | |
prompt2_chunk = '''Please do sentence chunking for the following sentence as in CoNLL 2000 shared task without generating any addtional text: {}''' | |
prompt2_parse = '''Generate textual representation of the constituency parse tree of the following sentence using Penn TreeBank tag set without outputing any additional text: {}''' | |
prompt2_chunk = '''Please chunk the following sentence in CoNLL 2000 format with BIO tags without outputing any additional text: {}''' | |
## Prompt 3 | |
with open('demonstration_3_42_pos.txt', 'r') as f: | |
demon_pos = f.read() | |
with open('demonstration_3_42_chunk.txt', 'r') as f: | |
demon_chunk = f.read() | |
with open('demonstration_3_42_parse.txt', 'r') as f: | |
demon_parse = f.read() | |
# Your existing code | |
theme = gr.themes.Soft() | |
# issue get request for gpt 3.5 | |
gpt_pipeline = pipeline(task="text2text-generation", model="gpt2") | |
#vicuna7b_pipeline = pipeline(task="text2text-generation", model="lmsys/vicuna-7b-v1.3") | |
#llama7b_pipeline = pipeline(task="text2text-generation", model="./llama/hf/7B") | |
# Dropdown options for model and task | |
model_options = list(model_mapping.keys()) | |
task_options = ['POS', 'Chunking'] # remove parsing | |
# Function to process text based on model and task | |
def process_text(tab, text): | |
if tab == 'POS Tab': | |
strategy1_format = template_all.format(text) | |
strategy2_format = prompt2_pos.format(text) | |
strategy3_format = demon_pos | |
vicuna_result1 = gpt_pipeline(strategy1_format)[0]['generated_text'] | |
vicuna_result2 = gpt_pipeline(strategy2_format)[0]['generated_text'] | |
vicuna_result3 = gpt_pipeline(strategy3_format)[0]['generated_text'] | |
return (vicuna_result1, vicuna_result2, vicuna_result3) | |
elif tab == 'Chunk Tab': | |
strategy1_format = template_all.format(text) | |
strategy2_format = prompt2_chunk.format(text) | |
strategy3_format = demon_chunk | |
result1 = gpt_pipeline(strategy1_format)[0]['generated_text'] | |
result2 = gpt_pipeline(strategy2_format)[0]['generated_text'] | |
result3 = gpt_pipeline(strategy3_format)[0]['generated_text'] | |
return (result1, result2, result3) | |
# Gradio interface | |
with demo: | |
gr.Markdown("# LLM Evaluator With Linguistic Scrutiny") | |
with gr.Tabs(): | |
with gr.TabItem("POS", id="POS Tab"): | |
with gr.Row(): | |
gr.Markdown("<center>Vicuna 7b</center>") | |
gr.Markdown("<center> LLaMA-7b </center>") | |
gr.Markdown("<center> GPT 3.5 </center>") | |
with gr.Row(): | |
model1_S1_output = gr.Textbox(label="Strategy 1 QA") | |
model2_S1_output = gr.Textbox(label=".") | |
model3_S1_output = gr.Textbox(label=".") | |
with gr.Row(): | |
model1_S2_output = gr.Textbox(label="Strategy 2 Instruction") | |
model2_S2_output = gr.Textbox(label=".") | |
model3_S2_output = gr.Textbox(label=".") | |
with gr.Row(): | |
model1_S3_output = gr.Textbox(label="Strategy 3 Structured Prompting") | |
model2_S3_output = gr.Textbox(label=".") | |
model3_S3_output = gr.Textbox(label=".") | |
with gr.Row(): | |
prompt = gr.Textbox(show_label=False, placeholder="Enter prompt") | |
send_button_POS = gr.Button("Send", scale=0) | |
with gr.TabItem("Chunking", id="Chunk Tab"): | |
with gr.Row(): | |
gr.Markdown("<center>Vicuna 7b</center>") | |
gr.Markdown("<center> LLaMA-7b </center>") | |
gr.Markdown("<center> GPT 3.5 </center>") | |
with gr.Row(): | |
model1_S1_output = gr.Textbox(label="Strategy 1 QA") | |
model2_S1_output = gr.Textbox(label=".") | |
model3_S1_output = gr.Textbox(label=".") | |
with gr.Row(): | |
model1_S2_output = gr.Textbox(label="Strategy 2 Instruction") | |
model2_S2_output = gr.Textbox(label=".") | |
model3_S2_output = gr.Textbox(label=".") | |
with gr.Row(): | |
model1_S3_output = gr.Textbox(label="Strategy 3 Structured Prompting") | |
model2_S3_output = gr.Textbox(label=".") | |
model3_S3_output = gr.Textbox(label=".") | |
with gr.Row(): | |
prompt = gr.Textbox(show_label=False, placeholder="Enter prompt") | |
send_button_Chunk = gr.Button("Send", scale=0) | |
send_button_POS.click(process_text, inputs=["POS Tab", prompt], outputs=[model1_S1_output, model1_S1_output, model1_S1_output]) | |
send_button_Chunk.click(process_text, inputs=["Chunk Tab", prompt], outputs=[model1_S1_output, model1_S1_output, model1_S1_output]) | |
demo.launch() | |