Spaces:

BigSalmon
/

GPT2

Runtime error

App Files Files Community

GPT2 / app.py

BigSalmon

Update app.py

2c47d16 over 1 year ago

raw

history blame contribute delete

15.8 kB

	import streamlit as st
	import numpy as np
	import pandas as pd
	import os
	import torch
	import torch.nn as nn
	from transformers.activations import get_activation
	from transformers import AutoTokenizer, AutoModelForCausalLM


	st.title('GPT2: To see all prompt outlines: https://huggingface.co/BigSalmon/InformalToFormalLincoln64Paraphrase')

	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

	@st.cache(allow_output_mutation=True)
	def get_model():

	#BigSalmon/InstructGPT2Large

	#BigSalmon/InformalToFormalLincoln99Paraphrase

	tokenizer = AutoTokenizer.from_pretrained("BigSalmon/AbstractTest")
	model = AutoModelForCausalLM.from_pretrained("BigSalmon/AbstractTest")

	#tokenizer = AutoTokenizer.from_pretrained("BigSalmon/InstructGPT2Large")
	#model = AutoModelForCausalLM.from_pretrained("BigSalmon/InstructGPT2Large")

	#tokenizer = AutoTokenizer.from_pretrained("BigSalmon/TruncatedLLamaGPT2Large")
	#model = AutoModelForCausalLM.from_pretrained("BigSalmon/TruncatedLLamaGPT2Large")

	#tokenizer = AutoTokenizer.from_pretrained("BigSalmon/InformalToFormalLincoln95Paraphrase")
	#model = AutoModelForCausalLM.from_pretrained("BigSalmon/InformalToFormalLincoln95Paraphrase")

	#tokenizer = AutoTokenizer.from_pretrained("BigSalmon/InformalToFormalLincoln93Paraphrase")
	#model = AutoModelForCausalLM.from_pretrained("BigSalmon/InformalToFormalLincoln93Paraphrase")

	#tokenizer = AutoTokenizer.from_pretrained("BigSalmon/InformalToFormalLincoln91Paraphrase")
	#model = AutoModelForCausalLM.from_pretrained("BigSalmon/InformalToFormalLincoln91Paraphrase")

	#tokenizer = AutoTokenizer.from_pretrained("BigSalmon/InformalToFormalLincoln90Paraphrase")
	#model = AutoModelForCausalLM.from_pretrained("BigSalmon/InformalToFormalLincoln90Paraphrase")

	#tokenizer = AutoTokenizer.from_pretrained("BigSalmon/InformalToFormalLincoln88Paraphrase")
	#model = AutoModelForCausalLM.from_pretrained("BigSalmon/InformalToFormalLincoln88Paraphrase")

	#tokenizer = AutoTokenizer.from_pretrained("BigSalmon/InformalToFormalLincoln86Paraphrase")
	#model = AutoModelForCausalLM.from_pretrained("BigSalmon/InformalToFormalLincoln86Paraphrase")

	#tokenizer = AutoTokenizer.from_pretrained("BigSalmon/InformalToFormalLincoln82Paraphrase")
	#model = AutoModelForCausalLM.from_pretrained("BigSalmon/InformalToFormalLincoln82Paraphrase")

	#tokenizer = AutoTokenizer.from_pretrained("BigSalmon/InformalToFormalLincoln79Paraphrase")
	#model = AutoModelForCausalLM.from_pretrained("BigSalmon/InformalToFormalLincoln79Paraphrase")

	#tokenizer = AutoTokenizer.from_pretrained("BigSalmon/InformalToFormalLincoln74Paraphrase")
	#model = AutoModelForCausalLM.from_pretrained("BigSalmon/InformalToFormalLincoln74Paraphrase")

	#tokenizer = AutoTokenizer.from_pretrained("BigSalmon/InformalToFormalLincoln72Paraphrase")
	#model = AutoModelForCausalLM.from_pretrained("BigSalmon/InformalToFormalLincoln72Paraphrase")

	#tokenizer = AutoTokenizer.from_pretrained("BigSalmon/InformalToFormalLincoln64Paraphrase")
	#model = AutoModelForCausalLM.from_pretrained("BigSalmon/InformalToFormalLincoln64Paraphrase")

	#tokenizer = AutoTokenizer.from_pretrained("BigSalmon/InformalToFormalLincoln60Paraphrase")
	#model = AutoModelForCausalLM.from_pretrained("BigSalmon/InformalToFormalLincoln60Paraphrase")

	#tokenizer = AutoTokenizer.from_pretrained("BigSalmon/GPTNeo1.3BInformalToFormal")
	#model = AutoModelForCausalLM.from_pretrained("BigSalmon/GPTNeo1.3BInformalToFormal")

	#tokenizer = AutoTokenizer.from_pretrained("BigSalmon/InformalToFormalLincoln55")
	#model = AutoModelForCausalLM.from_pretrained("BigSalmon/InformalToFormalLincoln55")

	#tokenizer = AutoTokenizer.from_pretrained("BigSalmon/InformalToFormalLincoln51")
	#model = AutoModelForCausalLM.from_pretrained("BigSalmon/InformalToFormalLincoln51")

	#tokenizer = AutoTokenizer.from_pretrained("BigSalmon/InformalToFormalLincoln45")
	#model = AutoModelForCausalLM.from_pretrained("BigSalmon/InformalToFormalLincoln49")

	#tokenizer = AutoTokenizer.from_pretrained("BigSalmon/InformalToFormalLincoln43")
	#model = AutoModelForCausalLM.from_pretrained("BigSalmon/InformalToFormalLincoln43")

	#tokenizer = AutoTokenizer.from_pretrained("BigSalmon/InformalToFormalLincoln41")
	#model = AutoModelForCausalLM.from_pretrained("BigSalmon/InformalToFormalLincoln41")

	#tokenizer = AutoTokenizer.from_pretrained("BigSalmon/InformalToFormalLincoln38")
	#model = AutoModelForCausalLM.from_pretrained("BigSalmon/InformalToFormalLincoln38")

	#tokenizer = AutoTokenizer.from_pretrained("BigSalmon/InformalToFormalLincoln37")
	#model = AutoModelForCausalLM.from_pretrained("BigSalmon/InformalToFormalLincoln37")

	#tokenizer = AutoTokenizer.from_pretrained("BigSalmon/InformalToFormalLincoln36")
	#model = AutoModelForCausalLM.from_pretrained("BigSalmon/InformalToFormalLincoln36")

	#tokenizer = AutoTokenizer.from_pretrained("BigSalmon/MediumInformalToFormalLincoln")
	#model = AutoModelForCausalLM.from_pretrained("BigSalmon/MediumInformalToFormalLincoln")

	#tokenizer = AutoTokenizer.from_pretrained("BigSalmon/InformalToFormalLincoln35")
	#model = AutoModelForCausalLM.from_pretrained("BigSalmon/InformalToFormalLincoln35")

	#tokenizer = AutoTokenizer.from_pretrained("BigSalmon/InformalToFormalLincoln31")
	#model = AutoModelForCausalLM.from_pretrained("BigSalmon/InformalToFormalLincoln31")

	#tokenizer = AutoTokenizer.from_pretrained("BigSalmon/InformalToFormalLincoln21")
	#model = AutoModelForCausalLM.from_pretrained("BigSalmon/InformalToFormalLincoln21")

	#tokenizer = AutoTokenizer.from_pretrained("BigSalmon/PointsOneSent")
	#model = AutoModelForCausalLM.from_pretrained("BigSalmon/PointsOneSent")

	#tokenizer = AutoTokenizer.from_pretrained("BigSalmon/PointsToSentence")
	#model = AutoModelForCausalLM.from_pretrained("BigSalmon/PointsToSentence")

	return model, tokenizer

	model, tokenizer = get_model()

	g = """informal english: garage band has made people who know nothing about music good at creating music.
	Translated into the Style of Abraham Lincoln: garage band ( offers the uninitiated in music the ability to produce professional-quality compositions / catapults those for whom music is an uncharted art the ability the realize masterpieces / stimulates music novice's competency to yield sublime arrangements / begets individuals of rudimentary musical talent the proficiency to fashion elaborate suites ).

	informal english: chrome extensions can make doing regular tasks much easier to get done.
	Translated into the Style of Abraham Lincoln: chrome extensions ( yield the boon of time-saving convenience / ( expedite the ability to / unlock the means to more readily ) accomplish everyday tasks / turbocharges the velocity with which one can conduct their obligations ).

	informal english: broadband is finally expanding to rural areas, a great development that will thrust them into modern life.
	Translated into the Style of Abraham Lincoln: broadband is ( ( finally / at last / after years of delay ) arriving in remote locations / springing to life in far-flung outposts / inching into even the most backwater corners of the nation ) that will leap-frog them into the twenty-first century.

	informal english: google translate has made talking to people who do not share your language easier.
	Translated into the Style of Abraham Lincoln: google translate ( imparts communicability to individuals whose native tongue differs / mitigates the trials of communication across linguistic barriers / hastens the bridging of semantic boundaries / mollifies the complexity of multilingual communication / avails itself to the internationalization of discussion / flexes its muscles to abet intercultural conversation / calms the tides of linguistic divergence ).

	informal english: corn fields are all across illinois, visible once you leave chicago.
	Translated into the Style of Abraham Lincoln: corn fields ( permeate illinois / span the state of illinois / ( occupy / persist in ) all corners of illinois / line the horizon of illinois / envelop the landscape of illinois ), manifesting themselves visibly as one ventures beyond chicago.

	informal english: """

	number_of_outputs = st.sidebar.slider("Number of Outputs", 5, 100)
	log_nums = st.sidebar.slider("How Many Log Outputs?", 50, 1000)

	def BestProbs(prompt):
	prompt = prompt.strip()
	text = tokenizer.encode(prompt)
	myinput, past_key_values = torch.tensor([text]), None
	myinput = myinput
	logits, past_key_values = model(myinput, past_key_values = past_key_values, return_dict=False)
	logits = logits[0,-1]
	probabilities = torch.nn.functional.softmax(logits)
	best_logits, best_indices = logits.topk(10)
	best_words = [tokenizer.decode([idx.item()]) for idx in best_indices]
	for i in best_words[0:10]:
	print("_______")
	st.write(f"${i} $\n")
	f = (f"${i} $\n")
	m = (prompt + f"{i}")
	BestProbs2(m)
	return f

	def BestProbs2(prompt):
	prompt = prompt.strip()
	text = tokenizer.encode(prompt)
	myinput, past_key_values = torch.tensor([text]), None
	myinput = myinput
	logits, past_key_values = model(myinput, past_key_values = past_key_values, return_dict=False)
	logits = logits[0,-1]
	probabilities = torch.nn.functional.softmax(logits)
	best_logits, best_indices = logits.topk(20)
	best_words = [tokenizer.decode([idx.item()]) for idx in best_indices]
	for i in best_words[0:20]:
	print(i)
	st.write(i)

	def LogProbs(prompt):
	col1 = []
	col2 = []
	prompt = prompt.strip()
	text = tokenizer.encode(prompt)
	myinput, past_key_values = torch.tensor([text]), None
	myinput = myinput
	logits, past_key_values = model(myinput, past_key_values = past_key_values, return_dict=False)
	logits = logits[0,-1]
	probabilities = torch.nn.functional.softmax(logits)
	best_logits, best_indices = logits.topk(10)
	best_words = [tokenizer.decode([idx.item()]) for idx in best_indices]
	for i in best_words[0:10]:
	print("_______")
	f = i
	col1.append(f)
	m = (prompt + f"{i}")
	#print("^^" + f + " ^^")
	prompt = m.strip()
	text = tokenizer.encode(prompt)
	myinput, past_key_values = torch.tensor([text]), None
	myinput = myinput
	logits, past_key_values = model(myinput, past_key_values = past_key_values, return_dict=False)
	logits = logits[0,-1]
	probabilities = torch.nn.functional.softmax(logits)
	best_logits, best_indices = logits.topk(20)
	best_words = [tokenizer.decode([idx.item()]) for idx in best_indices]
	for i in best_words[0:20]:
	#print(i)
	col2.append(i)
	#print(col1)
	#print(col2)
	d = {col1[0]: [col2[0], col2[1], col2[2], col2[3], col2[4], col2[5], col2[6], col2[7], col2[8], col2[9], col2[10], col2[11], col2[12], col2[13], col2[14], col2[15], col2[16], col2[17], col2[18], col2[19]],
	col1[1]: [col2[20], col2[21], col2[22], col2[23], col2[24], col2[25], col2[26], col2[27], col2[28], col2[29], col2[30], col2[31], col2[32], col2[33], col2[34], col2[35], col2[36], col2[37], col2[38], col2[39]],
	col1[2]: [col2[40], col2[41], col2[42], col2[43], col2[44], col2[45], col2[46], col2[47], col2[48], col2[49], col2[50], col2[51], col2[52], col2[53], col2[54], col2[55], col2[56], col2[57], col2[58], col2[59]],
	col1[3]: [col2[60], col2[61], col2[62], col2[63], col2[64], col2[65], col2[66], col2[67], col2[68], col2[69], col2[70], col2[71], col2[72], col2[73], col2[74], col2[75], col2[76], col2[77], col2[78], col2[79]],
	col1[4]: [col2[80], col2[81], col2[82], col2[83], col2[84], col2[85], col2[86], col2[87], col2[88], col2[89], col2[90], col2[91], col2[92], col2[93], col2[94], col2[95], col2[96], col2[97], col2[98], col2[99]],
	col1[5]: [col2[100], col2[101], col2[102], col2[103], col2[104], col2[105], col2[106], col2[107], col2[108], col2[109], col2[110], col2[111], col2[112], col2[113], col2[114], col2[115], col2[116], col2[117], col2[118], col2[119]],
	col1[6]: [col2[120], col2[121], col2[122], col2[123], col2[124], col2[125], col2[126], col2[127], col2[128], col2[129], col2[130], col2[131], col2[132], col2[133], col2[134], col2[135], col2[136], col2[137], col2[138], col2[139]],
	col1[7]: [col2[140], col2[141], col2[142], col2[143], col2[144], col2[145], col2[146], col2[147], col2[148], col2[149], col2[150], col2[151], col2[152], col2[153], col2[154], col2[155], col2[156], col2[157], col2[158], col2[159]],
	col1[8]: [col2[160], col2[161], col2[162], col2[163], col2[164], col2[165], col2[166], col2[167], col2[168], col2[169], col2[170], col2[171], col2[172], col2[173], col2[174], col2[175], col2[176], col2[177], col2[178], col2[179]],
	col1[9]: [col2[180], col2[181], col2[182], col2[183], col2[184], col2[185], col2[186], col2[187], col2[188], col2[189], col2[190], col2[191], col2[192], col2[193], col2[194], col2[195], col2[196], col2[197], col2[198], col2[199]]}
	df = pd.DataFrame(data=d)
	print(df)
	st.write(df)
	return df

	def BestProbs5(prompt):
	prompt = prompt.strip()
	text = tokenizer.encode(prompt)
	myinput, past_key_values = torch.tensor([text]), None
	myinput = myinput
	logits, past_key_values = model(myinput, past_key_values = past_key_values, return_dict=False)
	logits = logits[0,-1]
	probabilities = torch.nn.functional.softmax(logits)
	best_logits, best_indices = logits.topk(number_of_outputs)
	best_words = [tokenizer.decode([idx.item()]) for idx in best_indices]
	for i in best_words[0:number_of_outputs]:
	#print(i)
	print("\n")
	g = (prompt + i)
	st.write(g)
	l = run_generate(g, "hey")
	st.write(l)

	def run_generate(text, bad_words):
	yo = []
	input_ids = tokenizer.encode(text, return_tensors='pt')
	res = len(tokenizer.encode(text))
	bad_words = bad_words.split()
	bad_word_ids = [[7829], [40940]]
	for bad_word in bad_words:
	bad_word = " " + bad_word
	ids = tokenizer(bad_word).input_ids
	bad_word_ids.append(ids)
	sample_outputs = model.generate(
	input_ids,
	do_sample=True,
	max_length= res + 5,
	min_length = res + 5,
	top_k=50,
	temperature=1.0,
	num_return_sequences=3,
	bad_words_ids=bad_word_ids
	)
	for i in range(3):
	e = tokenizer.decode(sample_outputs[i])
	e = e.replace(text, "")
	yo.append(e)
	print(yo)
	return yo

	with st.form(key='my_form'):
	prompt = st.text_area(label='Enter sentence', value=g, height=500)
	submit_button = st.form_submit_button(label='Submit')
	submit_button2 = st.form_submit_button(label='Fast Forward')
	submit_button3 = st.form_submit_button(label='Fast Forward 2.0')
	submit_button4 = st.form_submit_button(label='Get Top')

	if submit_button:
	with torch.no_grad():
	text = tokenizer.encode(prompt)
	myinput, past_key_values = torch.tensor([text]), None
	myinput = myinput
	myinput= myinput.to(device)
	logits, past_key_values = model(myinput, past_key_values = past_key_values, return_dict=False)
	logits = logits[0,-1]
	probabilities = torch.nn.functional.softmax(logits)
	best_logits, best_indices = logits.topk(log_nums)
	best_words = [tokenizer.decode([idx.item()]) for idx in best_indices]
	text.append(best_indices[0].item())
	best_probabilities = probabilities[best_indices].tolist()
	words = []
	st.write(best_words)
	if submit_button2:
	print("----")
	st.write("___")
	m = LogProbs(prompt)
	st.write("___")
	st.write(m)
	st.write("___")
	if submit_button3:
	print("----")
	st.write("___")
	st.write(BestProbs)
	if submit_button4:
	BestProbs5(prompt)