Spaces:
Running
Running
File size: 19,313 Bytes
600eba4 65e10d8 f367e37 600eba4 583ea31 21eaacc 7c6b992 337eed4 cb623b4 2ce45f3 49b06ba d8b6b57 74beffc 49b06ba 65e10d8 99cc735 65e10d8 78c3094 49b06ba 4215e2e 49b06ba 4215e2e 3ef4e66 ab27dae 0277a01 65e10d8 ab27dae 65e10d8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 |
import
import streamlit as st
import numpy as np
import pandas as pd
import os
import torch
import torch.nn as nn
from transformers.activations import get_activation
from transformers import AutoTokenizer, AutoModelForCausalLM
first = """informal english: corn fields are all across illinois, visible once you leave chicago.\nTranslated into the Style of Abraham Lincoln: corn fields ( permeate illinois / span the state of illinois / ( occupy / persist in ) all corners of illinois / line the horizon of illinois / envelop the landscape of illinois ), manifesting themselves visibly as one ventures beyond chicago.\n\ninformal english: """
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
@st.cache(allow_output_mutation=True)
def get_model():
#tokenizer = AutoTokenizer.from_pretrained("M4-ai/tau-1.8B")
#model = AutoModelForCausalLM.from_pretrained("M4-ai/tau-1.8B")
#tokenizer = AutoTokenizer.from_pretrained("HuggingFaceTB/SmolLM-360M")
#model = AutoModelForCausalLM.from_pretrained("HuggingFaceTB/SmolLM-360M")
tokenizer = AutoTokenizer.from_pretrained("stabilityai/stablelm-2-zephyr-1_6b")
model = AutoModelForCausalLM.from_pretrained("stabilityai/stablelm-2-zephyr-1_6b")
#tokenizer = AutoTokenizer.from_pretrained("facebook/opt-1.3b")
#model = AutoModelForCausalLM.from_pretrained("BigSalmon/InformalToFormalLincoln121Paraphrase")
#tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
#model = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
#tokenizer = AutoTokenizer.from_pretrained("tiiuae/falcon-rw-1b", trust_remote_code=True)
#model = AutoModelForCausalLM.from_pretrained("tiiuae/falcon-rw-1b", trust_remote_code=True)
#tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
#model = AutoModelForCausalLM.from_pretrained("BigSalmon/InformalToFormalLincoln120Paraphrase")
#tokenizer = AutoTokenizer.from_pretrained("BigSalmon/InformalToFormalLincoln119Paraphrase")
#model = AutoModelForCausalLM.from_pretrained("BigSalmon/InformalToFormalLincoln119Paraphrase")
#tokenizer = AutoTokenizer.from_pretrained("BigSalmon/InformalToFormalLincoln118Paraphrase")
#model = AutoModelForCausalLM.from_pretrained("BigSalmon/InformalToFormalLincoln118Paraphrase")
#tokenizer = AutoTokenizer.from_pretrained("BigSalmon/InformalToFormalLincoln86Paraphrase")
#model = AutoModelForCausalLM.from_pretrained("BigSalmon/InformalToFormalLincoln86Paraphrase")
#tokenizer = AutoTokenizer.from_pretrained("BigSalmon/InformalToFormalLincoln82Paraphrase")
#model = AutoModelForCausalLM.from_pretrained("BigSalmon/InformalToFormalLincoln82Paraphrase")
#tokenizer = AutoTokenizer.from_pretrained("BigSalmon/InformalToFormalLincoln79Paraphrase")
#model = AutoModelForCausalLM.from_pretrained("BigSalmon/InformalToFormalLincoln79Paraphrase")
#tokenizer = AutoTokenizer.from_pretrained("BigSalmon/InformalToFormalLincoln74Paraphrase")
#model = AutoModelForCausalLM.from_pretrained("BigSalmon/InformalToFormalLincoln74Paraphrase")
#tokenizer = AutoTokenizer.from_pretrained("BigSalmon/InformalToFormalLincoln72Paraphrase")
#model = AutoModelForCausalLM.from_pretrained("BigSalmon/InformalToFormalLincoln72Paraphrase")
#tokenizer = AutoTokenizer.from_pretrained("BigSalmon/InformalToFormalLincoln64Paraphrase")
#model = AutoModelForCausalLM.from_pretrained("BigSalmon/InformalToFormalLincoln64Paraphrase")
#tokenizer = AutoTokenizer.from_pretrained("BigSalmon/InformalToFormalLincoln60Paraphrase")
#model = AutoModelForCausalLM.from_pretrained("BigSalmon/InformalToFormalLincoln60Paraphrase")
#tokenizer = AutoTokenizer.from_pretrained("BigSalmon/GPTNeo1.3BInformalToFormal")
#model = AutoModelForCausalLM.from_pretrained("BigSalmon/GPTNeo1.3BInformalToFormal")
#tokenizer = AutoTokenizer.from_pretrained("BigSalmon/InformalToFormalLincoln55")
#model = AutoModelForCausalLM.from_pretrained("BigSalmon/InformalToFormalLincoln55")
#tokenizer = AutoTokenizer.from_pretrained("BigSalmon/InformalToFormalLincoln51")
#model = AutoModelForCausalLM.from_pretrained("BigSalmon/InformalToFormalLincoln51")
#tokenizer = AutoTokenizer.from_pretrained("BigSalmon/InformalToFormalLincoln45")
#model = AutoModelForCausalLM.from_pretrained("BigSalmon/InformalToFormalLincoln49")
#tokenizer = AutoTokenizer.from_pretrained("BigSalmon/InformalToFormalLincoln43")
#model = AutoModelForCausalLM.from_pretrained("BigSalmon/InformalToFormalLincoln43")
#tokenizer = AutoTokenizer.from_pretrained("BigSalmon/InformalToFormalLincoln41")
#model = AutoModelForCausalLM.from_pretrained("BigSalmon/InformalToFormalLincoln41")
#tokenizer = AutoTokenizer.from_pretrained("BigSalmon/InformalToFormalLincoln38")
#model = AutoModelForCausalLM.from_pretrained("BigSalmon/InformalToFormalLincoln38")
#tokenizer = AutoTokenizer.from_pretrained("BigSalmon/InformalToFormalLincoln37")
#model = AutoModelForCausalLM.from_pretrained("BigSalmon/InformalToFormalLincoln37")
#tokenizer = AutoTokenizer.from_pretrained("BigSalmon/InformalToFormalLincoln36")
#model = AutoModelForCausalLM.from_pretrained("BigSalmon/InformalToFormalLincoln36")
#tokenizer = AutoTokenizer.from_pretrained("BigSalmon/MediumInformalToFormalLincoln")
#model = AutoModelForCausalLM.from_pretrained("BigSalmon/MediumInformalToFormalLincoln")
#tokenizer = AutoTokenizer.from_pretrained("BigSalmon/InformalToFormalLincoln35")
#model = AutoModelForCausalLM.from_pretrained("BigSalmon/InformalToFormalLincoln35")
#tokenizer = AutoTokenizer.from_pretrained("BigSalmon/InformalToFormalLincoln31")
#model = AutoModelForCausalLM.from_pretrained("BigSalmon/InformalToFormalLincoln31")
#tokenizer = AutoTokenizer.from_pretrained("BigSalmon/InformalToFormalLincoln21")
#model = AutoModelForCausalLM.from_pretrained("BigSalmon/InformalToFormalLincoln21")
#tokenizer = AutoTokenizer.from_pretrained("BigSalmon/PointsOneSent")
#model = AutoModelForCausalLM.from_pretrained("BigSalmon/PointsOneSent")
#tokenizer = AutoTokenizer.from_pretrained("BigSalmon/PointsToSentence")
#model = AutoModelForCausalLM.from_pretrained("BigSalmon/PointsToSentence")
#tokenizer = AutoTokenizer.from_pretrained("BigSalmon/InformalToFormalLincoln89Paraphrase")
#model = AutoModelForCausalLM.from_pretrained("BigSalmon/InformalToFormalLincoln89Paraphrase")
#tokenizer = AutoTokenizer.from_pretrained("BigSalmon/DefinitionsSynonyms1")
#model = AutoModelForCausalLM.from_pretrained("BigSalmon/DefinitionsSynonyms1")
#tokenizer = AutoTokenizer.from_pretrained("BigSalmon/InformalToFormalLincoln95Paraphrase")
#model = AutoModelForCausalLM.from_pretrained("BigSalmon/InformalToFormalLincoln95Paraphrase")
#tokenizer = AutoTokenizer.from_pretrained("BigSalmon/AbstractTest")
#tokenizer = AutoTokenizer.from_pretrained("BigSalmon/InformalToFormalLincoln99Paraphrase")
#model = AutoModelForCausalLM.from_pretrained("BigSalmon/AbstractTest")
#tokenizer = AutoTokenizer.from_pretrained("BigSalmon/AbstractTest")
#tokenizer = AutoTokenizer.from_pretrained("BigSalmon/AbstractGen")
#model = AutoModelForCausalLM.from_pretrained("BigSalmon/AbstractGen")
#tokenizer = AutoTokenizer.from_pretrained("BigSalmon/InformalToFormalLincoln103Paraphrase")
#model = AutoModelForCausalLM.from_pretrained("BigSalmon/InformalToFormalLincoln103Paraphrase")
#tokenizer = AutoTokenizer.from_pretrained("BigSalmon/InformalToFormalLincoln105Paraphrase")
#model = AutoModelForCausalLM.from_pretrained("BigSalmon/InformalToFormalLincoln105Paraphrase")
#tokenizer = AutoTokenizer.from_pretrained("BigSalmon/InformalToFormalLincoln106Paraphrase")
#model = AutoModelForCausalLM.from_pretrained("BigSalmon/InformalToFormalLincoln106Paraphrase")
#tokenizer = AutoTokenizer.from_pretrained("BigSalmon/InformalToFormalLincoln109Paraphrase")
#model = AutoModelForCausalLM.from_pretrained("BigSalmon/InformalToFormalLincoln109Paraphrase")
#tokenizer = AutoTokenizer.from_pretrained("BigSalmon/InformalToFormalLincoln116Paraphrase")
#model = AutoModelForCausalLM.from_pretrained("BigSalmon/InformalToFormalLincoln116Paraphrase")
#tokenizer = AutoTokenizer.from_pretrained("BigSalmon/InformalToFormalLincoln115Paraphrase")
#model = AutoModelForCausalLM.from_pretrained("BigSalmon/InformalToFormalLincoln115Paraphrase")
#tokenizer = AutoTokenizer.from_pretrained("BigSalmon/InformalToFormalLincoln113Paraphrase")
#model = AutoModelForCausalLM.from_pretrained("BigSalmon/InformalToFormalLincoln113Paraphrase")
#tokenizer = AutoTokenizer.from_pretrained("BigSalmon/InformalToFormalLincoln110Paraphrase")
#model = AutoModelForCausalLM.from_pretrained("BigSalmon/InformalToFormalLincoln110Paraphrase")
#BigSalmon/InformalToFormalLincoln113Paraphrase
#tokenizer = AutoTokenizer.from_pretrained("BigSalmon/InformalToFormalLincoln101Paraphrase")
#model = AutoModelForCausalLM.from_pretrained("BigSalmon/InformalToFormalLincoln101Paraphrase")
#tokenizer = AutoTokenizer.from_pretrained("BigSalmon/DefinitionsSynonyms2")
#model = AutoModelForCausalLM.from_pretrained("BigSalmon/DefinitionsSynonyms2")
#tokenizer2 = AutoTokenizer.from_pretrained("BigSalmon/InformalToFormalLincolnMedium")
#model2 = AutoModelForCausalLM.from_pretrained("BigSalmon/InformalToFormalLincolnMedium")
return model, tokenizer
model, tokenizer = get_model()
g = """informal english: garage band has made people who know nothing about music good at creating music.
Translated into the Style of Abraham Lincoln: garage band ( offers the uninitiated in music the ability to produce professional-quality compositions / catapults those for whom music is an uncharted art the ability the realize masterpieces / stimulates music novice's competency to yield sublime arrangements / begets individuals of rudimentary musical talent the proficiency to fashion elaborate suites ).
informal english: chrome extensions can make doing regular tasks much easier to get done.
Translated into the Style of Abraham Lincoln: chrome extensions ( yield the boon of time-saving convenience / ( expedite the ability to / unlock the means to more readily ) accomplish everyday tasks / turbocharges the velocity with which one can conduct their obligations ).
informal english: broadband is finally expanding to rural areas, a great development that will thrust them into modern life.
Translated into the Style of Abraham Lincoln: broadband is ( ( finally / at last / after years of delay ) arriving in remote locations / springing to life in far-flung outposts / inching into even the most backwater corners of the nation ) that will leap-frog them into the twenty-first century.
informal english: google translate has made talking to people who do not share your language easier.
Translated into the Style of Abraham Lincoln: google translate ( imparts communicability to individuals whose native tongue differs / mitigates the trials of communication across linguistic barriers / hastens the bridging of semantic boundaries / mollifies the complexity of multilingual communication / avails itself to the internationalization of discussion / flexes its muscles to abet intercultural conversation / calms the tides of linguistic divergence ).
informal english: corn fields are all across illinois, visible once you leave chicago.
Translated into the Style of Abraham Lincoln: corn fields ( permeate illinois / span the state of illinois / ( occupy / persist in ) all corners of illinois / line the horizon of illinois / envelop the landscape of illinois ), manifesting themselves visibly as one ventures beyond chicago.
informal english: """
number_of_outputs = st.sidebar.slider("Number of Outputs", 5, 100)
log_nums = st.sidebar.slider("How Many Log Outputs?", 50, 1000)
def BestProbs(prompt):
prompt = prompt.strip()
text = tokenizer.encode(prompt)
myinput, past_key_values = torch.tensor([text]), None
myinput = myinput
logits, past_key_values = model(myinput, past_key_values = past_key_values, return_dict=False)
logits = logits[0,-1]
probabilities = torch.nn.functional.softmax(logits)
best_logits, best_indices = logits.topk(10)
best_words = [tokenizer.decode([idx.item()]) for idx in best_indices]
for i in best_words[0:10]:
print("_______")
st.write(f"${i} $\n")
f = (f"${i} $\n")
m = (prompt + f"{i}")
BestProbs2(m)
return f
def BestProbs2(prompt):
prompt = prompt.strip()
text = tokenizer.encode(prompt)
myinput, past_key_values = torch.tensor([text]), None
myinput = myinput
logits, past_key_values = model(myinput, past_key_values = past_key_values, return_dict=False)
logits = logits[0,-1]
probabilities = torch.nn.functional.softmax(logits)
best_logits, best_indices = logits.topk(20)
best_words = [tokenizer.decode([idx.item()]) for idx in best_indices]
for i in best_words[0:20]:
print(i)
st.write(i)
def LogProbs(prompt):
col1 = []
col2 = []
prompt = prompt.strip()
text = tokenizer.encode(prompt)
myinput, past_key_values = torch.tensor([text]), None
myinput = myinput
logits, past_key_values = model(myinput, past_key_values = past_key_values, return_dict=False)
logits = logits[0,-1]
probabilities = torch.nn.functional.softmax(logits)
best_logits, best_indices = logits.topk(10)
best_words = [tokenizer.decode([idx.item()]) for idx in best_indices]
for i in best_words[0:10]:
print("_______")
f = i
col1.append(f)
m = (prompt + f"{i}")
#print("^^" + f + " ^^")
prompt = m.strip()
text = tokenizer.encode(prompt)
myinput, past_key_values = torch.tensor([text]), None
myinput = myinput
logits, past_key_values = model(myinput, past_key_values = past_key_values, return_dict=False)
logits = logits[0,-1]
probabilities = torch.nn.functional.softmax(logits)
best_logits, best_indices = logits.topk(20)
best_words = [tokenizer.decode([idx.item()]) for idx in best_indices]
for i in best_words[0:20]:
#print(i)
col2.append(i)
#print(col1)
#print(col2)
d = {col1[0]: [col2[0], col2[1], col2[2], col2[3], col2[4], col2[5], col2[6], col2[7], col2[8], col2[9], col2[10], col2[11], col2[12], col2[13], col2[14], col2[15], col2[16], col2[17], col2[18], col2[19]],
col1[1]: [col2[20], col2[21], col2[22], col2[23], col2[24], col2[25], col2[26], col2[27], col2[28], col2[29], col2[30], col2[31], col2[32], col2[33], col2[34], col2[35], col2[36], col2[37], col2[38], col2[39]],
col1[2]: [col2[40], col2[41], col2[42], col2[43], col2[44], col2[45], col2[46], col2[47], col2[48], col2[49], col2[50], col2[51], col2[52], col2[53], col2[54], col2[55], col2[56], col2[57], col2[58], col2[59]],
col1[3]: [col2[60], col2[61], col2[62], col2[63], col2[64], col2[65], col2[66], col2[67], col2[68], col2[69], col2[70], col2[71], col2[72], col2[73], col2[74], col2[75], col2[76], col2[77], col2[78], col2[79]],
col1[4]: [col2[80], col2[81], col2[82], col2[83], col2[84], col2[85], col2[86], col2[87], col2[88], col2[89], col2[90], col2[91], col2[92], col2[93], col2[94], col2[95], col2[96], col2[97], col2[98], col2[99]],
col1[5]: [col2[100], col2[101], col2[102], col2[103], col2[104], col2[105], col2[106], col2[107], col2[108], col2[109], col2[110], col2[111], col2[112], col2[113], col2[114], col2[115], col2[116], col2[117], col2[118], col2[119]],
col1[6]: [col2[120], col2[121], col2[122], col2[123], col2[124], col2[125], col2[126], col2[127], col2[128], col2[129], col2[130], col2[131], col2[132], col2[133], col2[134], col2[135], col2[136], col2[137], col2[138], col2[139]],
col1[7]: [col2[140], col2[141], col2[142], col2[143], col2[144], col2[145], col2[146], col2[147], col2[148], col2[149], col2[150], col2[151], col2[152], col2[153], col2[154], col2[155], col2[156], col2[157], col2[158], col2[159]],
col1[8]: [col2[160], col2[161], col2[162], col2[163], col2[164], col2[165], col2[166], col2[167], col2[168], col2[169], col2[170], col2[171], col2[172], col2[173], col2[174], col2[175], col2[176], col2[177], col2[178], col2[179]],
col1[9]: [col2[180], col2[181], col2[182], col2[183], col2[184], col2[185], col2[186], col2[187], col2[188], col2[189], col2[190], col2[191], col2[192], col2[193], col2[194], col2[195], col2[196], col2[197], col2[198], col2[199]]}
df = pd.DataFrame(data=d)
print(df)
st.write(df)
return df
def BestProbs5(prompt):
prompt = prompt.strip()
text = tokenizer.encode(prompt)
myinput, past_key_values = torch.tensor([text]), None
myinput = myinput
logits, past_key_values = model(myinput, past_key_values = past_key_values, return_dict=False)
logits = logits[0,-1]
probabilities = torch.nn.functional.softmax(logits)
best_logits, best_indices = logits.topk(number_of_outputs)
best_words = [tokenizer.decode([idx.item()]) for idx in best_indices]
for i in best_words[0:number_of_outputs]:
#print(i)
print("\n")
g = (prompt + i)
st.write(g)
l = run_generate(g, "hey")
st.write(l)
def run_generate(text, bad_words):
yo = []
input_ids = tokenizer.encode(text, return_tensors='pt')
res = len(tokenizer.encode(text))
bad_words = bad_words.split()
bad_word_ids = [[7829], [40940]]
for bad_word in bad_words:
bad_word = " " + bad_word
ids = tokenizer(bad_word).input_ids
bad_word_ids.append(ids)
sample_outputs = model.generate(
input_ids,
do_sample=True,
max_length= res + 5,
min_length = res + 5,
top_k=50,
temperature=1.0,
num_return_sequences=3,
bad_words_ids=bad_word_ids
)
for i in range(3):
e = tokenizer.decode(sample_outputs[i])
e = e.replace(text, "")
yo.append(e)
print(yo)
return yo
with st.form(key='my_form'):
prompt = st.text_area(label='Enter sentence', value=g, height=500)
submit_button = st.form_submit_button(label='Submit')
submit_button2 = st.form_submit_button(label='Fast Forward')
submit_button3 = st.form_submit_button(label='Fast Forward 2.0')
submit_button4 = st.form_submit_button(label='Get Top')
if submit_button:
with torch.no_grad():
text = tokenizer.encode(prompt)
myinput, past_key_values = torch.tensor([text]), None
myinput = myinput
myinput= myinput.to(device)
logits, past_key_values = model(myinput, past_key_values = past_key_values, return_dict=False)
logits = logits[0,-1]
probabilities = torch.nn.functional.softmax(logits)
best_logits, best_indices = logits.topk(log_nums)
best_words = [tokenizer.decode([idx.item()]) for idx in best_indices]
text.append(best_indices[0].item())
best_probabilities = probabilities[best_indices].tolist()
words = []
st.write(best_words)
if submit_button2:
print("----")
st.write("___")
m = LogProbs(prompt)
st.write("___")
st.write(m)
st.write("___")
if submit_button3:
print("----")
st.write("___")
st.write(BestProbs)
if submit_button4:
BestProbs5(prompt) |