|
import streamlit as st |
|
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline |
|
|
|
model_name_or_path = "TheBloke/Unholy-v1-12L-13B-GPTQ" |
|
|
|
|
|
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, |
|
device_map="auto", |
|
trust_remote_code=False, |
|
revision="main", |
|
disable_exllama=True |
|
) |
|
|
|
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True,disable_exllama=True) |
|
|
|
prompt = "Tell me about AI" |
|
prompt_template=f'''Below is an instruction that describes a task. Write a response that appropriately completes the request. |
|
|
|
### Instruction: |
|
{prompt} |
|
|
|
### Response: |
|
|
|
''' |
|
|
|
print("\n\n*** Generate:") |
|
|
|
input_ids = tokenizer(prompt_template, return_tensors='pt').input_ids.cuda() |
|
output = model.generate(inputs=input_ids, temperature=0.7, do_sample=True, top_p=0.95, top_k=40, max_new_tokens=512) |
|
print(tokenizer.decode(output[0])) |
|
|
|
|
|
|
|
print("*** Pipeline:") |
|
pipe = pipeline( |
|
"text-generation", |
|
model=model, |
|
tokenizer=tokenizer, |
|
max_new_tokens=512, |
|
do_sample=True, |
|
temperature=0.7, |
|
top_p=0.95, |
|
top_k=40, |
|
repetition_penalty=1.1 |
|
) |
|
|
|
print(pipe(prompt_template)[0]['generated_text']) |
|
|
|
|
|
|
|
st.json(pipe(prompt_template)) |
|
|