|
from transformers import AutoTokenizer, AutoModelForCausalLM |
|
import torch |
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained(".", use_auth_token=None) |
|
model = AutoModelForCausalLM.from_pretrained(".", use_auth_token=None) |
|
|
|
|
|
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
|
|
|
|
|
text_input = "How QOS is applied on routers?" |
|
|
|
p=""" |
|
<|system|> |
|
You are a helpful assistant.<|end|> |
|
<|user|>""" + text_input + """<|end|> |
|
<|assistant|> |
|
""" |
|
|
|
|
|
inputs = tokenizer(p, return_tensors="pt") |
|
inputs = inputs.to(device) |
|
|
|
print("User Query: " + text_input) |
|
|
|
outputs = model.generate(**inputs, max_length=2000, num_return_sequences=1) |
|
|
|
print("Model Response: ") |
|
|
|
for output in outputs: |
|
generated_text = tokenizer.decode(output, skip_special_tokens=True) |
|
print(generated_text) |