|
pip install transformers |
|
from transformers import AutoTokenizer, AutoModelForCausalLM |
|
import torch |
|
|
|
tokenizer = AutoTokenizer.from_pretrained("OpenAssistant/falcon-7b-sft-mix-2000") |
|
model = AutoModelForCausalLM.from_pretrained("OpenAssistant/falcon-7b-sft-mix-2000") |
|
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
|
model.to(device) |
|
while True: |
|
user_input = input("User: ") |
|
if user_input.lower() == "exit": |
|
break |
|
|
|
input_text = f"User: {user_input}\nAssistant: " |
|
input_ids = tokenizer.encode(input_text, return_tensors="pt") |
|
input_ids = input_ids.to(device) |
|
|
|
with torch.no_grad(): |
|
output_ids = model.generate(input_ids, max_length=100, num_return_sequences=1) |
|
|
|
response = tokenizer.decode(output_ids[0], skip_special_tokens=True) |
|
print("Assistant:", response) |
|
|