Spaces:
Running
Running
import torch | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
import time | |
def generate_prompt(instruction, input=""): | |
instruction = instruction.strip().replace('\r\n','\n').replace('\n\n','\n') | |
input = input.strip().replace('\r\n','\n').replace('\n\n','\n') | |
if input: | |
return f"""Instruction: {instruction} | |
Input: {input} | |
Response:""" | |
else: | |
return f"""User: hi | |
Lover: Hi. I am your assistant and I will provide expert full response in full details. Please feel free to ask any question and I will always answer it. | |
User: {instruction} | |
Lover:""" | |
model_path = "models/rwkv-6-world-1b6/" # Path to your local model directory | |
model = AutoModelForCausalLM.from_pretrained( | |
model_path, | |
trust_remote_code=True, | |
use_flash_attention_2=False # Explicitly disable Flash Attention | |
).to(torch.float32) | |
tokenizer = AutoTokenizer.from_pretrained( | |
model_path, | |
bos_token="</s>", | |
eos_token="</ s>", | |
unk_token="<unk>", | |
pad_token="<pad>", | |
trust_remote_code=True, | |
padding_side='left', | |
clean_up_tokenization_spaces=False # Or set to True if you prefer | |
) | |
print(tokenizer.special_tokens_map) | |
text = "Hi" | |
prompt = generate_prompt(text) | |
input_ids = tokenizer(prompt, return_tensors="pt").input_ids | |
# Generate text word by word with stop sequence | |
generated_text = "" | |
for i in range(333): # Generate up to 333 tokens | |
output = model.generate(input_ids, max_new_tokens=1, do_sample=True, temperature=1.0, top_p=0.3, top_k=0) | |
new_word = tokenizer.decode(output[0][-1:], skip_special_tokens=True) | |
print(new_word, end="", flush=True) # Print word-by-word | |
generated_text += new_word | |
input_ids = output # Update input_ids for next iteration | |
print() # Add a newline at the end | |