|
import torch |
|
from transformers import AutoTokenizer, TextStreamer, AutoModelForCausalLM |
|
|
|
model_path = "Crystalcareai/GemMoE-Medium-v0.4" |
|
|
|
|
|
model = AutoModelForCausalLM.from_pretrained( |
|
model_path, |
|
device_map="auto", |
|
low_cpu_mem_usage=True, |
|
torch_dtype=torch.float16, |
|
attn_implementation="flash_attention_2" |
|
trust_remote_code=True, |
|
) |
|
|
|
tokenizer = AutoTokenizer.from_pretrained(model_path) |
|
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True) |
|
|
|
|
|
prompt_template = "[INST] {prompt} [/INST]" |
|
|
|
prompt = "You're standing on the surface of the Earth. "\ |
|
"You walk one mile south, one mile west and one mile north. "\ |
|
"You end up exactly where you started. Where are you?" |
|
|
|
tokens = tokenizer( |
|
prompt_template.format(prompt=prompt), |
|
return_tensors='pt' |
|
).input_ids.cuda() |
|
|
|
|
|
generation_output = model.generate( |
|
tokens, |
|
streamer=streamer, |
|
max_new_tokens=512 |
|
) |