Crystalcareai
/

Quiet-Star-Custom

Text Generation

Model card Files Files and versions Community

Crystalcareai commited on Mar 26

Commit

705c81f

•

1 Parent(s): dc09b15

Upload inference.py

Files changed (1) hide show

inference.py +36 -0

inference.py ADDED Viewed

	@@ -0,0 +1,36 @@

+import torch
+from transformers import AutoTokenizer, TextStreamer, AutoModelForCausalLM
+model_path = "Crystalcareai/GemMoE-Medium-v0.4"
+# Load model
+model = AutoModelForCausalLM.from_pretrained(
+    model_path,
+    device_map="auto",
+    low_cpu_mem_usage=True,
+    torch_dtype=torch.float16,
+    attn_implementation="flash_attention_2"
+    trust_remote_code=True,
+)
+tokenizer = AutoTokenizer.from_pretrained(model_path)
+streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
+# Convert prompt to tokens
+prompt_template = "[INST] {prompt} [/INST]"
+prompt = "You're standing on the surface of the Earth. "\
+        "You walk one mile south, one mile west and one mile north. "\
+        "You end up exactly where you started. Where are you?"
+tokens = tokenizer(
+    prompt_template.format(prompt=prompt),
+    return_tensors='pt'
+).input_ids.cuda()
+# Generate output
+generation_output = model.generate(
+    tokens,
+    streamer=streamer,
+    max_new_tokens=512
+)