Spaces:

Somunia
/

cpu-casuallm

Running

Somunia commited on Sep 3

Commit

8bc6b74

•

1 Parent(s): e0128dd

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -16,20 +16,23 @@ Response:"""
     else:
         return f"""User: hi
-Lover: Hi. I am your assistant and I will provide expert full response in full details. Please feel free to ask any question and I will always answer it.
 User: {instruction}
-Lover:"""
 model_path = "models/rwkv-6-world-1b6/" # Path to your local model directory
 model = AutoModelForCausalLM.from_pretrained(
     model_path,
     trust_remote_code=True,
-    use_flash_attention_2=False
 ).to(torch.float32)
 # Create a custom tokenizer (make sure to download vocab.json)
 tokenizer = AutoTokenizer.from_pretrained(
     model_path,
@@ -72,7 +75,7 @@ iface = gr.Interface(
 )
 # For local testing:
-iface.launch(share=True)
 # deploy()

     else:
         return f"""User: hi
+Assistant: Hi. I am your assistant and I will provide expert full response in full details. Please feel free to ask any question and I will always answer it.
 User: {instruction}
+Assistant:"""
 model_path = "models/rwkv-6-world-1b6/" # Path to your local model directory
 model = AutoModelForCausalLM.from_pretrained(
     model_path,
     trust_remote_code=True,
+    # use_flash_attention_2=False
 ).to(torch.float32)
+model = model.quantize(8)  # Quantize to int8 (experiment with different values)
+model = model.to("cpu")
 # Create a custom tokenizer (make sure to download vocab.json)
 tokenizer = AutoTokenizer.from_pretrained(
     model_path,
 )
 # For local testing:
+iface.launch(share=False)
 # deploy()