Update app.py
Browse files
app.py
CHANGED
@@ -19,7 +19,7 @@ if not torch.cuda.is_available():
|
|
19 |
raise ValueError("Running on CPU 🥶 This demo does not work on CPU.")
|
20 |
|
21 |
model_id = "neuralmagic/OpenHermes-2.5-Mistral-7B-pruned50"
|
22 |
-
model = LLM(model_id, max_model_len=MAX_INPUT_TOKEN_LENGTH)
|
23 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
24 |
tokenizer.use_default_system_prompt = False
|
25 |
|
|
|
19 |
raise ValueError("Running on CPU 🥶 This demo does not work on CPU.")
|
20 |
|
21 |
model_id = "neuralmagic/OpenHermes-2.5-Mistral-7B-pruned50"
|
22 |
+
model = LLM(model_id, sparsity="sparse_w16a16", max_model_len=MAX_INPUT_TOKEN_LENGTH)
|
23 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
24 |
tokenizer.use_default_system_prompt = False
|
25 |
|