Spaces:
Sleeping
Sleeping
Ventsislav Muchinov
commited on
Commit
•
cebcc81
1
Parent(s):
0c4b58a
Upload 2 files
Browse files
app.py
CHANGED
@@ -13,8 +13,16 @@ MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
|
|
13 |
ACCESS_TOKEN = os.getenv("HF_TOKEN", "")
|
14 |
|
15 |
model_id = "Qwen/Qwen2.5-14B-Instruct"
|
16 |
-
model = AutoModelForCausalLM.from_pretrained(
|
17 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
tokenizer.use_default_system_prompt = False
|
19 |
|
20 |
|
@@ -26,7 +34,7 @@ def generate(
|
|
26 |
temperature: float = 0.01,
|
27 |
top_p: float = 0.01,
|
28 |
top_k: int = 50,
|
29 |
-
repetition_penalty: float = 1.
|
30 |
) -> Iterator[str]:
|
31 |
conversation = []
|
32 |
if system_prompt:
|
@@ -101,7 +109,7 @@ chat_interface = gr.Interface(
|
|
101 |
minimum=1.0,
|
102 |
maximum=2.0,
|
103 |
step=0.05,
|
104 |
-
value=1.
|
105 |
),
|
106 |
],
|
107 |
title="Model testing",
|
|
|
13 |
ACCESS_TOKEN = os.getenv("HF_TOKEN", "")
|
14 |
|
15 |
model_id = "Qwen/Qwen2.5-14B-Instruct"
|
16 |
+
model = AutoModelForCausalLM.from_pretrained(
|
17 |
+
model_id,
|
18 |
+
torch_dtype=torch.float16,
|
19 |
+
device_map="auto",
|
20 |
+
load_in_8bit=True, # Enable 8-bit quantization
|
21 |
+
use_xformers=True,
|
22 |
+
token=ACCESS_TOKEN)
|
23 |
+
tokenizer = AutoTokenizer.from_pretrained(
|
24 |
+
model_id,
|
25 |
+
token=ACCESS_TOKEN)
|
26 |
tokenizer.use_default_system_prompt = False
|
27 |
|
28 |
|
|
|
34 |
temperature: float = 0.01,
|
35 |
top_p: float = 0.01,
|
36 |
top_k: int = 50,
|
37 |
+
repetition_penalty: float = 1.0,
|
38 |
) -> Iterator[str]:
|
39 |
conversation = []
|
40 |
if system_prompt:
|
|
|
109 |
minimum=1.0,
|
110 |
maximum=2.0,
|
111 |
step=0.05,
|
112 |
+
value=1.0,
|
113 |
),
|
114 |
],
|
115 |
title="Model testing",
|