Ventsislav Muchinov commited on
Commit
cebcc81
1 Parent(s): 0c4b58a

Upload 2 files

Browse files
Files changed (1) hide show
  1. app.py +12 -4
app.py CHANGED
@@ -13,8 +13,16 @@ MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
13
  ACCESS_TOKEN = os.getenv("HF_TOKEN", "")
14
 
15
  model_id = "Qwen/Qwen2.5-14B-Instruct"
16
- model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16, device_map="auto", token=ACCESS_TOKEN)
17
- tokenizer = AutoTokenizer.from_pretrained(model_id, token=ACCESS_TOKEN)
 
 
 
 
 
 
 
 
18
  tokenizer.use_default_system_prompt = False
19
 
20
 
@@ -26,7 +34,7 @@ def generate(
26
  temperature: float = 0.01,
27
  top_p: float = 0.01,
28
  top_k: int = 50,
29
- repetition_penalty: float = 1.2,
30
  ) -> Iterator[str]:
31
  conversation = []
32
  if system_prompt:
@@ -101,7 +109,7 @@ chat_interface = gr.Interface(
101
  minimum=1.0,
102
  maximum=2.0,
103
  step=0.05,
104
- value=1.2,
105
  ),
106
  ],
107
  title="Model testing",
 
13
  ACCESS_TOKEN = os.getenv("HF_TOKEN", "")
14
 
15
  model_id = "Qwen/Qwen2.5-14B-Instruct"
16
+ model = AutoModelForCausalLM.from_pretrained(
17
+ model_id,
18
+ torch_dtype=torch.float16,
19
+ device_map="auto",
20
+ load_in_8bit=True, # Enable 8-bit quantization
21
+ use_xformers=True,
22
+ token=ACCESS_TOKEN)
23
+ tokenizer = AutoTokenizer.from_pretrained(
24
+ model_id,
25
+ token=ACCESS_TOKEN)
26
  tokenizer.use_default_system_prompt = False
27
 
28
 
 
34
  temperature: float = 0.01,
35
  top_p: float = 0.01,
36
  top_k: int = 50,
37
+ repetition_penalty: float = 1.0,
38
  ) -> Iterator[str]:
39
  conversation = []
40
  if system_prompt:
 
109
  minimum=1.0,
110
  maximum=2.0,
111
  step=0.05,
112
+ value=1.0,
113
  ),
114
  ],
115
  title="Model testing",