jerukperas commited on
Commit
a9e9978
1 Parent(s): 5df3b0b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -5
app.py CHANGED
@@ -1,15 +1,17 @@
1
  import gradio as gr
2
  from llama_cpp import Llama
3
 
4
- print("Downloading model")
5
  llm = Llama.from_pretrained(
6
- repo_id="bartowski/gemma-2-2b-it-abliterated-GGUF",
7
- filename="gemma-2-2b-it-abliterated-IQ4_XS.gguf",
8
  numa=True,
9
- n_ctx=4096,
 
 
 
10
  )
11
 
12
-
13
  def respond(prompt: str):
14
  stream = llm.create_chat_completion(stream=True, messages=[{"role": "user", "content": prompt}])
15
 
 
1
  import gradio as gr
2
  from llama_cpp import Llama
3
 
4
+
5
  llm = Llama.from_pretrained(
6
+ repo_id="bartowski/Phi-3.5-mini-instruct-GGUF",
7
+ filename="Phi-3.5-mini-instruct-Q4_K_M.gguf",
8
  numa=True,
9
+ # flash_attn=True,
10
+ # n_gpu_layers=-1,
11
+ n_batch=1024,
12
+ n_ctx=4095,
13
  )
14
 
 
15
  def respond(prompt: str):
16
  stream = llm.create_chat_completion(stream=True, messages=[{"role": "user", "content": prompt}])
17