Spaces:
Runtime error
Runtime error
fix(generate:app.py): completion creation
Browse files
app.py
CHANGED
@@ -9,7 +9,7 @@ from llama_cpp import Llama
|
|
9 |
from huggingface_hub import hf_hub_download
|
10 |
|
11 |
hf_hub_download(repo_id="TheBloke/dolphin-2_6-phi-2-GGUF", filename="dolphin-2_6-phi-2.Q5_K_S.gguf", local_dir=".")
|
12 |
-
llm = Llama(model_path="./dolphin-2_6-phi-2.Q5_K_S.gguf")
|
13 |
|
14 |
ins = '''<|im_start|>user
|
15 |
{question}<|im_end|>
|
@@ -66,9 +66,12 @@ custom_theme = BlueTheme()
|
|
66 |
|
67 |
def generate(instruction):
|
68 |
prompt = ins.format(question=instruction)
|
69 |
-
response = llm(prompt, stop=['<|im_start|>user', '<|im_end|>'])
|
70 |
-
|
71 |
-
|
|
|
|
|
|
|
72 |
|
73 |
examples = [
|
74 |
"How do dogs bark?",
|
|
|
9 |
from huggingface_hub import hf_hub_download
|
10 |
|
11 |
hf_hub_download(repo_id="TheBloke/dolphin-2_6-phi-2-GGUF", filename="dolphin-2_6-phi-2.Q5_K_S.gguf", local_dir=".")
|
12 |
+
llm = Llama(model_path="./dolphin-2_6-phi-2.Q5_K_S.gguf", n_ctx=512, last_n_tokens_size=256, n_threads=4, n_gpu_layers=0)
|
13 |
|
14 |
ins = '''<|im_start|>user
|
15 |
{question}<|im_end|>
|
|
|
66 |
|
67 |
def generate(instruction):
|
68 |
prompt = ins.format(question=instruction)
|
69 |
+
response = llm.create_completion(prompt, stream=True, stop=['<|im_start|>user', '<|im_end|>'], repeat_penalty=1.1)
|
70 |
+
|
71 |
+
result = ""
|
72 |
+
for output in response:
|
73 |
+
result += output['choices'][0]['text']
|
74 |
+
yield result
|
75 |
|
76 |
examples = [
|
77 |
"How do dogs bark?",
|