Spaces:
Sleeping
Sleeping
File size: 1,721 Bytes
df5c858 c77c8b6 a8a13f3 df5c858 a77a167 05ddaf4 df5c858 b361fd5 3a434b6 df5c858 b361fd5 df5c858 a8a13f3 df5c858 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 |
from langchain.llms import LlamaCpp
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
import gradio as gr
import re
import os
n_gpu_layers = 40 # Change this value based on your model and your GPU VRAM pool.
n_batch = 512 # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.
n_ctx=2048
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
path = "persian_llama_7b.Q8_K_M.gguf"
llm = LlamaCpp(
model_path= path,
n_gpu_layers=n_gpu_layers, n_batch=n_batch,
callback_manager=callback_manager,
verbose=True,
n_ctx=n_ctx,
temperature=0.2,
max_tokens=200,
top_p=1,
)
prompt = """Below is an instruction that describes a task.
Write a response that appropriately completes the request.\n\n
### Instruction:\n\n{}\n\n\n### Response:\n\n\n"""
def generate_output(text):
result = ""
for s in llm.stream(prompt.format(text)):
result += s
yield result
def clear():
return "", ""
with gr.Blocks(theme=gr.themes.Soft()) as demo:
with gr.Row():
inputs=gr.Textbox(label="ورودی",placeholder="سوال خود را وارد کنید",rtl=True)
with gr.Row():
submit_btn= gr.Button("ارسال", variant="primary")
clear_btn = gr.ClearButton(value="پاک کردن", variant="secondary")
with gr.Row():
outputs=gr.Textbox(label="خروجی",rtl=True)
submit_btn.click(fn=generate_output,
inputs= [inputs],
outputs= [outputs])
clear_btn.click(fn=clear, inputs=[], outputs=[inputs, outputs])
demo.launch(server_name='0.0.0.0',share=True) |