Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -42,20 +42,20 @@ def split_docs(documents,chunk_size=1000):
|
|
42 |
sp_docs = text_splitter.split_documents(documents)
|
43 |
return sp_docs
|
44 |
|
45 |
-
@st.cache_resource
|
46 |
-
def load_llama2_llamaCpp():
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
|
60 |
def set_custom_prompt():
|
61 |
custom_prompt_template = """ Use the following pieces of information from context to answer the user's question.
|
@@ -91,7 +91,19 @@ def main():
|
|
91 |
# llm = HuggingFaceHub(
|
92 |
# repo_id=repo_id, model_kwargs={"temperature": 0.1, "max_length": 128})
|
93 |
|
94 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
95 |
qa_prompt = set_custom_prompt()
|
96 |
embeddings = load_embeddings()
|
97 |
|
|
|
42 |
sp_docs = text_splitter.split_documents(documents)
|
43 |
return sp_docs
|
44 |
|
45 |
+
# @st.cache_resource
|
46 |
+
# def load_llama2_llamaCpp():
|
47 |
+
# core_model_name = "llama-2-7b-chat.Q4_0.gguf"
|
48 |
+
# #n_gpu_layers = 32
|
49 |
+
# n_batch = 512
|
50 |
+
# callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
|
51 |
+
# llm = LlamaCpp(
|
52 |
+
# model_path=core_model_name,
|
53 |
+
# #n_gpu_layers=n_gpu_layers,
|
54 |
+
# n_batch=n_batch,
|
55 |
+
# callback_manager=callback_manager,
|
56 |
+
# verbose=True,n_ctx = 4096, temperature = 0.1, max_tokens = 128
|
57 |
+
# )
|
58 |
+
# return llm
|
59 |
|
60 |
def set_custom_prompt():
|
61 |
custom_prompt_template = """ Use the following pieces of information from context to answer the user's question.
|
|
|
91 |
# llm = HuggingFaceHub(
|
92 |
# repo_id=repo_id, model_kwargs={"temperature": 0.1, "max_length": 128})
|
93 |
|
94 |
+
|
95 |
+
core_model_name = "llama-2-7b-chat.Q4_0.gguf"
|
96 |
+
#n_gpu_layers = 32
|
97 |
+
n_batch = 512
|
98 |
+
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
|
99 |
+
llm = LlamaCpp(
|
100 |
+
model_path=core_model_name,
|
101 |
+
#n_gpu_layers=n_gpu_layers,
|
102 |
+
n_batch=n_batch,
|
103 |
+
callback_manager=callback_manager,
|
104 |
+
verbose=True,n_ctx = 4096, temperature = 0.1, max_tokens = 128
|
105 |
+
)
|
106 |
+
# llm = load_llama2_llamaCpp()
|
107 |
qa_prompt = set_custom_prompt()
|
108 |
embeddings = load_embeddings()
|
109 |
|