SiraH commited on
Commit
329434b
1 Parent(s): f934bd8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -15
app.py CHANGED
@@ -42,20 +42,20 @@ def split_docs(documents,chunk_size=1000):
42
  sp_docs = text_splitter.split_documents(documents)
43
  return sp_docs
44
 
45
- @st.cache_resource
46
- def load_llama2_llamaCpp():
47
- core_model_name = "phi-2.Q4_K_M.gguf"
48
- #n_gpu_layers = 32
49
- n_batch = 512
50
- callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
51
- llm = LlamaCpp(
52
- model_path=core_model_name,
53
- #n_gpu_layers=n_gpu_layers,
54
- n_batch=n_batch,
55
- callback_manager=callback_manager,
56
- verbose=True,n_ctx = 4096, temperature = 0.1, max_tokens = 128
57
- )
58
- return llm
59
 
60
  def set_custom_prompt():
61
  custom_prompt_template = """ Use the following pieces of information from context to answer the user's question.
@@ -91,7 +91,19 @@ def main():
91
  # llm = HuggingFaceHub(
92
  # repo_id=repo_id, model_kwargs={"temperature": 0.1, "max_length": 128})
93
 
94
- llm = load_llama2_llamaCpp()
 
 
 
 
 
 
 
 
 
 
 
 
95
  qa_prompt = set_custom_prompt()
96
  embeddings = load_embeddings()
97
 
 
42
  sp_docs = text_splitter.split_documents(documents)
43
  return sp_docs
44
 
45
+ # @st.cache_resource
46
+ # def load_llama2_llamaCpp():
47
+ # core_model_name = "llama-2-7b-chat.Q4_0.gguf"
48
+ # #n_gpu_layers = 32
49
+ # n_batch = 512
50
+ # callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
51
+ # llm = LlamaCpp(
52
+ # model_path=core_model_name,
53
+ # #n_gpu_layers=n_gpu_layers,
54
+ # n_batch=n_batch,
55
+ # callback_manager=callback_manager,
56
+ # verbose=True,n_ctx = 4096, temperature = 0.1, max_tokens = 128
57
+ # )
58
+ # return llm
59
 
60
  def set_custom_prompt():
61
  custom_prompt_template = """ Use the following pieces of information from context to answer the user's question.
 
91
  # llm = HuggingFaceHub(
92
  # repo_id=repo_id, model_kwargs={"temperature": 0.1, "max_length": 128})
93
 
94
+
95
+ core_model_name = "llama-2-7b-chat.Q4_0.gguf"
96
+ #n_gpu_layers = 32
97
+ n_batch = 512
98
+ callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
99
+ llm = LlamaCpp(
100
+ model_path=core_model_name,
101
+ #n_gpu_layers=n_gpu_layers,
102
+ n_batch=n_batch,
103
+ callback_manager=callback_manager,
104
+ verbose=True,n_ctx = 4096, temperature = 0.1, max_tokens = 128
105
+ )
106
+ # llm = load_llama2_llamaCpp()
107
  qa_prompt = set_custom_prompt()
108
  embeddings = load_embeddings()
109