herMaster commited on
Commit
9f7d3b3
1 Parent(s): c40ddd8

using ctransformers instead of llama.cpp

Browse files
Files changed (1) hide show
  1. app.py +21 -7
app.py CHANGED
@@ -5,9 +5,13 @@ from PyPDF2 import PdfReader
5
  from langchain.text_splitter import RecursiveCharacterTextSplitter
6
  from langchain.callbacks.manager import CallbackManager
7
  from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
8
- from langchain.llms import LlamaCpp
9
  from langchain.vectorstores import Qdrant
10
  from qdrant_client.http import models
 
 
 
 
11
 
12
 
13
 
@@ -24,13 +28,23 @@ callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
24
 
25
  print("loading the LLM......................................")
26
 
27
- llm = LlamaCpp(
28
- model_path="TheBloke/Llama-2-7B-Chat-GGUF/llama-2-7b-chat.Q8_0.gguf",
29
- n_ctx=2048,
30
- f16_kv=True, # MUST set to True, otherwise you will run into problem after a couple of calls
31
- callback_manager=callback_manager,
32
- verbose=True,
 
 
 
 
 
 
 
 
33
  )
 
 
34
  print("LLM loaded........................................")
35
  print("################################################################")
36
 
 
5
  from langchain.text_splitter import RecursiveCharacterTextSplitter
6
  from langchain.callbacks.manager import CallbackManager
7
  from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
8
+ # from langchain.llms import LlamaCpp
9
  from langchain.vectorstores import Qdrant
10
  from qdrant_client.http import models
11
+ from langchain.llms import CTransformers
12
+ # from ctransformers import AutoModelForCausalLM
13
+
14
+
15
 
16
 
17
 
 
28
 
29
  print("loading the LLM......................................")
30
 
31
+ # llm = LlamaCpp(
32
+ # model_path="TheBloke/Llama-2-7B-Chat-GGUF/llama-2-7b-chat.Q8_0.gguf",
33
+ # n_ctx=2048,
34
+ # f16_kv=True, # MUST set to True, otherwise you will run into problem after a couple of calls
35
+ # callback_manager=callback_manager,
36
+ # verbose=True,
37
+ # )
38
+
39
+ llm = CTransformers(
40
+ model="TheBloke/Llama-2-7B-Chat-GGUF/llama-2-7b-chat.Q8_0.gguf",
41
+ callbacks=[StreamingStdOutCallbackHandler()],
42
+ temperature = 0.2,
43
+ max_new_tokens = 1000,
44
+
45
  )
46
+
47
+
48
  print("LLM loaded........................................")
49
  print("################################################################")
50