Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Update auditqa/doc_process.py
Browse files- auditqa/doc_process.py +2 -2
auditqa/doc_process.py
CHANGED
@@ -31,7 +31,7 @@ def process_pdf():
|
|
31 |
# langchain text splitters: https://python.langchain.com/docs/modules/data_connection/document_transformers/
|
32 |
chunk_size = 256
|
33 |
text_splitter = RecursiveCharacterTextSplitter.from_huggingface_tokenizer(
|
34 |
-
AutoTokenizer.from_pretrained("BAAI/bge-en-
|
35 |
chunk_size=chunk_size,
|
36 |
chunk_overlap=10,
|
37 |
add_start_index=True,
|
@@ -78,7 +78,7 @@ def process_pdf():
|
|
78 |
embeddings = HuggingFaceEmbeddings(
|
79 |
model_kwargs = {'device': device},
|
80 |
encode_kwargs = {'normalize_embeddings': True},
|
81 |
-
model_name="BAAI/bge-en-
|
82 |
)
|
83 |
# placeholder for collection
|
84 |
qdrant_collections = {}
|
|
|
31 |
# langchain text splitters: https://python.langchain.com/docs/modules/data_connection/document_transformers/
|
32 |
chunk_size = 256
|
33 |
text_splitter = RecursiveCharacterTextSplitter.from_huggingface_tokenizer(
|
34 |
+
AutoTokenizer.from_pretrained("BAAI/bge-large-en-v1.5"),
|
35 |
chunk_size=chunk_size,
|
36 |
chunk_overlap=10,
|
37 |
add_start_index=True,
|
|
|
78 |
embeddings = HuggingFaceEmbeddings(
|
79 |
model_kwargs = {'device': device},
|
80 |
encode_kwargs = {'normalize_embeddings': True},
|
81 |
+
model_name="BAAI/bge-large-en-v1.5"
|
82 |
)
|
83 |
# placeholder for collection
|
84 |
qdrant_collections = {}
|