Update emb.py
Browse files
emb.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
import os
|
2 |
from langchain.document_loaders import PyPDFLoader, DirectoryLoader, PDFMinerLoader
|
3 |
-
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
4 |
from langchain.embeddings import SentenceTransformerEmbeddings
|
5 |
from langchain.vectorstores import Chroma
|
6 |
import configparser
|
@@ -131,25 +131,25 @@ class EmbeddingsManager:
|
|
131 |
|
132 |
|
133 |
#This function is used to add documents to an existing vector store
|
134 |
-
def generate_vector_store(self, index):
|
135 |
-
"""Adds a document to the vector store on Pinecone."""
|
136 |
-
|
137 |
-
documents = []
|
138 |
-
for root, dirs, files in os.walk("docs"):
|
139 |
-
for file in files:
|
140 |
-
if file.endswith(".pdf"):
|
141 |
-
print("Uploading "+file.replace(".pdf",""))
|
142 |
-
documents.clear()
|
143 |
-
loader = PDFMinerLoader(os.path.join(root, file))
|
144 |
-
documents.extend(loader.load())
|
145 |
-
text_splitter = RecursiveCharacterTextSplitter(chunk_size=self.text_split_size, chunk_overlap=self.text_overlap)
|
146 |
-
texts = text_splitter.split_documents(documents)
|
147 |
-
docsearch = Pinecone.from_documents(texts, embedding=self.embeddings_model, index_name=index)
|
148 |
-
os.remove(os.path.join(root, file))
|
149 |
-
|
150 |
-
return "Ok"
|
151 |
-
|
152 |
-
|
153 |
# Example Usage:
|
154 |
if __name__ == "__main__":
|
155 |
|
|
|
1 |
import os
|
2 |
from langchain.document_loaders import PyPDFLoader, DirectoryLoader, PDFMinerLoader
|
3 |
+
#from langchain.text_splitter import RecursiveCharacterTextSplitter
|
4 |
from langchain.embeddings import SentenceTransformerEmbeddings
|
5 |
from langchain.vectorstores import Chroma
|
6 |
import configparser
|
|
|
131 |
|
132 |
|
133 |
#This function is used to add documents to an existing vector store
|
134 |
+
# def generate_vector_store(self, index):
|
135 |
+
# """Adds a document to the vector store on Pinecone."""
|
136 |
+
#
|
137 |
+
# documents = []
|
138 |
+
# for root, dirs, files in os.walk("docs"):
|
139 |
+
# for file in files:
|
140 |
+
# if file.endswith(".pdf"):
|
141 |
+
# print("Uploading "+file.replace(".pdf",""))
|
142 |
+
# documents.clear()
|
143 |
+
# loader = PDFMinerLoader(os.path.join(root, file))
|
144 |
+
# documents.extend(loader.load())
|
145 |
+
# text_splitter = RecursiveCharacterTextSplitter(chunk_size=self.text_split_size, chunk_overlap=self.text_overlap)
|
146 |
+
# texts = text_splitter.split_documents(documents)
|
147 |
+
# docsearch = Pinecone.from_documents(texts, embedding=self.embeddings_model, index_name=index)
|
148 |
+
# os.remove(os.path.join(root, file))
|
149 |
+
#
|
150 |
+
# return "Ok"
|
151 |
+
#
|
152 |
+
#
|
153 |
# Example Usage:
|
154 |
if __name__ == "__main__":
|
155 |
|