DarForm commited on
Commit
65d160a
1 Parent(s): 8b9e5cd

Update emb.py

Browse files
Files changed (1) hide show
  1. emb.py +20 -20
emb.py CHANGED
@@ -1,6 +1,6 @@
1
  import os
2
  from langchain.document_loaders import PyPDFLoader, DirectoryLoader, PDFMinerLoader
3
- from langchain.text_splitter import RecursiveCharacterTextSplitter
4
  from langchain.embeddings import SentenceTransformerEmbeddings
5
  from langchain.vectorstores import Chroma
6
  import configparser
@@ -131,25 +131,25 @@ class EmbeddingsManager:
131
 
132
 
133
  #This function is used to add documents to an existing vector store
134
- def generate_vector_store(self, index):
135
- """Adds a document to the vector store on Pinecone."""
136
-
137
- documents = []
138
- for root, dirs, files in os.walk("docs"):
139
- for file in files:
140
- if file.endswith(".pdf"):
141
- print("Uploading "+file.replace(".pdf",""))
142
- documents.clear()
143
- loader = PDFMinerLoader(os.path.join(root, file))
144
- documents.extend(loader.load())
145
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=self.text_split_size, chunk_overlap=self.text_overlap)
146
- texts = text_splitter.split_documents(documents)
147
- docsearch = Pinecone.from_documents(texts, embedding=self.embeddings_model, index_name=index)
148
- os.remove(os.path.join(root, file))
149
-
150
- return "Ok"
151
-
152
-
153
  # Example Usage:
154
  if __name__ == "__main__":
155
 
 
1
  import os
2
  from langchain.document_loaders import PyPDFLoader, DirectoryLoader, PDFMinerLoader
3
+ #from langchain.text_splitter import RecursiveCharacterTextSplitter
4
  from langchain.embeddings import SentenceTransformerEmbeddings
5
  from langchain.vectorstores import Chroma
6
  import configparser
 
131
 
132
 
133
  #This function is used to add documents to an existing vector store
134
+ # def generate_vector_store(self, index):
135
+ # """Adds a document to the vector store on Pinecone."""
136
+ #
137
+ # documents = []
138
+ # for root, dirs, files in os.walk("docs"):
139
+ # for file in files:
140
+ # if file.endswith(".pdf"):
141
+ # print("Uploading "+file.replace(".pdf",""))
142
+ # documents.clear()
143
+ # loader = PDFMinerLoader(os.path.join(root, file))
144
+ # documents.extend(loader.load())
145
+ # text_splitter = RecursiveCharacterTextSplitter(chunk_size=self.text_split_size, chunk_overlap=self.text_overlap)
146
+ # texts = text_splitter.split_documents(documents)
147
+ # docsearch = Pinecone.from_documents(texts, embedding=self.embeddings_model, index_name=index)
148
+ # os.remove(os.path.join(root, file))
149
+ #
150
+ # return "Ok"
151
+ #
152
+ #
153
  # Example Usage:
154
  if __name__ == "__main__":
155