|
from haystack.document_stores import ElasticsearchDocumentStore |
|
from haystack.document_stores import InMemoryDocumentStore |
|
import os |
|
import pickle |
|
|
|
def get_document_store(document_index): |
|
host = os.environ.get("ELASTICSEARCH_HOST", "localhost") |
|
document_store = ElasticsearchDocumentStore(host=host, username="", password="", index=document_index) |
|
return document_store |
|
|
|
def add_data(filenames, document_store, document_index): |
|
data = [] |
|
for filename in filenames: |
|
with open(f"./data/website_data/{filename}", "rb") as fp: |
|
file = pickle.load(fp) |
|
data.append(file) |
|
document_store.write_documents(file, index=document_index) |
|
return document_store, data |
|
|
|
def get_in_memory_document_store(document_index): |
|
document_store = InMemoryDocumentStore(index=document_index) |
|
return document_store |