Spaces:
Sleeping
Sleeping
import os | |
import nest_asyncio | |
nest_asyncio.apply() | |
# bring in our LLAMA_CLOUD_API_KEY | |
from dotenv import load_dotenv | |
load_dotenv() | |
##### LLAMAPARSE ##### | |
from llama_parse import LlamaParse | |
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, StorageContext | |
from llama_index.vector_stores.qdrant import QdrantVectorStore | |
from llama_index.embeddings.openai import OpenAIEmbedding | |
from llama_index.core import Settings | |
##### Qdrant ####### | |
import qdrant_client | |
from qdrant_client import QdrantClient, models | |
llamaparse_api_key = os.getenv("LLAMA_CLOUD_API_KEY") | |
# set up parser | |
parser = LlamaParse(api_key=llamaparse_api_key, result_type="text") | |
# use SimpleDirectoryReader to parse our file | |
file_extractor = {".pdf": parser} | |
documents = SimpleDirectoryReader( | |
input_dir="./documents", file_extractor=file_extractor | |
).load_data() | |
qdrant_url = os.getenv("QDRANT_URL") | |
qdrant_api_key = os.getenv("QDRANT_API_KEY") | |
embed_model = OpenAIEmbedding(model="text-embedding-3-large") | |
Settings.embed_model = embed_model | |
from llama_index.llms.openai import OpenAI | |
openai_api_key = os.getenv("OPENAI_API_KEY") | |
llm = OpenAI(model="gpt-3.5-turbo", api_key=openai_api_key) | |
Settings.llm = llm | |
client = qdrant_client.QdrantClient( | |
api_key=qdrant_api_key, | |
url=qdrant_url, | |
) | |
###Creating New Collection on Qdrant Not needed### | |
# client.create_collection( | |
# collection_name="RAG_test", | |
# vectors_config=models.VectorParams(size=1536, distance=models.Distance.COSINE), | |
# ) | |
vector_store = QdrantVectorStore(client=client, collection_name="RAG_Test") | |
storage_context = StorageContext.from_defaults(vector_store=vector_store) | |
index = VectorStoreIndex.from_documents( | |
documents=documents, storage_context=storage_context, show_progress=True | |
) | |
index.storage_context.persist() | |