File size: 1,810 Bytes
befaea8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import os
import nest_asyncio

nest_asyncio.apply()

# bring in our LLAMA_CLOUD_API_KEY
from dotenv import load_dotenv

load_dotenv()

##### LLAMAPARSE #####
from llama_parse import LlamaParse
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, StorageContext
from llama_index.vector_stores.qdrant import QdrantVectorStore
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.core import Settings


##### Qdrant #######
import qdrant_client
from qdrant_client import QdrantClient, models


llamaparse_api_key = os.getenv("LLAMA_CLOUD_API_KEY")


# set up parser
parser = LlamaParse(api_key=llamaparse_api_key, result_type="text")

# use SimpleDirectoryReader to parse our file
file_extractor = {".pdf": parser}
documents = SimpleDirectoryReader(
    input_dir="./documents", file_extractor=file_extractor
).load_data()


qdrant_url = os.getenv("QDRANT_URL")
qdrant_api_key = os.getenv("QDRANT_API_KEY")


embed_model = OpenAIEmbedding(model="text-embedding-3-large")
Settings.embed_model = embed_model

from llama_index.llms.openai import OpenAI

openai_api_key = os.getenv("OPENAI_API_KEY")


llm = OpenAI(model="gpt-3.5-turbo", api_key=openai_api_key)


Settings.llm = llm
client = qdrant_client.QdrantClient(
    api_key=qdrant_api_key,
    url=qdrant_url,
)

###Creating New Collection on Qdrant Not needed###
# client.create_collection(
#     collection_name="RAG_test",
#     vectors_config=models.VectorParams(size=1536, distance=models.Distance.COSINE),
# )

vector_store = QdrantVectorStore(client=client, collection_name="RAG_Test")
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex.from_documents(
    documents=documents, storage_context=storage_context, show_progress=True
)

index.storage_context.persist()