File size: 2,574 Bytes
9fb0f7d
170741d
 
 
9fb0f7d
 
 
 
 
 
66bfc6b
170741d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9fb0f7d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66bfc6b
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
from typing import List
from pinecone import Pinecone, ServerlessSpec
from llama_index.vector_stores.pinecone import PineconeVectorStore
from dotenv import load_dotenv
from llama_index.core import (
    SimpleDirectoryReader,
    Document,
    VectorStoreIndex,
    StorageContext,
)
from huggingface_hub import HfFileSystem, HfApi

import os

load_dotenv()

# Pinecone Vector Database
pc = Pinecone(api_key=os.environ.get("PINECONE_API_KEY"))
pc_index_name = "llama-integration-pinecone"
# pc_index_name = "openai-embeddings"
pc_indexes = pc.list_indexes()

# Check if the index already exists
def index_exists(index_name):
    for index in pc_indexes:
        if index["name"] == index_name:
            return True
    return False

# Create the index if it doesn't exist
if not index_exists(pc_index_name):
    pc.create_index(
        name=pc_index_name,
        dimension=1536,
        spec=ServerlessSpec(cloud="aws", region="us-east-1"),
    )

# Initialize your index
pinecone_index = pc.Index(pc_index_name)

# print("Deleting all vectors in the pinecone index: ", pinecone_index.delete(delete_all=True))
# print("Deleting all vectors with the namespace 'calregs_pdf': ", pinecone_index.delete(namespace="calregs_pdf"))

SAVE_DIR = "uploaded_files"


def _namespace_exists(namespace: str):
    namespaces = pinecone_index.describe_index_stats()["namespaces"]
    return namespace in namespaces


def get_pinecone_index(filename: str) -> VectorStoreIndex:
    """This function loads the index from Pinecone if it exists, otherwise it creates a new index from the document."""
    namespace = filename.replace(".", "_").replace(" ", "_")
    pinecone_vector_store = PineconeVectorStore(
        pinecone_index=pinecone_index,
        namespace=namespace,
    )
    index = None
    if _namespace_exists(namespace=namespace):
        print(f"Namespace {namespace} exists.")
        index = VectorStoreIndex.from_vector_store(vector_store=pinecone_vector_store)
    else:
        reader = SimpleDirectoryReader(input_files=[f"{SAVE_DIR}/{filename}"])
        docs = reader.load_data(show_progress=True)
        storage_context = StorageContext.from_defaults(
            vector_store=pinecone_vector_store
        )
        index = VectorStoreIndex.from_documents(
            documents=docs, show_progress=True, storage_context=storage_context
        )

    return index

api = HfApi(
    token=os.environ.get("HF_TOKEN")
)

api.upload_file(
    repo_id="hbui/RegBot4.0", 
    path_or_fileobj=f"{SAVE_DIR}/calregs.pdf",
    path_in_repo=f"{SAVE_DIR}/calregs.pdf",
)