from typing import List from pinecone import Pinecone, ServerlessSpec from llama_index.vector_stores.pinecone import PineconeVectorStore from dotenv import load_dotenv from llama_index.core import ( SimpleDirectoryReader, Document, VectorStoreIndex, StorageContext, ) from huggingface_hub import HfFileSystem, HfApi import os load_dotenv() # Pinecone Vector Database pc = Pinecone(api_key=os.environ.get("PINECONE_API_KEY")) pc_index_name = "llama-integration-pinecone" # pc_index_name = "openai-embeddings" pc_indexes = pc.list_indexes() # Check if the index already exists def index_exists(index_name): for index in pc_indexes: if index["name"] == index_name: return True return False # Create the index if it doesn't exist if not index_exists(pc_index_name): pc.create_index( name=pc_index_name, dimension=1536, spec=ServerlessSpec(cloud="aws", region="us-east-1"), ) # Initialize your index pinecone_index = pc.Index(pc_index_name) # print("Deleting all vectors in the pinecone index: ", pinecone_index.delete(delete_all=True)) # print("Deleting all vectors with the namespace 'calregs_pdf': ", pinecone_index.delete(namespace="calregs_pdf")) SAVE_DIR = "uploaded_files" def _namespace_exists(namespace: str): namespaces = pinecone_index.describe_index_stats()["namespaces"] return namespace in namespaces def get_pinecone_index(filename: str) -> VectorStoreIndex: """This function loads the index from Pinecone if it exists, otherwise it creates a new index from the document.""" namespace = filename.replace(".", "_").replace(" ", "_") pinecone_vector_store = PineconeVectorStore( pinecone_index=pinecone_index, namespace=namespace, ) index = None if _namespace_exists(namespace=namespace): print(f"Namespace {namespace} exists.") index = VectorStoreIndex.from_vector_store(vector_store=pinecone_vector_store) else: reader = SimpleDirectoryReader(input_files=[f"{SAVE_DIR}/{filename}"]) docs = reader.load_data(show_progress=True) storage_context = StorageContext.from_defaults( vector_store=pinecone_vector_store ) index = VectorStoreIndex.from_documents( documents=docs, show_progress=True, storage_context=storage_context ) return index api = HfApi( token=os.environ.get("HF_TOKEN") ) api.upload_file( repo_id="hbui/RegBot4.0", path_or_fileobj=f"{SAVE_DIR}/calregs.pdf", path_in_repo=f"{SAVE_DIR}/calregs.pdf", )