Zulelee's picture
Upload 254 files
5e9cd1d verified
raw
history blame
2.52 kB
from typing import List
from configs import (
EMBEDDING_MODEL,
KB_ROOT_PATH)
from abc import ABC, abstractmethod
from server.knowledge_base.kb_cache.faiss_cache import kb_faiss_pool, ThreadSafeFaiss
import os
import shutil
from server.db.repository.knowledge_metadata_repository import add_summary_to_db, delete_summary_from_db
from langchain.docstore.document import Document
class KBSummaryService(ABC):
kb_name: str
embed_model: str
vs_path: str
kb_path: str
def __init__(self,
knowledge_base_name: str,
embed_model: str = EMBEDDING_MODEL
):
self.kb_name = knowledge_base_name
self.embed_model = embed_model
self.kb_path = self.get_kb_path()
self.vs_path = self.get_vs_path()
if not os.path.exists(self.vs_path):
os.makedirs(self.vs_path)
def get_vs_path(self):
return os.path.join(self.get_kb_path(), "summary_vector_store")
def get_kb_path(self):
return os.path.join(KB_ROOT_PATH, self.kb_name)
def load_vector_store(self) -> ThreadSafeFaiss:
return kb_faiss_pool.load_vector_store(kb_name=self.kb_name,
vector_name="summary_vector_store",
embed_model=self.embed_model,
create=True)
def add_kb_summary(self, summary_combine_docs: List[Document]):
with self.load_vector_store().acquire() as vs:
ids = vs.add_documents(documents=summary_combine_docs)
vs.save_local(self.vs_path)
summary_infos = [{"summary_context": doc.page_content,
"summary_id": id,
"doc_ids": doc.metadata.get('doc_ids'),
"metadata": doc.metadata} for id, doc in zip(ids, summary_combine_docs)]
status = add_summary_to_db(kb_name=self.kb_name, summary_infos=summary_infos)
return status
def create_kb_summary(self):
"""
创建知识库chunk summary
:return:
"""
if not os.path.exists(self.vs_path):
os.makedirs(self.vs_path)
def drop_kb_summary(self):
"""
删除知识库chunk summary
:param kb_name:
:return:
"""
with kb_faiss_pool.atomic:
kb_faiss_pool.pop(self.kb_name)
shutil.rmtree(self.vs_path)
delete_summary_from_db(kb_name=self.kb_name)