Spaces:
Running
Running
import gradio as gr | |
import openai | |
import os | |
from dotenv import load_dotenv | |
import phoenix as px | |
import llama_index | |
from llama_index import Prompt, ServiceContext, VectorStoreIndex, SimpleDirectoryReader | |
from llama_index.chat_engine.types import ChatMode | |
from llama_index.llms import ChatMessage, MessageRole | |
from llama_index.vector_stores.qdrant import QdrantVectorStore | |
from llama_index.text_splitter import SentenceSplitter | |
from llama_index.extractors import TitleExtractor | |
from llama_index.ingestion import IngestionPipeline | |
from chat_template import CHAT_TEXT_QA_PROMPT | |
from schemas import ChatbotVersion, ServiceProvider | |
from chatbot import Chatbot, IndexBuilder | |
from custom_io import UnstructuredReader, default_file_metadata_func | |
from qdrant import client as qdrantClient | |
from llama_index import set_global_service_context | |
from service_provider_config import get_service_provider_config | |
# initial service setup | |
px.launch_app() | |
llama_index.set_global_handler("arize_phoenix") | |
load_dotenv() | |
openai.api_key = os.getenv("OPENAI_API_KEY") | |
CHUNK_SIZE = 1024 | |
LLM, EMBED_MODEL = get_service_provider_config( | |
service_provider=ServiceProvider.OPENAI) | |
service_context = ServiceContext.from_defaults( | |
chunk_size=CHUNK_SIZE, | |
llm=LLM, | |
embed_model=EMBED_MODEL, | |
) | |
set_global_service_context(service_context) | |
class AwesumIndexBuilder(IndexBuilder): | |
def _load_doucments(self): | |
dir_reader = SimpleDirectoryReader('./awesumcare_data', file_extractor={ | |
".pdf": UnstructuredReader(), | |
".docx": UnstructuredReader(), | |
".pptx": UnstructuredReader(), | |
}, | |
recursive=True, | |
exclude=["*.png", "*.pptx"], | |
file_metadata=default_file_metadata_func) | |
self.documents = dir_reader.load_data() | |
print(f"Loaded {len(self.documents)} docs") | |
def _setup_service_context(self): | |
super()._setup_service_context() | |
def _setup_vector_store(self): | |
self.vector_store = QdrantVectorStore( | |
client=qdrantClient, collection_name=self.vdb_collection_name) | |
super()._setup_vector_store() | |
def _setup_index(self): | |
super()._setup_index() | |
if self.is_load_from_vector_store: | |
self.index = VectorStoreIndex.from_vector_store(self.vector_store) | |
print("set up index from vector store") | |
return | |
pipeline = IngestionPipeline( | |
transformations=[ | |
SentenceSplitter(), | |
EMBED_MODEL, | |
], | |
vector_store=self.vector_store, | |
) | |
pipeline.run(documents=self.documents) | |
self.index = VectorStoreIndex.from_vector_store(self.vector_store) | |
class AwesumCareChatbot(Chatbot): | |
DENIED_ANSWER_PROMPT = "" | |
SYSTEM_PROMPT = "" | |
CHAT_EXAMPLES = [ | |
"什麼是安心三寶?", | |
"點樣立平安紙?" | |
] | |
def _setup_observer(self): | |
pass | |
def _setup_index(self): | |
super()._setup_index() | |
# def _setup_index(self): | |
# self.index = VectorStoreIndex.from_documents( | |
# self.documents, | |
# service_context=self.service_context | |
# ) | |
# super()._setup_index() | |
def _setup_query_engine(self): | |
super()._setup_query_engine() | |
self.query_engine = self.index.as_query_engine( | |
text_qa_template=CHAT_TEXT_QA_PROMPT) | |
def _setup_tools(self): | |
from llama_index.tools.query_engine import QueryEngineTool | |
self.tools = QueryEngineTool.from_defaults( | |
query_engine=self.query_engine) | |
return super()._setup_tools() | |
def _setup_chat_engine(self): | |
# testing # | |
from llama_index.agent import OpenAIAgent | |
self.chat_engine = OpenAIAgent.from_tools( | |
tools=[self.tools], | |
llm=LLM, | |
similarity_top_k=1, | |
verbose=True | |
) | |
print("set up agent as chat engine") | |
# testing # | |
# self.chat_engine = self.index.as_chat_engine( | |
# chat_mode=ChatMode.BEST, | |
# similarity_top_k=5, | |
# text_qa_template=CHAT_TEXT_QA_PROMPT) | |
super()._setup_chat_engine() | |
# gpt-3.5-turbo-1106, gpt-4-1106-preview | |
awesum_chatbot = AwesumCareChatbot(model_name=ChatbotVersion.CHATGPT_35.value, | |
index_builder=AwesumIndexBuilder( | |
vdb_collection_name="demo-v0", | |
is_load_from_vector_store=True) | |
) | |
def vote(data: gr.LikeData): | |
if data.liked: | |
gr.Info("You up-voted this response: " + data.value) | |
else: | |
gr.Info("You down-voted this response: " + data.value) | |
chatbot = gr.Chatbot() | |
with gr.Blocks() as demo: | |
gr.Markdown("# Awesum Care demo") | |
with gr.Tab("With awesum care data prepared"): | |
gr.ChatInterface( | |
awesum_chatbot.stream_chat, | |
chatbot=chatbot, | |
examples=awesum_chatbot.CHAT_EXAMPLES, | |
) | |
chatbot.like(vote, None, None) | |
with gr.Tab("With Initial System Prompt (a.k.a. prompt wrapper)"): | |
gr.ChatInterface( | |
awesum_chatbot.predict_with_prompt_wrapper, examples=awesum_chatbot.CHAT_EXAMPLES) | |
with gr.Tab("Vanilla ChatGPT without modification"): | |
gr.ChatInterface(awesum_chatbot.predict_vanilla_chatgpt, | |
examples=awesum_chatbot.CHAT_EXAMPLES) | |
demo.queue() | |
demo.launch() | |