Spaces:
Runtime error
Runtime error
Zwea Htet
commited on
Commit
•
19f4fce
1
Parent(s):
a6a7a07
added langchain to ui
Browse files- .gitattributes +1 -0
- app.py +6 -63
- assets/updated_calregs.txt +0 -0
- models/bloom.py +0 -107
- models/langOpen.py +46 -0
- models/llamaCustom.py +123 -0
- pages/langchain_demo.py +26 -0
- pages/llama_custom_demo.py +23 -0
- requirements.txt +2 -1
- utils/__init__.py +0 -0
- utils/chatbox.py +94 -0
- utils/customLLM.py +0 -38
- utils/util.py +0 -12
.gitattributes
CHANGED
@@ -32,3 +32,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
32 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
33 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
34 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
32 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
33 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
34 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
35 |
+
.*pdf filter=lfs diff=lfs merge=lfs -text
|
app.py
CHANGED
@@ -1,25 +1,17 @@
|
|
1 |
# https://docs.streamlit.io/knowledge-base/tutorials/build-conversational-apps
|
2 |
|
3 |
import os
|
4 |
-
import time
|
5 |
|
6 |
import openai
|
7 |
import requests
|
8 |
import streamlit as st
|
9 |
|
10 |
-
from models import bloom
|
11 |
from utils.util import *
|
12 |
|
13 |
-
|
14 |
|
15 |
-
|
16 |
-
st.
|
17 |
-
st.header("Powered by `LlamaIndex🦙` and `OpenAI API`")
|
18 |
-
|
19 |
-
if "messages" not in st.session_state:
|
20 |
-
st.session_state.messages = []
|
21 |
-
|
22 |
-
index = None
|
23 |
|
24 |
api_key = st.text_input("Enter your OpenAI API key here:", type="password")
|
25 |
|
@@ -31,57 +23,8 @@ if api_key:
|
|
31 |
st.info("Success")
|
32 |
os.environ["OPENAI_API_KEY"] = api_key
|
33 |
openai.api_key = api_key
|
34 |
-
with st.spinner("Initializing vector index ..."):
|
35 |
-
index = create_index(bloom)
|
36 |
-
|
37 |
-
st.write("---")
|
38 |
-
if index:
|
39 |
-
# Display chat messages from history on app rerun
|
40 |
-
for message in st.session_state.messages:
|
41 |
-
with st.chat_message(message["role"]):
|
42 |
-
st.markdown(message["content"])
|
43 |
-
|
44 |
-
if prompt := st.chat_input("Say something"):
|
45 |
-
# Display user message in chat message container
|
46 |
-
st.chat_message("user").markdown(prompt)
|
47 |
-
|
48 |
-
# Add user message to chat history
|
49 |
-
st.session_state.messages.append({"role": "user", "content": prompt})
|
50 |
-
|
51 |
-
with st.spinner("Processing your query..."):
|
52 |
-
bot_response = get_response(index, prompt)
|
53 |
-
|
54 |
-
print("bot: ", bot_response)
|
55 |
-
|
56 |
-
# Display assistant response in chat message container
|
57 |
-
with st.chat_message("assistant"):
|
58 |
-
message_placeholder = st.empty()
|
59 |
-
full_response = ""
|
60 |
-
|
61 |
-
# simulate the chatbot "thinking" before responding
|
62 |
-
# (or stream its response)
|
63 |
-
for chunk in bot_response.split():
|
64 |
-
full_response += chunk + " "
|
65 |
-
time.sleep(0.05)
|
66 |
-
|
67 |
-
# add a blinking cursor to simulate typing
|
68 |
-
message_placeholder.markdown(full_response + "▌")
|
69 |
-
|
70 |
-
message_placeholder.markdown(full_response)
|
71 |
-
# st.markdown(response)
|
72 |
|
73 |
-
|
74 |
-
|
75 |
-
{"role": "assistant", "content": full_response}
|
76 |
-
)
|
77 |
|
78 |
-
|
79 |
-
# st.markdown(
|
80 |
-
# """
|
81 |
-
# <script>
|
82 |
-
# const chatContainer = document.getElementsByClassName("css-1n76uvr")[0];
|
83 |
-
# chatContainer.scrollTop = chatContainer.scrollHeight;
|
84 |
-
# </script>
|
85 |
-
# """,
|
86 |
-
# unsafe_allow_html=True,
|
87 |
-
# )
|
|
|
1 |
# https://docs.streamlit.io/knowledge-base/tutorials/build-conversational-apps
|
2 |
|
3 |
import os
|
|
|
4 |
|
5 |
import openai
|
6 |
import requests
|
7 |
import streamlit as st
|
8 |
|
|
|
9 |
from utils.util import *
|
10 |
|
11 |
+
st.set_page_config(page_title="RegBotBeta", page_icon="📜🤖")
|
12 |
|
13 |
+
st.title("Welcome to RegBotBeta2.0")
|
14 |
+
st.header("Powered by `LlamaIndex🦙`, `Langchain🦜🔗 ` and `OpenAI API`")
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
|
16 |
api_key = st.text_input("Enter your OpenAI API key here:", type="password")
|
17 |
|
|
|
23 |
st.info("Success")
|
24 |
os.environ["OPENAI_API_KEY"] = api_key
|
25 |
openai.api_key = api_key
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
|
27 |
+
if "openai_api_key" not in st.session_state:
|
28 |
+
st.session_state.openai_api_key = ""
|
|
|
|
|
29 |
|
30 |
+
st.session_state.openai_api_key = api_key
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
assets/updated_calregs.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
models/bloom.py
DELETED
@@ -1,107 +0,0 @@
|
|
1 |
-
import os
|
2 |
-
import pickle
|
3 |
-
from json import dumps, loads
|
4 |
-
|
5 |
-
import numpy as np
|
6 |
-
import openai
|
7 |
-
import pandas as pd
|
8 |
-
from dotenv import load_dotenv
|
9 |
-
from huggingface_hub import HfFileSystem
|
10 |
-
from llama_index import (
|
11 |
-
Document,
|
12 |
-
GPTVectorStoreIndex,
|
13 |
-
LLMPredictor,
|
14 |
-
PromptHelper,
|
15 |
-
ServiceContext,
|
16 |
-
StorageContext,
|
17 |
-
load_index_from_storage,
|
18 |
-
)
|
19 |
-
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
|
20 |
-
|
21 |
-
from utils.customLLM import CustomLLM
|
22 |
-
|
23 |
-
load_dotenv()
|
24 |
-
openai.api_key = os.getenv("OPENAI_API_KEY")
|
25 |
-
fs = HfFileSystem()
|
26 |
-
|
27 |
-
# get model
|
28 |
-
# model_name = "bigscience/bloom-560m"
|
29 |
-
# tokenizer = AutoTokenizer.from_pretrained(model_name)
|
30 |
-
# model = AutoModelForCausalLM.from_pretrained(model_name, config='T5Config')
|
31 |
-
|
32 |
-
# define prompt helper
|
33 |
-
# set maximum input size
|
34 |
-
context_window = 2048
|
35 |
-
# set number of output tokens
|
36 |
-
num_output = 525
|
37 |
-
# set maximum chunk overlap
|
38 |
-
chunk_overlap_ratio = 0.2
|
39 |
-
prompt_helper = PromptHelper(context_window, num_output, chunk_overlap_ratio)
|
40 |
-
|
41 |
-
# create a pipeline
|
42 |
-
# pl = pipeline(
|
43 |
-
# model=model,
|
44 |
-
# tokenizer=tokenizer,
|
45 |
-
# task="text-generation",
|
46 |
-
# # device=0, # GPU device number
|
47 |
-
# # max_length=512,
|
48 |
-
# do_sample=True,
|
49 |
-
# top_p=0.95,
|
50 |
-
# top_k=50,
|
51 |
-
# temperature=0.7
|
52 |
-
# )
|
53 |
-
|
54 |
-
# define llm
|
55 |
-
llm_predictor = LLMPredictor(llm=CustomLLM())
|
56 |
-
service_context = ServiceContext.from_defaults(
|
57 |
-
llm_predictor=llm_predictor, prompt_helper=prompt_helper
|
58 |
-
)
|
59 |
-
|
60 |
-
|
61 |
-
def prepare_data(file_path: str):
|
62 |
-
df = pd.read_json(file_path)
|
63 |
-
df = df.replace(to_replace="", value=np.nan).dropna(axis=0) # remove null values
|
64 |
-
|
65 |
-
parsed = loads(df.to_json(orient="records"))
|
66 |
-
|
67 |
-
documents = []
|
68 |
-
for item in parsed:
|
69 |
-
document = Document(
|
70 |
-
text=item["paragraphText"],
|
71 |
-
doc_id=item["_id"]["$oid"],
|
72 |
-
extra_info={
|
73 |
-
"chapter": item["chapter"],
|
74 |
-
"article": item["article"],
|
75 |
-
"title": item["title"],
|
76 |
-
},
|
77 |
-
)
|
78 |
-
documents.append(document)
|
79 |
-
|
80 |
-
return documents
|
81 |
-
|
82 |
-
|
83 |
-
def initialize_index(index_name):
|
84 |
-
file_path = f"./vectorStores/{index_name}"
|
85 |
-
if os.path.exists(file_path):
|
86 |
-
# rebuild storage context
|
87 |
-
storage_context = StorageContext.from_defaults(persist_dir=file_path)
|
88 |
-
|
89 |
-
# local load index access
|
90 |
-
index = load_index_from_storage(storage_context)
|
91 |
-
|
92 |
-
# huggingface repo load access
|
93 |
-
# with fs.open(file_path, "r") as file:
|
94 |
-
# index = pickle.loads(file.readlines())
|
95 |
-
return index
|
96 |
-
else:
|
97 |
-
documents = prepare_data(r"./assets/regItems.json")
|
98 |
-
index = GPTVectorStoreIndex.from_documents(
|
99 |
-
documents, service_context=service_context
|
100 |
-
)
|
101 |
-
# local write access
|
102 |
-
index.storage_context.persist(file_path)
|
103 |
-
|
104 |
-
# huggingface repo write access
|
105 |
-
# with fs.open(file_path, "w") as file:
|
106 |
-
# file.write(pickle.dumps(index))
|
107 |
-
return index
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
models/langOpen.py
ADDED
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
|
3 |
+
import openai
|
4 |
+
from dotenv import load_dotenv
|
5 |
+
from langchain.chains import LLMChain
|
6 |
+
from langchain.chat_models import ChatOpenAI
|
7 |
+
from langchain.embeddings.openai import OpenAIEmbeddings
|
8 |
+
from langchain.prompts import PromptTemplate
|
9 |
+
from langchain.vectorstores import FAISS
|
10 |
+
|
11 |
+
load_dotenv()
|
12 |
+
|
13 |
+
embeddings = OpenAIEmbeddings()
|
14 |
+
|
15 |
+
prompt_template = """Answer the question using the given context to the best of your ability.
|
16 |
+
If you don't know, answer I don't know.
|
17 |
+
Context: {context}
|
18 |
+
Topic: {topic}"""
|
19 |
+
|
20 |
+
PROMPT = PromptTemplate(template=prompt_template, input_variables=["context", "topic"])
|
21 |
+
|
22 |
+
|
23 |
+
class LangOpen:
|
24 |
+
def __init__(self, model_name: str) -> None:
|
25 |
+
self.index = self.initialize_index("langOpen")
|
26 |
+
self.llm = ChatOpenAI(temperature=0.3, model=model_name)
|
27 |
+
self.chain = LLMChain(llm=self.llm, prompt=PROMPT)
|
28 |
+
|
29 |
+
def initialize_index(self, index_name):
|
30 |
+
path = f"./vectorStores/{index_name}"
|
31 |
+
if os.path.exists(path=path):
|
32 |
+
return FAISS.load_local(folder_path=path, embeddings=embeddings)
|
33 |
+
else:
|
34 |
+
faiss = FAISS.from_texts(
|
35 |
+
"./assets/updated_calregs.txt", embedding=embeddings
|
36 |
+
)
|
37 |
+
faiss.save_local(path)
|
38 |
+
return faiss
|
39 |
+
|
40 |
+
def get_response(self, query_str):
|
41 |
+
print("query_str: ", query_str)
|
42 |
+
print("model_name: ", self.llm.model_name)
|
43 |
+
docs = self.index.similarity_search(query_str, k=4)
|
44 |
+
inputs = [{"context": doc.page_content, "topic": query_str} for doc in docs]
|
45 |
+
result = self.chain.apply(inputs)[0]["text"]
|
46 |
+
return result
|
models/llamaCustom.py
ADDED
@@ -0,0 +1,123 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import pickle
|
3 |
+
from json import dumps, loads
|
4 |
+
from typing import Any, List, Mapping, Optional
|
5 |
+
|
6 |
+
import numpy as np
|
7 |
+
import openai
|
8 |
+
import pandas as pd
|
9 |
+
from dotenv import load_dotenv
|
10 |
+
from huggingface_hub import HfFileSystem
|
11 |
+
from langchain.llms.base import LLM
|
12 |
+
from llama_index import (
|
13 |
+
Document,
|
14 |
+
GPTVectorStoreIndex,
|
15 |
+
LLMPredictor,
|
16 |
+
PromptHelper,
|
17 |
+
ServiceContext,
|
18 |
+
SimpleDirectoryReader,
|
19 |
+
StorageContext,
|
20 |
+
load_index_from_storage,
|
21 |
+
)
|
22 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
|
23 |
+
|
24 |
+
# from utils.customLLM import CustomLLM
|
25 |
+
|
26 |
+
load_dotenv()
|
27 |
+
# openai.api_key = os.getenv("OPENAI_API_KEY")
|
28 |
+
fs = HfFileSystem()
|
29 |
+
|
30 |
+
# define prompt helper
|
31 |
+
# set maximum input size
|
32 |
+
CONTEXT_WINDOW = 2048
|
33 |
+
# set number of output tokens
|
34 |
+
NUM_OUTPUT = 525
|
35 |
+
# set maximum chunk overlap
|
36 |
+
CHUNK_OVERLAP_RATION = 0.2
|
37 |
+
|
38 |
+
prompt_helper = PromptHelper(
|
39 |
+
context_window=CONTEXT_WINDOW,
|
40 |
+
num_output=NUM_OUTPUT,
|
41 |
+
chunk_overlap_ratio=CHUNK_OVERLAP_RATION,
|
42 |
+
)
|
43 |
+
|
44 |
+
llm_model_name = "bigscience/bloom-560m"
|
45 |
+
tokenizer = AutoTokenizer.from_pretrained(llm_model_name)
|
46 |
+
model = AutoModelForCausalLM.from_pretrained(llm_model_name, config="T5Config")
|
47 |
+
|
48 |
+
model_pipeline = pipeline(
|
49 |
+
model=model,
|
50 |
+
tokenizer=tokenizer,
|
51 |
+
task="text-generation",
|
52 |
+
# device=0, # GPU device number
|
53 |
+
# max_length=512,
|
54 |
+
do_sample=True,
|
55 |
+
top_p=0.95,
|
56 |
+
top_k=50,
|
57 |
+
temperature=0.7,
|
58 |
+
)
|
59 |
+
|
60 |
+
|
61 |
+
class CustomLLM(LLM):
|
62 |
+
pipeline = model_pipeline
|
63 |
+
|
64 |
+
def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
|
65 |
+
prompt_length = len(prompt)
|
66 |
+
response = self.pipeline(prompt, max_new_tokens=525)[0]["generated_text"]
|
67 |
+
|
68 |
+
# only return newly generated tokens
|
69 |
+
return response[prompt_length:]
|
70 |
+
|
71 |
+
@property
|
72 |
+
def _identifying_params(self) -> Mapping[str, Any]:
|
73 |
+
return {"name_of_model": self.model_name}
|
74 |
+
|
75 |
+
@property
|
76 |
+
def _llm_type(self) -> str:
|
77 |
+
return "custom"
|
78 |
+
|
79 |
+
|
80 |
+
class LlamaCustom:
|
81 |
+
# define llm
|
82 |
+
llm_predictor = LLMPredictor(llm=CustomLLM())
|
83 |
+
service_context = ServiceContext.from_defaults(
|
84 |
+
llm_predictor=llm_predictor, prompt_helper=prompt_helper
|
85 |
+
)
|
86 |
+
|
87 |
+
def __init__(self, name: str) -> None:
|
88 |
+
self.vector_index = self.initialize_index(index_name=name)
|
89 |
+
|
90 |
+
def initialize_index(self, index_name):
|
91 |
+
file_path = f"./vectorStores/{index_name}"
|
92 |
+
if os.path.exists(path=file_path):
|
93 |
+
# rebuild storage context
|
94 |
+
storage_context = StorageContext.from_defaults(persist_dir=file_path)
|
95 |
+
|
96 |
+
# local load index access
|
97 |
+
index = load_index_from_storage(storage_context)
|
98 |
+
|
99 |
+
# huggingface repo load access
|
100 |
+
# with fs.open(file_path, "r") as file:
|
101 |
+
# index = pickle.loads(file.readlines())
|
102 |
+
return index
|
103 |
+
else:
|
104 |
+
# documents = prepare_data(r"./assets/regItems.json")
|
105 |
+
documents = SimpleDirectoryReader(input_dir="./assets/pdf").load_data()
|
106 |
+
|
107 |
+
index = GPTVectorStoreIndex.from_documents(
|
108 |
+
documents, service_context=self.service_context
|
109 |
+
)
|
110 |
+
|
111 |
+
# local write access
|
112 |
+
index.storage_context.persist(file_path)
|
113 |
+
|
114 |
+
# huggingface repo write access
|
115 |
+
# with fs.open(file_path, "w") as file:
|
116 |
+
# file.write(pickle.dumps(index))
|
117 |
+
return index
|
118 |
+
|
119 |
+
def get_response(self, query_str):
|
120 |
+
print("query_str: ", query_str)
|
121 |
+
query_engine = self.vector_index.as_query_engine()
|
122 |
+
response = query_engine.query(query_str)
|
123 |
+
return str(response)
|
pages/langchain_demo.py
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
|
3 |
+
import openai
|
4 |
+
import streamlit as st
|
5 |
+
|
6 |
+
from models.langOpen import LangOpen
|
7 |
+
from utils.chatbox import chatbox
|
8 |
+
|
9 |
+
st.set_page_config(page_title="Langchain", page_icon="")
|
10 |
+
|
11 |
+
st.subheader("Langchain with OpenAI Demo")
|
12 |
+
|
13 |
+
if "messages" not in st.session_state:
|
14 |
+
st.session_state.messages = []
|
15 |
+
|
16 |
+
if "openai_api_key" not in st.session_state:
|
17 |
+
st.info("Enter your openai key to access the chatbot.")
|
18 |
+
else:
|
19 |
+
option = st.selectbox(
|
20 |
+
label="Select your model:", options=("gpt-3.5-turbo", "gpt-4"), index=0
|
21 |
+
)
|
22 |
+
|
23 |
+
with st.spinner(f"Initializing {option} ..."):
|
24 |
+
langOpen = LangOpen(model_name=option)
|
25 |
+
|
26 |
+
chatbox("lang_open", langOpen)
|
pages/llama_custom_demo.py
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import time
|
3 |
+
|
4 |
+
import openai
|
5 |
+
import streamlit as st
|
6 |
+
|
7 |
+
from models.llamaCustom import LlamaCustom
|
8 |
+
from utils.chatbox import *
|
9 |
+
|
10 |
+
st.set_page_config(page_title="Llama", page_icon="🦙")
|
11 |
+
|
12 |
+
st.subheader("Llama Index with Custom LLM Demo")
|
13 |
+
|
14 |
+
if "messages" not in st.session_state:
|
15 |
+
st.session_state.messages = []
|
16 |
+
|
17 |
+
if "openai_api_key" not in st.session_state:
|
18 |
+
st.info("Enter your openai key to access the chatbot.")
|
19 |
+
else:
|
20 |
+
with st.spinner("Initializing vector index"):
|
21 |
+
model = LlamaCustom(name="llamaCustom")
|
22 |
+
|
23 |
+
chatbox("llama_custom", model)
|
requirements.txt
CHANGED
@@ -9,4 +9,5 @@ faiss-cpu
|
|
9 |
python-dotenv
|
10 |
streamlit>=1.24.0
|
11 |
huggingface_hub
|
12 |
-
xformers
|
|
|
|
9 |
python-dotenv
|
10 |
streamlit>=1.24.0
|
11 |
huggingface_hub
|
12 |
+
xformers
|
13 |
+
pypdf
|
utils/__init__.py
ADDED
File without changes
|
utils/chatbox.py
ADDED
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import time
|
2 |
+
|
3 |
+
import streamlit as st
|
4 |
+
|
5 |
+
def display_chat_history(model_name: str):
|
6 |
+
for message in st.session_state[model_name]:
|
7 |
+
with st.chat_message(message["role"]):
|
8 |
+
st.markdown(message["content"])
|
9 |
+
|
10 |
+
def chat_input(model_name: str):
|
11 |
+
if prompt := st.chat_input("Say something"):
|
12 |
+
# Display user message in chat message container
|
13 |
+
st.chat_message("user").markdown(prompt)
|
14 |
+
|
15 |
+
# Add user message to chat history
|
16 |
+
st.session_state[model_name].append({"role": "user", "content": prompt})
|
17 |
+
|
18 |
+
return prompt
|
19 |
+
|
20 |
+
def display_bot_msg(model_name: str, bot_response: str):
|
21 |
+
# Display assistant response in chat message container
|
22 |
+
with st.chat_message("assistant"):
|
23 |
+
message_placeholder = st.empty()
|
24 |
+
full_response = ""
|
25 |
+
|
26 |
+
# simulate the chatbot "thinking" before responding
|
27 |
+
# (or stream its response)
|
28 |
+
for chunk in bot_response.split():
|
29 |
+
full_response += chunk + " "
|
30 |
+
time.sleep(0.05)
|
31 |
+
|
32 |
+
# add a blinking cursor to simulate typing
|
33 |
+
message_placeholder.markdown(full_response + "▌")
|
34 |
+
|
35 |
+
message_placeholder.markdown(full_response)
|
36 |
+
# st.markdown(response)
|
37 |
+
|
38 |
+
# Add assistant response to chat history
|
39 |
+
st.session_state[model_name].append(
|
40 |
+
{"model_name": model_name, "role": "assistant", "content": full_response}
|
41 |
+
)
|
42 |
+
|
43 |
+
# @st.cache_data
|
44 |
+
def chatbox(model_name: str, model: None):
|
45 |
+
# Display chat messages from history on app rerun
|
46 |
+
for message in st.session_state.messages:
|
47 |
+
if (message["model_name"] == model_name):
|
48 |
+
with st.chat_message(message["role"]):
|
49 |
+
st.markdown(message["content"])
|
50 |
+
|
51 |
+
if prompt := st.chat_input("Say something"):
|
52 |
+
# Display user message in chat message container
|
53 |
+
st.chat_message("user").markdown(prompt)
|
54 |
+
|
55 |
+
# Add user message to chat history
|
56 |
+
st.session_state.messages.append({"model_name": model_name, "role": "user", "content": prompt})
|
57 |
+
|
58 |
+
with st.spinner("Processing your query..."):
|
59 |
+
bot_response = model.get_response(prompt)
|
60 |
+
|
61 |
+
print("bot: ", bot_response)
|
62 |
+
|
63 |
+
# Display assistant response in chat message container
|
64 |
+
with st.chat_message("assistant"):
|
65 |
+
message_placeholder = st.empty()
|
66 |
+
full_response = ""
|
67 |
+
|
68 |
+
# simulate the chatbot "thinking" before responding
|
69 |
+
# (or stream its response)
|
70 |
+
for chunk in bot_response.split():
|
71 |
+
full_response += chunk + " "
|
72 |
+
time.sleep(0.05)
|
73 |
+
|
74 |
+
# add a blinking cursor to simulate typing
|
75 |
+
message_placeholder.markdown(full_response + "▌")
|
76 |
+
|
77 |
+
message_placeholder.markdown(full_response)
|
78 |
+
# st.markdown(response)
|
79 |
+
|
80 |
+
# Add assistant response to chat history
|
81 |
+
st.session_state.messages.append(
|
82 |
+
{"model_name": model_name, "role": "assistant", "content": full_response}
|
83 |
+
)
|
84 |
+
|
85 |
+
# Scroll to the bottom of the chat container
|
86 |
+
# st.markdown(
|
87 |
+
# """
|
88 |
+
# <script>
|
89 |
+
# const chatContainer = document.getElementsByClassName("css-1n76uvr")[0];
|
90 |
+
# chatContainer.scrollTop = chatContainer.scrollHeight;
|
91 |
+
# </script>
|
92 |
+
# """,
|
93 |
+
# unsafe_allow_html=True,
|
94 |
+
# )
|
utils/customLLM.py
DELETED
@@ -1,38 +0,0 @@
|
|
1 |
-
from typing import Any, List, Mapping, Optional
|
2 |
-
|
3 |
-
from langchain.llms.base import LLM
|
4 |
-
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
|
5 |
-
|
6 |
-
model_name = "bigscience/bloom-560m"
|
7 |
-
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
8 |
-
model = AutoModelForCausalLM.from_pretrained(model_name, config='T5Config')
|
9 |
-
|
10 |
-
pl = pipeline(
|
11 |
-
model=model,
|
12 |
-
tokenizer=tokenizer,
|
13 |
-
task="text-generation",
|
14 |
-
# device=0, # GPU device number
|
15 |
-
# max_length=512,
|
16 |
-
do_sample=True,
|
17 |
-
top_p=0.95,
|
18 |
-
top_k=50,
|
19 |
-
temperature=0.7
|
20 |
-
)
|
21 |
-
|
22 |
-
class CustomLLM(LLM):
|
23 |
-
pipeline = pl
|
24 |
-
|
25 |
-
def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
|
26 |
-
prompt_length = len(prompt)
|
27 |
-
response = self.pipeline(prompt, max_new_tokens=525)[0]["generated_text"]
|
28 |
-
|
29 |
-
# only return newly generated tokens
|
30 |
-
return response[prompt_length:]
|
31 |
-
|
32 |
-
@property
|
33 |
-
def _identifying_params(self) -> Mapping[str, Any]:
|
34 |
-
return {"name_of_model": self.model_name}
|
35 |
-
|
36 |
-
@property
|
37 |
-
def _llm_type(self) -> str:
|
38 |
-
return "custom"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
utils/util.py
CHANGED
@@ -13,15 +13,3 @@ def validate(token: str):
|
|
13 |
|
14 |
response = requests.post(api_endpoint, json=data, headers=headers)
|
15 |
return response
|
16 |
-
|
17 |
-
|
18 |
-
def create_index(model):
|
19 |
-
index = model.initialize_index("bloomLlama")
|
20 |
-
return index
|
21 |
-
|
22 |
-
|
23 |
-
def get_response(vector_index, query_str):
|
24 |
-
print("query_str: ", query_str)
|
25 |
-
query_engine = vector_index.as_query_engine()
|
26 |
-
response = query_engine.query(query_str)
|
27 |
-
return str(response)
|
|
|
13 |
|
14 |
response = requests.post(api_endpoint, json=data, headers=headers)
|
15 |
return response
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|