Spaces:
Sleeping
Sleeping
chore: Update langchain_community.vectorstores imports and add Chroma vectorstore
Browse files- .gitattributes +1 -0
- app.py +40 -11
- langchain_chroma/chroma.sqlite3 +3 -0
- langchain_chroma/dcb5b5f5-3dac-4239-b657-276cb0bca164/data_level0.bin +3 -0
- langchain_chroma/dcb5b5f5-3dac-4239-b657-276cb0bca164/header.bin +3 -0
- langchain_chroma/dcb5b5f5-3dac-4239-b657-276cb0bca164/index_metadata.pickle +3 -0
- langchain_chroma/dcb5b5f5-3dac-4239-b657-276cb0bca164/length.bin +3 -0
- langchain_chroma/dcb5b5f5-3dac-4239-b657-276cb0bca164/link_lists.bin +3 -0
- langchain_faiss/index.faiss +2 -2
- langchain_faiss/index.pkl +2 -2
.gitattributes
CHANGED
@@ -34,6 +34,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
langchain_faiss/* filter=lfs diff=lfs merge=lfs -text
|
|
|
37 |
*.pdf filter=lfs diff=lfs merge=lfs -text
|
38 |
*.faiss filter=lfs diff=lfs merge=lfs -text
|
39 |
*.msg filter=lfs diff=lfs merge=lfs -text
|
|
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
langchain_faiss/* filter=lfs diff=lfs merge=lfs -text
|
37 |
+
langchain_chroma/* filter=lfs diff=lfs merge=lfs -text
|
38 |
*.pdf filter=lfs diff=lfs merge=lfs -text
|
39 |
*.faiss filter=lfs diff=lfs merge=lfs -text
|
40 |
*.msg filter=lfs diff=lfs merge=lfs -text
|
app.py
CHANGED
@@ -16,7 +16,7 @@ from langchain_community.document_loaders.parsers.language.language_parser impor
|
|
16 |
LanguageParser,
|
17 |
)
|
18 |
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
|
19 |
-
from langchain_community.vectorstores import FAISS
|
20 |
from langchain_core.callbacks.manager import CallbackManager
|
21 |
from langchain_core.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
|
22 |
from langchain_core.output_parsers import StrOutputParser
|
@@ -129,18 +129,37 @@ cached_embeddings = CacheBackedEmbeddings.from_bytes_store(
|
|
129 |
|
130 |
# Create and save FAISS index
|
131 |
FAISS_DB_INDEX = "./langchain_faiss"
|
132 |
-
#
|
133 |
-
#
|
134 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
135 |
FAISS_DB_INDEX, cached_embeddings, allow_dangerous_deserialization=True
|
136 |
)
|
|
|
|
|
|
|
|
|
137 |
|
138 |
# Create retrievers
|
139 |
-
faiss_retriever =
|
|
|
140 |
bm25_retriever = BM25Retriever.from_documents(combined_documents)
|
141 |
bm25_retriever.k = 10
|
142 |
ensemble_retriever = EnsembleRetriever(
|
143 |
-
retrievers=[bm25_retriever, faiss_retriever
|
|
|
144 |
)
|
145 |
|
146 |
# Create prompt template
|
@@ -172,11 +191,14 @@ class StreamCallback(BaseCallbackHandler):
|
|
172 |
print(token, end="", flush=True)
|
173 |
|
174 |
|
|
|
|
|
|
|
175 |
# Initialize LLMs with configuration
|
176 |
llm = ChatOpenAI(
|
177 |
model="gpt-4o",
|
178 |
temperature=0,
|
179 |
-
streaming=
|
180 |
callbacks=[StreamCallback()],
|
181 |
).configurable_alternatives(
|
182 |
ConfigurableField(id="llm"),
|
@@ -220,11 +242,11 @@ rag_chain = (
|
|
220 |
)
|
221 |
|
222 |
|
223 |
-
model_key = os.getenv("
|
224 |
-
print("
|
225 |
|
226 |
|
227 |
-
def
|
228 |
message,
|
229 |
history: list[tuple[str, str]],
|
230 |
):
|
@@ -234,11 +256,18 @@ def respond(
|
|
234 |
yield response
|
235 |
|
236 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
237 |
"""
|
238 |
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
|
239 |
"""
|
240 |
demo = gr.ChatInterface(
|
241 |
-
respond,
|
242 |
title="λ체μΈμ λν΄μ λ¬Όμ΄λ³΄μΈμ!",
|
243 |
description="μλ
νμΈμ!\nμ λ λ체μΈμ λν μΈκ³΅μ§λ₯ QAλ΄μ
λλ€. λ체μΈμ λν΄ κΉμ μ§μμ κ°μ§κ³ μμ΄μ. λμ²΄μΈ κ°λ°μ κ΄ν λμμ΄ νμνμλ©΄ μΈμ λ μ§ μ§λ¬Έν΄μ£ΌμΈμ!",
|
244 |
)
|
|
|
16 |
LanguageParser,
|
17 |
)
|
18 |
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
|
19 |
+
from langchain_community.vectorstores import FAISS, Chroma
|
20 |
from langchain_core.callbacks.manager import CallbackManager
|
21 |
from langchain_core.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
|
22 |
from langchain_core.output_parsers import StrOutputParser
|
|
|
129 |
|
130 |
# Create and save FAISS index
|
131 |
FAISS_DB_INDEX = "./langchain_faiss"
|
132 |
+
# faiss_db = FAISS.from_documents(
|
133 |
+
# documents=combined_documents,
|
134 |
+
# embedding=cached_embeddings,
|
135 |
+
# )
|
136 |
+
# faiss_db.save_local(folder_path=FAISS_DB_INDEX)
|
137 |
+
|
138 |
+
# Create and save Chroma index
|
139 |
+
CHROMA_DB_INDEX = "./langchain_chroma"
|
140 |
+
# chroma_db = Chroma.from_documents(
|
141 |
+
# documents=combined_documents,
|
142 |
+
# embedding=cached_embeddings,
|
143 |
+
# persist_directory=CHROMA_DB_INDEX,
|
144 |
+
# )
|
145 |
+
|
146 |
+
# load vectorstore
|
147 |
+
faiss_db = FAISS.load_local(
|
148 |
FAISS_DB_INDEX, cached_embeddings, allow_dangerous_deserialization=True
|
149 |
)
|
150 |
+
chroma_db = Chroma(
|
151 |
+
embedding_function=cached_embeddings,
|
152 |
+
persist_directory=CHROMA_DB_INDEX,
|
153 |
+
)
|
154 |
|
155 |
# Create retrievers
|
156 |
+
faiss_retriever = faiss_db.as_retriever(search_type="mmr", search_kwargs={"k": 10})
|
157 |
+
chroma_retriever = chroma_db.as_retriever(search_type="mmr", search_kwargs={"k": 10})
|
158 |
bm25_retriever = BM25Retriever.from_documents(combined_documents)
|
159 |
bm25_retriever.k = 10
|
160 |
ensemble_retriever = EnsembleRetriever(
|
161 |
+
retrievers=[bm25_retriever, faiss_retriever, chroma_retriever],
|
162 |
+
weights=[0.4, 0.3, 0.3],
|
163 |
)
|
164 |
|
165 |
# Create prompt template
|
|
|
191 |
print(token, end="", flush=True)
|
192 |
|
193 |
|
194 |
+
streaming = os.getenv("STREAMING", "true") == "true"
|
195 |
+
print("STREAMING", streaming)
|
196 |
+
|
197 |
# Initialize LLMs with configuration
|
198 |
llm = ChatOpenAI(
|
199 |
model="gpt-4o",
|
200 |
temperature=0,
|
201 |
+
streaming=streaming,
|
202 |
callbacks=[StreamCallback()],
|
203 |
).configurable_alternatives(
|
204 |
ConfigurableField(id="llm"),
|
|
|
242 |
)
|
243 |
|
244 |
|
245 |
+
model_key = os.getenv("MODEL_KEY", "gemini")
|
246 |
+
print("MODEL_KEY", model_key)
|
247 |
|
248 |
|
249 |
+
def respond_stream(
|
250 |
message,
|
251 |
history: list[tuple[str, str]],
|
252 |
):
|
|
|
256 |
yield response
|
257 |
|
258 |
|
259 |
+
def respond(
|
260 |
+
message,
|
261 |
+
history: list[tuple[str, str]],
|
262 |
+
):
|
263 |
+
return rag_chain.with_config(configurable={"llm": model_key}).invoke(message)
|
264 |
+
|
265 |
+
|
266 |
"""
|
267 |
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
|
268 |
"""
|
269 |
demo = gr.ChatInterface(
|
270 |
+
respond_stream if streaming else respond,
|
271 |
title="λ체μΈμ λν΄μ λ¬Όμ΄λ³΄μΈμ!",
|
272 |
description="μλ
νμΈμ!\nμ λ λ체μΈμ λν μΈκ³΅μ§λ₯ QAλ΄μ
λλ€. λ체μΈμ λν΄ κΉμ μ§μμ κ°μ§κ³ μμ΄μ. λμ²΄μΈ κ°λ°μ κ΄ν λμμ΄ νμνμλ©΄ μΈμ λ μ§ μ§λ¬Έν΄μ£ΌμΈμ!",
|
273 |
)
|
langchain_chroma/chroma.sqlite3
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:825e12c771f36b9c3e1f7aa3410a662227600a7c2c361960a0369b29b633871f
|
3 |
+
size 189218816
|
langchain_chroma/dcb5b5f5-3dac-4239-b657-276cb0bca164/data_level0.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a9f756c81ce3c4911b5c264c5332a0f1dcd2b3bc98dd84dbdb94d01bd9cf927b
|
3 |
+
size 63540000
|
langchain_chroma/dcb5b5f5-3dac-4239-b657-276cb0bca164/header.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9fcf6e0965f08173cd9d1d2b68e7d8c9a6971bd865faa9a8cfd8ba994cccc64b
|
3 |
+
size 100
|
langchain_chroma/dcb5b5f5-3dac-4239-b657-276cb0bca164/index_metadata.pickle
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ff4868df92f9bdadacd1abe27aef8cdb3704a0b16304fdd33ae539ce6c39d371
|
3 |
+
size 868175
|
langchain_chroma/dcb5b5f5-3dac-4239-b657-276cb0bca164/length.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:57bba9bb25e471e0266df7225b0e7b12ae290d7237cbf2cf16cb3f9503c4a75d
|
3 |
+
size 60000
|
langchain_chroma/dcb5b5f5-3dac-4239-b657-276cb0bca164/link_lists.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9b31f9bfa91a0635928b7b7f2b64977111f7f8fb6207ed42b3d0443518680d5c
|
3 |
+
size 132080
|
langchain_faiss/index.faiss
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7794ae040ff5830ba667d78cd4a1648e52e8bd860175f68a74ab85ea56ab3bb2
|
3 |
+
size 61632557
|
langchain_faiss/index.pkl
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2a9c82717db2899e9cdd3196f4806a0807167fe9e7892e3a2c00b5b5c6cb6c9b
|
3 |
+
size 19914288
|