anpigon commited on
Commit
d6ca2e6
β€’
1 Parent(s): 3fc26ed

chore: Update langchain_community.vectorstores imports and add Chroma vectorstore

Browse files
.gitattributes CHANGED
@@ -34,6 +34,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  langchain_faiss/* filter=lfs diff=lfs merge=lfs -text
 
37
  *.pdf filter=lfs diff=lfs merge=lfs -text
38
  *.faiss filter=lfs diff=lfs merge=lfs -text
39
  *.msg filter=lfs diff=lfs merge=lfs -text
 
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  langchain_faiss/* filter=lfs diff=lfs merge=lfs -text
37
+ langchain_chroma/* filter=lfs diff=lfs merge=lfs -text
38
  *.pdf filter=lfs diff=lfs merge=lfs -text
39
  *.faiss filter=lfs diff=lfs merge=lfs -text
40
  *.msg filter=lfs diff=lfs merge=lfs -text
app.py CHANGED
@@ -16,7 +16,7 @@ from langchain_community.document_loaders.parsers.language.language_parser impor
16
  LanguageParser,
17
  )
18
  from langchain_community.embeddings import HuggingFaceBgeEmbeddings
19
- from langchain_community.vectorstores import FAISS
20
  from langchain_core.callbacks.manager import CallbackManager
21
  from langchain_core.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
22
  from langchain_core.output_parsers import StrOutputParser
@@ -129,18 +129,37 @@ cached_embeddings = CacheBackedEmbeddings.from_bytes_store(
129
 
130
  # Create and save FAISS index
131
  FAISS_DB_INDEX = "./langchain_faiss"
132
- # db = FAISS.from_documents(combined_documents, cached_embeddings)
133
- # db.save_local(folder_path=FAISS_DB_INDEX)
134
- db = FAISS.load_local(
 
 
 
 
 
 
 
 
 
 
 
 
 
135
  FAISS_DB_INDEX, cached_embeddings, allow_dangerous_deserialization=True
136
  )
 
 
 
 
137
 
138
  # Create retrievers
139
- faiss_retriever = db.as_retriever(search_type="mmr", search_kwargs={"k": 10})
 
140
  bm25_retriever = BM25Retriever.from_documents(combined_documents)
141
  bm25_retriever.k = 10
142
  ensemble_retriever = EnsembleRetriever(
143
- retrievers=[bm25_retriever, faiss_retriever], weights=[0.5, 0.5], search_type="mmr"
 
144
  )
145
 
146
  # Create prompt template
@@ -172,11 +191,14 @@ class StreamCallback(BaseCallbackHandler):
172
  print(token, end="", flush=True)
173
 
174
 
 
 
 
175
  # Initialize LLMs with configuration
176
  llm = ChatOpenAI(
177
  model="gpt-4o",
178
  temperature=0,
179
- streaming=True,
180
  callbacks=[StreamCallback()],
181
  ).configurable_alternatives(
182
  ConfigurableField(id="llm"),
@@ -220,11 +242,11 @@ rag_chain = (
220
  )
221
 
222
 
223
- model_key = os.getenv("LLM_MODEL", "gpt4")
224
- print("model", model_key)
225
 
226
 
227
- def respond(
228
  message,
229
  history: list[tuple[str, str]],
230
  ):
@@ -234,11 +256,18 @@ def respond(
234
  yield response
235
 
236
 
 
 
 
 
 
 
 
237
  """
238
  For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
239
  """
240
  demo = gr.ChatInterface(
241
- respond,
242
  title="λž­μ²΄μΈμ— λŒ€ν•΄μ„œ λ¬Όμ–΄λ³΄μ„Έμš”!",
243
  description="μ•ˆλ…•ν•˜μ„Έμš”!\nμ €λŠ” λž­μ²΄μΈμ— λŒ€ν•œ 인곡지λŠ₯ QAλ΄‡μž…λ‹ˆλ‹€. λž­μ²΄μΈμ— λŒ€ν•΄ κΉŠμ€ 지식을 가지고 μžˆμ–΄μš”. 랭체인 κ°œλ°œμ— κ΄€ν•œ 도움이 ν•„μš”ν•˜μ‹œλ©΄ μ–Έμ œλ“ μ§€ μ§ˆλ¬Έν•΄μ£Όμ„Έμš”!",
244
  )
 
16
  LanguageParser,
17
  )
18
  from langchain_community.embeddings import HuggingFaceBgeEmbeddings
19
+ from langchain_community.vectorstores import FAISS, Chroma
20
  from langchain_core.callbacks.manager import CallbackManager
21
  from langchain_core.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
22
  from langchain_core.output_parsers import StrOutputParser
 
129
 
130
  # Create and save FAISS index
131
  FAISS_DB_INDEX = "./langchain_faiss"
132
+ # faiss_db = FAISS.from_documents(
133
+ # documents=combined_documents,
134
+ # embedding=cached_embeddings,
135
+ # )
136
+ # faiss_db.save_local(folder_path=FAISS_DB_INDEX)
137
+
138
+ # Create and save Chroma index
139
+ CHROMA_DB_INDEX = "./langchain_chroma"
140
+ # chroma_db = Chroma.from_documents(
141
+ # documents=combined_documents,
142
+ # embedding=cached_embeddings,
143
+ # persist_directory=CHROMA_DB_INDEX,
144
+ # )
145
+
146
+ # load vectorstore
147
+ faiss_db = FAISS.load_local(
148
  FAISS_DB_INDEX, cached_embeddings, allow_dangerous_deserialization=True
149
  )
150
+ chroma_db = Chroma(
151
+ embedding_function=cached_embeddings,
152
+ persist_directory=CHROMA_DB_INDEX,
153
+ )
154
 
155
  # Create retrievers
156
+ faiss_retriever = faiss_db.as_retriever(search_type="mmr", search_kwargs={"k": 10})
157
+ chroma_retriever = chroma_db.as_retriever(search_type="mmr", search_kwargs={"k": 10})
158
  bm25_retriever = BM25Retriever.from_documents(combined_documents)
159
  bm25_retriever.k = 10
160
  ensemble_retriever = EnsembleRetriever(
161
+ retrievers=[bm25_retriever, faiss_retriever, chroma_retriever],
162
+ weights=[0.4, 0.3, 0.3],
163
  )
164
 
165
  # Create prompt template
 
191
  print(token, end="", flush=True)
192
 
193
 
194
+ streaming = os.getenv("STREAMING", "true") == "true"
195
+ print("STREAMING", streaming)
196
+
197
  # Initialize LLMs with configuration
198
  llm = ChatOpenAI(
199
  model="gpt-4o",
200
  temperature=0,
201
+ streaming=streaming,
202
  callbacks=[StreamCallback()],
203
  ).configurable_alternatives(
204
  ConfigurableField(id="llm"),
 
242
  )
243
 
244
 
245
+ model_key = os.getenv("MODEL_KEY", "gemini")
246
+ print("MODEL_KEY", model_key)
247
 
248
 
249
+ def respond_stream(
250
  message,
251
  history: list[tuple[str, str]],
252
  ):
 
256
  yield response
257
 
258
 
259
+ def respond(
260
+ message,
261
+ history: list[tuple[str, str]],
262
+ ):
263
+ return rag_chain.with_config(configurable={"llm": model_key}).invoke(message)
264
+
265
+
266
  """
267
  For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
268
  """
269
  demo = gr.ChatInterface(
270
+ respond_stream if streaming else respond,
271
  title="λž­μ²΄μΈμ— λŒ€ν•΄μ„œ λ¬Όμ–΄λ³΄μ„Έμš”!",
272
  description="μ•ˆλ…•ν•˜μ„Έμš”!\nμ €λŠ” λž­μ²΄μΈμ— λŒ€ν•œ 인곡지λŠ₯ QAλ΄‡μž…λ‹ˆλ‹€. λž­μ²΄μΈμ— λŒ€ν•΄ κΉŠμ€ 지식을 가지고 μžˆμ–΄μš”. 랭체인 κ°œλ°œμ— κ΄€ν•œ 도움이 ν•„μš”ν•˜μ‹œλ©΄ μ–Έμ œλ“ μ§€ μ§ˆλ¬Έν•΄μ£Όμ„Έμš”!",
273
  )
langchain_chroma/chroma.sqlite3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:825e12c771f36b9c3e1f7aa3410a662227600a7c2c361960a0369b29b633871f
3
+ size 189218816
langchain_chroma/dcb5b5f5-3dac-4239-b657-276cb0bca164/data_level0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a9f756c81ce3c4911b5c264c5332a0f1dcd2b3bc98dd84dbdb94d01bd9cf927b
3
+ size 63540000
langchain_chroma/dcb5b5f5-3dac-4239-b657-276cb0bca164/header.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9fcf6e0965f08173cd9d1d2b68e7d8c9a6971bd865faa9a8cfd8ba994cccc64b
3
+ size 100
langchain_chroma/dcb5b5f5-3dac-4239-b657-276cb0bca164/index_metadata.pickle ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff4868df92f9bdadacd1abe27aef8cdb3704a0b16304fdd33ae539ce6c39d371
3
+ size 868175
langchain_chroma/dcb5b5f5-3dac-4239-b657-276cb0bca164/length.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57bba9bb25e471e0266df7225b0e7b12ae290d7237cbf2cf16cb3f9503c4a75d
3
+ size 60000
langchain_chroma/dcb5b5f5-3dac-4239-b657-276cb0bca164/link_lists.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b31f9bfa91a0635928b7b7f2b64977111f7f8fb6207ed42b3d0443518680d5c
3
+ size 132080
langchain_faiss/index.faiss CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f50b9cdc2968dd1fe5875e7e1f8ed2689a3e938d505a0e2f06b5257083339bd2
3
- size 2621485
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7794ae040ff5830ba667d78cd4a1648e52e8bd860175f68a74ab85ea56ab3bb2
3
+ size 61632557
langchain_faiss/index.pkl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ba84841f2d61493243e47d654cff88c8864c5fa6119469b7569677e4f82f3c5f
3
- size 862597
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a9c82717db2899e9cdd3196f4806a0807167fe9e7892e3a2c00b5b5c6cb6c9b
3
+ size 19914288