mintaeng commited on
Commit
d09dce6
โ€ข
1 Parent(s): fcf4068

Create retriever.py

Browse files
Files changed (1) hide show
  1. retriever.py +37 -0
retriever.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_core.runnables import RunnablePassthrough
2
+ from langchain_core.output_parsers import StrOutputParser
3
+ from langchain_community.chat_models import ChatOllama
4
+ from langchain_core.prompts import ChatPromptTemplate
5
+ from langchain_pinecone import PineconeVectorStore
6
+ from langchain_community.embeddings import SentenceTransformerEmbeddings
7
+
8
+ import os
9
+ from dotenv import load_dotenv
10
+ from langchain.retrievers import BM25Retriever, EnsembleRetriever
11
+ from kiwipiepy import Kiwi
12
+ load_dotenv()
13
+
14
+ kiwi = Kiwi()
15
+
16
+ def kiwi_tokenize(text):
17
+ return [token.form for token in kiwi.tokenize(text)]
18
+ # embedding_model = SentenceTransformerEmbeddings(model_name='BM-K/KoSimCSE-roberta-multitask', model_kwargs={"trust_remote_code":True})
19
+
20
+ def retriever(pc, bm25):
21
+ pcretriever = pc.as_retriever(search_kwargs={'k':4})
22
+ kiwi_bm25 = BM25Retriever.from_documents(bm25,preprocess_func=kiwi_tokenize)
23
+ kiwi_bm25.k=4
24
+
25
+ kiwibm25_pc_37 = EnsembleRetriever(
26
+ retrievers=[kiwi_bm25, pcretriever], # ์‚ฌ์šฉํ•  ๊ฒ€์ƒ‰ ๋ชจ๋ธ์˜ ๋ฆฌ์ŠคํŠธ
27
+ weights=[0.3, 0.7], # ๊ฐ ๊ฒ€์ƒ‰ ๋ชจ๋ธ์˜ ๊ฒฐ๊ณผ์— ์ ์šฉํ•  ๊ฐ€์ค‘์น˜
28
+ search_type="mmr", # ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ์˜ ๋‹ค์–‘์„ฑ์„ ์ฆ์ง„์‹œํ‚ค๋Š” MMR ๋ฐฉ์‹์„ ์‚ฌ์šฉ
29
+ )
30
+ # Pinecone vector store ์ดˆ๊ธฐํ™”
31
+ # vectorstore = PineconeVectorStore(
32
+ # index_name=os.getenv("INDEX_NAME"), embedding=embedding_model
33
+ # )
34
+
35
+ # retriever = vectorstore.as_retriever(search_kwargs={'k': 2})
36
+
37
+ return kiwibm25_pc_37