Spaces:
Sleeping
Sleeping
feat: Update app.py with DataFrameLoader and LongContextReorder
Browse files
app.py
CHANGED
@@ -2,6 +2,7 @@ import os
|
|
2 |
import torch
|
3 |
|
4 |
import gradio as gr
|
|
|
5 |
from dotenv import load_dotenv
|
6 |
from langchain.callbacks.base import BaseCallbackHandler
|
7 |
from langchain.embeddings import CacheBackedEmbeddings
|
@@ -10,7 +11,11 @@ from langchain.retrievers import EnsembleRetriever
|
|
10 |
from langchain.storage import LocalFileStore
|
11 |
from langchain_anthropic import ChatAnthropic
|
12 |
from langchain_community.chat_models import ChatOllama
|
13 |
-
from langchain_community.document_loaders import
|
|
|
|
|
|
|
|
|
14 |
from langchain_community.document_loaders.generic import GenericLoader
|
15 |
from langchain_community.document_loaders.parsers.language.language_parser import (
|
16 |
LanguageParser,
|
@@ -21,7 +26,11 @@ from langchain_core.callbacks.manager import CallbackManager
|
|
21 |
from langchain_core.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
|
22 |
from langchain_core.output_parsers import StrOutputParser
|
23 |
from langchain_core.prompts import PromptTemplate
|
24 |
-
from langchain_core.runnables import
|
|
|
|
|
|
|
|
|
25 |
from langchain_google_genai import GoogleGenerativeAI
|
26 |
from langchain_groq import ChatGroq
|
27 |
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
|
@@ -29,6 +38,8 @@ from langchain_text_splitters import Language, RecursiveCharacterTextSplitter
|
|
29 |
|
30 |
from langchain_cohere import CohereRerank
|
31 |
from langchain.retrievers.contextual_compression import ContextualCompressionRetriever
|
|
|
|
|
32 |
|
33 |
# Load environment variables
|
34 |
load_dotenv()
|
@@ -87,10 +98,9 @@ print(f".ipynb 파일의 개수: {len(ipynb_documents)}")
|
|
87 |
|
88 |
|
89 |
## wikidocs
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
wiki_documents = text_splitter.split_documents(loader.load())
|
94 |
|
95 |
|
96 |
# Split documents into chunks
|
@@ -257,7 +267,11 @@ llm = ChatOpenAI(
|
|
257 |
|
258 |
# Create retrieval-augmented generation chain
|
259 |
rag_chain = (
|
260 |
-
{
|
|
|
|
|
|
|
|
|
261 |
| prompt
|
262 |
| llm
|
263 |
| StrOutputParser()
|
|
|
2 |
import torch
|
3 |
|
4 |
import gradio as gr
|
5 |
+
import pandas as pd
|
6 |
from dotenv import load_dotenv
|
7 |
from langchain.callbacks.base import BaseCallbackHandler
|
8 |
from langchain.embeddings import CacheBackedEmbeddings
|
|
|
11 |
from langchain.storage import LocalFileStore
|
12 |
from langchain_anthropic import ChatAnthropic
|
13 |
from langchain_community.chat_models import ChatOllama
|
14 |
+
from langchain_community.document_loaders import (
|
15 |
+
NotebookLoader,
|
16 |
+
TextLoader,
|
17 |
+
DataFrameLoader,
|
18 |
+
)
|
19 |
from langchain_community.document_loaders.generic import GenericLoader
|
20 |
from langchain_community.document_loaders.parsers.language.language_parser import (
|
21 |
LanguageParser,
|
|
|
26 |
from langchain_core.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
|
27 |
from langchain_core.output_parsers import StrOutputParser
|
28 |
from langchain_core.prompts import PromptTemplate
|
29 |
+
from langchain_core.runnables import (
|
30 |
+
ConfigurableField,
|
31 |
+
RunnablePassthrough,
|
32 |
+
RunnableLambda,
|
33 |
+
)
|
34 |
from langchain_google_genai import GoogleGenerativeAI
|
35 |
from langchain_groq import ChatGroq
|
36 |
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
|
|
|
38 |
|
39 |
from langchain_cohere import CohereRerank
|
40 |
from langchain.retrievers.contextual_compression import ContextualCompressionRetriever
|
41 |
+
from langchain_community.document_transformers import LongContextReorder
|
42 |
+
|
43 |
|
44 |
# Load environment variables
|
45 |
load_dotenv()
|
|
|
98 |
|
99 |
|
100 |
## wikidocs
|
101 |
+
df = pd.read_parquet("./docs/wikidocs_14314.parquet")
|
102 |
+
loader = DataFrameLoader(df, page_content_column="content")
|
103 |
+
wiki_documents = loader.load()
|
|
|
104 |
|
105 |
|
106 |
# Split documents into chunks
|
|
|
267 |
|
268 |
# Create retrieval-augmented generation chain
|
269 |
rag_chain = (
|
270 |
+
{
|
271 |
+
"context": compression_retriever
|
272 |
+
| RunnableLambda(LongContextReorder().transform_documents),
|
273 |
+
"question": RunnablePassthrough(),
|
274 |
+
}
|
275 |
| prompt
|
276 |
| llm
|
277 |
| StrOutputParser()
|