Spaces:
Running
Running
silvanocerza
commited on
Commit
•
cf437f7
1
Parent(s):
c5936cd
Add document sources
Browse files
main.py
CHANGED
@@ -5,8 +5,10 @@ import os
|
|
5 |
|
6 |
from dotenv import load_dotenv
|
7 |
from haystack.preview import Pipeline
|
|
|
8 |
from haystack.preview.components.retrievers import MemoryBM25Retriever
|
9 |
from haystack.preview.components.generators.openai.gpt import GPTGenerator
|
|
|
10 |
from haystack.preview.components.builders.prompt_builder import PromptBuilder
|
11 |
from haystack.preview.components.preprocessors import (
|
12 |
DocumentCleaner,
|
@@ -77,34 +79,39 @@ def index_files(files):
|
|
77 |
indexing_pipeline.run({"converter": {"paths": files}})
|
78 |
|
79 |
|
80 |
-
def search(question: str) ->
|
81 |
-
retriever = MemoryBM25Retriever(document_store=document_store(), top_k=
|
82 |
|
83 |
template = """Take a deep breath and think then answer given the context
|
84 |
-
Context: {{ documents|map(attribute='text')|
|
85 |
-
Question: {{
|
86 |
Answer:
|
87 |
"""
|
88 |
prompt_builder = PromptBuilder(template)
|
89 |
|
90 |
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
|
91 |
generator = GPTGenerator(api_key=OPENAI_API_KEY)
|
|
|
92 |
|
93 |
pipe = Pipeline()
|
94 |
|
95 |
pipe.add_component("docs_retriever", retriever)
|
96 |
-
pipe.add_component("
|
97 |
pipe.add_component("gpt35", generator)
|
|
|
98 |
|
99 |
-
pipe.connect("docs_retriever.documents", "
|
100 |
-
pipe.connect("
|
|
|
|
|
101 |
res = pipe.run(
|
102 |
{
|
103 |
"docs_retriever": {"query": question},
|
104 |
-
"
|
|
|
105 |
}
|
106 |
)
|
107 |
-
return res["
|
108 |
|
109 |
|
110 |
with st.status(
|
@@ -129,5 +136,18 @@ if question := st.text_input(
|
|
129 |
with st.spinner("Waiting"):
|
130 |
answer = search(question)
|
131 |
|
132 |
-
st.
|
133 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
|
6 |
from dotenv import load_dotenv
|
7 |
from haystack.preview import Pipeline
|
8 |
+
from haystack.preview.dataclasses import GeneratedAnswer
|
9 |
from haystack.preview.components.retrievers import MemoryBM25Retriever
|
10 |
from haystack.preview.components.generators.openai.gpt import GPTGenerator
|
11 |
+
from haystack.preview.components.builders.answer_builder import AnswerBuilder
|
12 |
from haystack.preview.components.builders.prompt_builder import PromptBuilder
|
13 |
from haystack.preview.components.preprocessors import (
|
14 |
DocumentCleaner,
|
|
|
79 |
indexing_pipeline.run({"converter": {"paths": files}})
|
80 |
|
81 |
|
82 |
+
def search(question: str) -> GeneratedAnswer:
|
83 |
+
retriever = MemoryBM25Retriever(document_store=document_store(), top_k=5)
|
84 |
|
85 |
template = """Take a deep breath and think then answer given the context
|
86 |
+
Context: {{ documents|map(attribute='text')|replace('\n', ' ')|join(';') }}
|
87 |
+
Question: {{ query }}
|
88 |
Answer:
|
89 |
"""
|
90 |
prompt_builder = PromptBuilder(template)
|
91 |
|
92 |
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
|
93 |
generator = GPTGenerator(api_key=OPENAI_API_KEY)
|
94 |
+
answer_builder = AnswerBuilder()
|
95 |
|
96 |
pipe = Pipeline()
|
97 |
|
98 |
pipe.add_component("docs_retriever", retriever)
|
99 |
+
pipe.add_component("prompt_builder", prompt_builder)
|
100 |
pipe.add_component("gpt35", generator)
|
101 |
+
pipe.add_component("answer_builder", answer_builder)
|
102 |
|
103 |
+
pipe.connect("docs_retriever.documents", "prompt_builder.documents")
|
104 |
+
pipe.connect("prompt_builder.prompt", "gpt35.prompt")
|
105 |
+
pipe.connect("docs_retriever.documents", "answer_builder.documents")
|
106 |
+
pipe.connect("gpt35.replies", "answer_builder.replies")
|
107 |
res = pipe.run(
|
108 |
{
|
109 |
"docs_retriever": {"query": question},
|
110 |
+
"prompt_builder": {"query": question},
|
111 |
+
"answer_builder": {"query": question},
|
112 |
}
|
113 |
)
|
114 |
+
return res["answer_builder"]["answers"][0]
|
115 |
|
116 |
|
117 |
with st.status(
|
|
|
136 |
with st.spinner("Waiting"):
|
137 |
answer = search(question)
|
138 |
|
139 |
+
if not st.session_state.get("run_once", False):
|
140 |
+
st.balloons()
|
141 |
+
st.session_state["run_once"] = True
|
142 |
+
|
143 |
+
print(answer.data)
|
144 |
+
st.markdown(answer.data)
|
145 |
+
with st.expander("See sources:"):
|
146 |
+
for document in answer.documents:
|
147 |
+
url_source = document.metadata.get("url_source", "")
|
148 |
+
content = f"{url_source}: {document.text}" if url_source else document.text
|
149 |
+
if document.metadata.get("type") == "md":
|
150 |
+
st.markdown(content)
|
151 |
+
else:
|
152 |
+
st.write(content)
|
153 |
+
st.divider()
|