File size: 3,739 Bytes
0486b97
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
from haystack.components.embedders import SentenceTransformersTextEmbedder
from haystack import Pipeline
from haystack_integrations.components.retrievers.chroma import ChromaEmbeddingRetriever
from haystack_integrations.document_stores.chroma import ChromaDocumentStore
from haystack.components.generators import OpenAIGenerator
from haystack.components.builders import PromptBuilder
import haystack.logging


import streamlit as st

from dotenv import load_dotenv
from haystack import component
import logging

haystack.logging.configure_logging(use_json=True)

logging.basicConfig(
    format="%(levelname)s - %(name)s -  %(message)s", level=logging.WARNING
)
logging.getLogger("haystack").setLevel(logging.INFO)

load_dotenv()


@component
class ListToString:
    @component.output_types(text=str)
    def run(self, input_list: list[str]):
        print(input_list[0])
        return {"text": input_list[0]}


@st.cache_resource
def retrieval_pipeline(path):
    document_store = ChromaDocumentStore(persist_path=path)
    retriever = ChromaEmbeddingRetriever(document_store, top_k=5)

    template = """Transform this query into a imaginary response that the

    user could expect based on your knowledge. Use 1-3 sentences. Replace

    entities or names that you invent with <axz>. The result should be in

    German.

    Query: {{

    query}}"""

    prompt_builder = PromptBuilder(template=template)

    generator = OpenAIGenerator()

    # Create a pipeline
    basic_rag_pipeline = Pipeline()

    # Add components to your pipeline
    basic_rag_pipeline.add_component("prompt_builder", prompt_builder)
    basic_rag_pipeline.add_component("generator", generator)
    basic_rag_pipeline.add_component("list_to_string", ListToString())
    basic_rag_pipeline.add_component("retriever", retriever)
    basic_rag_pipeline.add_component(
        "text_embedder",
        SentenceTransformersTextEmbedder(model="intfloat/multilingual-e5-small"),
    )

    basic_rag_pipeline.connect("prompt_builder", "generator")
    basic_rag_pipeline.connect("generator.replies", "list_to_string.input_list")
    basic_rag_pipeline.connect("list_to_string.text", "text_embedder.text")
    basic_rag_pipeline.connect("text_embedder.embedding", "retriever.query_embedding")

    return basic_rag_pipeline


def generation_pipeline():
    template = """

        Given the following information, answer the question.



        Context:

        {% for document in documents %}

            {{ document.content }}

        {% endfor %}



        Bleibe chronologisch. Erkläre Konzepte und Begriffe wenn nötig.



        Question: {{question}}

        Answer:

        """

    prompt_builder = PromptBuilder(template=template)

    generator = OpenAIGenerator(model="gpt-4")

    # Create a pipeline
    basic_rag_pipeline = Pipeline()

    basic_rag_pipeline.add_component("prompt_builder", prompt_builder)
    basic_rag_pipeline.add_component("llm", generator)

    basic_rag_pipeline.connect("prompt_builder", "llm")

    return basic_rag_pipeline


retrieval_pipe = retrieval_pipeline("chatbot/chromadb")
generation_pipe = generation_pipeline()

prompt = st.chat_input("Say something")

if prompt:
    response = retrieval_pipe.run({"prompt_builder": {"query": prompt}})

    st.markdown("### Sources")
    st.write(response["retriever"]["documents"])

    answer = generation_pipe.run(
        {
            "prompt_builder": {
                "question": prompt,
                "documents": response["retriever"]["documents"],
            }
        }
    )
    st.markdown("### Answer")
    st.write(answer["llm"]["replies"][0])