dpleus commited on
Commit
0486b97
1 Parent(s): 4e10394
Files changed (3) hide show
  1. .gitattributes +0 -1
  2. app.py +121 -3
  3. requirements.txt +7 -3
.gitattributes CHANGED
@@ -2,5 +2,4 @@
2
  *.gitignore filter=lfs diff=lfs merge=lfs -text
3
  *.yaml filter=lfs diff=lfs merge=lfs -text
4
  /Dockerfile filter=lfs diff=lfs merge=lfs -text
5
- *.py filter=lfs diff=lfs merge=lfs -text
6
  *.sqlite3 filter=lfs diff=lfs merge=lfs -text
 
2
  *.gitignore filter=lfs diff=lfs merge=lfs -text
3
  *.yaml filter=lfs diff=lfs merge=lfs -text
4
  /Dockerfile filter=lfs diff=lfs merge=lfs -text
 
5
  *.sqlite3 filter=lfs diff=lfs merge=lfs -text
app.py CHANGED
@@ -1,3 +1,121 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:fdbf6cc3d9716db1fa57e7c2337fe353e09749488ca12353aa99d9f999dabf3f
3
- size 3739
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from haystack.components.embedders import SentenceTransformersTextEmbedder
2
+ from haystack import Pipeline
3
+ from haystack_integrations.components.retrievers.chroma import ChromaEmbeddingRetriever
4
+ from haystack_integrations.document_stores.chroma import ChromaDocumentStore
5
+ from haystack.components.generators import OpenAIGenerator
6
+ from haystack.components.builders import PromptBuilder
7
+ import haystack.logging
8
+
9
+
10
+ import streamlit as st
11
+
12
+ from dotenv import load_dotenv
13
+ from haystack import component
14
+ import logging
15
+
16
+ haystack.logging.configure_logging(use_json=True)
17
+
18
+ logging.basicConfig(
19
+ format="%(levelname)s - %(name)s - %(message)s", level=logging.WARNING
20
+ )
21
+ logging.getLogger("haystack").setLevel(logging.INFO)
22
+
23
+ load_dotenv()
24
+
25
+
26
+ @component
27
+ class ListToString:
28
+ @component.output_types(text=str)
29
+ def run(self, input_list: list[str]):
30
+ print(input_list[0])
31
+ return {"text": input_list[0]}
32
+
33
+
34
+ @st.cache_resource
35
+ def retrieval_pipeline(path):
36
+ document_store = ChromaDocumentStore(persist_path=path)
37
+ retriever = ChromaEmbeddingRetriever(document_store, top_k=5)
38
+
39
+ template = """Transform this query into a imaginary response that the
40
+ user could expect based on your knowledge. Use 1-3 sentences. Replace
41
+ entities or names that you invent with <axz>. The result should be in
42
+ German.
43
+ Query: {{
44
+ query}}"""
45
+
46
+ prompt_builder = PromptBuilder(template=template)
47
+
48
+ generator = OpenAIGenerator()
49
+
50
+ # Create a pipeline
51
+ basic_rag_pipeline = Pipeline()
52
+
53
+ # Add components to your pipeline
54
+ basic_rag_pipeline.add_component("prompt_builder", prompt_builder)
55
+ basic_rag_pipeline.add_component("generator", generator)
56
+ basic_rag_pipeline.add_component("list_to_string", ListToString())
57
+ basic_rag_pipeline.add_component("retriever", retriever)
58
+ basic_rag_pipeline.add_component(
59
+ "text_embedder",
60
+ SentenceTransformersTextEmbedder(model="intfloat/multilingual-e5-small"),
61
+ )
62
+
63
+ basic_rag_pipeline.connect("prompt_builder", "generator")
64
+ basic_rag_pipeline.connect("generator.replies", "list_to_string.input_list")
65
+ basic_rag_pipeline.connect("list_to_string.text", "text_embedder.text")
66
+ basic_rag_pipeline.connect("text_embedder.embedding", "retriever.query_embedding")
67
+
68
+ return basic_rag_pipeline
69
+
70
+
71
+ def generation_pipeline():
72
+ template = """
73
+ Given the following information, answer the question.
74
+
75
+ Context:
76
+ {% for document in documents %}
77
+ {{ document.content }}
78
+ {% endfor %}
79
+
80
+ Bleibe chronologisch. Erkläre Konzepte und Begriffe wenn nötig.
81
+
82
+ Question: {{question}}
83
+ Answer:
84
+ """
85
+
86
+ prompt_builder = PromptBuilder(template=template)
87
+
88
+ generator = OpenAIGenerator(model="gpt-4")
89
+
90
+ # Create a pipeline
91
+ basic_rag_pipeline = Pipeline()
92
+
93
+ basic_rag_pipeline.add_component("prompt_builder", prompt_builder)
94
+ basic_rag_pipeline.add_component("llm", generator)
95
+
96
+ basic_rag_pipeline.connect("prompt_builder", "llm")
97
+
98
+ return basic_rag_pipeline
99
+
100
+
101
+ retrieval_pipe = retrieval_pipeline("chatbot/chromadb")
102
+ generation_pipe = generation_pipeline()
103
+
104
+ prompt = st.chat_input("Say something")
105
+
106
+ if prompt:
107
+ response = retrieval_pipe.run({"prompt_builder": {"query": prompt}})
108
+
109
+ st.markdown("### Sources")
110
+ st.write(response["retriever"]["documents"])
111
+
112
+ answer = generation_pipe.run(
113
+ {
114
+ "prompt_builder": {
115
+ "question": prompt,
116
+ "documents": response["retriever"]["documents"],
117
+ }
118
+ }
119
+ )
120
+ st.markdown("### Answer")
121
+ st.write(answer["llm"]["replies"][0])
requirements.txt CHANGED
@@ -1,3 +1,7 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:5ef2686aa78fbb22db80cbec6bf2bccb66bc2c7bfdee6f32c8180d71ca27a2d9
3
- size 82
 
 
 
 
 
1
+ streamlit
2
+ haystack-ai
3
+ bitsandbytes
4
+ accelerate
5
+ pypdf
6
+ cryptography
7
+ pre-commit