Spaces:
GIZ
/
Running on CPU Upgrade

prashant commited on
Commit
048a702
1 Parent(s): 4d2be28

decorator test

Browse files
app.py CHANGED
@@ -1,4 +1,4 @@
1
- import appStore.keyword_search as keyword_search
2
  import appStore.sdg_analysis as sdg_analysis
3
  #import appStore.coherence as coherence
4
  import appStore.info as info
@@ -12,6 +12,6 @@ app = MultiApp()
12
 
13
  app.add_app("About","house", info.app)
14
  app.add_app("SDG Analysis","gear",sdg_analysis.app)
15
- app.add_app("Search","search", keyword_search.app)
16
 
17
  app.run()
 
1
+ # import appStore.keyword_search as keyword_search
2
  import appStore.sdg_analysis as sdg_analysis
3
  #import appStore.coherence as coherence
4
  import appStore.info as info
 
12
 
13
  app.add_app("About","house", info.app)
14
  app.add_app("SDG Analysis","gear",sdg_analysis.app)
15
+ # app.add_app("Search","search", keyword_search.app)
16
 
17
  app.run()
appStore/keyword_search.py CHANGED
@@ -89,4 +89,10 @@ def app():
89
  with st.spinner("Performing Similar/Contextual search"):
90
  semantic_search(queryList,paraList)
91
 
 
 
 
 
 
 
92
 
 
89
  with st.spinner("Performing Similar/Contextual search"):
90
  semantic_search(queryList,paraList)
91
 
92
+ else:
93
+ st.info("🤔 No document found, please try to upload it at the sidebar!")
94
+ logging.warning("Terminated as no document provided")
95
+
96
+
97
+
98
 
utils/preprocessing.py CHANGED
@@ -23,7 +23,7 @@ def useOCR(file_path: str)-> Text:
23
  file_path: file_path of uploade file, returned by add_upload function in
24
  uploadAndExample.py
25
 
26
- Returns the text files as string.
27
  """
28
 
29
 
@@ -242,7 +242,8 @@ class UdfPreProcessor(BaseComponent):
242
 
243
  def processingpipeline():
244
  """
245
- Returns the preprocessing pipeline
 
246
 
247
  """
248
 
 
23
  file_path: file_path of uploade file, returned by add_upload function in
24
  uploadAndExample.py
25
 
26
+ Returns the text file as string.
27
  """
28
 
29
 
 
242
 
243
  def processingpipeline():
244
  """
245
+ Returns the preprocessing pipeline. Will use FileConverter and UdfPreProcesor
246
+ from utils.
247
 
248
  """
249
 
utils/sdg_classifier.py CHANGED
@@ -3,13 +3,14 @@ from haystack.schema import Document
3
  from typing import List, Tuple
4
  import configparser
5
  import streamlit as st
 
6
  from pandas import DataFrame, Series
7
  import logging
8
  from utils.preprocessing import processingpipeline
9
  config = configparser.ConfigParser()
10
  config.read_file(open('paramconfig.cfg'))
11
 
12
- @st.cache(allow_output_mutation=True)
13
  def load_sdgClassifier():
14
  """
15
  loads the document classifier using haystack, where the name/path of model
@@ -49,11 +50,14 @@ def sdg_classification(haystackdoc:List[Document])->Tuple[DataFrame,Series]:
49
  logging.info("running SDG classifiication")
50
  threshold = float(config.get('sdg','THRESHOLD'))
51
 
52
-
53
- classifier = load_sdgClassifier()
 
 
 
54
  results = classifier.predict(haystackdoc)
55
 
56
-
57
  labels_= [(l.meta['classification']['label'],
58
  l.meta['classification']['score'],l.content,) for l in results]
59
 
@@ -68,10 +72,19 @@ def sdg_classification(haystackdoc:List[Document])->Tuple[DataFrame,Series]:
68
 
69
  return df, x
70
 
71
- def runSDGPreprocessingPipeline()->List[Document]:
72
  """
73
  creates the pipeline and runs the preprocessing pipeline,
74
  the params for pipeline are fetched from paramconfig
 
 
 
 
 
 
 
 
 
75
 
76
  Return
77
  --------------
@@ -81,6 +94,7 @@ def runSDGPreprocessingPipeline()->List[Document]:
81
  key = 'documents' on output.
82
 
83
  """
 
84
  file_path = st.session_state['filepath']
85
  file_name = st.session_state['filename']
86
  sdg_processing_pipeline = processingpipeline()
 
3
  from typing import List, Tuple
4
  import configparser
5
  import streamlit as st
6
+ from utils.streamlitcheck import check_streamlit
7
  from pandas import DataFrame, Series
8
  import logging
9
  from utils.preprocessing import processingpipeline
10
  config = configparser.ConfigParser()
11
  config.read_file(open('paramconfig.cfg'))
12
 
13
+
14
  def load_sdgClassifier():
15
  """
16
  loads the document classifier using haystack, where the name/path of model
 
50
  logging.info("running SDG classifiication")
51
  threshold = float(config.get('sdg','THRESHOLD'))
52
 
53
+ if check_streamlit():
54
+ st.write("caching model")
55
+ classifier = st.cache(load_sdgClassifier(), allow_output_mutation=True)
56
+ else:
57
+ classifier = load_sdgClassifier()
58
  results = classifier.predict(haystackdoc)
59
 
60
+
61
  labels_= [(l.meta['classification']['label'],
62
  l.meta['classification']['score'],l.content,) for l in results]
63
 
 
72
 
73
  return df, x
74
 
75
+ def runSDGPreprocessingPipeline(file_path = None, file_name = None)->List[Document]:
76
  """
77
  creates the pipeline and runs the preprocessing pipeline,
78
  the params for pipeline are fetched from paramconfig
79
+
80
+ Param
81
+ ------------
82
+
83
+ file_path: filepath, if not given will check for file_path in streamlit
84
+ session_state, else will return
85
+
86
+ file_name: filename, if not given will check for file_name in streamlit
87
+ session_state
88
 
89
  Return
90
  --------------
 
94
  key = 'documents' on output.
95
 
96
  """
97
+ # if file_path:
98
  file_path = st.session_state['filepath']
99
  file_name = st.session_state['filename']
100
  sdg_processing_pipeline = processingpipeline()
utils/semantic_search.py CHANGED
@@ -107,19 +107,25 @@ def semanticSearchPipeline(documents:List[Document]):
107
 
108
  document_store = InMemoryDocumentStore()
109
  document_store.write_documents(documents)
110
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
111
  embedding_model = config.get('semantic_search','RETRIEVER')
112
  embedding_model_format = config.get('semantic_search','RETRIEVER_FORMAT')
113
- embedding_layer = int(config.get('semantic_search','RETRIEVER_EMB_LAYER'))
114
- retriever_top_k = int(config.get('semantic_search','RETRIEVER_TOP_K'))
115
- retriever = EmbeddingRetriever(
116
- document_store=document_store,
117
- embedding_model=embedding_model,top_k = retriever_top_k,
118
- emb_extraction_layer=embedding_layer, scale_score =True,
119
- model_format=embedding_model_format, use_gpu = True)
120
- document_store.update_embeddings(retriever)
121
- else:
122
-
123
  retriever = EmbeddingRetriever(
124
  document_store=document_store,
125
  embedding_model=embedding_model,top_k = retriever_top_k,
@@ -134,13 +140,24 @@ def semanticSearchPipeline(documents:List[Document]):
134
  embedding_model_format = config.get('semantic_search','RETRIEVER_FORMAT')
135
  embedding_layer = int(config.get('semantic_search','RETRIEVER_EMB_LAYER'))
136
  retriever_top_k = int(config.get('semantic_search','RETRIEVER_TOP_K'))
 
 
137
  retriever = EmbeddingRetriever(
138
  document_store=document_store,
139
  embedding_model=embedding_model,top_k = retriever_top_k,
140
  emb_extraction_layer=embedding_layer, scale_score =True,
141
  model_format=embedding_model_format, use_gpu = True)
 
142
  document_store.update_embeddings(retriever)
143
  st.session_state['document_store'] = document_store
 
 
 
 
 
 
 
 
144
 
145
  querycheck = QueryCheck()
146
 
 
107
 
108
  document_store = InMemoryDocumentStore()
109
  document_store.write_documents(documents)
110
+ if 'retriever' in st.session_state:
111
+ retriever = st.session_state['retriever']
112
+ document_store.update_embeddings(retriever)
113
+ # querycheck =
114
+
115
+
116
+ # embedding_model = config.get('semantic_search','RETRIEVER')
117
+ # embedding_model_format = config.get('semantic_search','RETRIEVER_FORMAT')
118
+ # embedding_layer = int(config.get('semantic_search','RETRIEVER_EMB_LAYER'))
119
+ # retriever_top_k = int(config.get('semantic_search','RETRIEVER_TOP_K'))
120
+ # retriever = EmbeddingRetriever(
121
+ # document_store=document_store,
122
+ # embedding_model=embedding_model,top_k = retriever_top_k,
123
+ # emb_extraction_layer=embedding_layer, scale_score =True,
124
+ # model_format=embedding_model_format, use_gpu = True)
125
+ # document_store.update_embeddings(retriever)
126
+ else:
127
  embedding_model = config.get('semantic_search','RETRIEVER')
128
  embedding_model_format = config.get('semantic_search','RETRIEVER_FORMAT')
 
 
 
 
 
 
 
 
 
 
129
  retriever = EmbeddingRetriever(
130
  document_store=document_store,
131
  embedding_model=embedding_model,top_k = retriever_top_k,
 
140
  embedding_model_format = config.get('semantic_search','RETRIEVER_FORMAT')
141
  embedding_layer = int(config.get('semantic_search','RETRIEVER_EMB_LAYER'))
142
  retriever_top_k = int(config.get('semantic_search','RETRIEVER_TOP_K'))
143
+
144
+
145
  retriever = EmbeddingRetriever(
146
  document_store=document_store,
147
  embedding_model=embedding_model,top_k = retriever_top_k,
148
  emb_extraction_layer=embedding_layer, scale_score =True,
149
  model_format=embedding_model_format, use_gpu = True)
150
+ st.session_state['retriever'] = retriever
151
  document_store.update_embeddings(retriever)
152
  st.session_state['document_store'] = document_store
153
+ querycheck = QueryCheck()
154
+ st.session_state['querycheck'] = querycheck
155
+ reader_model = config.get('semantic_search','READER')
156
+ reader_top_k = retriever_top_k
157
+ reader = FARMReader(model_name_or_path=reader_model,
158
+ top_k = reader_top_k, use_gpu=True)
159
+
160
+ st.session_state['reader'] = reader
161
 
162
  querycheck = QueryCheck()
163
 
utils/streamlitcheck.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def check_streamlit():
2
+ """
3
+ Function to check whether python code is run within streamlit
4
+
5
+ Returns
6
+ -------
7
+ use_streamlit : boolean
8
+ True if code is run within streamlit, else False
9
+ """
10
+ try:
11
+ from streamlit.script_run_context import get_script_run_ctx
12
+ if not get_script_run_ctx():
13
+ use_streamlit = False
14
+ else:
15
+ use_streamlit = True
16
+ except ModuleNotFoundError:
17
+ use_streamlit = False
18
+ return use_streamlit
19
+