ppsingh commited on
Commit
927f30d
1 Parent(s): 1e46fba

Update appStore/target.py

Browse files
Files changed (1) hide show
  1. appStore/target.py +17 -144
appStore/target.py CHANGED
@@ -8,168 +8,41 @@ import matplotlib.pyplot as plt
8
  import numpy as np
9
  import pandas as pd
10
  import streamlit as st
11
- from utils.target_classifier import load_targetClassifier, target_classification
 
12
  import logging
13
  logger = logging.getLogger(__name__)
14
  from utils.config import get_classifier_params
15
  from io import BytesIO
16
  import xlsxwriter
17
  import plotly.express as px
 
 
 
 
 
 
18
 
19
  # Declare all the necessary variables
20
- classifier_identifier = 'target'
21
- params = get_classifier_params(classifier_identifier)
22
-
23
- ## Labels dictionary ###
24
- _lab_dict = {
25
- 'NEGATIVE':'NO TARGET INFO',
26
- 'TARGET':'TARGET',
27
- }
28
-
29
- @st.cache_data
30
- def to_excel(df):
31
- df['Target Validation'] = 'No'
32
- df['Netzero Validation'] = 'No'
33
- df['GHG Validation'] = 'No'
34
- df['Adapt-Mitig Validation'] = 'No'
35
- df['Sector'] = 'No'
36
- len_df = len(df)
37
- output = BytesIO()
38
- writer = pd.ExcelWriter(output, engine='xlsxwriter')
39
- df.to_excel(writer, index=False, sheet_name='Sheet1')
40
- workbook = writer.book
41
- worksheet = writer.sheets['Sheet1']
42
- worksheet.data_validation('L2:L{}'.format(len_df),
43
- {'validate': 'list',
44
- 'source': ['No', 'Yes', 'Discard']})
45
- worksheet.data_validation('M2:L{}'.format(len_df),
46
- {'validate': 'list',
47
- 'source': ['No', 'Yes', 'Discard']})
48
- worksheet.data_validation('N2:L{}'.format(len_df),
49
- {'validate': 'list',
50
- 'source': ['No', 'Yes', 'Discard']})
51
- worksheet.data_validation('O2:L{}'.format(len_df),
52
- {'validate': 'list',
53
- 'source': ['No', 'Yes', 'Discard']})
54
- worksheet.data_validation('P2:L{}'.format(len_df),
55
- {'validate': 'list',
56
- 'source': ['No', 'Yes', 'Discard']})
57
- writer.save()
58
- processed_data = output.getvalue()
59
- return processed_data
60
 
61
  def app():
62
-
63
- #### APP INFO #####
64
- # st.write(
65
- # """
66
- # The **Target Extraction** app is an easy-to-use interface built \
67
- # in Streamlit for analyzing policy documents for \
68
- # Classification of the paragraphs/texts in the document *If it \
69
- # contains any Economy-Wide Targets related information* - \
70
- # developed by GIZ Data Service Center, GFA, IKI Tracs, \
71
- # SV Klima and SPA. \n
72
- # """)
73
-
74
-
75
  ### Main app code ###
76
  with st.container():
77
  if 'key0' in st.session_state:
78
  df = st.session_state.key0
79
 
80
- #load Classifier
81
- classifier = load_targetClassifier(classifier_name=params['model_name'])
82
- st.session_state['{}_classifier'.format(classifier_identifier)] = classifier
 
83
  if len(df) > 100:
84
  warning_msg = ": This might take sometime, please sit back and relax."
85
  else:
86
  warning_msg = ""
87
 
88
- df = target_classification(haystack_doc=df,
89
- threshold= params['threshold'])
90
- st.session_state.key1 = df
91
-
92
- # # excel part
93
- # temp = df[df['Relevancy']>threshold]
94
-
95
- # df['Validation'] = 'No'
96
- # df_xlsx = to_excel(df)
97
- # st.download_button(label='📥 Download Current Result',
98
- # data=df_xlsx ,
99
- # file_name= 'file_target.xlsx')
100
-
101
- def target_display():
102
- if 'key1' in st.session_state:
103
- df = st.session_state.key1
104
-
105
-
106
- hits = df[df['Target Label'] == 'TARGET']
107
- # hits['GHG Label'] = hits['GHG Label'].apply(lambda i: _lab_dict[i])
108
- range_val = min(5,len(hits))
109
- if range_val !=0:
110
- count_target = sum(hits['Target Label'] == 'TARGET')
111
- count_netzero = sum(hits['Netzero Label'] == 'NET-ZERO')
112
- count_ghg = sum(hits['GHG Label'] == 'GHG')
113
- count_economy = sum([True if 'Economy-wide' in x else False
114
- for x in hits['Sector Label']])
115
-
116
- # count_df = df['Target Label'].value_counts()
117
- # count_df = count_df.rename('count')
118
- # count_df = count_df.rename_axis('Target Label').reset_index()
119
- # count_df['Label_def'] = count_df['Target Label'].apply(lambda x: _lab_dict[x])
120
-
121
- # fig = px.bar(count_df, y="Label_def", x="count", orientation='h', height=200)
122
- c1, c2 = st.columns([1,1])
123
- with c1:
124
- st.write('**Target Paragraphs**: `{}`'.format(count_target))
125
- st.write('**NetZero Related Paragraphs**: `{}`'.format(count_netzero))
126
-
127
- # st.plotly_chart(fig,use_container_width= True)
128
-
129
- # count_netzero = sum(hits['Netzero Label'] == 'NETZERO')
130
- # count_ghg = sum(hits['GHG Label'] == 'LABEL_2')
131
- # count_economy = sum([True if 'Economy-wide' in x else False
132
- # for x in hits['Sector Label']])
133
- with c2:
134
- st.write('**GHG Related Paragraphs**: `{}`'.format(count_ghg))
135
- st.write('**Economy-wide Related Paragraphs**: `{}`'.format(count_economy))
136
- st.write('-------------------')
137
- hits = hits.sort_values(by=['Relevancy'], ascending=False)
138
- netzerohit = hits[hits['Netzero Label'] == 'NET-ZERO']
139
- if not netzerohit.empty:
140
- netzerohit = netzerohit.sort_values(by = ['Netzero Score'], ascending = False)
141
- # st.write('-------------------')
142
- # st.markdown("###### Netzero paragraph ######")
143
- st.write('**Netzero paragraph** `page {}`: {}'.format(netzerohit.iloc[0]['page'],
144
- netzerohit.iloc[0]['text'].replace("\n", " ")))
145
- st.write("")
146
- else:
147
- st.info("🤔 No Netzero paragraph found")
148
-
149
- # st.write("**Result {}** `page {}` (Relevancy Score: {:.2f})'".format(i+1,hits.iloc[i]['page'],hits.iloc[i]['Relevancy'])")
150
- st.write('-------------------')
151
- st.markdown("###### Top few Target Classified paragraph/text results ######")
152
- range_val = min(5,len(hits))
153
- for i in range(range_val):
154
- # the page number reflects the page that contains the main paragraph
155
- # according to split limit, the overlapping part can be on a separate page
156
- st.write('**Result {}** (Relevancy Score: {:.2f}): `page {}`, `Sector: {}`,\
157
- `GHG: {}`, `Adapt-Mitig :{}`'\
158
- .format(i+1,hits.iloc[i]['Relevancy'],
159
- hits.iloc[i]['page'], hits.iloc[i]['Sector Label'],
160
- hits.iloc[i]['GHG Label'],hits.iloc[i]['Adapt-Mitig Label']))
161
- st.write("\t Text: \t{}".format(hits.iloc[i]['text'].replace("\n", " ")))
162
- hits = hits.reset_index(drop =True)
163
- st.write('----------------')
164
- st.write('Explore the data')
165
- st.write(hits)
166
- df_xlsx = to_excel(df)
167
-
168
- with st.sidebar:
169
- st.write('-------------')
170
- st.download_button(label='📥 Download Result',
171
- data=df_xlsx ,
172
- file_name= 'cpu_analysis.xlsx')
173
 
174
- else:
175
- st.info("🤔 No Targets found")
 
8
  import numpy as np
9
  import pandas as pd
10
  import streamlit as st
11
+ from st_aggrid import AgGrid
12
+ from utils.tapp_classifier import load_tappClassifier, tapp_classification
13
  import logging
14
  logger = logging.getLogger(__name__)
15
  from utils.config import get_classifier_params
16
  from io import BytesIO
17
  import xlsxwriter
18
  import plotly.express as px
19
+ from pandas.api.types import (
20
+ is_categorical_dtype,
21
+ is_datetime64_any_dtype,
22
+ is_numeric_dtype,
23
+ is_object_dtype,
24
+ is_list_like)
25
 
26
  # Declare all the necessary variables
27
+ tapp_classifier_identifier = 'tapp'
28
+ param1 = get_classifier_params(tapp_classifier_identifier)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
  def app():
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  ### Main app code ###
32
  with st.container():
33
  if 'key0' in st.session_state:
34
  df = st.session_state.key0
35
 
36
+ #load Classifiers
37
+ classifier = load_tappClassifier(classifier_name=param1['model_name'])
38
+ st.session_state['{}_classifier'.format(tapp_classifier_identifier)] = classifier
39
+
40
  if len(df) > 100:
41
  warning_msg = ": This might take sometime, please sit back and relax."
42
  else:
43
  warning_msg = ""
44
 
45
+ tapp_classification(haystack_doc=df,
46
+ threshold= param1['threshold'])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
 
48
+ #st.session_state.key1 = df