ppsingh commited on
Commit
29d6d65
1 Parent(s): 7fd91ba

Update appStore/adapmit.py

Browse files
Files changed (1) hide show
  1. appStore/adapmit.py +38 -174
appStore/adapmit.py CHANGED
@@ -1,174 +1,38 @@
1
- # set path
2
- import glob, os, sys
3
- sys.path.append('../utils')
4
-
5
- #import needed libraries
6
- import seaborn as sns
7
- import matplotlib.pyplot as plt
8
- import numpy as np
9
- import pandas as pd
10
- import streamlit as st
11
- from utils.adapmit_classifier import load_adapmitClassifier,adapmit_classification
12
- # from utils.keyword_extraction import textrank
13
- import logging
14
- logger = logging.getLogger(__name__)
15
- from utils.config import get_classifier_params
16
- from utils.preprocessing import paraLengthCheck
17
- from io import BytesIO
18
- import xlsxwriter
19
- import plotly.express as px
20
-
21
- # Declare all the necessary variables
22
- classifier_identifier = 'adapmit'
23
- params = get_classifier_params(classifier_identifier)
24
-
25
- @st.cache_data
26
- def to_excel(df):
27
- len_df = len(df)
28
- output = BytesIO()
29
- writer = pd.ExcelWriter(output, engine='xlsxwriter')
30
- df.to_excel(writer, index=False, sheet_name='Sheet1')
31
- workbook = writer.book
32
- worksheet = writer.sheets['Sheet1']
33
- worksheet.data_validation('E2:E{}'.format(len_df),
34
- {'validate': 'list',
35
- 'source': ['No', 'Yes', 'Discard']})
36
- worksheet.data_validation('F2:F{}'.format(len_df),
37
- {'validate': 'list',
38
- 'source': ['No', 'Yes', 'Discard']})
39
- worksheet.data_validation('G2:G{}'.format(len_df),
40
- {'validate': 'list',
41
- 'source': ['No', 'Yes', 'Discard']})
42
- writer.save()
43
- processed_data = output.getvalue()
44
- return processed_data
45
-
46
- def app():
47
-
48
- ### Main app code ###
49
- with st.container():
50
-
51
- if 'key1' in st.session_state:
52
- df = st.session_state.key1
53
-
54
- classifier = load_adapmitClassifier(classifier_name=params['model_name'])
55
- st.session_state['{}_classifier'.format(classifier_identifier)] = classifier
56
- if sum(df['Target Label'] == 'TARGET') > 100:
57
- warning_msg = ": This might take sometime, please sit back and relax."
58
- else:
59
- warning_msg = ""
60
-
61
- df = adapmit_classification(haystack_doc=df,
62
- threshold= params['threshold'])
63
-
64
- st.session_state.key1 = df
65
-
66
-
67
-
68
-
69
-
70
- # threshold= params['threshold']
71
- # truth_df = df.drop(['text'],axis=1)
72
- # truth_df = truth_df.astype(float) >= threshold
73
- # truth_df = truth_df.astype(str)
74
- # categories = list(truth_df.columns)
75
-
76
- # placeholder = {}
77
- # for val in categories:
78
- # placeholder[val] = dict(truth_df[val].value_counts())
79
- # count_df = pd.DataFrame.from_dict(placeholder)
80
- # count_df = count_df.T
81
- # count_df = count_df.reset_index()
82
- # # st.write(count_df)
83
- # placeholder = []
84
- # for i in range(len(count_df)):
85
- # placeholder.append([count_df.iloc[i]['index'],count_df['True'][i],'Yes'])
86
- # placeholder.append([count_df.iloc[i]['index'],count_df['False'][i],'No'])
87
- # count_df = pd.DataFrame(placeholder, columns = ['category','count','truth_value'])
88
- # # st.write("Total Paragraphs: {}".format(len(df)))
89
- # fig = px.bar(count_df, y='category', x='count',
90
- # color='truth_value',orientation='h', height =200)
91
- # c1, c2 = st.columns([1,1])
92
- # with c1:
93
- # st.plotly_chart(fig,use_container_width= True)
94
-
95
- # truth_df['labels'] = truth_df.apply(lambda x: {i if x[i]=='True' else None for i in categories}, axis=1)
96
- # truth_df['labels'] = truth_df.apply(lambda x: list(x['labels'] -{None}),axis=1)
97
- # # st.write(truth_df)
98
- # df = pd.concat([df,truth_df['labels']],axis=1)
99
- # st.markdown("###### Top few 'Mitigation' related paragraph/text ######")
100
- # df = df.sort_values(by = ['Mitigation'], ascending=False)
101
- # for i in range(3):
102
- # if df.iloc[i]['Mitigation'] >= 0.50:
103
- # st.write('**Result {}** (Relevancy Score: {:.2f})'.format(i+1,df.iloc[i]['Mitigation']))
104
- # st.write("\t Text: \t{}".format(df.iloc[i]['text'].replace("\n", " ")))
105
-
106
- # st.markdown("###### Top few 'Adaptation' related paragraph/text ######")
107
- # df = df.sort_values(by = ['Adaptation'], ascending=False)
108
- # for i in range(3):
109
- # if df.iloc[i]['Adaptation'] > 0.5:
110
- # st.write('**Result {}** (Relevancy Score: {:.2f})'.format(i+1,df.iloc[i]['Adaptation']))
111
- # st.write("\t Text: \t{}".format(df.iloc[i]['text'].replace("\n", " ")))
112
- # # st.write(df[['text','labels']])
113
- # df['Validation'] = 'No'
114
- # df['Val-Mitigation'] = 'No'
115
- # df['Val-Adaptation'] = 'No'
116
- # df_xlsx = to_excel(df)
117
- # st.download_button(label='📥 Download Current Result',
118
- # data=df_xlsx ,
119
- # file_name= 'file_adaptation-mitigation.xlsx')
120
- # # st.session_state.key4 =
121
-
122
- # # category =set(df.columns)
123
- # # removecols = {'Validation','Val-Adaptation','Val-Mitigation','text'}
124
- # # category = list(category - removecols)
125
-
126
- # else:
127
- # st.info("🤔 No document found, please try to upload it at the sidebar!")
128
- # logging.warning("Terminated as no document provided")
129
-
130
- # # Creating truth value dataframe
131
- # if 'key4' in st.session_state:
132
- # if st.session_state.key4 is not None:
133
- # df = st.session_state.key4
134
- # st.markdown("###### Select the threshold for classifier ######")
135
- # c4, c5 = st.columns([1,1])
136
-
137
- # with c4:
138
- # threshold = st.slider("Threshold", min_value=0.00, max_value=1.0,
139
- # step=0.01, value=0.5,
140
- # help = "Keep High Value if want refined result, low if dont want to miss anything" )
141
- # category =set(df.columns)
142
- # removecols = {'Validation','Val-Adaptation','Val-Mitigation','text'}
143
- # category = list(category - removecols)
144
-
145
- # placeholder = {}
146
- # for val in category:
147
- # temp = df[val].astype(float) > threshold
148
- # temp = temp.astype(str)
149
- # placeholder[val] = dict(temp.value_counts())
150
-
151
- # count_df = pd.DataFrame.from_dict(placeholder)
152
- # count_df = count_df.T
153
- # count_df = count_df.reset_index()
154
- # placeholder = []
155
- # for i in range(len(count_df)):
156
- # placeholder.append([count_df.iloc[i]['index'],count_df['False'][i],'False'])
157
- # placeholder.append([count_df.iloc[i]['index'],count_df['True'][i],'True'])
158
-
159
- # count_df = pd.DataFrame(placeholder, columns = ['category','count','truth_value'])
160
- # fig = px.bar(count_df, x='category', y='count',
161
- # color='truth_value',
162
- # height=400)
163
- # st.write("")
164
- # st.plotly_chart(fig)
165
-
166
- # df['Validation'] = 'No'
167
- # df['Val-Mitigation'] = 'No'
168
- # df['Val-Adaptation'] = 'No'
169
- # df_xlsx = to_excel(df)
170
- # st.download_button(label='📥 Download Current Result',
171
- # data=df_xlsx ,
172
- # file_name= 'file_adaptation-mitigation.xlsx')
173
-
174
-
 
1
+ # set path
2
+ import glob, os, sys
3
+ sys.path.append('../utils')
4
+
5
+ #import needed libraries
6
+ import seaborn as sns
7
+ import matplotlib.pyplot as plt
8
+ import numpy as np
9
+ import pandas as pd
10
+ import streamlit as st
11
+ from utils.adapmit_classifier import load_adapmitClassifier,adapmit_classification
12
+ # from utils.keyword_extraction import textrank
13
+ import logging
14
+ logger = logging.getLogger(__name__)
15
+ from utils.config import get_classifier_params
16
+ from utils.preprocessing import paraLengthCheck
17
+ from io import BytesIO
18
+ import xlsxwriter
19
+ import plotly.express as px
20
+
21
+ # Declare all the necessary variables
22
+ classifier_identifier = 'adapmit'
23
+ params = get_classifier_params(classifier_identifier)
24
+
25
+
26
+ def app():
27
+ ### Main app code ###
28
+ with st.container():
29
+ if 'key1' in st.session_state:
30
+ df = st.session_state.key1
31
+
32
+ classifier = load_adapmitClassifier(classifier_name=params['model_name'])
33
+ st.session_state['{}_classifier'.format(classifier_identifier)] = classifier
34
+
35
+ df = adapmit_classification(haystack_doc=df,
36
+ threshold= params['threshold'])
37
+
38
+ st.session_state.key1 = df