faizhalas commited on
Commit
79c577b
β€’
1 Parent(s): 89a3b96

Update pages/3 Bidirected Network.py

Browse files
Files changed (1) hide show
  1. pages/3 Bidirected Network.py +187 -182
pages/3 Bidirected Network.py CHANGED
@@ -51,7 +51,7 @@ st.subheader('Put your file here...', anchor=False)
51
 
52
  #===clear cache===
53
  def reset_all():
54
- st.cache_data.clear()
55
 
56
  #===check type===
57
  @st.cache_data(ttl=3600)
@@ -79,193 +79,198 @@ def conv_txt(extype):
79
  uploaded_file = st.file_uploader('', type=['csv', 'txt'], on_change=reset_all)
80
 
81
  if uploaded_file is not None:
82
- extype = get_ext(uploaded_file)
83
- if extype.endswith('.csv'):
84
- papers = upload(extype)
85
- elif extype.endswith('.txt'):
86
- papers = conv_txt(extype)
 
 
 
 
 
 
 
 
 
87
 
88
- @st.cache_data(ttl=3600)
89
- def get_data_arul(extype):
90
- list_of_column_key = list(papers.columns)
91
- list_of_column_key = [k for k in list_of_column_key if 'Keyword' in k]
92
- return papers, list_of_column_key
93
-
94
- papers, list_of_column_key = get_data_arul(extype)
95
-
96
- col1, col2 = st.columns(2)
97
- with col1:
98
- method = st.selectbox(
99
- 'Choose method',
100
- ('Lemmatization', 'Stemming'), on_change=reset_all)
101
- with col2:
102
- keyword = st.selectbox(
103
- 'Choose column',
104
- (list_of_column_key), on_change=reset_all)
105
-
106
-
107
- #===body===
108
- @st.cache_data(ttl=3600)
109
- def clean_arul(extype):
110
- global keyword, papers
111
- try:
112
- arul = papers.dropna(subset=[keyword])
113
- except KeyError:
114
- st.error('Error: Please check your Author/Index Keywords column.')
115
- sys.exit(1)
116
- arul[keyword] = arul[keyword].map(lambda x: re.sub('-—–', ' ', x))
117
- arul[keyword] = arul[keyword].map(lambda x: re.sub('; ', ' ; ', x))
118
- arul[keyword] = arul[keyword].map(lambda x: x.lower())
119
- arul[keyword] = arul[keyword].dropna()
120
- return arul
121
-
122
- arul = clean_arul(extype)
123
-
124
- #===stem/lem===
125
- @st.cache_data(ttl=3600)
126
- def lemma_arul(extype):
127
- lemmatizer = WordNetLemmatizer()
128
- def lemmatize_words(text):
129
- words = text.split()
130
- words = [lemmatizer.lemmatize(word) for word in words]
131
- return ' '.join(words)
132
- arul[keyword] = arul[keyword].apply(lemmatize_words)
133
- return arul
134
 
135
- @st.cache_data(ttl=3600)
136
- def stem_arul(extype):
137
- stemmer = SnowballStemmer("english")
138
- def stem_words(text):
139
- words = text.split()
140
- words = [stemmer.stem(word) for word in words]
141
- return ' '.join(words)
142
- arul[keyword] = arul[keyword].apply(stem_words)
143
- return arul
144
-
145
- if method is 'Lemmatization':
146
- arul = lemma_arul(extype)
147
- else:
148
- arul = stem_arul(extype)
149
 
150
- @st.cache_data(ttl=3600)
151
- def arm(extype):
152
- arule = arul[keyword].str.split(' ; ')
153
- arule_list = arule.values.tolist()
154
- te_ary = te.fit(arule_list).transform(arule_list)
155
- df = pd.DataFrame(te_ary, columns=te.columns_)
156
- return df
157
- df = arm(extype)
158
-
159
- col1, col2, col3 = st.columns(3)
160
- with col1:
161
- supp = st.slider(
162
- 'Select value of Support',
163
- 0.001, 1.000, (0.010), on_change=reset_all)
164
- with col2:
165
- conf = st.slider(
166
- 'Select value of Confidence',
167
- 0.001, 1.000, (0.050), on_change=reset_all)
168
- with col3:
169
- maxlen = st.slider(
170
- 'Maximum length of the itemsets generated',
171
- 2, 8, (2), on_change=reset_all)
172
-
173
- tab1, tab2, tab3 = st.tabs(["πŸ“ˆ Result & Generate visualization", "πŸ“ƒ Reference", "πŸ““ Recommended Reading"])
174
 
175
- with tab1:
176
- #===Association rules===
 
177
  @st.cache_data(ttl=3600)
178
- def freqitem(extype):
179
- freq_item = fpgrowth(df, min_support=supp, use_colnames=True, max_len=maxlen)
180
- return freq_item
181
-
182
- freq_item = freqitem(extype)
183
- col1, col2 = st.columns(2)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
184
  with col1:
185
- st.write('🚨 The more data you have, the longer you will have to wait.')
 
 
186
  with col2:
187
- showall = st.checkbox('Show all nodes', value=True, on_change=reset_all)
188
-
189
- @st.cache_data(ttl=3600)
190
- def arm_table(extype):
191
- restab = association_rules(freq_item, metric='confidence', min_threshold=conf)
192
- restab = restab[['antecedents', 'consequents', 'antecedent support', 'consequent support', 'support', 'confidence', 'lift', 'conviction']]
193
- restab['antecedents'] = restab['antecedents'].apply(lambda x: ', '.join(list(x))).astype('unicode')
194
- restab['consequents'] = restab['consequents'].apply(lambda x: ', '.join(list(x))).astype('unicode')
195
- if showall:
196
- restab['Show'] = True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
197
  else:
198
- restab['Show'] = False
199
- return restab
200
-
201
- if freq_item.empty:
202
- st.error('Please lower your value.', icon="🚨")
203
- else:
204
- restab = arm_table(extype)
205
- restab = st.data_editor(restab, use_container_width=True)
206
- res = restab[restab['Show'] == True]
207
-
208
- #===visualize===
209
-
210
- if st.button('πŸ“ˆ Generate network visualization', on_click=reset_all):
211
- with st.spinner('Visualizing, please wait ....'):
212
- @st.cache_data(ttl=3600)
213
- def map_node(extype):
214
- res['to'] = res['antecedents'] + ' β†’ ' + res['consequents'] + '\n Support = ' + res['support'].astype(str) + '\n Confidence = ' + res['confidence'].astype(str) + '\n Conviction = ' + res['conviction'].astype(str)
215
- res_ant = res[['antecedents','antecedent support']].rename(columns={'antecedents': 'node', 'antecedent support': 'size'})
216
- res_con = res[['consequents','consequent support']].rename(columns={'consequents': 'node', 'consequent support': 'size'})
217
- res_node = pd.concat([res_ant, res_con]).drop_duplicates(keep='first')
218
- return res_node, res
219
-
220
- res_node, res = map_node(extype)
221
-
222
- @st.cache_data(ttl=3600)
223
- def arul_network(extype):
224
- nodes = []
225
- edges = []
226
-
227
- for w,x in zip(res_node['size'], res_node['node']):
228
- nodes.append( Node(id=x,
229
- label=x,
230
- size=50*w+10,
231
- shape="dot",
232
- labelHighlightBold=True,
233
- group=x,
234
- opacity=10,
235
- mass=1)
236
- )
237
-
238
- for y,z,a,b in zip(res['antecedents'],res['consequents'],res['confidence'],res['to']):
239
- edges.append( Edge(source=y,
240
- target=z,
241
- title=b,
242
- width=a*2,
243
- physics=True,
244
- smooth=True
245
- )
246
- )
247
- return nodes, edges
248
-
249
- nodes, edges = arul_network(extype)
250
- config = Config(width=1200,
251
- height=800,
252
- directed=True,
253
- physics=True,
254
- hierarchical=False,
255
- maxVelocity=5
256
- )
257
-
258
- return_value = agraph(nodes=nodes,
259
- edges=edges,
260
- config=config)
261
- time.sleep(1)
262
- st.toast('Process completed', icon='πŸ“ˆ')
263
 
264
- with tab2:
265
- st.markdown('**Santosa, F. A. (2023). Adding Perspective to the Bibliometric Mapping Using Bidirected Graph. Open Information Science, 7(1), 20220152.** https://doi.org/10.1515/opis-2022-0152')
266
-
267
- with tab3:
268
- st.markdown('**Agrawal, R., ImieliΕ„ski, T., & Swami, A. (1993). Mining association rules between sets of items in large databases. In ACM SIGMOD Record (Vol. 22, Issue 2, pp. 207–216). Association for Computing Machinery (ACM).** https://doi.org/10.1145/170036.170072')
269
- st.markdown('**Brin, S., Motwani, R., Ullman, J. D., & Tsur, S. (1997). Dynamic itemset counting and implication rules for market basket data. ACM SIGMOD Record, 26(2), 255–264.** https://doi.org/10.1145/253262.253325')
270
- st.markdown('**Edmonds, J., & Johnson, E. L. (2003). Matching: A Well-Solved Class of Integer Linear Programs. Combinatorial Optimization β€” Eureka, You Shrink!, 27–30.** https://doi.org/10.1007/3-540-36478-1_3')
271
- st.markdown('**Li, M. (2016, August 23). An exploration to visualise the emerging trends of technology foresight based on an improved technique of co-word analysis and relevant literature data of WOS. Technology Analysis & Strategic Management, 29(6), 655–671.** https://doi.org/10.1080/09537325.2016.1220518')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
 
52
  #===clear cache===
53
  def reset_all():
54
+ st.cache_data.clear()
55
 
56
  #===check type===
57
  @st.cache_data(ttl=3600)
 
79
  uploaded_file = st.file_uploader('', type=['csv', 'txt'], on_change=reset_all)
80
 
81
  if uploaded_file is not None:
82
+ try:
83
+ extype = get_ext(uploaded_file)
84
+ if extype.endswith('.csv'):
85
+ papers = upload(extype)
86
+ elif extype.endswith('.txt'):
87
+ papers = conv_txt(extype)
88
+
89
+ @st.cache_data(ttl=3600)
90
+ def get_data_arul(extype):
91
+ list_of_column_key = list(papers.columns)
92
+ list_of_column_key = [k for k in list_of_column_key if 'Keyword' in k]
93
+ return papers, list_of_column_key
94
+
95
+ papers, list_of_column_key = get_data_arul(extype)
96
 
97
+ col1, col2 = st.columns(2)
98
+ with col1:
99
+ method = st.selectbox(
100
+ 'Choose method',
101
+ ('Lemmatization', 'Stemming'), on_change=reset_all)
102
+ with col2:
103
+ keyword = st.selectbox(
104
+ 'Choose column',
105
+ (list_of_column_key), on_change=reset_all)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
 
108
+ #===body===
109
+ @st.cache_data(ttl=3600)
110
+ def clean_arul(extype):
111
+ global keyword, papers
112
+ try:
113
+ arul = papers.dropna(subset=[keyword])
114
+ except KeyError:
115
+ st.error('Error: Please check your Author/Index Keywords column.')
116
+ sys.exit(1)
117
+ arul[keyword] = arul[keyword].map(lambda x: re.sub('-—–', ' ', x))
118
+ arul[keyword] = arul[keyword].map(lambda x: re.sub('; ', ' ; ', x))
119
+ arul[keyword] = arul[keyword].map(lambda x: x.lower())
120
+ arul[keyword] = arul[keyword].dropna()
121
+ return arul
 
 
 
 
 
 
 
 
 
 
122
 
123
+ arul = clean_arul(extype)
124
+
125
+ #===stem/lem===
126
  @st.cache_data(ttl=3600)
127
+ def lemma_arul(extype):
128
+ lemmatizer = WordNetLemmatizer()
129
+ def lemmatize_words(text):
130
+ words = text.split()
131
+ words = [lemmatizer.lemmatize(word) for word in words]
132
+ return ' '.join(words)
133
+ arul[keyword] = arul[keyword].apply(lemmatize_words)
134
+ return arul
135
+
136
+ @st.cache_data(ttl=3600)
137
+ def stem_arul(extype):
138
+ stemmer = SnowballStemmer("english")
139
+ def stem_words(text):
140
+ words = text.split()
141
+ words = [stemmer.stem(word) for word in words]
142
+ return ' '.join(words)
143
+ arul[keyword] = arul[keyword].apply(stem_words)
144
+ return arul
145
+
146
+ if method is 'Lemmatization':
147
+ arul = lemma_arul(extype)
148
+ else:
149
+ arul = stem_arul(extype)
150
+
151
+ @st.cache_data(ttl=3600)
152
+ def arm(extype):
153
+ arule = arul[keyword].str.split(' ; ')
154
+ arule_list = arule.values.tolist()
155
+ te_ary = te.fit(arule_list).transform(arule_list)
156
+ df = pd.DataFrame(te_ary, columns=te.columns_)
157
+ return df
158
+ df = arm(extype)
159
+
160
+ col1, col2, col3 = st.columns(3)
161
  with col1:
162
+ supp = st.slider(
163
+ 'Select value of Support',
164
+ 0.001, 1.000, (0.010), on_change=reset_all)
165
  with col2:
166
+ conf = st.slider(
167
+ 'Select value of Confidence',
168
+ 0.001, 1.000, (0.050), on_change=reset_all)
169
+ with col3:
170
+ maxlen = st.slider(
171
+ 'Maximum length of the itemsets generated',
172
+ 2, 8, (2), on_change=reset_all)
173
+
174
+ tab1, tab2, tab3 = st.tabs(["πŸ“ˆ Result & Generate visualization", "πŸ“ƒ Reference", "πŸ““ Recommended Reading"])
175
+
176
+ with tab1:
177
+ #===Association rules===
178
+ @st.cache_data(ttl=3600)
179
+ def freqitem(extype):
180
+ freq_item = fpgrowth(df, min_support=supp, use_colnames=True, max_len=maxlen)
181
+ return freq_item
182
+
183
+ freq_item = freqitem(extype)
184
+ col1, col2 = st.columns(2)
185
+ with col1:
186
+ st.write('🚨 The more data you have, the longer you will have to wait.')
187
+ with col2:
188
+ showall = st.checkbox('Show all nodes', value=True, on_change=reset_all)
189
+
190
+ @st.cache_data(ttl=3600)
191
+ def arm_table(extype):
192
+ restab = association_rules(freq_item, metric='confidence', min_threshold=conf)
193
+ restab = restab[['antecedents', 'consequents', 'antecedent support', 'consequent support', 'support', 'confidence', 'lift', 'conviction']]
194
+ restab['antecedents'] = restab['antecedents'].apply(lambda x: ', '.join(list(x))).astype('unicode')
195
+ restab['consequents'] = restab['consequents'].apply(lambda x: ', '.join(list(x))).astype('unicode')
196
+ if showall:
197
+ restab['Show'] = True
198
+ else:
199
+ restab['Show'] = False
200
+ return restab
201
+
202
+ if freq_item.empty:
203
+ st.error('Please lower your value.', icon="🚨")
204
  else:
205
+ restab = arm_table(extype)
206
+ restab = st.data_editor(restab, use_container_width=True)
207
+ res = restab[restab['Show'] == True]
208
+
209
+ #===visualize===
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
210
 
211
+ if st.button('πŸ“ˆ Generate network visualization', on_click=reset_all):
212
+ with st.spinner('Visualizing, please wait ....'):
213
+ @st.cache_data(ttl=3600)
214
+ def map_node(extype):
215
+ res['to'] = res['antecedents'] + ' β†’ ' + res['consequents'] + '\n Support = ' + res['support'].astype(str) + '\n Confidence = ' + res['confidence'].astype(str) + '\n Conviction = ' + res['conviction'].astype(str)
216
+ res_ant = res[['antecedents','antecedent support']].rename(columns={'antecedents': 'node', 'antecedent support': 'size'})
217
+ res_con = res[['consequents','consequent support']].rename(columns={'consequents': 'node', 'consequent support': 'size'})
218
+ res_node = pd.concat([res_ant, res_con]).drop_duplicates(keep='first')
219
+ return res_node, res
220
+
221
+ res_node, res = map_node(extype)
222
+
223
+ @st.cache_data(ttl=3600)
224
+ def arul_network(extype):
225
+ nodes = []
226
+ edges = []
227
+
228
+ for w,x in zip(res_node['size'], res_node['node']):
229
+ nodes.append( Node(id=x,
230
+ label=x,
231
+ size=50*w+10,
232
+ shape="dot",
233
+ labelHighlightBold=True,
234
+ group=x,
235
+ opacity=10,
236
+ mass=1)
237
+ )
238
+
239
+ for y,z,a,b in zip(res['antecedents'],res['consequents'],res['confidence'],res['to']):
240
+ edges.append( Edge(source=y,
241
+ target=z,
242
+ title=b,
243
+ width=a*2,
244
+ physics=True,
245
+ smooth=True
246
+ )
247
+ )
248
+ return nodes, edges
249
+
250
+ nodes, edges = arul_network(extype)
251
+ config = Config(width=1200,
252
+ height=800,
253
+ directed=True,
254
+ physics=True,
255
+ hierarchical=False,
256
+ maxVelocity=5
257
+ )
258
+
259
+ return_value = agraph(nodes=nodes,
260
+ edges=edges,
261
+ config=config)
262
+ time.sleep(1)
263
+ st.toast('Process completed', icon='πŸ“ˆ')
264
+
265
+ with tab2:
266
+ st.markdown('**Santosa, F. A. (2023). Adding Perspective to the Bibliometric Mapping Using Bidirected Graph. Open Information Science, 7(1), 20220152.** https://doi.org/10.1515/opis-2022-0152')
267
+
268
+ with tab3:
269
+ st.markdown('**Agrawal, R., ImieliΕ„ski, T., & Swami, A. (1993). Mining association rules between sets of items in large databases. In ACM SIGMOD Record (Vol. 22, Issue 2, pp. 207–216). Association for Computing Machinery (ACM).** https://doi.org/10.1145/170036.170072')
270
+ st.markdown('**Brin, S., Motwani, R., Ullman, J. D., & Tsur, S. (1997). Dynamic itemset counting and implication rules for market basket data. ACM SIGMOD Record, 26(2), 255–264.** https://doi.org/10.1145/253262.253325')
271
+ st.markdown('**Edmonds, J., & Johnson, E. L. (2003). Matching: A Well-Solved Class of Integer Linear Programs. Combinatorial Optimization β€” Eureka, You Shrink!, 27–30.** https://doi.org/10.1007/3-540-36478-1_3')
272
+ st.markdown('**Li, M. (2016, August 23). An exploration to visualise the emerging trends of technology foresight based on an improved technique of co-word analysis and relevant literature data of WOS. Technology Analysis & Strategic Management, 29(6), 655–671.** https://doi.org/10.1080/09537325.2016.1220518')
273
+
274
+ except:
275
+ st.error("Please ensure that your file is correct. Please contact us if you find that this is an error.", icon="🚨")
276
+ st.stop()