faizhalas commited on
Commit
2b45b17
β€’
1 Parent(s): 908aad1

Create 0 FileChecker.py

Browse files
Files changed (1) hide show
  1. pages/0 FileChecker.py +139 -0
pages/0 FileChecker.py ADDED
@@ -0,0 +1,139 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+
4
+ #===config===
5
+ st.set_page_config(
6
+ page_title="Coconut",
7
+ page_icon="πŸ₯₯",
8
+ layout="wide",
9
+ initial_sidebar_state="collapsed"
10
+ )
11
+
12
+ hide_streamlit_style = """
13
+ <style>
14
+ #MainMenu
15
+ {visibility: hidden;}
16
+ footer {visibility: hidden;}
17
+ [data-testid="collapsedControl"] {display: none}
18
+ </style>
19
+ """
20
+ st.markdown(hide_streamlit_style, unsafe_allow_html=True)
21
+
22
+ st.page_link("https://www.coconut-libtool.com/the-app", label="Go to app", icon="πŸ₯₯")
23
+
24
+ def reset_data():
25
+ st.cache_data.clear()
26
+
27
+ #===check filetype===
28
+ @st.cache_data(ttl=3600)
29
+ def get_ext(extype):
30
+ extype = uploaded_file.name
31
+ return extype
32
+
33
+ #===upload===
34
+ @st.cache_data(ttl=3600)
35
+ def upload(extype):
36
+ keywords = pd.read_csv(uploaded_file)
37
+ return keywords
38
+
39
+ @st.cache_data(ttl=3600)
40
+ def conv_txt(extype):
41
+ col_dict = {'TI': 'Title',
42
+ 'SO': 'Source title',
43
+ 'DE': 'Author Keywords',
44
+ 'DT': 'Document Type',
45
+ 'AB': 'Abstract',
46
+ 'TC': 'Cited by',
47
+ 'PY': 'Year',
48
+ 'ID': 'Keywords Plus'}
49
+ keywords = pd.read_csv(uploaded_file, sep='\t', lineterminator='\r')
50
+ keywords.rename(columns=col_dict, inplace=True)
51
+ return keywords
52
+
53
+ st.header('File Checker', anchor=False)
54
+ st.subheader('Put your file here...', anchor=False)
55
+
56
+ #===read data===
57
+ uploaded_file = st.file_uploader('', type=['csv','txt'], on_change=reset_data)
58
+
59
+ if uploaded_file is not None:
60
+ extype = get_ext(uploaded_file)
61
+ if extype.endswith('.csv'):
62
+ data = upload(extype)
63
+
64
+ elif extype.endswith('.txt'):
65
+ data = conv_txt(extype)
66
+
67
+ col1, col2, col3 = st.columns(3)
68
+
69
+ with col1:
70
+ #===check keywords===
71
+ keycheck = list(data.columns)
72
+ keycheck = [k for k in keycheck if 'Keyword' in k]
73
+ container1 = st.container(border=True)
74
+
75
+ if not keycheck:
76
+ container1.subheader('❌ Keyword Stem', divider='red', anchor=False)
77
+ container1.write("Unfortunately, you don't have a column containing keywords in your data. Please check again. If you want to use it in another column, please rename it to 'Keywords'.")
78
+ else:
79
+ container1.subheader('βœ”οΈ Keyword Stem', divider='blue', anchor=False)
80
+ container1.write('Congratulations! You can use Keywords Stem')
81
+
82
+ #===Visualization===
83
+ if 'Publication Year' in data.columns:
84
+ data.rename(columns={'Publication Year': 'Year', 'Citing Works Count': 'Cited by',
85
+ 'Publication Type': 'Document Type', 'Source Title': 'Source title'}, inplace=True)
86
+
87
+ col2check = ['Document Type','Source title','Cited by','Year']
88
+ miss_col = [column for column in col2check if column not in data.columns]
89
+ container2 = st.container(border=True)
90
+
91
+ if not miss_col:
92
+ container2.subheader('βœ”οΈ Sunburst', divider='blue', anchor=False)
93
+ container2.write('Congratulations! You can use Sunburst')
94
+ else:
95
+ container2.subheader('❌ Sunburst', divider='red', anchor=False)
96
+ miss_col_str = ', '.join(miss_col)
97
+ container2.write(f"Unfortunately, you don't have: {miss_col_str}. Please check again.")
98
+
99
+ with col2:
100
+ #===check any obj===
101
+ coldf = sorted(data.select_dtypes(include=['object']).columns.tolist())
102
+ container3 = st.container(border=True)
103
+
104
+ if not coldf:
105
+ container3.subheader('❌ Topic Modeling', divider='red', anchor=False)
106
+ container3.write("Unfortunately, you don't have a column containing object in your data. Please check again.")
107
+ else:
108
+ container3.subheader('βœ”οΈ Topic Modeling', divider='blue', anchor=False)
109
+ container3.write('Congratulations! You can use Topic Modeling')
110
+
111
+ #===Burst===
112
+ container4 = st.container(border=True)
113
+ if not coldf or 'Year' not in data.columns:
114
+ container4.subheader('❌ Burst Detection', divider='red', anchor=False)
115
+ container4.write("Unfortunately, you don't have a column containing object in your data or a 'Year' column. Please check again.")
116
+ else:
117
+ container4.subheader('βœ”οΈ Burst Detection', divider='blue', anchor=False)
118
+ container4.write('Congratulations! You can use Burst Detection')
119
+
120
+ with col3:
121
+ #===bidirected===
122
+ container5 = st.container(border=True)
123
+ if not keycheck:
124
+ container5.subheader('❌ Bidirected Network', divider='red', anchor=False)
125
+ container5.write("Unfortunately, you don't have a column containing keywords in your data. Please check again. If you want to use it in another column, please rename it to 'Keywords'.")
126
+ else:
127
+ container5.subheader('βœ”οΈ Bidirected Network', divider='blue', anchor=False)
128
+ container5.write('Congratulations! You can use Bidirected Network')
129
+
130
+ #===scattertext===
131
+ container6 = st.container(border=True)
132
+ if not coldf:
133
+ container6.subheader('❌ Scattertext', divider='red', anchor=False)
134
+ container6.write("Unfortunately, you don't have a column containing object in your data. Please check again.")
135
+ else:
136
+ container6.subheader('βœ”οΈ Scattertext', divider='blue', anchor=False)
137
+ container6.write('Congratulations! You can use Scattertext')
138
+
139
+