File size: 5,222 Bytes
8516ffb
 
 
 
 
2f2aa71
8516ffb
 
 
544dcf4
 
 
 
8516ffb
544dcf4
ede50ba
 
544dcf4
 
ede50ba
544dcf4
ede50ba
 
544dcf4
ede50ba
544dcf4
9102787
544dcf4
 
 
 
 
 
 
 
 
8516ffb
 
 
 
 
 
 
 
 
 
 
 
 
 
34db5c9
641920f
34db5c9
 
8516ffb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
544dcf4
8516ffb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2f2aa71
 
 
540545b
2f2aa71
8516ffb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
#===import module===
import streamlit as st
import pandas as pd
import plotly.express as px
import numpy as np
import sys

#===config===
st.set_page_config(
    page_title="Coconut",
    page_icon="🥥",
    layout="wide",
    initial_sidebar_state="collapsed"
)

hide_streamlit_style = """
            <style>
            #MainMenu 
            {visibility: hidden;}
            footer {visibility: hidden;}
            [data-testid="collapsedControl"] {display: none}
            </style>
            """
st.markdown(hide_streamlit_style, unsafe_allow_html=True)

with st.popover("🔗 Menu"):
    st.page_link("https://www.coconut-libtool.com/", label="Home", icon="🏠")
    st.page_link("pages/1 Scattertext.py", label="Scattertext", icon="1️⃣")
    st.page_link("pages/2 Topic Modeling.py", label="Topic Modeling", icon="2️⃣")
    st.page_link("pages/3 Bidirected Network.py", label="Bidirected Network", icon="3️⃣")
    st.page_link("pages/4 Sunburst.py", label="Sunburst", icon="4️⃣")
    st.page_link("pages/5 Burst Detection.py", label="Burst Detection", icon="5️⃣")
    st.page_link("pages/6 Keywords Stem.py", label="Keywords Stem", icon="6️⃣")
    
st.header("Sunburst Visualization", anchor=False)
st.subheader('Put your file here...', anchor=False)

#===clear cache===
def reset_all():
     st.cache_data.clear()

#===check type===
@st.cache_data(ttl=3600)
def get_ext(extype):
    extype = uploaded_file.name
    return extype

@st.cache_data(ttl=3600)
def upload(extype):
    papers = pd.read_csv(uploaded_file)
    #lens.org
    if 'Publication Year' in papers.columns:
               papers.rename(columns={'Publication Year': 'Year', 'Citing Works Count': 'Cited by',
                                     'Publication Type': 'Document Type', 'Source Title': 'Source title'}, inplace=True)
    return papers

@st.cache_data(ttl=3600)
def conv_txt(extype):
    col_dict = {'TI': 'Title',
            'SO': 'Source title',
            'DT': 'Document Type',
            'DE': 'Author Keywords',
            'ID': 'Keywords Plus',
            'AB': 'Abstract',
            'TC': 'Cited by',
            'PY': 'Year',}
    papers = pd.read_csv(uploaded_file, sep='\t', lineterminator='\r')
    papers.rename(columns=col_dict, inplace=True)
    return papers

#===Read data===
uploaded_file = st.file_uploader('', type=['csv', 'txt'], on_change=reset_all)

if uploaded_file is not None:
    extype = get_ext(uploaded_file)
    if extype.endswith('.csv'):
         papers = upload(extype) 
   
    elif extype.endswith('.txt'):
         papers = conv_txt(extype)
    
    @st.cache_data(ttl=3600)
    def get_minmax(extype):
        extype = extype
        MIN = int(papers['Year'].min())
        MAX = int(papers['Year'].max())
        GAP = MAX - MIN
        return papers, MIN, MAX, GAP
    
    tab1, tab2 = st.tabs(["📈 Generate visualization", "📓 Recommended Reading"])
    
    with tab1:    
        #===sunburst===
        try:
            papers, MIN, MAX, GAP = get_minmax(extype)
        except KeyError:
            st.error('Error: Please check again your columns.')
            sys.exit(1)
        
        if (GAP != 0):
            YEAR = st.slider('Year', min_value=MIN, max_value=MAX, value=(MIN, MAX), on_change=reset_all)
        else:
            st.write('You only have data in ', (MAX))
            YEAR = (MIN, MAX)
        
        @st.cache_data(ttl=3600)
        def listyear(extype):
            global papers
            years = list(range(YEAR[0],YEAR[1]+1))
            papers = papers.loc[papers['Year'].isin(years)]
            return years, papers
        
        @st.cache_data(ttl=3600)
        def vis_sunbrust(extype):
            papers['Cited by'] = papers['Cited by'].fillna(0)
            vis = pd.DataFrame()
            vis[['doctype','source','citby','year']] = papers[['Document Type','Source title','Cited by','Year']]
            viz=vis.groupby(['doctype', 'source', 'year'])['citby'].agg(['sum','count']).reset_index()  
            viz.rename(columns={'sum': 'cited by', 'count': 'total docs'}, inplace=True)
                            
            fig = px.sunburst(viz, path=['doctype', 'source', 'year'], values='total docs',
                          color='cited by', 
                          color_continuous_scale='RdBu',
                          color_continuous_midpoint=np.average(viz['cited by'], weights=viz['total docs']))
            fig.update_layout(height=800, width=1200)
            return fig
        
        years, papers = listyear(extype)

        if {'Document Type','Source title','Cited by','Year'}.issubset(papers.columns):
            fig = vis_sunbrust(extype)
            st.plotly_chart(fig, height=800, width=1200) #use_container_width=True)
           
        else:
            st.error('We require these columns: Document Type, Source title, Cited by, Year', icon="🚨")
    
    with tab2:
        st.markdown('**numpy.average — NumPy v1.24 Manual. (n.d.). Numpy.Average — NumPy v1.24 Manual.** https://numpy.org/doc/stable/reference/generated/numpy.average.html')
        st.markdown('**Sunburst. (n.d.). Sunburst Charts in Python.** https://plotly.com/python/sunburst-charts/')