File size: 4,495 Bytes
8516ffb
 
 
 
 
2f2aa71
8516ffb
 
 
 
 
 
 
 
ede50ba
 
 
 
 
 
 
 
8516ffb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34db5c9
641920f
34db5c9
 
8516ffb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2f2aa71
 
 
540545b
2f2aa71
8516ffb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
#===import module===
import streamlit as st
import pandas as pd
import plotly.express as px
import numpy as np
import sys

#===config===
st.set_page_config(
     page_title="Coconut",
     page_icon="πŸ₯₯",
     layout="wide"
)
st.header("Data visualization")
hide_streamlit_style = """
            <style>
            #MainMenu {visibility: hidden;}
            footer {visibility: hidden;}
            </style>
            """
st.markdown(hide_streamlit_style, unsafe_allow_html=True) 

st.subheader('Put your CSV file and choose a visualization')

#===clear cache===
def reset_all():
     st.cache_data.clear()

#===check type===
@st.cache_data(ttl=3600)
def get_ext(extype):
    extype = uploaded_file.name
    return extype

@st.cache_data(ttl=3600)
def upload(extype):
    papers = pd.read_csv(uploaded_file)
    #lens.org
    if 'Publication Year' in papers.columns:
               papers.rename(columns={'Publication Year': 'Year', 'Citing Works Count': 'Cited by',
                                     'Publication Type': 'Document Type', 'Source Title': 'Source title'}, inplace=True)
    return papers

@st.cache_data(ttl=3600)
def conv_txt(extype):
    col_dict = {'TI': 'Title',
            'SO': 'Source title',
            'DT': 'Document Type',
            'DE': 'Author Keywords',
            'ID': 'Keywords Plus',
            'AB': 'Abstract',
            'TC': 'Cited by',
            'PY': 'Year',}
    papers = pd.read_csv(uploaded_file, sep='\t', lineterminator='\r')
    papers.rename(columns=col_dict, inplace=True)
    return papers

#===Read data===
uploaded_file = st.file_uploader("Choose a file", type=['csv', 'txt'], on_change=reset_all)

if uploaded_file is not None:
    extype = get_ext(uploaded_file)
    if extype.endswith('.csv'):
         papers = upload(extype) 
   
    elif extype.endswith('.txt'):
         papers = conv_txt(extype)
    
    @st.cache_data(ttl=3600)
    def get_minmax(extype):
        extype = extype
        MIN = int(papers['Year'].min())
        MAX = int(papers['Year'].max())
        GAP = MAX - MIN
        return papers, MIN, MAX, GAP
    
    tab1, tab2 = st.tabs(["πŸ“ˆ Generate visualization", "πŸ““ Recommended Reading"])
    
    with tab1:    
        #===sunburst===
        try:
            papers, MIN, MAX, GAP = get_minmax(extype)
        except KeyError:
            st.error('Error: Please check again your columns.')
            sys.exit(1)
        
        if (GAP != 0):
            YEAR = st.slider('Year', min_value=MIN, max_value=MAX, value=(MIN, MAX), on_change=reset_all)
        else:
            st.write('You only have data in ', (MAX))
            YEAR = (MIN, MAX)
        
        @st.cache_data(ttl=3600)
        def listyear(extype):
            global papers
            years = list(range(YEAR[0],YEAR[1]+1))
            papers = papers.loc[papers['Year'].isin(years)]
            return years, papers
        
        @st.cache_data(ttl=3600)
        def vis_sunbrust(extype):
            papers['Cited by'] = papers['Cited by'].fillna(0)
            vis = pd.DataFrame()
            vis[['doctype','source','citby','year']] = papers[['Document Type','Source title','Cited by','Year']]
            viz=vis.groupby(['doctype', 'source', 'year'])['citby'].agg(['sum','count']).reset_index()  
            viz.rename(columns={'sum': 'cited by', 'count': 'total docs'}, inplace=True)
                            
            fig = px.sunburst(viz, path=['doctype', 'source', 'year'], values='total docs',
                          color='cited by', 
                          color_continuous_scale='RdBu',
                          color_continuous_midpoint=np.average(viz['cited by'], weights=viz['total docs']))
            fig.update_layout(height=800, width=1200)
            return fig
        
        years, papers = listyear(extype)

        if {'Document Type','Source title','Cited by','Year'}.issubset(papers.columns):
            fig = vis_sunbrust(extype)
            st.plotly_chart(fig, height=800, width=1200) #use_container_width=True)
           
        else:
            st.error('We require these columns: Document Type, Source title, Cited by, Year', icon="🚨")
    
    with tab2:
        st.markdown('**numpy.average β€” NumPy v1.24 Manual. (n.d.). Numpy.Average β€” NumPy v1.24 Manual.** https://numpy.org/doc/stable/reference/generated/numpy.average.html')
        st.markdown('**Sunburst. (n.d.). Sunburst Charts in Python.** https://plotly.com/python/sunburst-charts/')