ppsingh commited on
Commit
301af0b
1 Parent(s): 9a8eaaf

Create excel_convert.py

Browse files
Files changed (1) hide show
  1. appStore/excel_convert.py +145 -0
appStore/excel_convert.py ADDED
@@ -0,0 +1,145 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # set path
2
+ import glob, os, sys;
3
+ sys.path.append('../utils')
4
+
5
+ #import needed libraries
6
+ import seaborn as sns
7
+ import matplotlib.pyplot as plt
8
+ import numpy as np
9
+ import pandas as pd
10
+ import streamlit as st
11
+ from st_aggrid import AgGrid
12
+ import logging
13
+ logger = logging.getLogger(__name__)
14
+ from utils.config import get_classifier_params
15
+ from io import BytesIO
16
+ import xlsxwriter
17
+ import plotly.express as px
18
+ from pandas.api.types import (
19
+ is_categorical_dtype,
20
+ is_datetime64_any_dtype,
21
+ is_numeric_dtype,
22
+ is_object_dtype,
23
+ is_list_like)
24
+
25
+
26
+ def to_excel():
27
+ if 'key1' in st.session_state:
28
+ df = st.session_state['key1']
29
+ len_df = len(df)
30
+ output = BytesIO()
31
+ writer = pd.ExcelWriter(output, engine='xlsxwriter')
32
+ df.to_excel(writer, index=False, sheet_name='rawdata')
33
+ def build_sheet(df,name):
34
+ df = df[df.keep == True]
35
+ df = df.reset_index(drop=True)
36
+ df.drop(columns = ['keep'], inplace=True)
37
+ df.to_excel(writer,index=False,sheet_name = name)
38
+
39
+
40
+ if 'target_hits' in st.session_state:
41
+ target_hits = st.session_state['target_hits']
42
+ build_sheet(target_hits[['text','page','keep','MitigationLabel','AdaptationLabel','Sector','Sub-Target']],'Target')
43
+ if 'action_hits' in st.session_state:
44
+ action_hits = st.session_state['action_hits']
45
+ build_sheet(action_hits[['text','page','keep','MitigationLabel','AdaptationLabel','Sector']],'Actions')
46
+ if 'policy_hits' in st.session_state:
47
+ policy_hits = st.session_state['policy_hits']
48
+ build_sheet(policy_hits[['text','page','keep','MitigationLabel','AdaptationLabel','Sector']],'Policy')
49
+ if 'plan_hits' in st.session_state:
50
+ plan_hits = st.session_state['plan_hits']
51
+ build_sheet(adaptation_hits[['text','page','keep','MitigationLabel','AdaptationLabel','Sector']],'Plans')
52
+
53
+ workbook = writer.book
54
+ writer.close()
55
+ processed_data = output.getvalue()
56
+ return processed_data
57
+
58
+
59
+ def filter_dataframe(key, cols):
60
+ """
61
+ Adds a UI on top of a dataframe to let viewers filter columns
62
+ Args:
63
+ key: key to look for in session_state
64
+ cols: columns to use for filter in that order
65
+ Returns:
66
+ None
67
+ """
68
+ modify = st.checkbox("Add filters")
69
+
70
+ if not modify:
71
+ return
72
+ if key not in st.session_state:
73
+ return
74
+ else:
75
+ df = st.session_state[key]
76
+ df = df[cols + list(set(df.columns) - set(cols))]
77
+ if len(df)==0:
78
+ return
79
+
80
+ modification_container = st.container()
81
+
82
+ with modification_container:
83
+ temp = list(set(cols) -{'page','keep'})
84
+ to_filter_columns = st.multiselect("Filter dataframe on", temp)
85
+ for column in to_filter_columns:
86
+ left, right = st.columns((1, 20))
87
+ left.write("↳")
88
+ # Treat columns with < 10 unique values as categorical
89
+ if is_categorical_dtype(df[column]):
90
+ # st.write(type(df[column][0]), column)
91
+ user_cat_input = right.multiselect(
92
+ f"Values for {column}",
93
+ df[column].unique(),
94
+ default=list(df[column].unique()),
95
+ )
96
+ df = df[df[column].isin(user_cat_input)]
97
+ elif is_numeric_dtype(df[column]):
98
+ _min = float(df[column].min())
99
+ _max = float(df[column].max())
100
+ step = (_max - _min) / 100
101
+ user_num_input = right.slider(
102
+ f"Values for {column}",
103
+ _min,
104
+ _max,
105
+ (_min, _max),
106
+ step=step,
107
+ )
108
+ df = df[df[column].between(*user_num_input)]
109
+ elif is_list_like(df[column]) & (type(df[column][0]) == list) :
110
+ list_vals = set(x for lst in df[column].tolist() for x in lst)
111
+ user_multi_input = right.multiselect(
112
+ f"Values for {column}",
113
+ list_vals,
114
+ default=list_vals,
115
+ )
116
+ df['check'] = df[column].apply(lambda x: any(i in x for i in user_multi_input))
117
+ df = df[df.check == True]
118
+ df.drop(columns = ['check'],inplace=True)
119
+ else:
120
+ user_text_input = right.text_input(
121
+ f"Substring or regex in {column}",
122
+ )
123
+ if user_text_input:
124
+ df = df[df[column].str.lower().str.contains(user_text_input)]
125
+
126
+ df = df.reset_index(drop=True)
127
+ df = st.data_editor(
128
+ df,
129
+ column_config={
130
+ "keep": st.column_config.CheckboxColumn(
131
+ help="Select which rows to keep",
132
+ default=False,
133
+ )
134
+ },
135
+ disabled=list(set(df.columns) - {'keep'}),
136
+ hide_index=True,
137
+ key = 'editor'+key,
138
+ )
139
+
140
+
141
+ #("updating target hits....")
142
+ # st.write(len(df[df.keep == True]))
143
+ st.session_state[key] = df
144
+
145
+ return