File size: 7,817 Bytes
031e5e2 6d737a4 031e5e2 6d737a4 031e5e2 31fa6e4 031e5e2 31fa6e4 031e5e2 6d737a4 031e5e2 6d737a4 031e5e2 6d737a4 031e5e2 6d737a4 cc525c6 0e3ebc4 cc525c6 6d737a4 dc55918 e4174ea acff600 dc55918 acff600 dc55918 acff600 dc55918 e0984c6 6d737a4 d30004c d7ac6f9 8f06a72 e0984c6 f510aed 6118d20 144d528 e0984c6 acff600 6d737a4 d7ac6f9 8f06a72 144d528 8f06a72 6d737a4 8f06a72 b8f6c0c 8f06a72 31fa6e4 b8f6c0c 6d737a4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 |
# set path
import glob, os, sys;
sys.path.append('../utils')
#import needed libraries
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import streamlit as st
from utils.target_classifier import load_targetClassifier, target_classification
import logging
logger = logging.getLogger(__name__)
from utils.config import get_classifier_params
from io import BytesIO
import xlsxwriter
import plotly.express as px
# Declare all the necessary variables
classifier_identifier = 'target'
params = get_classifier_params(classifier_identifier)
## Labels dictionary ###
_lab_dict = {
'NEGATIVE':'NO TARGET INFO',
'TARGET':'TARGET',
}
@st.cache_data
def to_excel(df):
df['Target Validation'] = 'No'
df['Netzero Validation'] = 'No'
df['GHG Validation'] = 'No'
df['Adapt-Mitig Validation'] = 'No'
df['Sector'] = 'No'
len_df = len(df)
output = BytesIO()
writer = pd.ExcelWriter(output, engine='xlsxwriter')
df.to_excel(writer, index=False, sheet_name='Sheet1')
workbook = writer.book
worksheet = writer.sheets['Sheet1']
worksheet.data_validation('L2:L{}'.format(len_df),
{'validate': 'list',
'source': ['No', 'Yes', 'Discard']})
worksheet.data_validation('M2:L{}'.format(len_df),
{'validate': 'list',
'source': ['No', 'Yes', 'Discard']})
worksheet.data_validation('N2:L{}'.format(len_df),
{'validate': 'list',
'source': ['No', 'Yes', 'Discard']})
worksheet.data_validation('O2:L{}'.format(len_df),
{'validate': 'list',
'source': ['No', 'Yes', 'Discard']})
worksheet.data_validation('P2:L{}'.format(len_df),
{'validate': 'list',
'source': ['No', 'Yes', 'Discard']})
writer.save()
processed_data = output.getvalue()
return processed_data
def app():
#### APP INFO #####
# st.write(
# """
# The **Target Extraction** app is an easy-to-use interface built \
# in Streamlit for analyzing policy documents for \
# Classification of the paragraphs/texts in the document *If it \
# contains any Economy-Wide Targets related information* - \
# developed by GIZ Data Service Center, GFA, IKI Tracs, \
# SV Klima and SPA. \n
# """)
### Main app code ###
with st.container():
if 'key0' in st.session_state:
df = st.session_state.key0
#load Classifier
classifier = load_targetClassifier(classifier_name=params['model_name'])
st.session_state['{}_classifier'.format(classifier_identifier)] = classifier
if len(df) > 100:
warning_msg = ": This might take sometime, please sit back and relax."
else:
warning_msg = ""
df = target_classification(haystack_doc=df,
threshold= params['threshold'])
st.session_state.key1 = df
# # excel part
# temp = df[df['Relevancy']>threshold]
# df['Validation'] = 'No'
# df_xlsx = to_excel(df)
# st.download_button(label='📥 Download Current Result',
# data=df_xlsx ,
# file_name= 'file_target.xlsx')
def target_display():
if 'key1' in st.session_state:
df = st.session_state.key1
hits = df[df['Target Label'] == 'TARGET']
# hits['GHG Label'] = hits['GHG Label'].apply(lambda i: _lab_dict[i])
range_val = min(5,len(hits))
if range_val !=0:
count_target = sum(hits['Target Label'] == 'TARGET')
count_netzero = sum(hits['Netzero Label'] == 'NETZERO')
count_ghg = sum(hits['GHG Label'] == 'GHG')
count_economy = sum([True if 'Economy-wide' in x else False
for x in hits['Sector Label']])
# count_df = df['Target Label'].value_counts()
# count_df = count_df.rename('count')
# count_df = count_df.rename_axis('Target Label').reset_index()
# count_df['Label_def'] = count_df['Target Label'].apply(lambda x: _lab_dict[x])
# fig = px.bar(count_df, y="Label_def", x="count", orientation='h', height=200)
c1, c2 = st.columns([1,1])
with c1:
st.write('**Target Paragraphs**: `{}`'.format(count_target))
st.write('**NetZero Related Paragraphs**: `{}`'.format(count_netzero))
# st.plotly_chart(fig,use_container_width= True)
# count_netzero = sum(hits['Netzero Label'] == 'NETZERO')
# count_ghg = sum(hits['GHG Label'] == 'LABEL_2')
# count_economy = sum([True if 'Economy-wide' in x else False
# for x in hits['Sector Label']])
with c2:
st.write('**GHG Related Paragraphs**: `{}`'.format(count_ghg))
st.write('**Economy-wide Related Paragraphs**: `{}`'.format(count_economy))
st.write('-------------------')
hits = hits.sort_values(by=['Relevancy'], ascending=False)
netzerohit = hits[hits['Netzero Label'] == 'NETZERO']
if not netzerohit.empty:
netzerohit = netzerohit.sort_values(by = ['Netzero Score'], ascending = False)
# st.write('-------------------')
# st.markdown("###### Netzero paragraph ######")
st.write('**Netzero paragraph** `page {}`: {}'.format(netzerohit.iloc[0]['page'],
netzerohit.iloc[0]['text'].replace("\n", " ")))
st.write("")
else:
st.info("🤔 No Netzero paragraph found")
# st.write("**Result {}** `page {}` (Relevancy Score: {:.2f})'".format(i+1,hits.iloc[i]['page'],hits.iloc[i]['Relevancy'])")
st.write('-------------------')
st.markdown("###### Top few Target Classified paragraph/text results ######")
range_val = min(5,len(hits))
for i in range(range_val):
# the page number reflects the page that contains the main paragraph
# according to split limit, the overlapping part can be on a separate page
st.write('**Result {}** (Relevancy Score: {:.2f}): `page {}`, `Sector: {}`,\
`GHG: {}`, `Adapt-Mitig :{}`'\
.format(i+1,hits.iloc[i]['Relevancy'],
hits.iloc[i]['page'], hits.iloc[i]['Sector Label'],
hits.iloc[i]['GHG Label'],hits.iloc[i]['Adapt-Mitig Label']))
st.write("\t Text: \t{}".format(hits.iloc[i]['text'].replace("\n", " ")))
hits = hits.reset_index(drop =True)
st.write('----------------')
st.write('Explore the data')
st.write(hits)
df_xlsx = to_excel(df)
with st.sidebar:
st.write('-------------')
st.download_button(label='📥 Download Result',
data=df_xlsx ,
file_name= 'cpu_analysis.xlsx')
else:
st.info("🤔 No Targets found") |