theekshana's picture
rouge score
400b4d5
raw
history blame
12.2 kB
import datetime
import os
import time
import logging
import nltk
import validators
import streamlit as st
from summarizer import summarizer_init, summarizer_summarize
from rouge_evaluate import get_rouge_scores
from config import MODELS
from warnings import filterwarnings
filterwarnings("ignore")
from utils import (
clean_text,
fetch_article_text,
preprocess_text_for_abstractive_summarization,
read_text_from_file,
)
# from rouge import Rouge
logger = logging.getLogger(__name__)
def initialize_app():
nltk.download("punkt")
SESSION_DEFAULTS = {
"model_type": "local",
"model_name": "Boardpac summarizer v1",
"summarizer_type": "Map Reduce",
"is_parameters_changed":False,
"is_evaluate_pressed":False,
"is_evaluate_pressed":False,
"reference_summary":'',
"generated_summary":'',
"summary_time":'',
# "user_question":'',
'openai_api_key':'',
}
for k, v in SESSION_DEFAULTS.items():
if k not in st.session_state:
st.session_state[k] = v
# init_summarizer(st.session_state.model_name,api_key=None)
@st.cache_resource
def init_summarizer(model_name,api_key=None):
with st.spinner(
text="initialising the summarizer. This might take a few seconds ..."
):
model_type = "local"
if model_name == "OpenAI":
model_type = "openai"
model_path = MODELS[model_name]
if model_type == "openai":
#validation logic
api_key = st.session_state.openai_api_key
tokenizer,base_summarizer = summarizer_init(model_path,model_type,api_key)
else:
logger.info(f"Model for summarization : {model_path}")
tokenizer,base_summarizer = summarizer_init(model_path, model_type)
alert = st.success("summarizer initialised")
time.sleep(1) # Wait for 1 seconds
alert.empty() # Clear the alert
return model_type, tokenizer, base_summarizer
def update_parameters_change():
st.session_state.is_parameters_changed = True
def parameters_change_button(model_name, summarizer_type):
st.session_state.model_name = model_name
st.session_state.summarizer_type = summarizer_type
st.session_state.is_parameters_changed = False
# init_summarizer(model_name,api_key=None)
alert = st.success("chat parameters updated")
time.sleep(2) # Wait for 1 seconds
alert.empty() # Clear the alert
import re
def is_valid_open_ai_api_key(secretKey):
if re.search("^sk-[a-zA-Z0-9]{32,}$", secretKey ):
return True
else: return False
def side_bar():
with st.sidebar:
st.subheader("Model parameters")
with st.form('param_form'):
# st.info('Info: use openai chat model for best results')
model_name = st.selectbox(
"Summary model",
MODELS,
# options=["long-t5 v0", "long-t5 v1", "pegasus-x-large v1", "OpenAI"],
key="Model Name",
help="Select the LLM model for summarization",
# on_change=update_parameters_change,
)
summarizer_type = st.selectbox(
"Summarizer Type for Long Text",
# options=["Map Reduce", "Refine"]
options=["Map Reduce"]
)
submitted = st.form_submit_button(
"Save Parameters",
# on_click=update_parameters_change
disabled = True
)
# if submitted:
# parameters_change_button(model_name, summarizer_type)
st.markdown("\n")
if st.session_state.model_name == 'openai':
with st.form('openai api key'):
api_key = st.text_input(
"Enter openai api key",
type="password",
value=st.session_state.openai_api_key,
help="enter an openai api key created from 'https://platform.openai.com/account/api-keys'",
)
submit_key = st.form_submit_button(
"Save key",
# on_click=update_parameters_change
)
if submit_key:
st.session_state.openai_api_key = api_key
# st.text(st.session_state.openai_api_key)
alert = st.success("openai api key updated")
time.sleep(1) # Wait for 3 seconds
alert.empty() # Clear the alert
st.markdown(
"### How to use\n"
"1. Select the Summarization model\n" # noqa: E501
# "1. If selected model asks for a api key enter a valid api key.\n" # noqa: E501
"1. Enter the text to get the summary."
)
st.markdown("---")
st.markdown("""
This app supports text in the following formats:
- Raw text in text box
- .txt, .pdf, .docx file formats
"""
# - URL of article/news to be summarized
)
def load_app():
st.title("Text Summarizer 📝")
# inp_text = st.text_input("Enter text or a url here")
# inp_text = st.text_input(
# "Enter text or a url here"
# )
# with st.form("Text Summarizer"):
inp_text = st.text_area(
"Enter text here"
)
st.markdown(
"<h4 style='text-align: center; color: green;'>OR</h4>",
unsafe_allow_html=True,
)
uploaded_file = st.file_uploader(
"Upload a .txt, .pdf, .docx file for summarization"
)
is_url = validators.url(inp_text)
if is_url:
# complete text, chunks to summarize (list of sentences for long docs)
logger.info("Text Input Type: URL")
text, cleaned_txt = fetch_article_text(url=inp_text)
elif uploaded_file:
logger.info("Text Input Type: FILE")
cleaned_txt = read_text_from_file(uploaded_file)
cleaned_txt = clean_text(cleaned_txt)
else:
logger.info("Text Input Type: INPUT TEXT")
cleaned_txt = inp_text # clean_text(inp_text)
# view summarized text (expander)
with st.expander("View input text"):
if is_url:
st.write(cleaned_txt[0])
else:
st.write(cleaned_txt)
st.subheader('Optional - Evaluate summary against a reference')
# with st.form('Evaluate summary against a reference'):
reference_summary = st.text_area(
"Enter reference summary here"
)
submitted = st.button("Summarize")
# submitted = st.form_submit_button("Summarize")
if submitted:
if is_url:
text_to_summarize = " ".join([txt for txt in cleaned_txt])
else:
text_to_summarize = cleaned_txt
summarized_text=submit_text_to_summarize(text_to_summarize)
# summarized_text=submit_text_to_summarize2(text_to_summarize, reference_summary)
# "reference_summary":'',
# st.session_state.generated_summary=summarized_text
# evaluate_block(summarized_text)
# st.subheader('Evaluate summary against a reference')
# with st.form('Evaluate summary against a reference'):
# reference_summary = st.text_area(
# "Enter reference summary here"
# )
# Evaluate = st.button(
# "Evaluate",
# # on_click=update_evaluate_button_change,
# on_click=testtttt,
# # args=[summarized_text, reference_summary]
# )
# if Evaluate :
if reference_summary.strip() != '':
summarized_text=st.session_state.generated_summary
rouge_result = get_rouge_scores(summarized_text, clean_text(reference_summary))
# st.text(f"evaluate scores-----: {scores}")
# st.info("evaluate scores-----:")
# st.info("evaluated scores-----:")
# with st.expander(f"evaluated scores: rouge1: {scores['rouge1']:.2f}%"):
# st.table(scores)
st.text("Evaluated scores:")
col1, col2, col3 = st.columns(3)
col1.metric('Rogue1', "{:.2f}".format(rouge_result['rouge1']))
col2.metric('rouge2', "{:.2f}".format(rouge_result['rouge2']))
col3.metric('rougeL', "{:.2f}".format(rouge_result['rougeL']))
# col4.metric('rougeLsum', "{:.2f}".format(rouge_result['rougeLsum']))
def submit_text_to_summarize(text_to_summarize):
summarized_text, time = get_summary(text_to_summarize)
st.session_state.generated_summary=summarized_text
display_output(summarized_text,time)
# evaluate_block(summarized_text)
# return summarized_text
def submit_text_to_summarize2(text_to_summarize, reference_summary):
summarized_text, time = get_summary(text_to_summarize)
# display_output(summarized_text,time)
logger.info(f"SUMMARY: {summarized_text}")
logger.info(f"Summary took {time}s")
st.subheader("Summarized text")
st.info(f"{summarized_text}")
# st.info(f"Time: {time}s")
st.text(f"Time taken: {time}s")
# scores = get_rouge_scores(summarized_text, reference_summary)
# st.markdown(f"evaluate scores: {scores}")
def get_summary(text_to_summarize):
model_name = st.session_state.model_name
summarizer_type = st.session_state.summarizer_type
model_type, tokenizer, base_summarizer = init_summarizer(model_name,api_key=None)
logger.info(f"Model Name: {model_name}")
logger.info(f"Summarization Type for Long Text: {summarizer_type}")
with st.spinner(
text="Creating summary. This might take a few seconds ..."
):
if summarizer_type == "Refine":
# summarized_text, time = summarizer.summarize(text_to_summarize,"refine")
summarized_text, time = summarizer_summarize(model_type,tokenizer, base_summarizer, text_to_summarize ,summarizer_type = "refine")
return summarized_text, time
else :
# summarized_text, time = summarizer.summarize(text_to_summarize,"map_reduce")
summarized_text, time = summarizer_summarize(model_type,tokenizer, base_summarizer, text_to_summarize ,summarizer_type = "map_reduce")
return summarized_text, time
def evaluate_block(summarized_text):
st.subheader('Evaluate summary against a reference')
# with st.form('Evaluate summary against a reference'):
reference_summary = st.text_area(
"Enter reference summary here"
)
Evaluate = st.button(
"Evaluate",
on_click=testtttt,
args=[summarized_text,reference_summary]
)
# Evaluate = st.form_submit_button(
# "Evaluate key",
# # on_click=update_parameters_change
# on_click=testtttt,
# args=[summarized_text,reference_summary]
# )
# if Evaluate or st.session_state.is_evaluate_pressed:
# if reference_summary:
# # if
# scores = get_rouge_scores(summarized_text, reference_summary)
# st.text(f"evaluate scores: {scores}")
# st.session_state.is_evaluate_pressed = False
def testtttt(summarized_text, reference_summary):
print(summarized_text, reference_summary)
scores = get_rouge_scores(summarized_text, reference_summary)
st.text(f"evaluate scores-----: {scores}")
def update_evaluate_button_change():
st.session_state.is_evaluate_pressed = True
# def evaluate(summarized_text, reference_summary):
# return get_rouge_scores(summarized_text, reference_summary)
def display_output(summarized_text,time):
logger.info(f"SUMMARY: {summarized_text}")
logger.info(f"Summary took {time}s")
st.subheader("Summarized text")
st.info(f"{summarized_text}")
# st.info(f"Time: {time}s")
st.text(f"Time taken: {time}s")
def main():
initialize_app()
side_bar()
load_app()
if __name__ == "__main__":
main()
# text_to_summarize, model_name, summarizer_type, summarize = load_app()
# summarized_text,time = get_summary(text_to_summarize, model_name, summarizer_type, summarize)
# display_output(summarized_text,time)