import datetime import os import time import logging import nltk import validators import streamlit as st from summarizer import summarizer_init, summarizer_summarize from rouge_evaluate import get_rouge_scores from config import MODELS from warnings import filterwarnings filterwarnings("ignore") from utils import ( clean_text, fetch_article_text, preprocess_text_for_abstractive_summarization, read_text_from_file, ) # from rouge import Rouge logger = logging.getLogger(__name__) def initialize_app(): nltk.download("punkt") SESSION_DEFAULTS = { "model_type": "local", "model_name": "Boardpac summarizer v1", "summarizer_type": "Map Reduce", "is_parameters_changed":False, "is_evaluate_pressed":False, "is_evaluate_pressed":False, "reference_summary":'', "generated_summary":'', "summary_time":'', # "user_question":'', 'openai_api_key':'', } for k, v in SESSION_DEFAULTS.items(): if k not in st.session_state: st.session_state[k] = v # init_summarizer(st.session_state.model_name,api_key=None) @st.cache_resource def init_summarizer(model_name,api_key=None): with st.spinner( text="initialising the summarizer. This might take a few seconds ..." ): model_type = "local" if model_name == "OpenAI": model_type = "openai" model_path = MODELS[model_name] if model_type == "openai": #validation logic api_key = st.session_state.openai_api_key tokenizer,base_summarizer = summarizer_init(model_path,model_type,api_key) else: logger.info(f"Model for summarization : {model_path}") tokenizer,base_summarizer = summarizer_init(model_path, model_type) alert = st.success("summarizer initialised") time.sleep(1) # Wait for 1 seconds alert.empty() # Clear the alert return model_type, tokenizer, base_summarizer def update_parameters_change(): st.session_state.is_parameters_changed = True def parameters_change_button(model_name, summarizer_type): st.session_state.model_name = model_name st.session_state.summarizer_type = summarizer_type st.session_state.is_parameters_changed = False # init_summarizer(model_name,api_key=None) alert = st.success("chat parameters updated") time.sleep(2) # Wait for 1 seconds alert.empty() # Clear the alert import re def is_valid_open_ai_api_key(secretKey): if re.search("^sk-[a-zA-Z0-9]{32,}$", secretKey ): return True else: return False def side_bar(): with st.sidebar: st.subheader("Model parameters") with st.form('param_form'): # st.info('Info: use openai chat model for best results') model_name = st.selectbox( "Summary model", MODELS, # options=["long-t5 v0", "long-t5 v1", "pegasus-x-large v1", "OpenAI"], key="Model Name", help="Select the LLM model for summarization", # on_change=update_parameters_change, ) summarizer_type = st.selectbox( "Summarizer Type for Long Text", # options=["Map Reduce", "Refine"] options=["Map Reduce"] ) submitted = st.form_submit_button( "Save Parameters", # on_click=update_parameters_change disabled = True ) # if submitted: # parameters_change_button(model_name, summarizer_type) st.markdown("\n") if st.session_state.model_name == 'openai': with st.form('openai api key'): api_key = st.text_input( "Enter openai api key", type="password", value=st.session_state.openai_api_key, help="enter an openai api key created from 'https://platform.openai.com/account/api-keys'", ) submit_key = st.form_submit_button( "Save key", # on_click=update_parameters_change ) if submit_key: st.session_state.openai_api_key = api_key # st.text(st.session_state.openai_api_key) alert = st.success("openai api key updated") time.sleep(1) # Wait for 3 seconds alert.empty() # Clear the alert st.markdown( "### How to use\n" "1. Select the Summarization model\n" # noqa: E501 # "1. If selected model asks for a api key enter a valid api key.\n" # noqa: E501 "1. Enter the text to get the summary." ) st.markdown("---") st.markdown(""" This app supports text in the following formats: - Raw text in text box - .txt, .pdf, .docx file formats """ # - URL of article/news to be summarized ) def load_app(): st.title("Text Summarizer 📝") # inp_text = st.text_input("Enter text or a url here") # inp_text = st.text_input( # "Enter text or a url here" # ) # with st.form("Text Summarizer"): inp_text = st.text_area( "Enter text here" ) st.markdown( "

OR

", unsafe_allow_html=True, ) uploaded_file = st.file_uploader( "Upload a .txt, .pdf, .docx file for summarization" ) is_url = validators.url(inp_text) if is_url: # complete text, chunks to summarize (list of sentences for long docs) logger.info("Text Input Type: URL") text, cleaned_txt = fetch_article_text(url=inp_text) elif uploaded_file: logger.info("Text Input Type: FILE") cleaned_txt = read_text_from_file(uploaded_file) cleaned_txt = clean_text(cleaned_txt) else: logger.info("Text Input Type: INPUT TEXT") cleaned_txt = inp_text # clean_text(inp_text) # view summarized text (expander) with st.expander("View input text"): if is_url: st.write(cleaned_txt[0]) else: st.write(cleaned_txt) st.subheader('Optional - Evaluate summary against a reference') # with st.form('Evaluate summary against a reference'): reference_summary = st.text_area( "Enter reference summary here" ) submitted = st.button("Summarize") # submitted = st.form_submit_button("Summarize") if submitted: if is_url: text_to_summarize = " ".join([txt for txt in cleaned_txt]) else: text_to_summarize = cleaned_txt summarized_text=submit_text_to_summarize(text_to_summarize) # summarized_text=submit_text_to_summarize2(text_to_summarize, reference_summary) # "reference_summary":'', # st.session_state.generated_summary=summarized_text # evaluate_block(summarized_text) # st.subheader('Evaluate summary against a reference') # with st.form('Evaluate summary against a reference'): # reference_summary = st.text_area( # "Enter reference summary here" # ) # Evaluate = st.button( # "Evaluate", # # on_click=update_evaluate_button_change, # on_click=testtttt, # # args=[summarized_text, reference_summary] # ) # if Evaluate : if reference_summary.strip() != '': summarized_text=st.session_state.generated_summary rouge_result = get_rouge_scores(summarized_text, clean_text(reference_summary)) # st.text(f"evaluate scores-----: {scores}") # st.info("evaluate scores-----:") # st.info("evaluated scores-----:") # with st.expander(f"evaluated scores: rouge1: {scores['rouge1']:.2f}%"): # st.table(scores) st.text("Evaluated scores:") col1, col2, col3 = st.columns(3) col1.metric('Rogue1', "{:.2f}".format(rouge_result['rouge1'])) col2.metric('rouge2', "{:.2f}".format(rouge_result['rouge2'])) col3.metric('rougeL', "{:.2f}".format(rouge_result['rougeL'])) # col4.metric('rougeLsum', "{:.2f}".format(rouge_result['rougeLsum'])) def submit_text_to_summarize(text_to_summarize): summarized_text, time = get_summary(text_to_summarize) st.session_state.generated_summary=summarized_text display_output(summarized_text,time) # evaluate_block(summarized_text) # return summarized_text def submit_text_to_summarize2(text_to_summarize, reference_summary): summarized_text, time = get_summary(text_to_summarize) # display_output(summarized_text,time) logger.info(f"SUMMARY: {summarized_text}") logger.info(f"Summary took {time}s") st.subheader("Summarized text") st.info(f"{summarized_text}") # st.info(f"Time: {time}s") st.text(f"Time taken: {time}s") # scores = get_rouge_scores(summarized_text, reference_summary) # st.markdown(f"evaluate scores: {scores}") def get_summary(text_to_summarize): model_name = st.session_state.model_name summarizer_type = st.session_state.summarizer_type model_type, tokenizer, base_summarizer = init_summarizer(model_name,api_key=None) logger.info(f"Model Name: {model_name}") logger.info(f"Summarization Type for Long Text: {summarizer_type}") with st.spinner( text="Creating summary. This might take a few seconds ..." ): if summarizer_type == "Refine": # summarized_text, time = summarizer.summarize(text_to_summarize,"refine") summarized_text, time = summarizer_summarize(model_type,tokenizer, base_summarizer, text_to_summarize ,summarizer_type = "refine") return summarized_text, time else : # summarized_text, time = summarizer.summarize(text_to_summarize,"map_reduce") summarized_text, time = summarizer_summarize(model_type,tokenizer, base_summarizer, text_to_summarize ,summarizer_type = "map_reduce") return summarized_text, time def evaluate_block(summarized_text): st.subheader('Evaluate summary against a reference') # with st.form('Evaluate summary against a reference'): reference_summary = st.text_area( "Enter reference summary here" ) Evaluate = st.button( "Evaluate", on_click=testtttt, args=[summarized_text,reference_summary] ) # Evaluate = st.form_submit_button( # "Evaluate key", # # on_click=update_parameters_change # on_click=testtttt, # args=[summarized_text,reference_summary] # ) # if Evaluate or st.session_state.is_evaluate_pressed: # if reference_summary: # # if # scores = get_rouge_scores(summarized_text, reference_summary) # st.text(f"evaluate scores: {scores}") # st.session_state.is_evaluate_pressed = False def testtttt(summarized_text, reference_summary): print(summarized_text, reference_summary) scores = get_rouge_scores(summarized_text, reference_summary) st.text(f"evaluate scores-----: {scores}") def update_evaluate_button_change(): st.session_state.is_evaluate_pressed = True # def evaluate(summarized_text, reference_summary): # return get_rouge_scores(summarized_text, reference_summary) def display_output(summarized_text,time): logger.info(f"SUMMARY: {summarized_text}") logger.info(f"Summary took {time}s") st.subheader("Summarized text") st.info(f"{summarized_text}") # st.info(f"Time: {time}s") st.text(f"Time taken: {time}s") def main(): initialize_app() side_bar() load_app() if __name__ == "__main__": main() # text_to_summarize, model_name, summarizer_type, summarize = load_app() # summarized_text,time = get_summary(text_to_summarize, model_name, summarizer_type, summarize) # display_output(summarized_text,time)