import datetime import os import time import logging import nltk import validators import streamlit as st from summarizer import summarizer_init, summarizer_summarize from config import MODELS from warnings import filterwarnings filterwarnings("ignore") from utils import ( clean_text, fetch_article_text, preprocess_text_for_abstractive_summarization, read_text_from_file, ) # summarizer = None # from rouge import Rouge logger = logging.getLogger(__name__) def initialize_app(): nltk.download("punkt") SESSION_DEFAULTS = { "model_type": "local", "model_name": "long-t5 v1", "summarizer_type": "Map Reduce", "is_parameters_changed":False, # "user_question":'', 'openai_api_key':'', } for k, v in SESSION_DEFAULTS.items(): if k not in st.session_state: st.session_state[k] = v # init_summarizer(st.session_state.model_name,api_key=None) @st.cache_resource def init_summarizer(model_name,api_key=None): with st.spinner( text="initialising the summarizer. This might take a few seconds ..." ): model_type = "local" if model_name == "OpenAI": model_type = "openai" model_path = MODELS[model_name] if model_type == "openai": #validation logic api_key = st.session_state.openai_api_key tokenizer,base_summarizer = summarizer_init(model_path,model_type,api_key) else: logger.info(f"Model for summarization : {model_path}") tokenizer,base_summarizer = summarizer_init(model_path, model_type) alert = st.success("summarizer initialised") time.sleep(1) # Wait for 1 seconds alert.empty() # Clear the alert return model_type, tokenizer, base_summarizer def update_parameters_change(): st.session_state.is_parameters_changed = True def parameters_change_button(model_name, summarizer_type): st.session_state.model_name = model_name st.session_state.summarizer_type = summarizer_type st.session_state.is_parameters_changed = False # init_summarizer(model_name,api_key=None) alert = st.success("chat parameters updated") time.sleep(2) # Wait for 1 seconds alert.empty() # Clear the alert import re def is_valid_open_ai_api_key(secretKey): if re.search("^sk-[a-zA-Z0-9]{32,}$", secretKey ): return True else: return False def side_bar(): with st.sidebar: st.subheader("Model parameters") with st.form('param_form'): # st.info('Info: use openai chat model for best results') model_name = st.selectbox( "Chat model", MODELS, # options=["long-t5 v0", "long-t5 v1", "pegasus-x-large v1", "OpenAI"], key="Model Name", help="Select the LLM model for summarization", # on_change=update_parameters_change, ) summarizer_type = st.selectbox( "Summarizer Type for Long Text", options=["Map Reduce", "Refine"] ) submitted = st.form_submit_button( "Save Parameters", # on_click=update_parameters_change ) if submitted: parameters_change_button(model_name, summarizer_type) st.markdown("\n") if st.session_state.model_name == 'openai': with st.form('openai api key'): api_key = st.text_input( "Enter openai api key", type="password", value=st.session_state.openai_api_key, help="enter an openai api key created from 'https://platform.openai.com/account/api-keys'", ) submit_key = st.form_submit_button( "Save key", # on_click=update_parameters_change ) if submit_key: st.session_state.openai_api_key = api_key # st.text(st.session_state.openai_api_key) alert = st.success("openai api key updated") time.sleep(1) # Wait for 3 seconds alert.empty() # Clear the alert st.markdown( "### How to use\n" "1. Select the LLM model\n" # noqa: E501 "1. If selected model asks for a api key enter a valid api key.\n" # noqa: E501 "1. Enter a text or a url to get a summary." ) st.markdown("---") st.markdown(""" This app supports text in the following formats: - Raw text in text box - URL of article/news to be summarized - .txt, .pdf, .docx file formats """) def load_app(): st.title("Text Summarizer 📝") inp_text = st.text_input("Enter text or a url here") st.markdown( "

OR

", unsafe_allow_html=True, ) uploaded_file = st.file_uploader( "Upload a .txt, .pdf, .docx file for summarization" ) is_url = validators.url(inp_text) if is_url: # complete text, chunks to summarize (list of sentences for long docs) logger.info("Text Input Type: URL") text, cleaned_txt = fetch_article_text(url=inp_text) elif uploaded_file: logger.info("Text Input Type: FILE") cleaned_txt = read_text_from_file(uploaded_file) cleaned_txt = clean_text(cleaned_txt) else: logger.info("Text Input Type: INPUT TEXT") cleaned_txt = clean_text(inp_text) # view summarized text (expander) with st.expander("View input text"): if is_url: st.write(cleaned_txt[0]) else: st.write(cleaned_txt) submitted = st.button("Summarize") if submitted: if is_url: text_to_summarize = " ".join([txt for txt in cleaned_txt]) else: text_to_summarize = cleaned_txt submit_text_to_summarize(text_to_summarize) def submit_text_to_summarize(text_to_summarize): summarized_text, time = get_summary(text_to_summarize) display_output(summarized_text,time) def get_summary(text_to_summarize): model_name = st.session_state.model_name summarizer_type = st.session_state.summarizer_type model_type, tokenizer, base_summarizer = init_summarizer(model_name,api_key=None) logger.info(f"Model Name: {model_name}") logger.info(f"Summarization Type for Long Text: {summarizer_type}") with st.spinner( text="Creating summary. This might take a few seconds ..." ): if summarizer_type == "Refine": # summarized_text, time = summarizer.summarize(text_to_summarize,"refine") summarized_text, time = summarizer_summarize(model_type,tokenizer, base_summarizer, text_to_summarize ,summarizer_type = "refine") return summarized_text, time else : # summarized_text, time = summarizer.summarize(text_to_summarize,"map_reduce") summarized_text, time = summarizer_summarize(model_type,tokenizer, base_summarizer, text_to_summarize ,summarizer_type = "map_reduce") return summarized_text, time def display_output(summarized_text,time): logger.info(f"SUMMARY: {summarized_text}") logger.info(f"Summary took {time}s") st.subheader("Summarized text") st.info(f"{summarized_text}") st.info(f"Time: {time}s") def main(): initialize_app() side_bar() load_app() # chat_body() if __name__ == "__main__": main() # text_to_summarize, model_name, summarizer_type, summarize = load_app() # summarized_text,time = get_summary(text_to_summarize, model_name, summarizer_type, summarize) # display_output(summarized_text,time)