|
import datetime |
|
import os |
|
import time |
|
import logging |
|
import nltk |
|
import validators |
|
import streamlit as st |
|
from summarizer import summarizer_init, summarizer_summarize |
|
from rouge_evaluate import get_rouge_scores |
|
from config import MODELS |
|
from warnings import filterwarnings |
|
|
|
filterwarnings("ignore") |
|
from utils import ( |
|
clean_text, |
|
fetch_article_text, |
|
preprocess_text_for_abstractive_summarization, |
|
read_text_from_file, |
|
) |
|
|
|
|
|
|
|
logger = logging.getLogger(__name__) |
|
|
|
def initialize_app(): |
|
nltk.download("punkt") |
|
SESSION_DEFAULTS = { |
|
"model_type": "local", |
|
"model_name": "Boardpac summarizer v1", |
|
"summarizer_type": "Map Reduce", |
|
"is_parameters_changed":False, |
|
"is_evaluate_pressed":False, |
|
"is_evaluate_pressed":False, |
|
"reference_summary":'', |
|
"generated_summary":'', |
|
"summary_time":'', |
|
|
|
'openai_api_key':'', |
|
} |
|
|
|
for k, v in SESSION_DEFAULTS.items(): |
|
if k not in st.session_state: |
|
st.session_state[k] = v |
|
|
|
|
|
|
|
@st.cache_resource |
|
def init_summarizer(model_name,api_key=None): |
|
with st.spinner( |
|
text="initialising the summarizer. This might take a few seconds ..." |
|
): |
|
model_type = "local" |
|
if model_name == "OpenAI": |
|
model_type = "openai" |
|
|
|
model_path = MODELS[model_name] |
|
if model_type == "openai": |
|
|
|
api_key = st.session_state.openai_api_key |
|
tokenizer,base_summarizer = summarizer_init(model_path,model_type,api_key) |
|
else: |
|
logger.info(f"Model for summarization : {model_path}") |
|
tokenizer,base_summarizer = summarizer_init(model_path, model_type) |
|
|
|
alert = st.success("summarizer initialised") |
|
time.sleep(1) |
|
alert.empty() |
|
return model_type, tokenizer, base_summarizer |
|
|
|
def update_parameters_change(): |
|
st.session_state.is_parameters_changed = True |
|
|
|
|
|
def parameters_change_button(model_name, summarizer_type): |
|
st.session_state.model_name = model_name |
|
st.session_state.summarizer_type = summarizer_type |
|
st.session_state.is_parameters_changed = False |
|
|
|
alert = st.success("chat parameters updated") |
|
time.sleep(2) |
|
alert.empty() |
|
|
|
import re |
|
def is_valid_open_ai_api_key(secretKey): |
|
if re.search("^sk-[a-zA-Z0-9]{32,}$", secretKey ): |
|
return True |
|
else: return False |
|
|
|
def side_bar(): |
|
with st.sidebar: |
|
st.subheader("Model parameters") |
|
|
|
with st.form('param_form'): |
|
|
|
model_name = st.selectbox( |
|
"Summary model", |
|
MODELS, |
|
|
|
key="Model Name", |
|
help="Select the LLM model for summarization", |
|
|
|
) |
|
|
|
summarizer_type = st.selectbox( |
|
"Summarizer Type for Long Text", |
|
|
|
options=["Map Reduce"] |
|
) |
|
|
|
submitted = st.form_submit_button( |
|
"Save Parameters", |
|
|
|
disabled = True |
|
) |
|
|
|
|
|
|
|
|
|
|
|
st.markdown("\n") |
|
if st.session_state.model_name == 'openai': |
|
with st.form('openai api key'): |
|
api_key = st.text_input( |
|
"Enter openai api key", |
|
type="password", |
|
value=st.session_state.openai_api_key, |
|
help="enter an openai api key created from 'https://platform.openai.com/account/api-keys'", |
|
) |
|
|
|
submit_key = st.form_submit_button( |
|
"Save key", |
|
|
|
) |
|
|
|
if submit_key: |
|
st.session_state.openai_api_key = api_key |
|
|
|
alert = st.success("openai api key updated") |
|
time.sleep(1) |
|
alert.empty() |
|
st.markdown( |
|
"### How to use\n" |
|
"1. Select the Summarization model\n" |
|
|
|
"1. Enter the text to get the summary." |
|
) |
|
st.markdown("---") |
|
st.markdown(""" |
|
This app supports text in the following formats: |
|
- Raw text in text box |
|
- .txt, .pdf, .docx file formats |
|
""" |
|
|
|
) |
|
|
|
|
|
def load_app(): |
|
st.title("Text Summarizer 📝") |
|
|
|
|
|
|
|
|
|
|
|
|
|
inp_text = st.text_area( |
|
"Enter text here" |
|
) |
|
st.markdown( |
|
"<h4 style='text-align: center; color: green;'>OR</h4>", |
|
unsafe_allow_html=True, |
|
) |
|
uploaded_file = st.file_uploader( |
|
"Upload a .txt, .pdf, .docx file for summarization" |
|
) |
|
|
|
is_url = validators.url(inp_text) |
|
if is_url: |
|
|
|
logger.info("Text Input Type: URL") |
|
text, cleaned_txt = fetch_article_text(url=inp_text) |
|
elif uploaded_file: |
|
logger.info("Text Input Type: FILE") |
|
cleaned_txt = read_text_from_file(uploaded_file) |
|
cleaned_txt = clean_text(cleaned_txt) |
|
else: |
|
logger.info("Text Input Type: INPUT TEXT") |
|
cleaned_txt = inp_text |
|
|
|
|
|
with st.expander("View input text"): |
|
if is_url: |
|
st.write(cleaned_txt[0]) |
|
else: |
|
st.write(cleaned_txt) |
|
|
|
st.subheader('Optional - Evaluate summary against a reference') |
|
|
|
reference_summary = st.text_area( |
|
"Enter reference summary here" |
|
) |
|
|
|
submitted = st.button("Summarize") |
|
|
|
|
|
if submitted: |
|
if is_url: |
|
text_to_summarize = " ".join([txt for txt in cleaned_txt]) |
|
else: |
|
text_to_summarize = cleaned_txt |
|
|
|
summarized_text=submit_text_to_summarize(text_to_summarize) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if reference_summary.strip() != '': |
|
summarized_text=st.session_state.generated_summary |
|
rouge_result = get_rouge_scores(summarized_text, clean_text(reference_summary)) |
|
|
|
|
|
|
|
|
|
|
|
st.text("Evaluated scores:") |
|
col1, col2, col3 = st.columns(3) |
|
col1.metric('Rogue1', "{:.2f}".format(rouge_result['rouge1'])) |
|
col2.metric('rouge2', "{:.2f}".format(rouge_result['rouge2'])) |
|
col3.metric('rougeL', "{:.2f}".format(rouge_result['rougeL'])) |
|
|
|
|
|
|
|
|
|
def submit_text_to_summarize(text_to_summarize): |
|
summarized_text, time = get_summary(text_to_summarize) |
|
st.session_state.generated_summary=summarized_text |
|
display_output(summarized_text,time) |
|
|
|
|
|
|
|
def submit_text_to_summarize2(text_to_summarize, reference_summary): |
|
summarized_text, time = get_summary(text_to_summarize) |
|
|
|
logger.info(f"SUMMARY: {summarized_text}") |
|
logger.info(f"Summary took {time}s") |
|
st.subheader("Summarized text") |
|
st.info(f"{summarized_text}") |
|
|
|
st.text(f"Time taken: {time}s") |
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_summary(text_to_summarize): |
|
model_name = st.session_state.model_name |
|
summarizer_type = st.session_state.summarizer_type |
|
model_type, tokenizer, base_summarizer = init_summarizer(model_name,api_key=None) |
|
|
|
logger.info(f"Model Name: {model_name}") |
|
logger.info(f"Summarization Type for Long Text: {summarizer_type}") |
|
|
|
with st.spinner( |
|
text="Creating summary. This might take a few seconds ..." |
|
): |
|
if summarizer_type == "Refine": |
|
|
|
summarized_text, time = summarizer_summarize(model_type,tokenizer, base_summarizer, text_to_summarize ,summarizer_type = "refine") |
|
return summarized_text, time |
|
else : |
|
|
|
summarized_text, time = summarizer_summarize(model_type,tokenizer, base_summarizer, text_to_summarize ,summarizer_type = "map_reduce") |
|
return summarized_text, time |
|
|
|
def evaluate_block(summarized_text): |
|
st.subheader('Evaluate summary against a reference') |
|
|
|
reference_summary = st.text_area( |
|
"Enter reference summary here" |
|
) |
|
Evaluate = st.button( |
|
"Evaluate", |
|
on_click=testtttt, |
|
args=[summarized_text,reference_summary] |
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def testtttt(summarized_text, reference_summary): |
|
print(summarized_text, reference_summary) |
|
scores = get_rouge_scores(summarized_text, reference_summary) |
|
st.text(f"evaluate scores-----: {scores}") |
|
|
|
|
|
def update_evaluate_button_change(): |
|
st.session_state.is_evaluate_pressed = True |
|
|
|
|
|
|
|
|
|
|
|
def display_output(summarized_text,time): |
|
logger.info(f"SUMMARY: {summarized_text}") |
|
logger.info(f"Summary took {time}s") |
|
st.subheader("Summarized text") |
|
st.info(f"{summarized_text}") |
|
|
|
st.text(f"Time taken: {time}s") |
|
|
|
def main(): |
|
|
|
initialize_app() |
|
side_bar() |
|
load_app() |
|
|
|
|
|
if __name__ == "__main__": |
|
main() |
|
|
|
|
|
|
|
|
|
|
|
|