Spaces:
Runtime error
Runtime error
File size: 2,504 Bytes
4df3ec6 fe021fb cf53b75 fe021fb 4df3ec6 4b21134 fe021fb 4b21134 cf53b75 4b21134 4065f3f 4b21134 4065f3f cf53b75 e36f01a f39343a 4b21134 f39343a fe021fb f39343a fe021fb 4b21134 4df3ec6 f39343a 4df3ec6 4b21134 6f0c363 fe021fb 4df3ec6 4b21134 fe021fb 4b21134 fe021fb 4b21134 4df3ec6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 |
import torch
import validators
import streamlit as st
from transformers import pipeline, T5Tokenizer, T5ForConditionalGeneration
# local modules
from extractive_summarizer.model_processors import Summarizer
from src.utils import clean_text, fetch_article_text
from src.abstractive_summarizer import abstractive_summarizer
# abstractive summarizer model
@st.cache()
def load_abs_model():
tokenizer = T5Tokenizer.from_pretrained("t5-large")
model = T5ForConditionalGeneration.from_pretrained("t5-base")
return tokenizer, model
if __name__ == "__main__":
# ---------------------------------
# Main Application
# ---------------------------------
st.title("Text Summarizer 📝")
summarize_type = st.sidebar.selectbox(
"Summarization type", options=["Extractive", "Abstractive"]
)
inp_text = st.text_input("Enter text or a url here")
is_url = validators.url(inp_text)
if is_url:
# complete text, chunks to summarize (list of sentences for long docs)
text, text_to_summarize = fetch_article_text(url=inp_text)
else:
text_to_summarize = clean_text(inp_text)
# view summarized text (expander)
with st.expander("View input text"):
st.write(inp_text)
summarize = st.button("Summarize")
# called on toggle button [summarize]
if summarize:
if summarize_type == "Extractive":
# extractive summarizer
with st.spinner(
text="Creating extractive summary. This might take a few seconds ..."
):
ext_model = Summarizer()
summarized_text = ext_model(text_to_summarize, num_sentences=6)
elif summarize_type == "Abstractive":
with st.spinner(
text="Creating abstractive summary. This might take a few seconds ..."
):
abs_tokenizer, abs_model = load_abs_model()
summarized_text = abstractive_summarizer(
abs_tokenizer, abs_model, text_to_summarize
)
elif summarize_type == "Abstractive" and is_url:
abs_url_summarizer = pipeline("summarization")
tmp_sum = abs_url_summarizer(
text_to_summarize, max_length=120, min_length=30, do_sample=False
)
summarized_text = " ".join([summ["summary_text"] for summ in tmp_sum])
# final summarized output
st.subheader("Summarized text")
st.info(summarized_text)
|