File size: 5,338 Bytes
4910f63 3319c4e 4910f63 49c5f7e 4910f63 b5fcd23 4910f63 49c5f7e 4910f63 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 |
from newspaper import Article
import streamlit as st
import requests
import json
import time
import os
import nltk
nltk.download('punkt')
## ............................................... ##
# Set page configuration (Call this once and make changes as needed)
st.set_page_config(page_title='Website Article Summarize', layout='wide', page_icon='📃')
## ............................................... ##
with st.container():
# Page title layout
st.title("📃 Website Article Summarize")
st.markdown("**Generate summaries of articles from websites using abstractive summarization with Language Model and Library NewsPaper.**")
st.caption("Created by Bayhaqy.")
## ............................................... ##
with st.container():
# Sidebar content
st.sidebar.subheader("About the app")
st.sidebar.info("This app uses optional 🤗HuggingFace's Model [facebook/bart-large-cnn](https://huggingface.co/facebook/bart-large-cnn) \
or [pegasus_indonesian_base-finetune](https://huggingface.co/pegasus_indonesian_base-finetune) model and Library NewsPaper.")
st.sidebar.write("\n\n")
st.sidebar.markdown("**Get a free API key from HuggingFace:**")
st.sidebar.markdown("* Create a [free account](https://huggingface.co/join) or [login](https://huggingface.co/login)")
st.sidebar.markdown("* Go to **Settings** and then **Access Tokens**")
st.sidebar.markdown("* Create a new Token (select 'read' role)")
st.sidebar.markdown("* Paste your API key in the text box")
st.sidebar.divider()
st.sidebar.write("Please make sure you choose the correct model and is not behind a paywall.")
st.sidebar.write("\n\n")
st.sidebar.divider()
with st.container():
# Inputs
st.subheader("Enter the URL of the website article you want to summarize")
default_url = "https://"
url = st.text_input("URL:", default_url)
headers_ = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.82 Safari/537.36'
}
st.markdown("You need to Fetch Article first, and then Submit to Summarize.")
## ............................................... ##
if st.button("Fetch article"):
article_url = url
session = requests.Session()
try:
response_ = session.get(article_url, headers=headers_, timeout=10)
if response_.status_code == 200:
with st.spinner('Fetching your article...'):
time.sleep(3)
st.success('Your article is ready for summarization!')
article = Article(url)
article.download()
article.parse()
title = article.title
text = article.text
st.divider()
st.subheader("Real Article")
with st.expander("See Details"):
st.markdown(f"Your article: **{title}**")
st.markdown(f"**{text}**")
st.divider()
else:
st.write("Error occurred while fetching article.")
except Exception as e:
st.write(f"Error occurred while fetching article: {e}")
with st.container():
# HuggingFace API KEY input
#API_KEY = st.text_input("Enter your HuggingFace API key", type="password")
HF_TOKEN = os.environ.get("HF_TOKEN", None)
headers = {"Authorization": f"Bearer {HF_TOKEN}"}
# Selectbox to choose between API URLs
selected_api_url = st.selectbox("Select Model", options=["bart-large-cnn", "pegasus_indonesian_base-finetune"])
# Determine the selected Model
if selected_api_url == "bart-large-cnn":
API_URL = "https://api-inference.huggingface.co/models/facebook/bart-large-cnn"
else:
API_URL = "https://api-inference.huggingface.co/models/thonyyy/pegasus_indonesian_base-finetune"
with st.container():
# Download and parse the article
if st.button("Submit to Summarize"):
article = Article(url)
article.download()
article.parse()
article.nlp()
title = article.title
text = article.text
html = article.html
summ = article.summary
# HuggingFace API request function summary
def query_sum(payload):
response = requests.post(API_URL, headers=headers, json=payload)
return response.json()
with st.spinner('Doing some AI magic, please wait...'):
time.sleep(1)
# Query the API Summary
output_sum = query_sum({"inputs": text, })
if output_sum:
# Check if the dictionary is not empty
summary = output_sum[0].get('summary_text', '').replace('<n>', " ")
else:
# Handle the case where the dictionary is empty or doesn't have 'summary_text'
summary = "Summary not available"
st.divider()
st.subheader("Summary AI")
with st.expander("See Details"):
st.markdown(f"Your article: **{title}**")
st.markdown(f"**{summary}**")
st.divider()
st.subheader("Summary Library NewsPaper")
with st.expander("See Details"):
st.markdown(f"Your article: **{title}**")
st.markdown(f"**{summ}**")
st.divider()
st.subheader("Real Article")
with st.expander("See Details"):
st.markdown(f"Your article: **{title}**")
st.markdown(f"**{text}**")
with st.container():
st.markdown("----")
st.markdown("© 2023 Website Article Summarize App") |