Spaces:

Bayhaqy
/

Classification-News-Analysis-and-Prediction

Running

Classification-News-Analysis-and-Prediction

File size: 5,338 Bytes

from newspaper import Article
import streamlit as st
import requests
import json
import time
import os
import nltk
nltk.download('punkt')

## ............................................... ##
# Set page configuration (Call this once and make changes as needed)
st.set_page_config(page_title='Website Article Summarize',  layout='wide', page_icon='📃')


## ............................................... ##
with st.container():
  # Page title layout
  st.title("📃 Website Article Summarize")
  st.markdown("**Generate summaries of articles from websites using abstractive summarization with Language Model and Library NewsPaper.**")
  st.caption("Created by Bayhaqy.")

## ............................................... ##
with st.container():
  # Sidebar content
  st.sidebar.subheader("About the app")
  st.sidebar.info("This app uses optional 🤗HuggingFace's Model [facebook/bart-large-cnn](https://huggingface.co/facebook/bart-large-cnn) \
  or [pegasus_indonesian_base-finetune](https://huggingface.co/pegasus_indonesian_base-finetune) model and Library NewsPaper.")
  st.sidebar.write("\n\n")
  st.sidebar.markdown("**Get a free API key from HuggingFace:**")
  st.sidebar.markdown("* Create a [free account](https://huggingface.co/join) or [login](https://huggingface.co/login)")
  st.sidebar.markdown("* Go to **Settings** and then **Access Tokens**")
  st.sidebar.markdown("* Create a new Token (select 'read' role)")
  st.sidebar.markdown("* Paste your API key in the text box")
  st.sidebar.divider()
  st.sidebar.write("Please make sure you choose the correct model and is not behind a paywall.")
  st.sidebar.write("\n\n")
  st.sidebar.divider()
    
with st.container():
  # Inputs 
  st.subheader("Enter the URL of the website article you want to summarize")
  default_url = "https://"
  url = st.text_input("URL:", default_url)
  
  headers_ = {
          'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.82 Safari/537.36'
      }

  st.markdown("You need to Fetch Article first, and then Submit to Summarize.")
  ## ............................................... ##
  if st.button("Fetch article"):
    article_url = url
    session = requests.Session()

    try:
      response_ = session.get(article_url, headers=headers_, timeout=10)
      if response_.status_code == 200:
        with st.spinner('Fetching your article...'):
            time.sleep(3)
            st.success('Your article is ready for summarization!')     
            article = Article(url)
            article.download()
            article.parse()
            
            title = article.title
            text = article.text
            
            st.divider()
            st.subheader("Real Article")
            with st.expander("See Details"):
              st.markdown(f"Your article: **{title}**")
              st.markdown(f"**{text}**")
              st.divider()

      else:
        st.write("Error occurred while fetching article.")

    except Exception as e:
      st.write(f"Error occurred while fetching article: {e}")

with st.container():
  # HuggingFace API KEY input
  #API_KEY = st.text_input("Enter your HuggingFace API key", type="password")
  HF_TOKEN = os.environ.get("HF_TOKEN", None)

  headers = {"Authorization": f"Bearer {HF_TOKEN}"}
  
  # Selectbox to choose between API URLs
  selected_api_url = st.selectbox("Select Model", options=["bart-large-cnn", "pegasus_indonesian_base-finetune"])
  
  # Determine the selected Model
  if selected_api_url == "bart-large-cnn":
    API_URL = "https://api-inference.huggingface.co/models/facebook/bart-large-cnn"
  else:
    API_URL = "https://api-inference.huggingface.co/models/thonyyy/pegasus_indonesian_base-finetune"

with st.container():
  # Download and parse the article
  if st.button("Submit to Summarize"):
    article = Article(url)
    article.download()
    article.parse()
    article.nlp()
    
    title = article.title
    text = article.text
    html = article.html
    summ = article.summary

    # HuggingFace API request function summary
    def query_sum(payload):
      response = requests.post(API_URL, headers=headers, json=payload)
      return response.json()

    with st.spinner('Doing some AI magic, please wait...'):
      time.sleep(1)

      # Query the API Summary
      output_sum = query_sum({"inputs": text, })
        
      if output_sum:
          # Check if the dictionary is not empty
          summary = output_sum[0].get('summary_text', '').replace('<n>', " ")
      else:
          # Handle the case where the dictionary is empty or doesn't have 'summary_text'
          summary = "Summary not available"

      st.divider()
      st.subheader("Summary AI")
      with st.expander("See Details"):
        st.markdown(f"Your article: **{title}**")
        st.markdown(f"**{summary}**")

      st.divider()
      st.subheader("Summary Library NewsPaper")
      with st.expander("See Details"):
        st.markdown(f"Your article: **{title}**")
        st.markdown(f"**{summ}**")

      st.divider()
      st.subheader("Real Article")
      with st.expander("See Details"):
        st.markdown(f"Your article: **{title}**")
        st.markdown(f"**{text}**")

with st.container():
  st.markdown("----")
  st.markdown("© 2023 Website Article Summarize App")