|
import feedparser |
|
from pathlib import Path |
|
import json |
|
from db import Database |
|
from apscheduler.schedulers.background import BackgroundScheduler |
|
from datetime import datetime |
|
|
|
scheduler = BackgroundScheduler() |
|
|
|
database = Database(Path("./")) |
|
|
|
|
|
def get_feed(feed_url): |
|
feed = feedparser.parse(feed_url) |
|
return {"entries": feed["entries"]} |
|
|
|
|
|
def cache_news(): |
|
print("Caching news") |
|
data = [] |
|
for feed in TOP_NEWS_FEEDS: |
|
url = feed["url"] |
|
label = feed["label"] |
|
print(f"Fetching {label} from {url}") |
|
try: |
|
feed = get_feed(url) |
|
data.append((url, label, json.dumps(feed))) |
|
except Exception as e: |
|
print(f"Failed to fetch {label} from {url}: {e}") |
|
|
|
database.insert(data) |
|
|
|
|
|
scheduler.add_job( |
|
cache_news, "interval", hours=1, id="cache_news", next_run_time=datetime.now() |
|
) |
|
|
|
|
|
TOP_NEWS_FEEDS = [ |
|
{"label": "BBC World News", "url": "http://feeds.bbci.co.uk/news/world/rss.xml"}, |
|
{ |
|
"label": "Reddit World News", |
|
"url": "https://www.reddit.com/r/worldnews/top/.rss", |
|
}, |
|
{"label": "Vox", "url": "http://www.vox.com/rss/index.xml"}, |
|
{"label": "CBS News", "url": "https://www.cbsnews.com/latest/rss/main"}, |
|
{"label": "ABC News", "url": "http://abcnews.go.com/abcnews/topstories"}, |
|
{"label": "CNN Top Stories", "url": "http://rss.cnn.com/rss/cnn_topstories.rss"}, |
|
{"label": "CNN World News", "url": "http://rss.cnn.com/rss/cnn_world.rss"}, |
|
{ |
|
"label": "The New York Times", |
|
"url": "http://www.nytimes.com/services/xml/rss/nyt/HomePage.xml", |
|
}, |
|
{ |
|
"label": "The Economist", |
|
"url": "http://www.economist.com/sections/business-finance/rss.xml", |
|
}, |
|
{"label": "The Guardian", "url": "https://www.theguardian.com/international/rss"}, |
|
{"label": "NPR", "url": "http://www.npr.org/rss/rss.php?id=1001"}, |
|
{"label": "Al Jazeera", "url": "https://www.aljazeera.com/xml/rss/all.xml"}, |
|
{ |
|
"label": "The Guardian World News", |
|
"url": "https://www.theguardian.com/world/rss", |
|
}, |
|
{"label": "The Atlantic", "url": "https://www.theatlantic.com/feed/all/"}, |
|
{"label": "Vice", "url": "http://www.vice.com/rss"}, |
|
{ |
|
"label": "The New York Times", |
|
"url": "https://rss.nytimes.com/services/xml/rss/nyt/HomePage.xml", |
|
}, |
|
{ |
|
"label": "The New Yorker", |
|
"url": "http://www.newyorker.com/services/rss/feeds/everything.xml", |
|
}, |
|
{"label": "Pew Research Center", "url": "http://www.pewresearch.org/feed/"}, |
|
{"label": "Fox News", "url": "http://feeds.feedburner.com/foxnews/latest"}, |
|
{ |
|
"label": "The Washington Post", |
|
"url": "http://feeds.washingtonpost.com/rss/world", |
|
}, |
|
{"label": "The Guardian UK", "url": "https://www.theguardian.com/uk/rss"}, |
|
{"label": "TIME", "url": "http://rss.time.com/web/time/rss/top/index.xml"}, |
|
{ |
|
"label": "The New York Times", |
|
"url": "http://rss.nytimes.com/services/xml/rss/nyt/HomePage.xml", |
|
}, |
|
{"label": "NPR", "url": "https://feeds.npr.org/1001/rss.xml"}, |
|
{"label": "Fortune", "url": "http://fortune.com/feed/"}, |
|
{"label": "Fox News", "url": "http://feeds.foxnews.com/foxnews/latest"}, |
|
{ |
|
"label": "BBC World News", |
|
"url": "http://newsrss.bbc.co.uk/rss/newsonline_world_edition/front_page/rss.xml", |
|
}, |
|
{"label": "Al Jazeera", "url": "http://www.aljazeera.com/xml/rss/all.xml"}, |
|
{"label": "Le Monde", "url": "https://www.lemonde.fr/rss/une.xml"}, |
|
{"label": "Vox", "url": "https://www.vox.com/rss/index.xml"}, |
|
{ |
|
"label": "The New York Times", |
|
"url": "http://rss.nytimes.com/services/xml/rss/nyt/World.xml", |
|
}, |
|
{"label": "The Guardian US", "url": "https://www.theguardian.com/us/rss"}, |
|
{"label": "ProPublica", "url": "http://feeds.propublica.org/propublica/main"}, |
|
{"label": "The Washington Post", "url": "https://feedx.net/rss/washingtonpost.xml"}, |
|
{"label": "Axios", "url": "https://api.axios.com/feed/top/"}, |
|
{"label": "RT", "url": "https://www.rt.com/rss/"}, |
|
{"label": "ABC News US", "url": "http://feeds.abcnews.com/abcnews/usheadlines"}, |
|
{"label": "CNN US", "url": "http://rss.cnn.com/rss/cnn_topstories.rss"}, |
|
{"label": "CBS News", "url": "http://www.cbsnews.com/latest/rss/main"}, |
|
{ |
|
"label": "The Wall Street Journal", |
|
"url": "http://online.wsj.com/xml/rss/3_7085.xml", |
|
}, |
|
{ |
|
"label": "USA Today", |
|
"url": "http://content.usatoday.com/marketing/rss/rsstrans.aspx?feedId=news2", |
|
}, |
|
{ |
|
"label": "The Christian Science Monitor", |
|
"url": "http://rss.csmonitor.com/feeds/usa", |
|
}, |
|
{ |
|
"label": "NBC News Top Stories", |
|
"url": "http://feeds.nbcnews.com/feeds/topstories", |
|
}, |
|
{"label": "NBC News World News", "url": "http://feeds.nbcnews.com/feeds/worldnews"}, |
|
{ |
|
"label": "Reuters World News", |
|
"url": "http://feeds.reuters.com/Reuters/worldNews", |
|
}, |
|
{ |
|
"label": "Reuters US News", |
|
"url": "http://feeds.reuters.com/Reuters/domesticNews", |
|
}, |
|
{ |
|
"label": "Associated Press US Headlines", |
|
"url": "http://hosted.ap.org/lineups/USHEADS.rss", |
|
}, |
|
{ |
|
"label": "Associated Press World Headlines", |
|
"url": "http://hosted.ap.org/lineups/WORLDHEADS.rss", |
|
}, |
|
{ |
|
"label": "HuffPost World News", |
|
"url": "http://www.huffingtonpost.com/feeds/verticals/world/index.xml", |
|
}, |
|
{ |
|
"label": "BBC News US and Canada", |
|
"url": "http://feeds.bbci.co.uk/news/world/us_and_canada/rss.xml", |
|
}, |
|
{"label": "Yahoo News US", "url": "http://news.yahoo.com/rss/us"}, |
|
{"label": "Yahoo News World", "url": "http://rss.news.yahoo.com/rss/world"}, |
|
{"label": "Newsweek", "url": "http://www.newsweek.com/rss"}, |
|
{ |
|
"label": "The Daily Beast", |
|
"url": "http://feeds.feedburner.com/thedailybeast/articles", |
|
}, |
|
{"label": "Quartz", "url": "http://qz.com/feed"}, |
|
{"label": "The Guardian USA", "url": "http://www.theguardian.com/world/usa/rss"}, |
|
{"label": "Politico", "url": "http://www.politico.com/rss/politicopicks.xml"}, |
|
{"label": "The New Yorker News", "url": "http://www.newyorker.com/feed/news"}, |
|
{"label": "PBS NewsHour", "url": "http://feeds.feedburner.com/NationPBSNewsHour"}, |
|
{"label": "PBS NewsHour World", "url": "http://feeds.feedburner.com/NewshourWorld"}, |
|
{"label": "NPR Politics", "url": "http://www.npr.org/rss/rss.php?id=1003"}, |
|
{"label": "NPR World", "url": "http://www.npr.org/rss/rss.php?id=1004"}, |
|
{ |
|
"label": "The Atlantic National", |
|
"url": "http://feeds.feedburner.com/AtlanticNational", |
|
}, |
|
{ |
|
"label": "The Atlantic Wire", |
|
"url": "http://feeds.feedburner.com/TheAtlanticWire", |
|
}, |
|
{ |
|
"label": "Los Angeles Times US", |
|
"url": "http://www.latimes.com/nation/rss2.0.xml", |
|
}, |
|
{ |
|
"label": "Los Angeles Times World", |
|
"url": "http://www.latimes.com/world/rss2.0.xml", |
|
}, |
|
{ |
|
"label": "Breaking News", |
|
"url": "http://api.breakingnews.com/api/v1/item/?format=rss", |
|
}, |
|
{"label": "VICE News", "url": "https://news.vice.com/rss"}, |
|
{ |
|
"label": "Talking Points Memo", |
|
"url": "http://talkingpointsmemo.com/feed/livewire", |
|
}, |
|
{"label": "TIME Newsfeed", "url": "http://time.com/newsfeed/feed/"}, |
|
{"label": "Fox News", "url": "http://feeds.foxnews.com/foxnews/latest?format=xml"}, |
|
{"label": "Mashable US & World", "url": "http://mashable.com/us-world/rss/"}, |
|
] |
|
|