File size: 978 Bytes
2bdbf4a
 
 
5a2da72
2bdbf4a
 
 
 
 
 
 
 
 
 
 
 
 
 
5a2da72
2bdbf4a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
import os

from duckduckgo_search import DDGS
import requests
from bs4 import BeautifulSoup

def run(text):
    results = ddg(text)
    url = results[0]['href']
    text = bs4(url)
    return text, results

def ddg(text, max_results = 5):
    with DDGS() as ddgs:
        results = [r for r in ddgs.text(text, max_results=max_results)]
    return results

def bs4(url):
    html = requests.get(url).text
    soup = BeautifulSoup(html, features="html.parser")

    # kill all script and style elements
    for script in soup(["script", "style"]):
        script.extract()    # rip it out

    # get text
    text = soup.get_text()

    # break into lines and remove leading and trailing space on each
    lines = (line.strip() for line in text.splitlines())
    # break multi-headlines into a line each
    chunks = (phrase.strip() for line in lines for phrase in line.split("  "))
    # drop blank lines
    text = '\n'.join(chunk for chunk in chunks if chunk)

    return text