acecalisto3 commited on
Commit
49199b4
1 Parent(s): 499690c

Create i_search.py

Browse files
Files changed (1) hide show
  1. i_search.py +59 -0
i_search.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ from bs4 import BeautifulSoup
3
+
4
+ headers_Get = {
5
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:49.0) Gecko/20100101 Firefox/49.0',
6
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
7
+ 'Accept-Language': 'en-US,en;q=0.5',
8
+ 'Accept-Encoding': 'gzip, deflate',
9
+ 'DNT': '1',
10
+ 'Connection': 'keep-alive',
11
+ 'Upgrade-Insecure-Requests': '1'
12
+ }
13
+
14
+ def i_search(url):
15
+ response = requests.get(url)
16
+ try:
17
+ response.raise_for_status()
18
+ soup = BeautifulSoup(response.content, 'html.parser')
19
+ out = 'URL Links:\n'.join([p.text for p in soup.find_all('a')])
20
+ out = ' '.join([p.text for p in soup.find_all('p')])
21
+ if out == "" or out == None:
22
+ out = ' '.join([p.text for p in soup.find_all('article')])
23
+
24
+ return out
25
+ except Exception as e:
26
+ print (e)
27
+ return "An Error occured when fetching this website. Please check the URL and try again, or use a different URL"
28
+
29
+
30
+
31
+ def b_search(q):
32
+ #s = requests.Session()
33
+ #url = q
34
+ #r = s.get(url, headers=headers_Get)
35
+ r=requests.get(q)
36
+ soup = BeautifulSoup(r.text, "html.parser")
37
+ output = []
38
+ for searchWrapper in soup.find_all('article'): #this line may change in future based on google's web page structure
39
+ url = searchWrapper.find('a')["href"]
40
+ text = searchWrapper.find('a').text.strip()
41
+ result = {'text': text, 'url': url}
42
+ output.append(result)
43
+
44
+ return output
45
+ def google(q):
46
+ s = requests.Session()
47
+ q = '+'.join(q.split())
48
+ url = 'https://www.google.com/search?q=' + q + '&ie=utf-8&oe=utf-8'
49
+ r = s.get(url, headers=headers_Get)
50
+
51
+ soup = BeautifulSoup(r.text, "html.parser")
52
+ output = []
53
+ for searchWrapper in soup.find_all('h3', {'class':'r'}): #this line may change in future based on google's web page structure
54
+ url = searchWrapper.find('a')["href"]
55
+ text = searchWrapper.find('a').text.strip()
56
+ result = {'text': text, 'url': url}
57
+ output.append(result)
58
+
59
+ return output