Spaces:
Sleeping
Sleeping
Navanjana
commited on
Commit
•
d0a7f7b
1
Parent(s):
11db7b9
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,134 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
import requests
|
3 |
+
import wikipedia
|
4 |
+
import gradio as gr
|
5 |
+
import transformers
|
6 |
+
import spacy
|
7 |
+
from bs4 import BeautifulSoup
|
8 |
+
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
9 |
+
|
10 |
+
|
11 |
+
# Set up Google SERP API credentials
|
12 |
+
serp_api_key = '5924c6cfe5fec240e39838ff06439c8d36d294a0' # Replace with your actual Google SERP API key
|
13 |
+
|
14 |
+
# Load the Pegasus model
|
15 |
+
tokenizer = AutoTokenizer.from_pretrained("facebook/bart-large-cnn")
|
16 |
+
model = AutoModelForSeq2SeqLM.from_pretrained("facebook/bart-large-cnn")
|
17 |
+
|
18 |
+
# Function to send a message and receive a response from the chatbot
|
19 |
+
def chat(message):
|
20 |
+
try:
|
21 |
+
# You can add your chatbot implementation here
|
22 |
+
return "."
|
23 |
+
except Exception as e:
|
24 |
+
print("An error occurred:", e)
|
25 |
+
return ""
|
26 |
+
|
27 |
+
# Function to get the latest answers from Google SERP API
|
28 |
+
def get_latest_answers(query):
|
29 |
+
url = "https://google.serper.dev/search"
|
30 |
+
|
31 |
+
payload = json.dumps({
|
32 |
+
"q": query
|
33 |
+
})
|
34 |
+
headers = {
|
35 |
+
'X-API-KEY': serp_api_key,
|
36 |
+
'Content-Type': 'application/json'
|
37 |
+
}
|
38 |
+
|
39 |
+
response = requests.request("POST", url, headers=headers, data=payload)
|
40 |
+
|
41 |
+
try:
|
42 |
+
# Parse the response JSON
|
43 |
+
data = json.loads(response.text)
|
44 |
+
|
45 |
+
# Extract details from the response
|
46 |
+
output = ""
|
47 |
+
|
48 |
+
if 'knowledgeGraph' in data:
|
49 |
+
knowledge_graph = data['knowledgeGraph']
|
50 |
+
output += "Website: {}\n".format(knowledge_graph.get('website'))
|
51 |
+
output += "Description: {}\n".format(knowledge_graph.get('description'))
|
52 |
+
|
53 |
+
if 'organic' in data:
|
54 |
+
organic_results = data['organic']
|
55 |
+
for result in organic_results:
|
56 |
+
output += "Snippet: {}\n".format(result.get('snippet'))
|
57 |
+
|
58 |
+
if 'peopleAlsoAsk' in data:
|
59 |
+
people_also_ask = data['peopleAlsoAsk']
|
60 |
+
for question in people_also_ask:
|
61 |
+
output += "Snippet: {}\n".format(question.get('snippet'))
|
62 |
+
|
63 |
+
return output
|
64 |
+
|
65 |
+
except json.JSONDecodeError:
|
66 |
+
print(".")
|
67 |
+
return ""
|
68 |
+
|
69 |
+
except Exception as e:
|
70 |
+
print(".")
|
71 |
+
return ""
|
72 |
+
|
73 |
+
# Function to search Wikipedia for an answer and summarize it
|
74 |
+
def search_wikipedia(query):
|
75 |
+
try:
|
76 |
+
search_results = wikipedia.search(query)
|
77 |
+
|
78 |
+
# Get the page summary of the first search result
|
79 |
+
if search_results:
|
80 |
+
page_title = search_results[0]
|
81 |
+
page_summary = wikipedia.summary(page_title)
|
82 |
+
return page_summary
|
83 |
+
else:
|
84 |
+
print(".")
|
85 |
+
return None
|
86 |
+
except wikipedia.exceptions.DisambiguationError as e:
|
87 |
+
# Handle disambiguation error
|
88 |
+
print(".")
|
89 |
+
return None
|
90 |
+
except wikipedia.exceptions.PageError as e:
|
91 |
+
# Handle page not found error
|
92 |
+
print(".")
|
93 |
+
return None
|
94 |
+
except Exception as e:
|
95 |
+
# Handle other exceptions
|
96 |
+
print(".")
|
97 |
+
return None
|
98 |
+
|
99 |
+
# Function to generate summarized paragraph using Google Pegasus summarization
|
100 |
+
def generate_summary(user_input):
|
101 |
+
output = get_latest_answers(user_input)
|
102 |
+
page_summary = search_wikipedia(user_input)
|
103 |
+
chat_answer = chat(user_input)
|
104 |
+
|
105 |
+
# Combine the input text from various sources
|
106 |
+
input_text = f"Google:\n{output}\nWikipedia :\n{page_summary}\n"
|
107 |
+
|
108 |
+
|
109 |
+
# Tokenize and generate a summary
|
110 |
+
input_ids = tokenizer.encode(input_text, return_tensors="pt", max_length=1024, truncation=True)
|
111 |
+
summary_ids = model.generate(input_ids, max_length=200, min_length=50, length_penalty=2.0, num_beams=4, early_stopping=True)
|
112 |
+
|
113 |
+
# Decode the summary
|
114 |
+
summarized_paragraph = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
|
115 |
+
|
116 |
+
return summarized_paragraph
|
117 |
+
|
118 |
+
# Define the Gradio interface
|
119 |
+
def summarizer_interface(user_input):
|
120 |
+
summarized_text = generate_summary(user_input)
|
121 |
+
return summarized_text
|
122 |
+
|
123 |
+
iface = gr.Interface(
|
124 |
+
fn=summarizer_interface,
|
125 |
+
inputs="text",
|
126 |
+
outputs="text",
|
127 |
+
title="Osana Web-GPT",
|
128 |
+
description="Enter your query and get the latest and better answer.",
|
129 |
+
theme="black",
|
130 |
+
layout="horizontal",
|
131 |
+
)
|
132 |
+
|
133 |
+
# Launch the interface
|
134 |
+
iface.launch()
|