Spaces:
Sleeping
Sleeping
updates
Browse files
agent.py
ADDED
@@ -0,0 +1,175 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
import os
|
3 |
+
|
4 |
+
from omegaconf import OmegaConf
|
5 |
+
import requests
|
6 |
+
from typing import Tuple
|
7 |
+
from bs4 import BeautifulSoup
|
8 |
+
|
9 |
+
from dotenv import load_dotenv
|
10 |
+
load_dotenv(override=True)
|
11 |
+
|
12 |
+
from pydantic import Field, BaseModel
|
13 |
+
from vectara_agent.agent import Agent
|
14 |
+
from vectara_agent.tools import ToolsFactory, VectaraToolFactory
|
15 |
+
from vectara_agent.tools_catalog import summarize_text
|
16 |
+
|
17 |
+
initial_prompt = "How can I help you today?"
|
18 |
+
|
19 |
+
get_headers = {
|
20 |
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:98.0) Gecko/20100101 Firefox/98.0",
|
21 |
+
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
|
22 |
+
"Accept-Language": "en-US,en;q=0.5",
|
23 |
+
"Accept-Encoding": "gzip, deflate",
|
24 |
+
"Connection": "keep-alive",
|
25 |
+
}
|
26 |
+
|
27 |
+
|
28 |
+
def create_assistant_tools(cfg):
|
29 |
+
|
30 |
+
class QueryHackerNews(BaseModel):
|
31 |
+
query: str = Field(..., description="The user query.")
|
32 |
+
|
33 |
+
vec_factory = VectaraToolFactory(vectara_api_key=cfg.api_key,
|
34 |
+
vectara_customer_id=cfg.customer_id,
|
35 |
+
vectara_corpus_id=cfg.corpus_id)
|
36 |
+
tools_factory = ToolsFactory()
|
37 |
+
|
38 |
+
ask_hackernews = vec_factory.create_rag_tool(
|
39 |
+
tool_name = "ask_hackernews",
|
40 |
+
tool_description = """
|
41 |
+
Responds to query based on information and stories in hacker news from the last 6-9 months.
|
42 |
+
""",
|
43 |
+
tool_args_schema = QueryHackerNews,
|
44 |
+
reranker = "multilingual_reranker_v1", rerank_k = 100,
|
45 |
+
n_sentences_before = 2, n_sentences_after = 2, lambda_val = 0.005,
|
46 |
+
summary_num_results = 10,
|
47 |
+
vectara_summarizer = 'vectara-summary-ext-24-05-med-omni',
|
48 |
+
include_citations = True,
|
49 |
+
)
|
50 |
+
|
51 |
+
def get_top_stories(
|
52 |
+
n_stories: int = Field(default=10, description="The number of top stories to return.")
|
53 |
+
) -> list[str]:
|
54 |
+
"""
|
55 |
+
Get the top stories from hacker news.
|
56 |
+
Returns a list of story IDS for the top stories right now. These are the top stories on hacker news.
|
57 |
+
"""
|
58 |
+
db_url = 'https://hacker-news.firebaseio.com/v0/'
|
59 |
+
top_stories = requests.get(f"{db_url}topstories.json").json()
|
60 |
+
return top_stories[:n_stories]
|
61 |
+
|
62 |
+
def get_show_stories(
|
63 |
+
n_stories: int = Field(default=10, description="The number of top SHOW HN stories to return.")
|
64 |
+
) -> list[str]:
|
65 |
+
"""
|
66 |
+
Get the top SHOW HN stories from hacker news.
|
67 |
+
Returns a list of story IDS for the top SHOW HN stories right now. These are stories where users show their projects.
|
68 |
+
"""
|
69 |
+
db_url = 'https://hacker-news.firebaseio.com/v0/'
|
70 |
+
top_stories = requests.get(f"{db_url}showstories.json").json()
|
71 |
+
return top_stories[:n_stories]
|
72 |
+
|
73 |
+
def get_ask_stories(
|
74 |
+
n_stories: int = Field(default=10, description="The number of top ASK HN stories to return.")
|
75 |
+
) -> list[str]:
|
76 |
+
"""
|
77 |
+
Get the top ASK HN stories from hacker news.
|
78 |
+
Returns a list of story IDS for the top ASK HN stories right now. These are stories where users ask questions to the community.
|
79 |
+
"""
|
80 |
+
db_url = 'https://hacker-news.firebaseio.com/v0/'
|
81 |
+
top_stories = requests.get(f"{db_url}askstories.json").json()
|
82 |
+
return top_stories[:n_stories]
|
83 |
+
|
84 |
+
def get_story_details(
|
85 |
+
story_id: str = Field(..., description="The story ID.")
|
86 |
+
) -> Tuple[str, str]:
|
87 |
+
"""
|
88 |
+
Get the title of a story from hacker news.
|
89 |
+
Returns:
|
90 |
+
- The title of the story (str)
|
91 |
+
- The main URL of the story (str)
|
92 |
+
- The external link pointed to in the story (str)
|
93 |
+
"""
|
94 |
+
db_url = 'https://hacker-news.firebaseio.com/v0/'
|
95 |
+
story = requests.get(f"{db_url}item/{story_id}.json").json()
|
96 |
+
story_url = f'https://news.ycombinator.com/item?id={story_id}'
|
97 |
+
return story['title'], story_url, story['url'],
|
98 |
+
|
99 |
+
def get_story_text(
|
100 |
+
story_id: str = Field(..., description="The story ID.")
|
101 |
+
) -> str:
|
102 |
+
"""
|
103 |
+
Get the text of the story from hacker news (original text + all comments)
|
104 |
+
Returns the extracted text of the story as a string.
|
105 |
+
"""
|
106 |
+
url = f'https://news.ycombinator.com/item?id={story_id}'
|
107 |
+
html = requests.get(url, headers=get_headers).text
|
108 |
+
soup = BeautifulSoup(html, 'html5lib')
|
109 |
+
for element in soup.find_all(['script', 'style']):
|
110 |
+
element.decompose()
|
111 |
+
text = soup.get_text(" ", strip=True).replace('\n', ' ')
|
112 |
+
return text
|
113 |
+
|
114 |
+
def whats_new(
|
115 |
+
n_stories: int = Field(default=10, description="The number of new stories to return.")
|
116 |
+
) -> list[str]:
|
117 |
+
"""
|
118 |
+
Provides a succint summary of what is new in the hackernews community
|
119 |
+
by summarizing the content and comments of top stories.
|
120 |
+
Returns a string with the summary.
|
121 |
+
"""
|
122 |
+
stories = get_top_stories(n_stories)
|
123 |
+
texts = [get_story_text(story_id) for story_id in stories[:n_stories]]
|
124 |
+
all_stories = '---------\n\n'.join(texts)
|
125 |
+
return summarize_text(all_stories)
|
126 |
+
|
127 |
+
return (
|
128 |
+
[tools_factory.create_tool(tool) for tool in
|
129 |
+
[
|
130 |
+
get_top_stories,
|
131 |
+
get_show_stories,
|
132 |
+
get_ask_stories,
|
133 |
+
get_story_details,
|
134 |
+
get_story_text,
|
135 |
+
whats_new,
|
136 |
+
]
|
137 |
+
] +
|
138 |
+
tools_factory.get_llama_index_tools("tavily_research", "TavilyToolSpec", api_key=cfg.tavily_api_key) +
|
139 |
+
tools_factory.standard_tools() +
|
140 |
+
tools_factory.guardrail_tools() +
|
141 |
+
[ask_hackernews]
|
142 |
+
)
|
143 |
+
|
144 |
+
def initialize_agent(_cfg, update_func):
|
145 |
+
bot_instructions = """
|
146 |
+
- You are a helpful assistant, with expertise in answering user questions based on Hacker News stories and comments.
|
147 |
+
- Give slight preference to newer stories when answering questions.
|
148 |
+
- Use the ask_hackernews tool to find relevant Hacker News stories and respond to user queries based on that information.
|
149 |
+
- when you include links to Hacker News stories, use the actual title of the story as the link's displayed text.
|
150 |
+
Don't use text like "Source" which doesn't tell the user what the link is about.
|
151 |
+
- Don't include external links in your responses unless the user asks for them.
|
152 |
+
- The Tavily tools are available to help you find information on the web, but only use them with user request - don't lose your focus on HackerNews as a source.
|
153 |
+
"""
|
154 |
+
|
155 |
+
agent = Agent(
|
156 |
+
tools=create_assistant_tools(_cfg),
|
157 |
+
topic="hacker news",
|
158 |
+
custom_instructions=bot_instructions,
|
159 |
+
update_func=update_func
|
160 |
+
)
|
161 |
+
agent.report()
|
162 |
+
return agent
|
163 |
+
|
164 |
+
def get_agent_config() -> OmegaConf:
|
165 |
+
cfg = OmegaConf.create({
|
166 |
+
'customer_id': str(os.environ['VECTARA_CUSTOMER_ID']),
|
167 |
+
'corpus_id': str(os.environ['VECTARA_CORPUS_ID']),
|
168 |
+
'api_key': str(os.environ['VECTARA_API_KEY']),
|
169 |
+
'examples': os.environ.get('QUERY_EXAMPLES', None),
|
170 |
+
'title': "Hacker News Assistant",
|
171 |
+
'demo_welcome': "Welcome to the Hacker News Assistant demo.",
|
172 |
+
'demo_description': "This demo can be used to ask about Hacker News.",
|
173 |
+
'tavily_api_key': str(os.environ['TAVILY_API_KEY']),
|
174 |
+
})
|
175 |
+
return cfg
|