ofermend commited on
Commit
ecb6e4b
1 Parent(s): cc95ba5
Files changed (1) hide show
  1. agent.py +175 -0
agent.py ADDED
@@ -0,0 +1,175 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import os
3
+
4
+ from omegaconf import OmegaConf
5
+ import requests
6
+ from typing import Tuple
7
+ from bs4 import BeautifulSoup
8
+
9
+ from dotenv import load_dotenv
10
+ load_dotenv(override=True)
11
+
12
+ from pydantic import Field, BaseModel
13
+ from vectara_agent.agent import Agent
14
+ from vectara_agent.tools import ToolsFactory, VectaraToolFactory
15
+ from vectara_agent.tools_catalog import summarize_text
16
+
17
+ initial_prompt = "How can I help you today?"
18
+
19
+ get_headers = {
20
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:98.0) Gecko/20100101 Firefox/98.0",
21
+ "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
22
+ "Accept-Language": "en-US,en;q=0.5",
23
+ "Accept-Encoding": "gzip, deflate",
24
+ "Connection": "keep-alive",
25
+ }
26
+
27
+
28
+ def create_assistant_tools(cfg):
29
+
30
+ class QueryHackerNews(BaseModel):
31
+ query: str = Field(..., description="The user query.")
32
+
33
+ vec_factory = VectaraToolFactory(vectara_api_key=cfg.api_key,
34
+ vectara_customer_id=cfg.customer_id,
35
+ vectara_corpus_id=cfg.corpus_id)
36
+ tools_factory = ToolsFactory()
37
+
38
+ ask_hackernews = vec_factory.create_rag_tool(
39
+ tool_name = "ask_hackernews",
40
+ tool_description = """
41
+ Responds to query based on information and stories in hacker news from the last 6-9 months.
42
+ """,
43
+ tool_args_schema = QueryHackerNews,
44
+ reranker = "multilingual_reranker_v1", rerank_k = 100,
45
+ n_sentences_before = 2, n_sentences_after = 2, lambda_val = 0.005,
46
+ summary_num_results = 10,
47
+ vectara_summarizer = 'vectara-summary-ext-24-05-med-omni',
48
+ include_citations = True,
49
+ )
50
+
51
+ def get_top_stories(
52
+ n_stories: int = Field(default=10, description="The number of top stories to return.")
53
+ ) -> list[str]:
54
+ """
55
+ Get the top stories from hacker news.
56
+ Returns a list of story IDS for the top stories right now. These are the top stories on hacker news.
57
+ """
58
+ db_url = 'https://hacker-news.firebaseio.com/v0/'
59
+ top_stories = requests.get(f"{db_url}topstories.json").json()
60
+ return top_stories[:n_stories]
61
+
62
+ def get_show_stories(
63
+ n_stories: int = Field(default=10, description="The number of top SHOW HN stories to return.")
64
+ ) -> list[str]:
65
+ """
66
+ Get the top SHOW HN stories from hacker news.
67
+ Returns a list of story IDS for the top SHOW HN stories right now. These are stories where users show their projects.
68
+ """
69
+ db_url = 'https://hacker-news.firebaseio.com/v0/'
70
+ top_stories = requests.get(f"{db_url}showstories.json").json()
71
+ return top_stories[:n_stories]
72
+
73
+ def get_ask_stories(
74
+ n_stories: int = Field(default=10, description="The number of top ASK HN stories to return.")
75
+ ) -> list[str]:
76
+ """
77
+ Get the top ASK HN stories from hacker news.
78
+ Returns a list of story IDS for the top ASK HN stories right now. These are stories where users ask questions to the community.
79
+ """
80
+ db_url = 'https://hacker-news.firebaseio.com/v0/'
81
+ top_stories = requests.get(f"{db_url}askstories.json").json()
82
+ return top_stories[:n_stories]
83
+
84
+ def get_story_details(
85
+ story_id: str = Field(..., description="The story ID.")
86
+ ) -> Tuple[str, str]:
87
+ """
88
+ Get the title of a story from hacker news.
89
+ Returns:
90
+ - The title of the story (str)
91
+ - The main URL of the story (str)
92
+ - The external link pointed to in the story (str)
93
+ """
94
+ db_url = 'https://hacker-news.firebaseio.com/v0/'
95
+ story = requests.get(f"{db_url}item/{story_id}.json").json()
96
+ story_url = f'https://news.ycombinator.com/item?id={story_id}'
97
+ return story['title'], story_url, story['url'],
98
+
99
+ def get_story_text(
100
+ story_id: str = Field(..., description="The story ID.")
101
+ ) -> str:
102
+ """
103
+ Get the text of the story from hacker news (original text + all comments)
104
+ Returns the extracted text of the story as a string.
105
+ """
106
+ url = f'https://news.ycombinator.com/item?id={story_id}'
107
+ html = requests.get(url, headers=get_headers).text
108
+ soup = BeautifulSoup(html, 'html5lib')
109
+ for element in soup.find_all(['script', 'style']):
110
+ element.decompose()
111
+ text = soup.get_text(" ", strip=True).replace('\n', ' ')
112
+ return text
113
+
114
+ def whats_new(
115
+ n_stories: int = Field(default=10, description="The number of new stories to return.")
116
+ ) -> list[str]:
117
+ """
118
+ Provides a succint summary of what is new in the hackernews community
119
+ by summarizing the content and comments of top stories.
120
+ Returns a string with the summary.
121
+ """
122
+ stories = get_top_stories(n_stories)
123
+ texts = [get_story_text(story_id) for story_id in stories[:n_stories]]
124
+ all_stories = '---------\n\n'.join(texts)
125
+ return summarize_text(all_stories)
126
+
127
+ return (
128
+ [tools_factory.create_tool(tool) for tool in
129
+ [
130
+ get_top_stories,
131
+ get_show_stories,
132
+ get_ask_stories,
133
+ get_story_details,
134
+ get_story_text,
135
+ whats_new,
136
+ ]
137
+ ] +
138
+ tools_factory.get_llama_index_tools("tavily_research", "TavilyToolSpec", api_key=cfg.tavily_api_key) +
139
+ tools_factory.standard_tools() +
140
+ tools_factory.guardrail_tools() +
141
+ [ask_hackernews]
142
+ )
143
+
144
+ def initialize_agent(_cfg, update_func):
145
+ bot_instructions = """
146
+ - You are a helpful assistant, with expertise in answering user questions based on Hacker News stories and comments.
147
+ - Give slight preference to newer stories when answering questions.
148
+ - Use the ask_hackernews tool to find relevant Hacker News stories and respond to user queries based on that information.
149
+ - when you include links to Hacker News stories, use the actual title of the story as the link's displayed text.
150
+ Don't use text like "Source" which doesn't tell the user what the link is about.
151
+ - Don't include external links in your responses unless the user asks for them.
152
+ - The Tavily tools are available to help you find information on the web, but only use them with user request - don't lose your focus on HackerNews as a source.
153
+ """
154
+
155
+ agent = Agent(
156
+ tools=create_assistant_tools(_cfg),
157
+ topic="hacker news",
158
+ custom_instructions=bot_instructions,
159
+ update_func=update_func
160
+ )
161
+ agent.report()
162
+ return agent
163
+
164
+ def get_agent_config() -> OmegaConf:
165
+ cfg = OmegaConf.create({
166
+ 'customer_id': str(os.environ['VECTARA_CUSTOMER_ID']),
167
+ 'corpus_id': str(os.environ['VECTARA_CORPUS_ID']),
168
+ 'api_key': str(os.environ['VECTARA_API_KEY']),
169
+ 'examples': os.environ.get('QUERY_EXAMPLES', None),
170
+ 'title': "Hacker News Assistant",
171
+ 'demo_welcome': "Welcome to the Hacker News Assistant demo.",
172
+ 'demo_description': "This demo can be used to ask about Hacker News.",
173
+ 'tavily_api_key': str(os.environ['TAVILY_API_KEY']),
174
+ })
175
+ return cfg