Spaces:
Running
Running
import os | |
from omegaconf import OmegaConf | |
import requests | |
from typing import Tuple | |
from bs4 import BeautifulSoup | |
from dotenv import load_dotenv | |
load_dotenv(override=True) | |
from pydantic import Field, BaseModel | |
from vectara_agent.agent import Agent | |
from vectara_agent.tools import ToolsFactory, VectaraToolFactory | |
from vectara_agent.tools_catalog import summarize_text | |
initial_prompt = "How can I help you today?" | |
get_headers = { | |
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:98.0) Gecko/20100101 Firefox/98.0", | |
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8", | |
"Accept-Language": "en-US,en;q=0.5", | |
"Accept-Encoding": "gzip, deflate", | |
"Connection": "keep-alive", | |
} | |
def create_assistant_tools(cfg): | |
class QueryHackerNews(BaseModel): | |
query: str = Field(..., description="The user query.") | |
vec_factory = VectaraToolFactory(vectara_api_key=cfg.api_key, | |
vectara_customer_id=cfg.customer_id, | |
vectara_corpus_id=cfg.corpus_id) | |
tools_factory = ToolsFactory() | |
ask_hackernews = vec_factory.create_rag_tool( | |
tool_name = "ask_hackernews", | |
tool_description = """ | |
Responds to query based on information and stories in hacker news from the last 6-9 months. | |
""", | |
tool_args_schema = QueryHackerNews, | |
reranker = "multilingual_reranker_v1", rerank_k = 100, | |
n_sentences_before = 2, n_sentences_after = 2, lambda_val = 0.005, | |
summary_num_results = 10, | |
vectara_summarizer = 'vectara-summary-ext-24-05-med-omni', | |
include_citations = True, | |
) | |
def get_top_stories( | |
n_stories: int = Field(default=10, description="The number of top stories to return.") | |
) -> list[str]: | |
""" | |
Get the top stories from hacker news. | |
Returns a list of story IDS for the top stories right now. These are the top stories on hacker news. | |
""" | |
db_url = 'https://hacker-news.firebaseio.com/v0/' | |
top_stories = requests.get(f"{db_url}topstories.json").json() | |
return top_stories[:n_stories] | |
def get_show_stories( | |
n_stories: int = Field(default=10, description="The number of top SHOW HN stories to return.") | |
) -> list[str]: | |
""" | |
Get the top SHOW HN stories from hacker news. | |
Returns a list of story IDS for the top SHOW HN stories right now. These are stories where users show their projects. | |
""" | |
db_url = 'https://hacker-news.firebaseio.com/v0/' | |
top_stories = requests.get(f"{db_url}showstories.json").json() | |
return top_stories[:n_stories] | |
def get_ask_stories( | |
n_stories: int = Field(default=10, description="The number of top ASK HN stories to return.") | |
) -> list[str]: | |
""" | |
Get the top ASK HN stories from hacker news. | |
Returns a list of story IDS for the top ASK HN stories right now. These are stories where users ask questions to the community. | |
""" | |
db_url = 'https://hacker-news.firebaseio.com/v0/' | |
top_stories = requests.get(f"{db_url}askstories.json").json() | |
return top_stories[:n_stories] | |
def get_story_details( | |
story_id: str = Field(..., description="The story ID.") | |
) -> Tuple[str, str]: | |
""" | |
Get the title of a story from hacker news. | |
Returns: | |
- The title of the story (str) | |
- The main URL of the story (str) | |
- The external link pointed to in the story (str) | |
""" | |
db_url = 'https://hacker-news.firebaseio.com/v0/' | |
story = requests.get(f"{db_url}item/{story_id}.json").json() | |
story_url = f'https://news.ycombinator.com/item?id={story_id}' | |
return story['title'], story_url, story['url'], | |
def get_story_text( | |
story_id: str = Field(..., description="The story ID.") | |
) -> str: | |
""" | |
Get the text of the story from hacker news (original text + all comments) | |
Returns the extracted text of the story as a string. | |
""" | |
url = f'https://news.ycombinator.com/item?id={story_id}' | |
html = requests.get(url, headers=get_headers).text | |
soup = BeautifulSoup(html, 'html5lib') | |
for element in soup.find_all(['script', 'style']): | |
element.decompose() | |
text = soup.get_text(" ", strip=True).replace('\n', ' ') | |
return text | |
def whats_new( | |
n_stories: int = Field(default=10, description="The number of new stories to return.") | |
) -> list[str]: | |
""" | |
Provides a succint summary of what is new in the hackernews community | |
by summarizing the content and comments of top stories. | |
Returns a string with the summary. | |
""" | |
stories = get_top_stories(n_stories) | |
texts = [get_story_text(story_id) for story_id in stories[:n_stories]] | |
all_stories = '---------\n\n'.join(texts) | |
return summarize_text(all_stories) | |
return ( | |
[tools_factory.create_tool(tool) for tool in | |
[ | |
get_top_stories, | |
get_show_stories, | |
get_ask_stories, | |
get_story_details, | |
get_story_text, | |
whats_new, | |
] | |
] + | |
tools_factory.get_llama_index_tools("tavily_research", "TavilyToolSpec", api_key=cfg.tavily_api_key) + | |
tools_factory.standard_tools() + | |
tools_factory.guardrail_tools() + | |
[ask_hackernews] | |
) | |
def initialize_agent(_cfg, update_func): | |
bot_instructions = """ | |
- You are a helpful assistant, with expertise in answering user questions based on Hacker News stories and comments. | |
- Give slight preference to newer stories when answering questions. | |
- Use the ask_hackernews tool to find relevant Hacker News stories and respond to user queries based on that information. | |
- when you include links to Hacker News stories, use the actual title of the story as the link's displayed text. | |
Don't use text like "Source" which doesn't tell the user what the link is about. | |
- Don't include external links in your responses unless the user asks for them. | |
- The Tavily tools are available to help you find information on the web, but only use them with user request - don't lose your focus on HackerNews as a source. | |
""" | |
agent = Agent( | |
tools=create_assistant_tools(_cfg), | |
topic="hacker news", | |
custom_instructions=bot_instructions, | |
update_func=update_func | |
) | |
agent.report() | |
return agent | |
def get_agent_config() -> OmegaConf: | |
cfg = OmegaConf.create({ | |
'customer_id': str(os.environ['VECTARA_CUSTOMER_ID']), | |
'corpus_id': str(os.environ['VECTARA_CORPUS_ID']), | |
'api_key': str(os.environ['VECTARA_API_KEY']), | |
'examples': os.environ.get('QUERY_EXAMPLES', None), | |
'demo_welcome': "Welcome to the Hacker News Assistant demo.", | |
'demo_description': "This demo can be used to ask about Hacker News.", | |
'tavily_api_key': str(os.environ['TAVILY_API_KEY']), | |
}) | |
return cfg | |