Spaces:
Sleeping
Sleeping
added tools
Browse files
README.md
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
emoji: 🐨
|
4 |
colorFrom: indigo
|
5 |
colorTo: indigo
|
@@ -8,7 +8,7 @@ app_port: 8501
|
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
license: apache-2.0
|
11 |
-
short_description:
|
12 |
---
|
13 |
|
14 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
1 |
---
|
2 |
+
title: Hacker News chat
|
3 |
emoji: 🐨
|
4 |
colorFrom: indigo
|
5 |
colorTo: indigo
|
|
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
license: apache-2.0
|
11 |
+
short_description: chatbot with HN data using vectara-agent
|
12 |
---
|
13 |
|
14 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
app.py
CHANGED
@@ -3,128 +3,127 @@ from omegaconf import OmegaConf
|
|
3 |
import streamlit as st
|
4 |
import os
|
5 |
from PIL import Image
|
6 |
-
import re
|
7 |
import sys
|
8 |
import datetime
|
9 |
-
import pandas as pd
|
10 |
import requests
|
11 |
from dotenv import load_dotenv
|
|
|
12 |
|
13 |
from pydantic import Field, BaseModel
|
14 |
from vectara_agent.agent import Agent, AgentStatusType
|
15 |
from vectara_agent.tools import ToolsFactory
|
16 |
|
17 |
-
|
18 |
-
tickers = {
|
19 |
-
"AAPL": "Apple Computer",
|
20 |
-
"GOOG": "Google",
|
21 |
-
"AMZN": "Amazon",
|
22 |
-
"SNOW": "Snowflake",
|
23 |
-
"TEAM": "Atlassian",
|
24 |
-
"TSLA": "Tesla",
|
25 |
-
"NVDA": "Nvidia",
|
26 |
-
"MSFT": "Microsoft",
|
27 |
-
"AMD": "Advanced Micro Devices",
|
28 |
-
"INTC": "Intel",
|
29 |
-
"NFLX": "Netflix",
|
30 |
-
}
|
31 |
-
years = [2020, 2021, 2022, 2023, 2024]
|
32 |
initial_prompt = "How can I help you today?"
|
33 |
|
34 |
load_dotenv(override=True)
|
35 |
|
36 |
def create_tools(cfg):
|
37 |
-
|
38 |
-
def get_company_info() -> list[str]:
|
39 |
-
"""
|
40 |
-
Returns a dictionary of companies you can query about. Always check this before using any other tool.
|
41 |
-
The output is a dictionary of valid ticker symbols mapped to company names.
|
42 |
-
You can use this to identify the companies you can query about, and their ticker information.
|
43 |
-
"""
|
44 |
-
return tickers
|
45 |
-
|
46 |
-
def get_valid_years() -> list[str]:
|
47 |
-
"""
|
48 |
-
Returns a list of the years for which financial reports are available.
|
49 |
-
Always check this before using any other tool.
|
50 |
-
"""
|
51 |
-
return years
|
52 |
|
53 |
-
|
54 |
-
def get_income_statement(
|
55 |
-
ticker=Field(description="the ticker symbol of the company."),
|
56 |
-
year=Field(description="the year for which to get the income statement."),
|
57 |
-
) -> str:
|
58 |
-
"""
|
59 |
-
Get the income statement for a given company and year using the FMP (https://financialmodelingprep.com) API.
|
60 |
-
Returns a dictionary with the income statement data. All data is in USD, but you can convert it to more compact form like K, M, B.
|
61 |
-
"""
|
62 |
-
fmp_api_key = os.environ.get("FMP_API_KEY", None)
|
63 |
-
if fmp_api_key is None:
|
64 |
-
return "FMP_API_KEY environment variable not set. This tool does not work."
|
65 |
-
url = f"https://financialmodelingprep.com/api/v3/income-statement/{ticker}?apikey={fmp_api_key}"
|
66 |
-
response = requests.get(url)
|
67 |
-
if response.status_code == 200:
|
68 |
-
data = response.json()
|
69 |
-
income_statement = pd.DataFrame(data)
|
70 |
-
income_statement["date"] = pd.to_datetime(income_statement["date"])
|
71 |
-
income_statement_specific_year = income_statement[
|
72 |
-
income_statement["date"].dt.year == int(year)
|
73 |
-
]
|
74 |
-
values_dict = income_statement_specific_year.to_dict(orient="records")[0]
|
75 |
-
return f"Financial results: {', '.join([f'{key}: {value}' for key, value in values_dict.items() if key not in ['date', 'cik', 'link', 'finalLink']])}"
|
76 |
-
else:
|
77 |
-
return "FMP API returned error. This tool does not work."
|
78 |
-
|
79 |
-
class QueryTranscriptsArgs(BaseModel):
|
80 |
query: str = Field(..., description="The user query.")
|
81 |
-
year: int = Field(..., description=f"The year. an integer between {min(years)} and {max(years)}.")
|
82 |
-
ticker: str = Field(..., description=f"The company ticker. Must be a valid ticket symbol from the list {tickers.keys()}.")
|
83 |
|
84 |
tools_factory = ToolsFactory(vectara_api_key=cfg.api_key,
|
85 |
vectara_customer_id=cfg.customer_id,
|
86 |
vectara_corpus_id=cfg.corpus_id)
|
87 |
-
|
88 |
-
tool_name = "
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
89 |
tool_description = """
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
""",
|
95 |
-
tool_args_schema =
|
96 |
-
tool_filter_template = "doc.year = {year} and doc.ticker = '{ticker}'",
|
97 |
reranker = "multilingual_reranker_v1", rerank_k = 100,
|
98 |
-
n_sentences_before = 2, n_sentences_after = 2, lambda_val = 0.
|
99 |
summary_num_results = 10,
|
100 |
vectara_summarizer = 'vectara-summary-ext-24-05-med-omni',
|
|
|
101 |
)
|
102 |
|
103 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
104 |
[
|
105 |
-
|
106 |
-
|
107 |
-
|
|
|
108 |
]
|
109 |
) +
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
[ask_transcripts]
|
114 |
)
|
115 |
|
116 |
def initialize_agent(_cfg):
|
117 |
date = datetime.datetime.now().strftime("%Y-%m-%d")
|
118 |
-
|
119 |
-
- You are a helpful
|
120 |
- Today's date is {date}.
|
121 |
-
-
|
122 |
-
-
|
123 |
-
|
124 |
-
|
125 |
- If a tool cannot respond properly, retry with a rephrased question or ask the user for more information.
|
126 |
-
- When querying a tool for a numeric value or KPI, use a concise and non-ambiguous description of what you are looking for.
|
127 |
-
- If you calculate a metric, make sure you have all the necessary information to complete the calculation. Don't guess.
|
128 |
- Be very careful not to report results you are not confident about.
|
129 |
"""
|
130 |
|
@@ -134,8 +133,8 @@ def initialize_agent(_cfg):
|
|
134 |
|
135 |
agent = Agent(
|
136 |
tools=create_tools(_cfg),
|
137 |
-
topic="
|
138 |
-
custom_instructions=
|
139 |
update_func=update_func
|
140 |
)
|
141 |
return agent
|
@@ -149,7 +148,7 @@ def launch_bot():
|
|
149 |
st.session_state.log_messages = []
|
150 |
st.session_state.show_logs = False
|
151 |
|
152 |
-
st.set_page_config(page_title="
|
153 |
if 'cfg' not in st.session_state:
|
154 |
cfg = OmegaConf.create({
|
155 |
'customer_id': str(os.environ['VECTARA_CUSTOMER_ID']),
|
@@ -164,11 +163,7 @@ def launch_bot():
|
|
164 |
with st.sidebar:
|
165 |
image = Image.open('Vectara-logo.png')
|
166 |
st.image(image, width=250)
|
167 |
-
st.markdown("## Welcome to the
|
168 |
-
companies = ", ".join(tickers.values())
|
169 |
-
st.markdown(
|
170 |
-
f"This assistant can help you with any questions about the financials of several companies:\n\n **{companies}**.\n"
|
171 |
-
)
|
172 |
|
173 |
st.markdown("\n\n")
|
174 |
bc1, bc2 = st.columns([1, 1])
|
@@ -208,8 +203,8 @@ def launch_bot():
|
|
208 |
with st.chat_message("assistant", avatar='🤖'):
|
209 |
with st.spinner(st.session_state.thinking_message):
|
210 |
res = st.session_state.agent.chat(prompt)
|
211 |
-
|
212 |
-
message = {"role": "assistant", "content":
|
213 |
st.session_state.messages.append(message)
|
214 |
st.rerun()
|
215 |
|
|
|
3 |
import streamlit as st
|
4 |
import os
|
5 |
from PIL import Image
|
|
|
6 |
import sys
|
7 |
import datetime
|
|
|
8 |
import requests
|
9 |
from dotenv import load_dotenv
|
10 |
+
from typing import Tuple
|
11 |
|
12 |
from pydantic import Field, BaseModel
|
13 |
from vectara_agent.agent import Agent, AgentStatusType
|
14 |
from vectara_agent.tools import ToolsFactory
|
15 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
initial_prompt = "How can I help you today?"
|
17 |
|
18 |
load_dotenv(override=True)
|
19 |
|
20 |
def create_tools(cfg):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
|
22 |
+
class QueryHackerNews(BaseModel):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
query: str = Field(..., description="The user query.")
|
|
|
|
|
24 |
|
25 |
tools_factory = ToolsFactory(vectara_api_key=cfg.api_key,
|
26 |
vectara_customer_id=cfg.customer_id,
|
27 |
vectara_corpus_id=cfg.corpus_id)
|
28 |
+
ask_hackernews_semantic = tools_factory.create_rag_tool(
|
29 |
+
tool_name = "ask_hackernews_semantic",
|
30 |
+
tool_description = """
|
31 |
+
Responds to query based on information in hacker news from the last 6 months.
|
32 |
+
Performs a semantic search to find relevant information.
|
33 |
+
Use this tool to perform pure semantic search.
|
34 |
+
""",
|
35 |
+
tool_args_schema = QueryHackerNews,
|
36 |
+
reranker = "multilingual_reranker_v1", rerank_k = 100,
|
37 |
+
n_sentences_before = 2, n_sentences_after = 2, lambda_val = 0.0,
|
38 |
+
summary_num_results = 10,
|
39 |
+
vectara_summarizer = 'vectara-summary-ext-24-05-med-omni',
|
40 |
+
include_citations = True,
|
41 |
+
)
|
42 |
+
|
43 |
+
ask_hackernews_hybrid = tools_factory.create_rag_tool(
|
44 |
+
tool_name = "ask_hackernews_keyword",
|
45 |
tool_description = """
|
46 |
+
Responds to query based on information in hacker news from the last 6 months
|
47 |
+
performs a hybrid search (both semantic and keyword) to find relevant information.
|
48 |
+
Use this tool when some amount of keyword search is expected to work better than semantic search,
|
49 |
+
For example, when you are looking for specific keywords or use rare words in the query.
|
50 |
""",
|
51 |
+
tool_args_schema = QueryHackerNews,
|
|
|
52 |
reranker = "multilingual_reranker_v1", rerank_k = 100,
|
53 |
+
n_sentences_before = 2, n_sentences_after = 2, lambda_val = 0.1,
|
54 |
summary_num_results = 10,
|
55 |
vectara_summarizer = 'vectara-summary-ext-24-05-med-omni',
|
56 |
+
include_citations = True,
|
57 |
)
|
58 |
|
59 |
+
def get_top_stories(
|
60 |
+
n_stories: int = Field(default=10, description="The number of top stories to return.")
|
61 |
+
) -> list[str]:
|
62 |
+
"""
|
63 |
+
Get the top stories from hacker news.
|
64 |
+
Returns a list of story IDS for the top stories right now
|
65 |
+
"""
|
66 |
+
db_url = 'https://hacker-news.firebaseio.com/v0/'
|
67 |
+
top_stories = requests.get(f"{db_url}topstories.json").json()
|
68 |
+
return top_stories[:n_stories]
|
69 |
+
|
70 |
+
def get_show_stories(
|
71 |
+
n_stories: int = Field(default=10, description="The number of top SHOW HN stories to return.")
|
72 |
+
) -> list[str]:
|
73 |
+
"""
|
74 |
+
Get the top SHOW HN stories from hacker news.
|
75 |
+
Returns a list of story IDS for the top SHOW HN stories right now
|
76 |
+
"""
|
77 |
+
db_url = 'https://hacker-news.firebaseio.com/v0/'
|
78 |
+
top_stories = requests.get(f"{db_url}showstories.json").json()
|
79 |
+
return top_stories[:n_stories]
|
80 |
+
|
81 |
+
def get_ask_stories(
|
82 |
+
n_stories: int = Field(default=10, description="The number of top ASK HN stories to return.")
|
83 |
+
) -> list[str]:
|
84 |
+
"""
|
85 |
+
Get the top ASK HN stories from hacker news.
|
86 |
+
Returns a list of story IDS for the top ASK HN stories right now
|
87 |
+
"""
|
88 |
+
db_url = 'https://hacker-news.firebaseio.com/v0/'
|
89 |
+
top_stories = requests.get(f"{db_url}askstories.json").json()
|
90 |
+
return top_stories[:n_stories]
|
91 |
+
|
92 |
+
def get_story_details(
|
93 |
+
story_id: str = Field(..., description="The story ID.")
|
94 |
+
) -> Tuple[str, str]:
|
95 |
+
"""
|
96 |
+
Get the title of a story from hacker news.
|
97 |
+
Returns the title of the story, and the URL associated with it
|
98 |
+
"""
|
99 |
+
db_url = 'https://hacker-news.firebaseio.com/v0/'
|
100 |
+
story = requests.get(f"{db_url}item/{story_id}.json").json()
|
101 |
+
return story['title'], story['url']
|
102 |
+
|
103 |
+
return (
|
104 |
+
tools_factory.get_tools(
|
105 |
[
|
106 |
+
get_top_stories,
|
107 |
+
get_show_stories,
|
108 |
+
get_ask_stories,
|
109 |
+
get_story_details,
|
110 |
]
|
111 |
) +
|
112 |
+
tools_factory.standard_tools() +
|
113 |
+
tools_factory.guardrail_tools() +
|
114 |
+
[ask_hackernews_semantic, ask_hackernews_hybrid]
|
|
|
115 |
)
|
116 |
|
117 |
def initialize_agent(_cfg):
|
118 |
date = datetime.datetime.now().strftime("%Y-%m-%d")
|
119 |
+
bot_instructions = f"""
|
120 |
+
- You are a helpful assistant, answering user questions about content from hacker news.
|
121 |
- Today's date is {date}.
|
122 |
+
- Never discuss politics, and always respond politely.
|
123 |
+
- Use tools when available instead of depending on your own knowledge.
|
124 |
+
- For RAG tools, if the tool returns an 'fcs' score, consider that as a confidence score for the response not being a hallucination.
|
125 |
+
0 = high hallucination, 1 = low or no hallucination. Values below 0.5 might mean the text is hallucination.
|
126 |
- If a tool cannot respond properly, retry with a rephrased question or ask the user for more information.
|
|
|
|
|
127 |
- Be very careful not to report results you are not confident about.
|
128 |
"""
|
129 |
|
|
|
133 |
|
134 |
agent = Agent(
|
135 |
tools=create_tools(_cfg),
|
136 |
+
topic="hacker news",
|
137 |
+
custom_instructions=bot_instructions,
|
138 |
update_func=update_func
|
139 |
)
|
140 |
return agent
|
|
|
148 |
st.session_state.log_messages = []
|
149 |
st.session_state.show_logs = False
|
150 |
|
151 |
+
st.set_page_config(page_title="Hacker News Bot", layout="wide")
|
152 |
if 'cfg' not in st.session_state:
|
153 |
cfg = OmegaConf.create({
|
154 |
'customer_id': str(os.environ['VECTARA_CUSTOMER_ID']),
|
|
|
163 |
with st.sidebar:
|
164 |
image = Image.open('Vectara-logo.png')
|
165 |
st.image(image, width=250)
|
166 |
+
st.markdown("## Welcome to the hacker news assistant demo.\n\n\n")
|
|
|
|
|
|
|
|
|
167 |
|
168 |
st.markdown("\n\n")
|
169 |
bc1, bc2 = st.columns([1, 1])
|
|
|
203 |
with st.chat_message("assistant", avatar='🤖'):
|
204 |
with st.spinner(st.session_state.thinking_message):
|
205 |
res = st.session_state.agent.chat(prompt)
|
206 |
+
res = res.replace('$', '\\$') # escape dollar sign for markdown
|
207 |
+
message = {"role": "assistant", "content": res, "avatar": '🤖'}
|
208 |
st.session_state.messages.append(message)
|
209 |
st.rerun()
|
210 |
|