hacker-news-chat

Running

App Files Files Community

ofermend commited on Jul 8

Commit

92937db

•

1 Parent(s): e01b95d

added functions

Browse files

Files changed (1) hide show

app.py +33 -4

app.py CHANGED Viewed

@@ -8,6 +8,7 @@ import datetime
 import requests
 from dotenv import load_dotenv
 from typing import Tuple
 from pydantic import Field, BaseModel
 from vectara_agent.agent import Agent, AgentStatusType
@@ -17,6 +18,14 @@ initial_prompt = "How can I help you today?"
 load_dotenv(override=True)
 def create_tools(cfg):
     class QueryHackerNews(BaseModel):
@@ -94,11 +103,31 @@ def create_tools(cfg):
     ) -> Tuple[str, str]:
         """
         Get the title of a story from hacker news.
-        Returns the title of the story, and the URL associated with it
         """
         db_url = 'https://hacker-news.firebaseio.com/v0/'
         story = requests.get(f"{db_url}item/{story_id}.json").json()
-        return story['title'], story['url']
     return (
         tools_factory.get_tools(
@@ -107,6 +136,7 @@ def create_tools(cfg):
                     get_show_stories,
                     get_ask_stories,
                     get_story_details,
                 ]
             ) +
         tools_factory.standard_tools() +
@@ -121,8 +151,7 @@ def initialize_agent(_cfg):
     - Today's date is {date}.
     - Never discuss politics, and always respond politely.
     - Use tools when available instead of depending on your own knowledge.
-    - For RAG tools, if the tool returns an 'fcs' score, consider that as a confidence score for the response not being a hallucination.
-      0 = high hallucination, 1 = low or no hallucination. Values below 0.5 might mean the text is hallucination.
     - If a tool cannot respond properly, retry with a rephrased question or ask the user for more information.
     - Be very careful not to report results you are not confident about.
     """

 import requests
 from dotenv import load_dotenv
 from typing import Tuple
+from bs4 import BeautifulSoup
 from pydantic import Field, BaseModel
 from vectara_agent.agent import Agent, AgentStatusType
 load_dotenv(override=True)
+get_headers = {
+    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:98.0) Gecko/20100101 Firefox/98.0",
+    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
+    "Accept-Language": "en-US,en;q=0.5",
+    "Accept-Encoding": "gzip, deflate",
+    "Connection": "keep-alive",
+}
 def create_tools(cfg):
     class QueryHackerNews(BaseModel):
     ) -> Tuple[str, str]:
         """
         Get the title of a story from hacker news.
+        Returns:
+         - The title of the story (str)
+         - The main URL of the story (str)
+         - The external link pointed to in the story (str)
         """
         db_url = 'https://hacker-news.firebaseio.com/v0/'
         story = requests.get(f"{db_url}item/{story_id}.json").json()
+        story_url = f'https://news.ycombinator.com/item?id={story_id}'
+        return story['title'], story_url, story['url'],
+    def get_story_text(
+            story_id: str = Field(..., description="The story ID.")
+    ) -> str:
+        """
+        Get the text of the story from hacker news (original text + all comments)
+        Returns the extracted text of the story as a string.
+        """
+        url = f'https://news.ycombinator.com/item?id={story_id}'
+        html = requests.get(url, headers=get_headers).text
+        soup = BeautifulSoup(html, 'html5lib')
+        for element in soup.find_all(['script', 'style']):
+            element.decompose()
+        text = soup.get_text(" ", strip=True).replace('\n', ' ')
+        return text
     return (
         tools_factory.get_tools(
                     get_show_stories,
                     get_ask_stories,
                     get_story_details,
+                    get_story_text,
                 ]
             ) +
         tools_factory.standard_tools() +
     - Today's date is {date}.
     - Never discuss politics, and always respond politely.
     - Use tools when available instead of depending on your own knowledge.
+    - If a tool provides citations, you can include them in your response to provide more context.
     - If a tool cannot respond properly, retry with a rephrased question or ask the user for more information.
     - Be very careful not to report results you are not confident about.
     """