Spaces:

BenjaminB
/

gistillery

Runtime error

App Files Files Community

Benjamin Bossan commited on May 22, 2023

Commit

71965fb

•

1 Parent(s): 31610d3

Add a source snippet to the results

Browse files

This way it is easier to trace the source of the entry.

Files changed (7) hide show

demo.py +11 -6
requests.org +15 -4
src/gistillery/base.py +1 -0
src/gistillery/db.py +1 -0
src/gistillery/webservice.py +27 -8
src/gistillery/worker.py +1 -1
tests/test_app.py +18 -0

demo.py CHANGED Viewed

@@ -19,14 +19,17 @@ def check_status():
 def check_tags():
     try:
         tag_counts = {
             key.strip("#"): val
-            for key, val in httpx.get("http://localhost:8080/tag_counts/").json().items()
             if key.strip("#")
         }
         sorted_tags = sorted(list(tag_counts.items()), key=lambda tup: -tup[1])
-        result = "Most common tags: " + ", ".join(f"{t} ({c})" for t, c in sorted_tags[:5])
     except httpx.ConnectError:
         result = ""
     return result
@@ -45,6 +48,7 @@ def get_results(inputs: list[str]):
         texts.append(
             f"## {i}. author: {entry['author']}\n\n"
             f"Date: _{entry['date']}_\n\n"
             f"**Summary**: {entry['summary']}\n\n"
             f"tags: _{' '.join(entry['tags'])}_"
         )
@@ -64,9 +68,6 @@ Input currently supports:
 - a URL to a webpage
 - a URL to a youtube video (the video will be transcribed)
 - a URL to an image (the image description will be used)
-Processing can take a couple of minutes, the info box below gives an update on
-how many jobs are in the queue.
 """
@@ -78,8 +79,12 @@ def get_demo():
         btn_submit = gr.Button("Submit")
         # check job status
         gr.HTML(value=check_status, label="Status", every=3)
-        gr.HTML(value=check_tags, label="Status", every=10)
         # check box of tags to filter on
         msg = "Enter hashtags to filter on (comma separated if multiple)"

 def check_tags():
+    url = "http://localhost:8080/tag_counts/"
     try:
         tag_counts = {
             key.strip("#"): val
+            for key, val in httpx.get(url).json().items()
             if key.strip("#")
         }
         sorted_tags = sorted(list(tag_counts.items()), key=lambda tup: -tup[1])
+        result = "Most common tags: " + ", ".join(
+            f"{t} ({c})" for t, c in sorted_tags[:5]
+        )
     except httpx.ConnectError:
         result = ""
     return result
         texts.append(
             f"## {i}. author: {entry['author']}\n\n"
             f"Date: _{entry['date']}_\n\n"
+            f"Source: _{entry['source_snippet']}_\n\n"
             f"**Summary**: {entry['summary']}\n\n"
             f"tags: _{' '.join(entry['tags'])}_"
         )
 - a URL to a webpage
 - a URL to a youtube video (the video will be transcribed)
 - a URL to an image (the image description will be used)
 """
         btn_submit = gr.Button("Submit")
         # check job status
+        gr.Markdown(
+            "Processing can take a couple of minutes, the info box below gives an "
+            "update on how many jobs are in the queue."
+        )
         gr.HTML(value=check_status, label="Status", every=3)
+        gr.HTML(value=check_tags, label="Status", every=9)
         # check box of tags to filter on
         msg = "Enter hashtags to filter on (comma separated if multiple)"

requests.org CHANGED Viewed

@@ -16,7 +16,7 @@ curl -X 'POST' \
   -H 'Content-Type: application/json' \
   -d '{
   "author": "ben",
-  "content": "In literature discussing why ChatGPT is able to capture so much of our imagination, I often come across two narratives: Scale: throwing more data and compute at it. UX: moving from a prompt interface to a more natural chat interface. A narrative that is often glossed over in the demo frenzy is the incredible technical creativity that went into making models like ChatGPT work. One such cool idea is RLHF (Reinforcement Learning from Human Feedback): incorporating reinforcement learning and human feedback into NLP. RL has been notoriously difficult to work with, and therefore, mostly confined to gaming and simulated environments like Atari or MuJoCo. Just five years ago, both RL and NLP were progressing pretty much orthogonally – different stacks, different techniques, and different experimentation setups. It’s impressive to see it work in a new domain at a massive scale. So, how exactly does RLHF work? Why does it work? This post will discuss the answers to those questions."
 }'
 #+end_src
@@ -30,7 +30,7 @@ curl -X 'POST' \
   -H 'Content-Type: application/json' \
   -d '{
   "author": "ben",
-  "content": "https://en.wikipedia.org/wiki/Goulburn_Street"
 }'
 #+end_src
@@ -44,12 +44,23 @@ curl -X 'POST' \
   -H 'Content-Type: application/json' \
   -d '{
   "author": "ben",
-  "content": "https://upload.wikimedia.org/wikipedia/commons/thumb/e/e1/Cattle_tyrant_%28Machetornis_rixosa%29_on_Capybara.jpg/1920px-Cattle_tyrant_%28Machetornis_rixosa%29_on_Capybara.jpg"
 }'
 #+end_src
 #+RESULTS:
-: Submitted job dc3da7b1d5aa47c38dc6713952104f5f
 #+begin_src bash
 curl -X 'GET' \

   -H 'Content-Type: application/json' \
   -d '{
   "author": "ben",
+  "content": "Diffusers is the go-to library for state-of-the-art pretrained diffusion models for generating images, audio, and even 3D structures of molecules. Whether you’re looking for a simple inference solution or want to train your own diffusion model, 🤗 Diffusers is a modular toolbox that supports both. Our library is designed with a focus on usability over performance, simple over easy, and customizability over abstractions."
 }'
 #+end_src
   -H 'Content-Type: application/json' \
   -d '{
   "author": "ben",
+  "content": "https://en.wikipedia.org/wiki/Hugging_Face"
 }'
 #+end_src
   -H 'Content-Type: application/json' \
   -d '{
   "author": "ben",
+  "content": "https://images.openai.com/blob/8a2b0833-55f2-44d6-bf4f-85f9471078f5/Anastronautridingahorseinaphotorealisticstyle6.jpg"
 }'
 #+end_src
 #+RESULTS:
+: Submitted job f37729bb36104ab4a23cefd0480e4862
+#+begin_src bash
+curl -X 'POST' \
+  'http://localhost:8080/submit/' \
+  -H 'accept: application/json' \
+  -H 'Content-Type: application/json' \
+  -d '{
+  "author": "ben",
+  "content": "https://www.youtube.com/watch?v=H39Z_720T5s"
+}'
+#+end_src
 #+begin_src bash
 curl -X 'GET' \

src/gistillery/base.py CHANGED Viewed

@@ -13,6 +13,7 @@ class EntriesResult(BaseModel):
     id: str
     author: str
     summary: str
     tags: list[str]
     date: dt.datetime

     id: str
     author: str
     summary: str
+    source_snippet: str
     tags: list[str]
     date: dt.datetime

src/gistillery/db.py CHANGED Viewed

@@ -16,6 +16,7 @@ CREATE TABLE entries
     id TEXT PRIMARY KEY,
     author TEXT NOT NULL,
     source TEXT NOT NULL,
     created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
 )
 """

     id TEXT PRIMARY KEY,
     author TEXT NOT NULL,
     source TEXT NOT NULL,
+    source_snippet TEXT NOT NULL,
     created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
 )
 """

src/gistillery/webservice.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import datetime as dt
 import logging
 import sqlite3
 import uuid
@@ -14,6 +15,8 @@ logger.setLevel(logging.DEBUG)
 app = FastAPI()
 # status
@@ -33,8 +36,12 @@ def submit_job(input: RequestInput) -> str:
         query = "INSERT INTO jobs (entry_id, status) VALUES (?, ?)"
         cursor.execute(query, (_id, "pending"))
         # create an entry
-        query = "INSERT INTO entries (id, author, source) VALUES (?, ?, ?)"
-        cursor.execute(query, (_id, input.author, input.content))
     return f"Submitted job {_id}"
@@ -81,7 +88,8 @@ def recent() -> list[EntriesResult]:
         # joined to a comma separated str
         cursor.execute("""
             SELECT
-              e.id, e.author, s.summary, GROUP_CONCAT(t.tag, ",") tags, e.created_at
             FROM entries e
             JOIN summaries s ON e.id = s.entry_id
             JOIN tags t ON e.id = t.entry_id
@@ -92,9 +100,14 @@ def recent() -> list[EntriesResult]:
         results = cursor.fetchall()
     entries = []
-    for _id, author, summary, tags, date in results:
         entry = EntriesResult(
-            id=_id, author=author, summary=summary, tags=tags.split(","), date=date
         )
         entries.append(entry)
     return entries
@@ -110,7 +123,8 @@ def recent_tag(tag: str) -> list[EntriesResult]:
         cursor.execute(
             """
             SELECT
-              e.id, e.author, s.summary, GROUP_CONCAT(t.tag, ",") tags, e.created_at
             FROM entries e
             JOIN summaries s ON e.id = s.entry_id
             JOIN tags t ON e.id = t.entry_id
@@ -126,9 +140,14 @@ def recent_tag(tag: str) -> list[EntriesResult]:
         results = cursor.fetchall()
     entries = []
-    for _id, author, summary, tag, date in results:
         entry = EntriesResult(
-            id=_id, author=author, summary=summary, tags=tag.split(","), date=date
         )
         entries.append(entry)
     return entries

 import datetime as dt
 import logging
+import reprlib
 import sqlite3
 import uuid
 app = FastAPI()
+aRepr = reprlib.Repr()
+aRepr.maxstring = 140
 # status
         query = "INSERT INTO jobs (entry_id, status) VALUES (?, ?)"
         cursor.execute(query, (_id, "pending"))
         # create an entry
+        source_snippet = aRepr.repr(input.content).strip("'")
+        query = (
+            "INSERT INTO entries (id, author, source, source_snippet) VALUES "
+            "(?, ?, ?, ?)"
+        )
+        cursor.execute(query, (_id, input.author, input.content, source_snippet))
     return f"Submitted job {_id}"
         # joined to a comma separated str
         cursor.execute("""
             SELECT
+              e.id, e.author, e.created_at, e.source_snippet,
+              s.summary, GROUP_CONCAT(t.tag, ",") tags
             FROM entries e
             JOIN summaries s ON e.id = s.entry_id
             JOIN tags t ON e.id = t.entry_id
         results = cursor.fetchall()
     entries = []
+    for row in results:
         entry = EntriesResult(
+            id=row.id,
+            author=row.author,
+            summary=row.summary,
+            source_snippet=row.source_snippet,
+            tags=row.tags.split(","),
+            date=row.created_at,
         )
         entries.append(entry)
     return entries
         cursor.execute(
             """
             SELECT
+              e.id, e.author, e.source_snippet, e.created_at,
+              s.summary, GROUP_CONCAT(t.tag, ",") tags
             FROM entries e
             JOIN summaries s ON e.id = s.entry_id
             JOIN tags t ON e.id = t.entry_id
         results = cursor.fetchall()
     entries = []
+    for row in results:
         entry = EntriesResult(
+            id=row.id,
+            author=row.author,
+            summary=row.summary,
+            source_snippet=row.source_snippet,
+            tags=row.tags.split(","),
+            date=row.created_at,
         )
         entries.append(entry)
     return entries

src/gistillery/worker.py CHANGED Viewed

@@ -1,5 +1,5 @@
-import time
 import sys
 from dataclasses import dataclass
 from gistillery.base import JobInput

 import sys
+import time
 from dataclasses import dataclass
 from gistillery.base import JobInput

tests/test_app.py CHANGED Viewed

@@ -213,6 +213,24 @@ class TestWebservice:
         assert resp1["summary"] == "this would"
         assert resp1["tags"] == sorted(["#this", "#would", "#be"])
     def test_recent_tag_with_entries(self, client, registry):
         # submit 2 entries
         client.post(

         assert resp1["summary"] == "this would"
         assert resp1["tags"] == sorted(["#this", "#would", "#be"])
+    def test_recent_source_snippet_shortened(self, client, registry):
+        # submit 2 entries
+        client.post("/submit", json={"author": "alice", "content": "this is short"})
+        client.post(
+            "/submit",
+            json={"author": "bob", "content": "this is long " * 100},
+        )
+        self.process_jobs(registry)
+        resp = client.get("/recent").json()
+        resp = sorted(resp, key=lambda x: x["author"])
+        assert resp[0]["source_snippet"] == "this is short"
+        expected_shortened = (
+            "this is long this is long this is long this is long this is long th"
+            "...ng this is long this is long this is long this is long this is long "
+        )
+        assert resp[1]["source_snippet"] == expected_shortened
     def test_recent_tag_with_entries(self, client, registry):
         # submit 2 entries
         client.post(