Spaces:
Runtime error
Runtime error
Benjamin Bossan
commited on
Commit
•
6ac056a
1
Parent(s):
ba8f25e
Store processed inputs in db
Browse filesAlso, use named tuples for row factory.
- src/gistillery/db.py +26 -4
- src/gistillery/webservice.py +2 -2
- src/gistillery/worker.py +6 -1
- tests/{test_webservice.py → test_app.py} +42 -19
src/gistillery/db.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
import logging
|
2 |
import os
|
3 |
import sqlite3
|
|
|
4 |
from contextlib import contextmanager
|
5 |
from typing import Generator
|
6 |
|
@@ -57,20 +58,41 @@ CREATE TABLE jobs
|
|
57 |
)
|
58 |
"""
|
59 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
60 |
TABLES = {
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
|
|
65 |
}
|
66 |
TABLES_CREATED = False
|
67 |
|
68 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
69 |
def _get_db_connection() -> sqlite3.Connection:
|
70 |
global TABLES_CREATED
|
71 |
|
72 |
# sqlite cannot deal with concurrent access, so we set a big timeout
|
73 |
conn = sqlite3.connect(db_file, timeout=30)
|
|
|
74 |
if TABLES_CREATED:
|
75 |
return conn
|
76 |
|
|
|
1 |
import logging
|
2 |
import os
|
3 |
import sqlite3
|
4 |
+
from collections import namedtuple
|
5 |
from contextlib import contextmanager
|
6 |
from typing import Generator
|
7 |
|
|
|
58 |
)
|
59 |
"""
|
60 |
|
61 |
+
# store the processed inputs
|
62 |
+
schema_inputs = """
|
63 |
+
CREATE TABLE inputs
|
64 |
+
(
|
65 |
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
66 |
+
entry_id TEXT NOT NULL,
|
67 |
+
input TEXT NOT NULL,
|
68 |
+
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
69 |
+
FOREIGN KEY(entry_id) REFERENCES entries(id)
|
70 |
+
)
|
71 |
+
"""
|
72 |
+
|
73 |
TABLES = {
|
74 |
+
'entries': schema_entries,
|
75 |
+
'summaries': schema_summary,
|
76 |
+
'tags': schema_tag,
|
77 |
+
'jobs': schema_job,
|
78 |
+
'inputs': schema_inputs,
|
79 |
}
|
80 |
TABLES_CREATED = False
|
81 |
|
82 |
|
83 |
+
# https://docs.python.org/3/library/sqlite3.html#how-to-create-and-use-row-factories
|
84 |
+
def namedtuple_factory(cursor, row):
|
85 |
+
fields = [column[0] for column in cursor.description]
|
86 |
+
cls = namedtuple("Row", fields)
|
87 |
+
return cls._make(row)
|
88 |
+
|
89 |
+
|
90 |
def _get_db_connection() -> sqlite3.Connection:
|
91 |
global TABLES_CREATED
|
92 |
|
93 |
# sqlite cannot deal with concurrent access, so we set a big timeout
|
94 |
conn = sqlite3.connect(db_file, timeout=30)
|
95 |
+
conn.row_factory = namedtuple_factory
|
96 |
if TABLES_CREATED:
|
97 |
return conn
|
98 |
|
src/gistillery/webservice.py
CHANGED
@@ -58,7 +58,7 @@ def recent() -> list[EntriesResult]:
|
|
58 |
# get the last 10 entries, join summary and tag, where each tag is
|
59 |
# joined to a comma separated str
|
60 |
cursor.execute("""
|
61 |
-
SELECT e.id, e.author, s.summary, GROUP_CONCAT(t.tag, ","), e.created_at
|
62 |
FROM entries e
|
63 |
JOIN summaries s ON e.id = s.entry_id
|
64 |
JOIN tags t ON e.id = t.entry_id
|
@@ -86,7 +86,7 @@ def recent_tag(tag: str) -> list[EntriesResult]:
|
|
86 |
with get_db_cursor() as cursor:
|
87 |
cursor.execute(
|
88 |
"""
|
89 |
-
SELECT e.id, e.author, s.summary, GROUP_CONCAT(t.tag, ","), e.created_at
|
90 |
FROM entries e
|
91 |
JOIN summaries s ON e.id = s.entry_id
|
92 |
JOIN tags t ON e.id = t.entry_id
|
|
|
58 |
# get the last 10 entries, join summary and tag, where each tag is
|
59 |
# joined to a comma separated str
|
60 |
cursor.execute("""
|
61 |
+
SELECT e.id, e.author, s.summary, GROUP_CONCAT(t.tag, ",") tags, e.created_at
|
62 |
FROM entries e
|
63 |
JOIN summaries s ON e.id = s.entry_id
|
64 |
JOIN tags t ON e.id = t.entry_id
|
|
|
86 |
with get_db_cursor() as cursor:
|
87 |
cursor.execute(
|
88 |
"""
|
89 |
+
SELECT e.id, e.author, s.summary, GROUP_CONCAT(t.tag, ",") tags, e.created_at
|
90 |
FROM entries e
|
91 |
JOIN summaries s ON e.id = s.entry_id
|
92 |
JOIN tags t ON e.id = t.entry_id
|
src/gistillery/worker.py
CHANGED
@@ -33,6 +33,7 @@ def check_pending_jobs() -> list[JobInput]:
|
|
33 |
|
34 |
@dataclass
|
35 |
class JobOutput:
|
|
|
36 |
summary: str
|
37 |
tags: list[str]
|
38 |
processor_name: str
|
@@ -54,6 +55,7 @@ def _process_job(job: JobInput, registry: MlRegistry) -> JobOutput:
|
|
54 |
summary = summarizer(processed)
|
55 |
|
56 |
return JobOutput(
|
|
|
57 |
summary=summary,
|
58 |
tags=tags,
|
59 |
processor_name=processor_name,
|
@@ -64,7 +66,10 @@ def _process_job(job: JobInput, registry: MlRegistry) -> JobOutput:
|
|
64 |
|
65 |
def store(job: JobInput, output: JobOutput) -> None:
|
66 |
with get_db_cursor() as cursor:
|
67 |
-
|
|
|
|
|
|
|
68 |
cursor.execute(
|
69 |
(
|
70 |
"INSERT INTO summaries (entry_id, summary, summarizer_name)"
|
|
|
33 |
|
34 |
@dataclass
|
35 |
class JobOutput:
|
36 |
+
processed: str
|
37 |
summary: str
|
38 |
tags: list[str]
|
39 |
processor_name: str
|
|
|
55 |
summary = summarizer(processed)
|
56 |
|
57 |
return JobOutput(
|
58 |
+
processed=processed,
|
59 |
summary=summary,
|
60 |
tags=tags,
|
61 |
processor_name=processor_name,
|
|
|
66 |
|
67 |
def store(job: JobInput, output: JobOutput) -> None:
|
68 |
with get_db_cursor() as cursor:
|
69 |
+
cursor.execute(
|
70 |
+
"INSERT INTO inputs (entry_id, input) VALUES (?, ?)",
|
71 |
+
(job.id, output.processed),
|
72 |
+
)
|
73 |
cursor.execute(
|
74 |
(
|
75 |
"INSERT INTO summaries (entry_id, summary, summarizer_name)"
|
tests/{test_webservice.py → test_app.py}
RENAMED
@@ -19,6 +19,13 @@ class TestWebservice:
|
|
19 |
filename = tmp_path / "test-db.sqlite"
|
20 |
os.environ["DB_FILE_NAME"] = str(filename)
|
21 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
@pytest.fixture
|
23 |
def client(self):
|
24 |
from gistillery.webservice import app
|
@@ -117,6 +124,31 @@ class TestWebservice:
|
|
117 |
}
|
118 |
assert last_updated is None
|
119 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
120 |
def test_submitted_job_status_done(self, client, mlregistry, monkeypatch):
|
121 |
# monkeypatch uuid4 to return a known value
|
122 |
job_id = "abc1234"
|
@@ -182,26 +214,17 @@ class TestWebservice:
|
|
182 |
assert resp0["summary"] == "this would"
|
183 |
assert resp0["tags"] == sorted(["#this", "#would", "#be"])
|
184 |
|
185 |
-
def
|
186 |
-
# monkeypatch uuid4 to return a known value
|
187 |
-
job_id = "abc1234"
|
188 |
-
monkeypatch.setattr("uuid.uuid4", lambda: SimpleNamespace(hex=job_id))
|
189 |
client.post("/submit", json={"author": "ben", "content": "this is a test"})
|
190 |
-
|
|
|
191 |
|
192 |
-
|
193 |
-
|
194 |
|
195 |
-
|
196 |
-
|
197 |
-
lambda job, registry: raise_(RuntimeError("something went wrong")),
|
198 |
-
)
|
199 |
self.process_jobs(mlregistry)
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
output.pop("last_updated")
|
204 |
-
assert output == {
|
205 |
-
"id": job_id,
|
206 |
-
"status": "failed",
|
207 |
-
}
|
|
|
19 |
filename = tmp_path / "test-db.sqlite"
|
20 |
os.environ["DB_FILE_NAME"] = str(filename)
|
21 |
|
22 |
+
@pytest.fixture
|
23 |
+
def cursor(self):
|
24 |
+
from gistillery.db import get_db_cursor
|
25 |
+
|
26 |
+
with get_db_cursor() as cursor:
|
27 |
+
yield cursor
|
28 |
+
|
29 |
@pytest.fixture
|
30 |
def client(self):
|
31 |
from gistillery.webservice import app
|
|
|
124 |
}
|
125 |
assert last_updated is None
|
126 |
|
127 |
+
def test_submitted_job_failed(self, client, mlregistry, monkeypatch):
|
128 |
+
# monkeypatch uuid4 to return a known value
|
129 |
+
job_id = "abc1234"
|
130 |
+
monkeypatch.setattr("uuid.uuid4", lambda: SimpleNamespace(hex=job_id))
|
131 |
+
client.post("/submit", json={"author": "ben", "content": "this is a test"})
|
132 |
+
# patch gistillery.worker._process_job to raise an exception
|
133 |
+
|
134 |
+
def raise_(ex):
|
135 |
+
raise ex
|
136 |
+
|
137 |
+
# make the job processing fail
|
138 |
+
monkeypatch.setattr(
|
139 |
+
"gistillery.worker._process_job",
|
140 |
+
lambda job, registry: raise_(RuntimeError("something went wrong")),
|
141 |
+
)
|
142 |
+
self.process_jobs(mlregistry)
|
143 |
+
|
144 |
+
resp = client.get(f"/check_job_status/{job_id}")
|
145 |
+
output = resp.json()
|
146 |
+
output.pop("last_updated")
|
147 |
+
assert output == {
|
148 |
+
"id": job_id,
|
149 |
+
"status": "failed",
|
150 |
+
}
|
151 |
+
|
152 |
def test_submitted_job_status_done(self, client, mlregistry, monkeypatch):
|
153 |
# monkeypatch uuid4 to return a known value
|
154 |
job_id = "abc1234"
|
|
|
214 |
assert resp0["summary"] == "this would"
|
215 |
assert resp0["tags"] == sorted(["#this", "#would", "#be"])
|
216 |
|
217 |
+
def test_clear(self, client, cursor, mlregistry):
|
|
|
|
|
|
|
218 |
client.post("/submit", json={"author": "ben", "content": "this is a test"})
|
219 |
+
self.process_jobs(mlregistry)
|
220 |
+
assert cursor.execute("SELECT COUNT(*) c FROM entries").fetchone()[0] == 1
|
221 |
|
222 |
+
client.get("/clear")
|
223 |
+
assert cursor.execute("SELECT COUNT(*) c FROM entries").fetchone()[0] == 0
|
224 |
|
225 |
+
def test_inputs_stored(self, client, cursor, mlregistry):
|
226 |
+
client.post("/submit", json={"author": "ben", "content": " this is a test\n"})
|
|
|
|
|
227 |
self.process_jobs(mlregistry)
|
228 |
+
rows = cursor.execute("SELECT * FROM inputs").fetchall()
|
229 |
+
assert len(rows) == 1
|
230 |
+
assert rows[0].input == "this is a test"
|
|
|
|
|
|
|
|
|
|