neuralworm commited on
Commit
84b09e2
1 Parent(s): a8a8bc2

really quit async experiment

Browse files
Files changed (1) hide show
  1. app.py +196 -209
app.py CHANGED
@@ -1,8 +1,7 @@
1
  import gradio as gr
2
  import json
3
  import re
4
- import asyncio
5
- import aiosqlite
6
  import logging
7
  from collections import defaultdict
8
  from util import process_json_files
@@ -13,11 +12,10 @@ from urllib.parse import quote_plus
13
  # Set up logging
14
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
15
 
16
- # Global variables
17
- conn = None # Database connection (will be initialized asynchronously)
18
  translator = None
19
  book_names = {} # Dictionary to store book names
20
- ongoing_search_task = None # Track ongoing search tasks
21
 
22
  def flatten_text(text):
23
  """Helper function to flatten nested lists into a single list."""
@@ -25,30 +23,29 @@ def flatten_text(text):
25
  return " ".join(flatten_text(item) if isinstance(item, list) else item for item in text)
26
  return text
27
 
28
- async def initialize_database():
29
- """Initializes the SQLite database asynchronously."""
30
  global conn
31
- conn = await aiosqlite.connect('gematria.db')
32
- async with conn:
33
- c = await conn.cursor()
34
- await c.execute('''
35
- CREATE TABLE IF NOT EXISTS results (
36
- gematria_sum INTEGER,
37
- words TEXT UNIQUE,
38
- translation TEXT,
39
- book INTEGER,
40
- chapter INTEGER,
41
- verse INTEGER,
42
- PRIMARY KEY (words, book, chapter, verse)
43
- )
44
- ''')
45
- await c.execute('''
46
- CREATE TABLE IF NOT EXISTS processed_books (
47
- book INTEGER PRIMARY KEY,
48
- max_phrase_length INTEGER
49
- )
50
- ''')
51
- await conn.commit()
52
  logging.info("Database initialized.")
53
 
54
  def initialize_translator():
@@ -57,89 +54,88 @@ def initialize_translator():
57
  translator = GoogleTranslator(source='iw', target='en')
58
  logging.info("Translator initialized.")
59
 
60
- async def insert_phrase_to_db(conn, gematria_sum, phrase_candidate, book, chapter, verse):
61
  """Inserts a phrase and its Gematria value into the database."""
62
- async with conn:
63
- c = await conn.cursor()
64
- try:
65
- await c.execute('''
66
- INSERT INTO results (gematria_sum, words, book, chapter, verse)
67
- VALUES (?, ?, ?, ?, ?)
68
- ''', (gematria_sum, phrase_candidate, book, chapter, verse))
69
- await conn.commit()
70
- logging.debug(f"Inserted phrase: {phrase_candidate} (Gematria: {gematria_sum}) at {book}:{chapter}:{verse}")
71
- except aiosqlite.IntegrityError:
72
- logging.debug(f"Phrase already exists: {phrase_candidate} (Gematria: {gematria_sum}) at {book}:{chapter}:{verse}")
73
-
74
- async def populate_database_async(conn, tanach_texts, max_phrase_length=1):
75
- """Asynchronous version of populate_database using aiosqlite."""
76
- global book_names
77
  logging.info("Populating database...")
78
- async with conn:
79
- c = await conn.cursor()
80
-
81
- for book_id, text in tanach_texts:
82
- await c.execute('''SELECT max_phrase_length FROM processed_books WHERE book = ?''', (book_id,))
83
- result = await c.fetchone()
84
- if result and result[0] >= max_phrase_length:
85
- logging.info(f"Skipping book {book_id}: Already processed with max_phrase_length {result[0]}")
 
 
 
 
 
 
 
 
 
 
 
 
 
86
  continue
87
-
88
- logging.info(f"Processing book {book_id} with max_phrase_length {max_phrase_length}")
89
- if 'text' not in text or not isinstance(text['text'], list):
90
- logging.warning(f"Skipping book {book_id} due to missing or invalid 'text' field.")
91
- continue
92
-
93
- title = text.get('title', 'Unknown')
94
- book_names[book_id] = title
95
-
96
- chapters = text['text']
97
- for chapter_id, chapter in enumerate(chapters):
98
- if not isinstance(chapter, list):
99
- logging.warning(f"Skipping chapter {chapter_id} in book {title} due to invalid format.")
100
- continue
101
- for verse_id, verse in enumerate(chapter):
102
- verse_text = flatten_text(verse)
103
- verse_text = re.sub(r"[^\u05D0-\u05EA ]+", "", verse_text)
104
- verse_text = re.sub(r" +", " ", verse_text)
105
- words = verse_text.split()
106
- for length in range(1, max_phrase_length + 1):
107
- for start in range(len(words) - length + 1):
108
- phrase_candidate = " ".join(words[start:start + length])
109
- gematria_sum = calculate_gematria(phrase_candidate.replace(" ", ""))
110
- await insert_phrase_to_db(conn, gematria_sum, phrase_candidate, book_id, chapter_id + 1, verse_id + 1)
111
- try:
112
- await c.execute('''INSERT INTO processed_books (book, max_phrase_length) VALUES (?, ?)''', (book_id, max_phrase_length))
113
- except aiosqlite.IntegrityError:
114
- await c.execute('''UPDATE processed_books SET max_phrase_length = ? WHERE book = ?''', (max_phrase_length, book_id))
115
- await conn.commit()
116
  logging.info("Database population complete.")
117
 
118
- async def get_translation(phrase):
119
  """Retrieves or generates the English translation of a Hebrew phrase."""
120
  global translator, conn
121
- async with conn:
122
- c = await conn.cursor()
123
- await c.execute('''
124
- SELECT translation FROM results
 
 
 
 
 
 
 
 
 
125
  WHERE words = ?
126
- ''', (phrase,))
127
- result = await c.fetchone()
128
- if result and result[0]:
129
- return result[0]
130
- else:
131
- translation = translate_and_store(phrase)
132
- await c.execute('''
133
- UPDATE results
134
- SET translation = ?
135
- WHERE words = ?
136
- ''', (translation, phrase))
137
- await conn.commit()
138
- return translation
139
 
140
  def translate_and_store(phrase):
141
  global translator
142
- max_retries = 3
143
  retries = 0
144
 
145
  while retries < max_retries:
@@ -148,123 +144,116 @@ def translate_and_store(phrase):
148
  logging.debug(f"Translated phrase: {translation}")
149
  return translation
150
  except (exceptions.TranslationNotFound, exceptions.NotValidPayload,
151
- exceptions.ServerException, exceptions.RequestError, requests.exceptions.ConnectionError) as e:
152
  retries += 1
153
  logging.warning(f"Error translating phrase '{phrase}': {e}. Retrying... ({retries}/{max_retries})")
154
 
155
  logging.error(f"Failed to translate phrase '{phrase}' after {max_retries} retries.")
156
- return "[Translation Error]"
157
 
158
- async def search_gematria_in_db(conn, gematria_sum):
159
  """Searches the database for phrases with a given Gematria value."""
160
- async with conn:
161
- c = await conn.cursor()
162
- await c.execute('''
163
- SELECT words, book, chapter, verse FROM results WHERE gematria_sum = ?
164
- ''', (gematria_sum,))
165
- results = await c.fetchall()
166
- logging.debug(f"Found {len(results)} matching phrases for Gematria: {gematria_sum}")
167
- return results
168
-
169
- async def gematria_search_interface(phrase, request: gr.Request):
170
  """The main function for the Gradio interface."""
171
- global ongoing_search_task, conn, book_names
172
-
173
  if not phrase.strip():
174
  return "Please enter a phrase."
175
 
176
- # Cancel any ongoing search task
177
- if ongoing_search_task is not None and not ongoing_search_task.done():
178
- ongoing_search_task.cancel()
179
-
180
- # Start the search asynchronously
181
- async def search_task():
182
- matching_phrases = await search_gematria_in_db(conn, calculate_gematria(phrase.replace(" ", "")))
183
-
184
- if not matching_phrases:
185
- return "No matching phrases found."
186
-
187
- # Sort and group results
188
- sorted_phrases = sorted(matching_phrases, key=lambda x: (x[1], x[2], x[3]))
189
- results_by_book = defaultdict(list)
190
- for words, book, chapter, verse in sorted_phrases:
191
- results_by_book[book].append((words, chapter, verse))
192
-
193
- # Format results for display
194
- results = []
195
- results.append("<div class='results-container'>")
196
- for book, phrases in results_by_book.items():
197
- results.append(f"<h4>Book: {book_names.get(book, 'Unknown')}</h4>")
198
- for words, chapter, verse in phrases:
199
- translation = await get_translation(words) # Await the translation here
200
- book_name_english = book_names.get(book, 'Unknown')
201
- link = f"https://www.biblegateway.com/passage/?search={quote_plus(book_name_english)}+{chapter}%3A{verse}"
202
-
203
- results.append(f"""
204
- <div class='result-item'>
205
- <p>Chapter: {chapter}, Verse: {verse}</p>
206
- <p class='hebrew-phrase'>Hebrew Phrase: {words}</p>
207
- <p>Translation: {translation}</p>
208
- <a href='{link}' target='_blank' class='bible-link'>[See on Bible Gateway]</a>
209
- </div>
210
- """)
211
- results.append("</div>")
212
-
213
- # Add CSS styling
214
- style = """
215
- <style>
216
- .results-container {
217
- display: grid;
218
- grid-template-columns: repeat(auto-fit, minmax(300px, 1fr));
219
- gap: 20px;
220
- }
221
-
222
- .result-item {
223
- border: 1px solid #ccc;
224
- padding: 15px;
225
- border-radius: 5px;
226
- box-shadow: 2px 2px 5px rgba(0, 0, 0, 0.1);
227
- }
228
-
229
- .hebrew-phrase {
230
- font-family: 'SBL Hebrew', 'Ezra SIL', serif;
231
- direction: rtl;
232
- }
233
-
234
- .bible-link {
235
- display: block;
236
- margin-top: 10px;
237
- color: #007bff;
238
- text-decoration: none;
239
- }
240
- </style>
241
- """
242
-
243
- return style + "\n".join(results)
244
-
245
- ongoing_search_task = request.app.get_blocks().queue.insert(fn=search_task, queue_id="gematria")
246
- result = request.app.get_blocks().queue.get_output(queue_id="gematria", job_hash=ongoing_search_task.job_hash)
247
- return result
248
-
249
- async def run_app():
250
- """Initializes, populates the database, and launches the Gradio app."""
251
- global conn
252
- await initialize_database()
253
  initialize_translator()
254
 
255
- # Move database population to a separate function
256
- async def populate_database():
257
- tanach_texts_1_1_1 = process_json_files(1, 1)
258
- tanach_texts_1_39_1 = process_json_files(1, 39)
259
- tanach_texts_27_27_4 = process_json_files(27, 27)
260
- await populate_database_async(conn, tanach_texts_1_1_1, max_phrase_length=1)
261
- await populate_database_async(conn, tanach_texts_1_39_1, max_phrase_length=1)
262
- await populate_database_async(conn, tanach_texts_27_27_4, max_phrase_length=4)
263
-
264
- # Start database population in the background
265
- asyncio.create_task(populate_database())
266
 
267
- # Create the main Gradio interface
268
  iface = gr.Interface(
269
  fn=gematria_search_interface,
270
  inputs=gr.Textbox(label="Enter phrase"),
@@ -272,11 +261,9 @@ async def run_app():
272
  title="Gematria Search in Tanach",
273
  description="Search for phrases in the Tanach that have the same Gematria value.",
274
  live=False,
275
- allow_flagging="never",
276
- concurrency_limit=3
277
  )
278
-
279
- iface.launch(max_threads=10)
280
 
281
  if __name__ == "__main__":
282
- asyncio.run(run_app())
 
1
  import gradio as gr
2
  import json
3
  import re
4
+ import sqlite3
 
5
  import logging
6
  from collections import defaultdict
7
  from util import process_json_files
 
12
  # Set up logging
13
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
14
 
15
+ # Global variables for database connection and translator
16
+ conn = None
17
  translator = None
18
  book_names = {} # Dictionary to store book names
 
19
 
20
  def flatten_text(text):
21
  """Helper function to flatten nested lists into a single list."""
 
23
  return " ".join(flatten_text(item) if isinstance(item, list) else item for item in text)
24
  return text
25
 
26
+ def initialize_database():
27
+ """Initializes the SQLite database."""
28
  global conn
29
+ conn = sqlite3.connect('gematria.db')
30
+ c = conn.cursor()
31
+ c.execute('''
32
+ CREATE TABLE IF NOT EXISTS results (
33
+ gematria_sum INTEGER,
34
+ words TEXT UNIQUE,
35
+ translation TEXT,
36
+ book INTEGER,
37
+ chapter INTEGER,
38
+ verse INTEGER,
39
+ PRIMARY KEY (words, book, chapter, verse)
40
+ )
41
+ ''')
42
+ c.execute('''
43
+ CREATE TABLE IF NOT EXISTS processed_books (
44
+ book INTEGER PRIMARY KEY,
45
+ max_phrase_length INTEGER
46
+ )
47
+ ''')
48
+ conn.commit()
 
49
  logging.info("Database initialized.")
50
 
51
  def initialize_translator():
 
54
  translator = GoogleTranslator(source='iw', target='en')
55
  logging.info("Translator initialized.")
56
 
57
+ def insert_phrase_to_db(gematria_sum, phrase_candidate, book, chapter, verse):
58
  """Inserts a phrase and its Gematria value into the database."""
59
+ global conn
60
+ c = conn.cursor()
61
+ try:
62
+ c.execute('''
63
+ INSERT INTO results (gematria_sum, words, book, chapter, verse)
64
+ VALUES (?, ?, ?, ?, ?)
65
+ ''', (gematria_sum, phrase_candidate, book, chapter, verse))
66
+ conn.commit()
67
+ logging.debug(f"Inserted phrase: {phrase_candidate} (Gematria: {gematria_sum}) at {book}:{chapter}:{verse}")
68
+ except sqlite3.IntegrityError:
69
+ logging.debug(f"Phrase already exists: {phrase_candidate} (Gematria: {gematria_sum}) at {book}:{chapter}:{verse}")
70
+
71
+ def populate_database(tanach_texts, max_phrase_length=1):
72
+ """Populates the database with phrases from the Tanach and their Gematria values."""
73
+ global conn, book_names
74
  logging.info("Populating database...")
75
+ c = conn.cursor()
76
+
77
+ for book_id, text in tanach_texts: # Unpack the tuple (book_id, text)
78
+ c.execute('''SELECT max_phrase_length FROM processed_books WHERE book = ?''', (book_id,))
79
+ result = c.fetchone()
80
+ if result and result[0] >= max_phrase_length:
81
+ logging.info(f"Skipping book {book_id}: Already processed with max_phrase_length {result[0]}")
82
+ continue
83
+
84
+ logging.info(f"Processing book {book_id} with max_phrase_length {max_phrase_length}")
85
+ if 'text' not in text or not isinstance(text['text'], list):
86
+ logging.warning(f"Skipping book {book_id} due to missing or invalid 'text' field.")
87
+ continue
88
+
89
+ title = text.get('title', 'Unknown')
90
+ book_names[book_id] = title # Store book name
91
+
92
+ chapters = text['text']
93
+ for chapter_id, chapter in enumerate(chapters):
94
+ if not isinstance(chapter, list):
95
+ logging.warning(f"Skipping chapter {chapter_id} in book {title} due to invalid format.")
96
  continue
97
+ for verse_id, verse in enumerate(chapter):
98
+ verse_text = flatten_text(verse)
99
+ verse_text = re.sub(r"[^\u05D0-\u05EA ]+", "", verse_text)
100
+ verse_text = re.sub(r" +", " ", verse_text)
101
+ words = verse_text.split()
102
+ for length in range(1, max_phrase_length + 1):
103
+ for start in range(len(words) - length + 1):
104
+ phrase_candidate = " ".join(words[start:start + length])
105
+ gematria_sum = calculate_gematria(phrase_candidate.replace(" ", ""))
106
+ insert_phrase_to_db(gematria_sum, phrase_candidate, book_id, chapter_id + 1, verse_id + 1)
107
+ try:
108
+ c.execute('''INSERT INTO processed_books (book, max_phrase_length) VALUES (?, ?)''', (book_id, max_phrase_length))
109
+ except sqlite3.IntegrityError:
110
+ c.execute('''UPDATE processed_books SET max_phrase_length = ? WHERE book = ?''', (max_phrase_length, book_id))
111
+ conn.commit()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
  logging.info("Database population complete.")
113
 
114
+ def get_translation(phrase):
115
  """Retrieves or generates the English translation of a Hebrew phrase."""
116
  global translator, conn
117
+ c = conn.cursor()
118
+ c.execute('''
119
+ SELECT translation FROM results
120
+ WHERE words = ?
121
+ ''', (phrase,))
122
+ result = c.fetchone()
123
+ if result and result[0]:
124
+ return result[0]
125
+ else:
126
+ translation = translate_and_store(phrase)
127
+ c.execute('''
128
+ UPDATE results
129
+ SET translation = ?
130
  WHERE words = ?
131
+ ''', (translation, phrase))
132
+ conn.commit()
133
+ return translation
134
+
 
 
 
 
 
 
 
 
 
135
 
136
  def translate_and_store(phrase):
137
  global translator
138
+ max_retries = 3 # You can adjust the number of retries
139
  retries = 0
140
 
141
  while retries < max_retries:
 
144
  logging.debug(f"Translated phrase: {translation}")
145
  return translation
146
  except (exceptions.TranslationNotFound, exceptions.NotValidPayload,
147
+ exceptions.ServerException, exceptions.RequestError, requests.exceptions.ConnectionError) as e: # Add ConnectionError
148
  retries += 1
149
  logging.warning(f"Error translating phrase '{phrase}': {e}. Retrying... ({retries}/{max_retries})")
150
 
151
  logging.error(f"Failed to translate phrase '{phrase}' after {max_retries} retries.")
152
+ return "[Translation Error]"
153
 
154
+ def search_gematria_in_db(gematria_sum):
155
  """Searches the database for phrases with a given Gematria value."""
156
+ global conn
157
+ c = conn.cursor()
158
+ c.execute('''
159
+ SELECT words, book, chapter, verse FROM results WHERE gematria_sum = ?
160
+ ''', (gematria_sum,))
161
+ results = c.fetchall()
162
+ logging.debug(f"Found {len(results)} matching phrases for Gematria: {gematria_sum}")
163
+ return results
164
+
165
+ def gematria_search_interface(phrase):
166
  """The main function for the Gradio interface."""
 
 
167
  if not phrase.strip():
168
  return "Please enter a phrase."
169
 
170
+ # Create database connection inside the function
171
+ global conn, book_names
172
+ conn = sqlite3.connect('gematria.db')
173
+ c = conn.cursor()
174
+
175
+ phrase_gematria = calculate_gematria(phrase.replace(" ", ""))
176
+ logging.info(f"Searching for phrases with Gematria: {phrase_gematria}")
177
+
178
+ matching_phrases = search_gematria_in_db(phrase_gematria)
179
+ if not matching_phrases:
180
+ return "No matching phrases found."
181
+
182
+ # Sort results by book, chapter, and verse
183
+ sorted_phrases = sorted(matching_phrases, key=lambda x: (x[1], x[2], x[3]))
184
+
185
+ # Group results by book
186
+ results_by_book = defaultdict(list)
187
+ for words, book, chapter, verse in sorted_phrases:
188
+ results_by_book[book].append((words, chapter, verse))
189
+
190
+ # Format results for display with enhanced structure
191
+ results = []
192
+ results.append("<div class='results-container'>")
193
+ for book, phrases in results_by_book.items():
194
+ results.append(f"<h4>Book: {book_names.get(book, 'Unknown')}</h4>")
195
+ for words, chapter, verse in phrases:
196
+ translation = get_translation(words)
197
+ book_name_english = book_names.get(book, 'Unknown')
198
+ link = f"https://www.biblegateway.com/passage/?search={quote_plus(book_name_english)}+{chapter}%3A{verse}"
199
+
200
+ results.append(f"""
201
+ <div class='result-item'>
202
+ <p>Chapter: {chapter}, Verse: {verse}</p>
203
+ <p class='hebrew-phrase'>Hebrew Phrase: {words}</p>
204
+ <p>Translation: {translation}</p>
205
+ <a href='{link}' target='_blank' class='bible-link'>[See on Bible Gateway]</a>
206
+ </div>
207
+ """)
208
+ results.append("</div>") # Close results-container div
209
+
210
+ conn.close()
211
+
212
+ # Add CSS styling
213
+ style = """
214
+ <style>
215
+ .results-container {
216
+ display: grid;
217
+ grid-template-columns: repeat(auto-fit, minmax(300px, 1fr));
218
+ gap: 20px;
219
+ }
220
+
221
+ .result-item {
222
+ border: 1px solid #ccc;
223
+ padding: 15px;
224
+ border-radius: 5px;
225
+ box-shadow: 2px 2px 5px rgba(0, 0, 0, 0.1);
226
+ }
227
+
228
+ .hebrew-phrase {
229
+ font-family: 'SBL Hebrew', 'Ezra SIL', serif;
230
+ direction: rtl;
231
+ }
232
+
233
+ .bible-link {
234
+ display: block;
235
+ margin-top: 10px;
236
+ color: #007bff;
237
+ text-decoration: none;
238
+ }
239
+ </style>
240
+ """
241
+
242
+ return style + "\n".join(results) # Concatenate style and results
243
+
244
+ def run_app():
245
+ """Initializes and launches the Gradio app."""
246
+ initialize_database()
247
  initialize_translator()
248
 
249
+ # Pre-populate the database
250
+ tanach_texts = process_json_files(1, 39) # Process all books
251
+ populate_database(tanach_texts, max_phrase_length=1)
252
+ tanach_texts = process_json_files(1, 1) # Process all books
253
+ populate_database(tanach_texts, max_phrase_length=4)
254
+ tanach_texts = process_json_files(27, 27) # Process all books
255
+ populate_database(tanach_texts, max_phrase_length=4)
 
 
 
 
256
 
 
257
  iface = gr.Interface(
258
  fn=gematria_search_interface,
259
  inputs=gr.Textbox(label="Enter phrase"),
 
261
  title="Gematria Search in Tanach",
262
  description="Search for phrases in the Tanach that have the same Gematria value.",
263
  live=False,
264
+ allow_flagging="never"
 
265
  )
266
+ iface.launch()
 
267
 
268
  if __name__ == "__main__":
269
+ run_app()