gematria_date_sums

Running

App Files Files Community

neuralworm commited on Jul 12

Commit

e8bf2aa

•

1 Parent(s): 6c0aa26

every result is now saved, no links

Browse files

Files changed (3) hide show

app.py +169 -170
gematria.db +2 -2
util.py +6 -9

app.py CHANGED Viewed

@@ -7,206 +7,205 @@ from collections import defaultdict
 from util import process_json_files
 from gematria import calculate_gematria
 from deep_translator import GoogleTranslator, exceptions
 # Set up logging
-logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 # Global variables for database connection and translator
 conn = None
 translator = None
 def flatten_text(text):
-  """Helper function to flatten nested lists into a single list."""
-  if isinstance(text, list):
-    return " ".join(flatten_text(item) if isinstance(item, list) else item for item in text)
-  return text
 def initialize_database():
-  """Initializes the SQLite database."""
-  global conn
-  conn = sqlite3.connect('gematria.db')
-  c = conn.cursor()
-  c.execute('''
     CREATE TABLE IF NOT EXISTS results (
-      gematria_sum INTEGER,
-      words TEXT UNIQUE,
-      translation TEXT,
-      occurrences TEXT,
-      PRIMARY KEY (words)
     )
-  ''')
-  c.execute('''
     CREATE TABLE IF NOT EXISTS processed_books (
-      book INTEGER PRIMARY KEY,
-      max_phrase_length INTEGER
     )
-  ''')
-  conn.commit()
-  logging.info("Database initialized.")
-def initialize_translator():
-  """Initializes the Google Translator."""
-  global translator
-  translator = GoogleTranslator(source='iw', target='en')
-  logging.info("Translator initialized.")
-def insert_phrase_to_db(gematria_sum, phrase_candidate, translation, occurrence):
-  """Inserts a phrase and its Gematria value into the database."""
-  global conn
-  c = conn.cursor()
-  try:
-    c.execute('''
-      INSERT INTO results (gematria_sum, words, translation, occurrences)
-      VALUES (?, ?, ?, ?)
-    ''', (gematria_sum, phrase_candidate, translation, occurrence))
-    conn.commit()
-    logging.debug(f"Inserted phrase: {phrase_candidate} (Gematria: {gematria_sum})")
-  except sqlite3.IntegrityError:
-    logging.debug(f"Phrase already exists: {phrase_candidate} (Gematria: {gematria_sum})")
-    c.execute('''
-      SELECT occurrences FROM results WHERE words = ?
-    ''', (phrase_candidate,))
-    existing_occurrences = c.fetchone()[0]
-    updated_occurrences = existing_occurrences + ';' + occurrence
-    c.execute('''
-      UPDATE results
-      SET occurrences = ?
-      WHERE words = ?
-    ''', (updated_occurrences, phrase_candidate))
     conn.commit()
-def populate_database(tanach_texts, max_phrase_length=3):
-  """Populates the database with phrases from the Tanach and their Gematria values."""
-  global conn
-  logging.info("Populating database...")
-  c = conn.cursor()
-  for book_id, text in enumerate(tanach_texts):
-    c.execute('''SELECT max_phrase_length FROM processed_books WHERE book = ?''', (book_id + 1,))
-    result = c.fetchone()
-    if result and result[0] >= max_phrase_length:
-      logging.info(f"Skipping book {book_id+1}: Already processed with max_phrase_length {result[0]}")
-      continue
-    logging.info(f"Processing book {book_id+1} with max_phrase_length {max_phrase_length}")
-    if 'text' not in text or not isinstance(text['text'], list):
-      logging.warning(f"Skipping book {book_id} due to missing or invalid 'text' field.")
-      continue
-    title = text.get('title', 'Unknown')
-    chapters = text['text']
-    for chapter_id, chapter in enumerate(chapters):
-      if not isinstance(chapter, list):
-        logging.warning(f"Skipping chapter {chapter_id} in book {title} due to invalid format.")
-        continue
-      for verse_id, verse in enumerate(chapter):
-        verse_text = flatten_text(verse)
-        verse_text = re.sub(r"[^\u05D0-\u05EA ]+", "", verse_text)
-        verse_text = re.sub(r" +", " ", verse_text)
-        words = verse_text.split()
-        for length in range(1, max_phrase_length + 1):
-          for start in range(len(words) - length + 1):
-            phrase_candidate = " ".join(words[start:start + length])
-            gematria_sum = calculate_gematria(phrase_candidate.replace(" ", ""))
-            occurrence = f"{book_id+1}:{title}:{chapter_id+1}:{verse_id+1}"
-            insert_phrase_to_db(gematria_sum, phrase_candidate, None, occurrence)  # No translation initially
     try:
-      c.execute('''INSERT INTO processed_books (book, max_phrase_length) VALUES (?, ?)''', (book_id + 1, max_phrase_length))
     except sqlite3.IntegrityError:
-      c.execute('''UPDATE processed_books SET max_phrase_length = ? WHERE book = ?''', (max_phrase_length, book_id + 1))
-    conn.commit()
-  logging.info("Database population complete.")
 def get_translation(phrase):
-  """Retrieves or generates the English translation of a Hebrew phrase."""
-  global translator, conn
-  c = conn.cursor()
-  c.execute('''
     SELECT translation FROM results
     WHERE words = ?
-  ''', (phrase,))
-  result = c.fetchone()
-  if result and result[0]:
-    return result[0]
-  else:
-    translation = translate_and_store(phrase)
-    c.execute('''
-      UPDATE results
-      SET translation = ?
-      WHERE words = ?
-    ''', (translation, phrase))
-    conn.commit()
-    return translation
 def translate_and_store(phrase):
-  """Translates a phrase using Google Translate."""
-  global translator
-  try:
-    translation = translator.translate(phrase)
-    logging.debug(f"Translated phrase: {translation}")
-    return translation
-  except (exceptions.TranslationNotFound, exceptions.NotValidPayload,
-          exceptions.ServerException, exceptions.RequestError) as e:
-    logging.error(f"Error translating phrase '{phrase}': {e}")
-    return "[Translation Error]"
 def search_gematria_in_db(gematria_sum):
-  """Searches the database for phrases with a given Gematria value."""
-  global conn
-  c = conn.cursor()
-  c.execute('''
-    SELECT words, occurrences FROM results WHERE gematria_sum = ?
-  ''', (gematria_sum,))
-  results = c.fetchall()
-  logging.debug(f"Found {len(results)} matching phrases for Gematria: {gematria_sum}")
-  return results
 def gematria_search_interface(phrase):
-  """The main function for the Gradio interface."""
-  if not phrase.strip():
-    return "Please enter a phrase."
-  # Datenbankverbindung innerhalb der Funktion erstellen
-  global conn
-  conn = sqlite3.connect('gematria.db')
-  c = conn.cursor()
-  phrase_gematria = calculate_gematria(phrase.replace(" ", ""))
-  logging.info(f"Searching for phrases with Gematria: {phrase_gematria}")
-  matching_phrases = search_gematria_in_db(phrase_gematria)
-  if not matching_phrases:
-    return "No matching phrases found."
-  # Format results for display
-  results = []
-  for words, occurrences in matching_phrases:
-    translation = get_translation(words)
-    for occurrence in occurrences.split(';'):
-      book, title, chapter, verse = occurrence.split(':')
-      results.append(f"Book: {title} ({book})\nChapter: {chapter}, Verse: {verse}\nPhrase: {words}\nTranslation: {translation}\n\n")
-  conn.close()
-  return "\n".join(results)
 def run_app():
-  """Initializes and launches the Gradio app."""
-  initialize_database()
-  initialize_translator()
-  # Pre-populate the database
-  tanach_texts = process_json_files(1, 39)
-  populate_database(tanach_texts)
-  iface = gr.Interface(
-    fn=gematria_search_interface,
-    inputs=gr.Textbox(label="Enter phrase"),
-    outputs=gr.Textbox(label="Results"),
-    title="Gematria Search in Tanach",
-    description="Search for phrases in the Tanach that have the same Gematria value.",
-    live=False,
-    allow_flagging="never"
-  )
-  iface.launch()
 if __name__ == "__main__":
-  run_app()

 from util import process_json_files
 from gematria import calculate_gematria
 from deep_translator import GoogleTranslator, exceptions
+from urllib.parse import quote_plus
 # Set up logging
+logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
 # Global variables for database connection and translator
 conn = None
 translator = None
+book_names = {}  # Dictionary to store book names
 def flatten_text(text):
+    """Helper function to flatten nested lists into a single list."""
+    if isinstance(text, list):
+        return " ".join(flatten_text(item) if isinstance(item, list) else item for item in text)
+    return text
 def initialize_database():
+    """Initializes the SQLite database."""
+    global conn
+    conn = sqlite3.connect('gematria.db')
+    c = conn.cursor()
+    c.execute('''
     CREATE TABLE IF NOT EXISTS results (
+        gematria_sum INTEGER,
+        words TEXT UNIQUE,
+        translation TEXT,
+        book INTEGER,
+        chapter INTEGER,
+        verse INTEGER,
+        PRIMARY KEY (words, book, chapter, verse)
     )
+    ''')
+    c.execute('''
     CREATE TABLE IF NOT EXISTS processed_books (
+        book INTEGER PRIMARY KEY,
+        max_phrase_length INTEGER
     )
+    ''')
     conn.commit()
+    logging.info("Database initialized.")
+def initialize_translator():
+    """Initializes the Google Translator."""
+    global translator
+    translator = GoogleTranslator(source='iw', target='en')
+    logging.info("Translator initialized.")
+def insert_phrase_to_db(gematria_sum, phrase_candidate, book, chapter, verse):
+    """Inserts a phrase and its Gematria value into the database."""
+    global conn
+    c = conn.cursor()
     try:
+        c.execute('''
+            INSERT INTO results (gematria_sum, words, book, chapter, verse)
+            VALUES (?, ?, ?, ?, ?)
+        ''', (gematria_sum, phrase_candidate, book, chapter, verse))
+        conn.commit()
+        logging.debug(f"Inserted phrase: {phrase_candidate} (Gematria: {gematria_sum}) at {book}:{chapter}:{verse}")
     except sqlite3.IntegrityError:
+        logging.debug(f"Phrase already exists: {phrase_candidate} (Gematria: {gematria_sum}) at {book}:{chapter}:{verse}")
+def populate_database(tanach_texts, max_phrase_length=3):
+    """Populates the database with phrases from the Tanach and their Gematria values."""
+    global conn, book_names
+    logging.info("Populating database...")
+    c = conn.cursor()
+    for book_id, text in tanach_texts:  # Unpack the tuple (book_id, text)
+        c.execute('''SELECT max_phrase_length FROM processed_books WHERE book = ?''', (book_id,))
+        result = c.fetchone()
+        if result and result[0] >= max_phrase_length:
+            logging.info(f"Skipping book {book_id}: Already processed with max_phrase_length {result[0]}")
+            continue
+        logging.info(f"Processing book {book_id} with max_phrase_length {max_phrase_length}")
+        if 'text' not in text or not isinstance(text['text'], list):
+            logging.warning(f"Skipping book {book_id} due to missing or invalid 'text' field.")
+            continue
+        title = text.get('title', 'Unknown')
+        book_names[book_id] = title  # Store book name
+        chapters = text['text']
+        for chapter_id, chapter in enumerate(chapters):
+            if not isinstance(chapter, list):
+                logging.warning(f"Skipping chapter {chapter_id} in book {title} due to invalid format.")
+                continue
+            for verse_id, verse in enumerate(chapter):
+                verse_text = flatten_text(verse)
+                verse_text = re.sub(r"[^\u05D0-\u05EA ]+", "", verse_text)
+                verse_text = re.sub(r" +", " ", verse_text)
+                words = verse_text.split()
+                for length in range(1, max_phrase_length + 1):
+                    for start in range(len(words) - length + 1):
+                        phrase_candidate = " ".join(words[start:start + length])
+                        gematria_sum = calculate_gematria(phrase_candidate.replace(" ", ""))
+                        insert_phrase_to_db(gematria_sum, phrase_candidate, book_id, chapter_id + 1, verse_id + 1)
+        try:
+            c.execute('''INSERT INTO processed_books (book, max_phrase_length) VALUES (?, ?)''', (book_id, max_phrase_length))
+        except sqlite3.IntegrityError:
+            c.execute('''UPDATE processed_books SET max_phrase_length = ? WHERE book = ?''', (max_phrase_length, book_id))
+        conn.commit()
+    logging.info("Database population complete.")
 def get_translation(phrase):
+    """Retrieves or generates the English translation of a Hebrew phrase."""
+    global translator, conn
+    c = conn.cursor()
+    c.execute('''
     SELECT translation FROM results
     WHERE words = ?
+    ''', (phrase,))
+    result = c.fetchone()
+    if result and result[0]:
+        return result[0]
+    else:
+        translation = translate_and_store(phrase)
+        c.execute('''
+            UPDATE results
+            SET translation = ?
+            WHERE words = ?
+        ''', (translation, phrase))
+        conn.commit()
+        return translation
 def translate_and_store(phrase):
+    """Translates a phrase using Google Translate."""
+    global translator
+    try:
+        translation = translator.translate(phrase)
+        logging.debug(f"Translated phrase: {translation}")
+        return translation
+    except (exceptions.TranslationNotFound, exceptions.NotValidPayload,
+            exceptions.ServerException, exceptions.RequestError) as e:
+        logging.error(f"Error translating phrase '{phrase}': {e}")
+        return "[Translation Error]"
 def search_gematria_in_db(gematria_sum):
+    """Searches the database for phrases with a given Gematria value."""
+    global conn
+    c = conn.cursor()
+    c.execute('''
+    SELECT words, book, chapter, verse FROM results WHERE gematria_sum = ?
+    ''', (gematria_sum,))
+    results = c.fetchall()
+    logging.debug(f"Found {len(results)} matching phrases for Gematria: {gematria_sum}")
+    return results
 def gematria_search_interface(phrase):
+    """The main function for the Gradio interface."""
+    if not phrase.strip():
+        return "Please enter a phrase."
+    # Create database connection inside the function
+    global conn, book_names
+    conn = sqlite3.connect('gematria.db')
+    c = conn.cursor()
+    phrase_gematria = calculate_gematria(phrase.replace(" ", ""))
+    logging.info(f"Searching for phrases with Gematria: {phrase_gematria}")
+    matching_phrases = search_gematria_in_db(phrase_gematria)
+    if not matching_phrases:
+        return "No matching phrases found."
+    # Format results for display
+    results = []
+    for words, book, chapter, verse in matching_phrases:
+        translation = get_translation(words)
+        book_name_english = book_names.get(book, 'Unknown')  # Get book name
+        link = f"https://www.biblegateway.com/passage/?search={quote_plus(book_name_english)}+{chapter}%3A{verse}"
+        results.append(
+            f"Book: {book_name_english}\nChapter: {chapter}, Verse: {verse}\nPhrase: {words}\nTranslation: {translation}\n<a href='{link}' target='_blank'>[See on Bible Gateway]</a>\n\n")
+    conn.close()
+    return "\n".join(results)
 def run_app():
+    """Initializes and launches the Gradio app."""
+    initialize_database()
+    initialize_translator()
+    # Pre-populate the database
+    tanach_texts = process_json_files(1, 1)  # Process all books
+    populate_database(tanach_texts, max_phrase_length=4)
+    tanach_texts = process_json_files(27, 27)  # Process all books
+    populate_database(tanach_texts, max_phrase_length=4)
+    iface = gr.Interface(
+        fn=gematria_search_interface,
+        inputs=gr.Textbox(label="Enter phrase"),
+        outputs=gr.HTML(label="Results"),
+        title="Gematria Search in Tanach",
+        description="Search for phrases in the Tanach that have the same Gematria value.",
+        live=False,
+        allow_flagging="never"
+    )
+    iface.launch()
 if __name__ == "__main__":
+    run_app()

gematria.db CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6fcf5c1564217654d09ee0f57dc0025114c5df51a352f829932e86a10570ce09
-size 47960064

 version https://git-lfs.github.com/spec/v1
+oid sha256:4203619e032a9d31dd2232d2ac4960f26ce6af8fe04728bb0a53aac0ff05a5e7
+size 7172096

util.py CHANGED Viewed

@@ -11,17 +11,14 @@ def process_json_files(start, end):
             with open(file_name, 'r', encoding='utf-8') as file:
                 data = json.load(file)
                 if data:
-                    results.append({
-                        "book": i,
-                        "title": data.get("title", "No title"),
-			"text": data.get("text", "No text"),
-                    })
         except FileNotFoundError:
-            results.append({"error": f"File {file_name} not found."})
         except json.JSONDecodeError as e:
-            results.append({"error": f"File {file_name} could not be read as JSON: {e}"})
         except KeyError as e:
-            results.append({"error": f"Expected key 'text' is missing in {file_name}: {e}"})
-    return results

             with open(file_name, 'r', encoding='utf-8') as file:
                 data = json.load(file)
                 if data:
+                    # Return a tuple of book_id and text data
+                    results.append((i, {"title": data.get("title", "No title"), "text": data.get("text", [])}))
         except FileNotFoundError:
+            results.append((i, {"error": f"File {file_name} not found."}))  # Use a tuple here
         except json.JSONDecodeError as e:
+            results.append((i, {"error": f"File {file_name} could not be read as JSON: {e}"}))  # Use a tuple here
         except KeyError as e:
+            results.append((i, {"error": f"Expected key 'text' is missing in {file_name}: {e}"}))  # Use a tuple here
+    return results