gematria_date_sums

Running

App Files Files Community

neuralworm commited on Jul 13

Commit

d07d4c9

•

1 Parent(s): 21790a2

better populate speed, refactor, fix

Browse files

Files changed (3) hide show

app.py +237 -237
gematria.db +0 -3
util.py +18 -6

app.py CHANGED Viewed

@@ -10,263 +10,263 @@ from deep_translator import GoogleTranslator, exceptions
 from urllib.parse import quote_plus
 # Set up logging
-logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
-# Global variables for database connection and translator
 conn = None
 translator = None
-book_names = {}  # Dictionary to store book names
-def flatten_text(text):
-  """Helper function to flatten nested lists into a single list."""
-  if isinstance(text, list):
-    return " ".join(flatten_text(item) if isinstance(item, list) else item for item in text)
-  return text
 def initialize_database():
-  """Initializes the SQLite database."""
-  global conn
-  conn = sqlite3.connect('gematria.db')
-  c = conn.cursor()
-  c.execute('''
-  CREATE TABLE IF NOT EXISTS results (
-    gematria_sum INTEGER,
-    words TEXT,
-    translation TEXT,
-    book INTEGER,
-    chapter INTEGER,
-    verse INTEGER,
-    PRIMARY KEY (gematria_sum, book, chapter, verse)
-  )
-  ''')
-  c.execute('''
-  CREATE TABLE IF NOT EXISTS processed_books (
-    book INTEGER PRIMARY KEY,
-    max_phrase_length INTEGER
-  )
-  ''')
-  conn.commit()
-  logging.info("Database initialized.")
 def initialize_translator():
-  """Initializes the Google Translator."""
-  global translator
-  translator = GoogleTranslator(source='iw', target='en')
-  logging.info("Translator initialized.")
-def insert_phrase_to_db(gematria_sum, phrase_candidate, book, chapter, verse):
-  """Inserts a phrase and its Gematria value into the database."""
-  global conn
-  c = conn.cursor()
-  try:
-    c.execute('''
-      INSERT INTO results (gematria_sum, words, book, chapter, verse)
-      VALUES (?, ?, ?, ?, ?)
-    ''', (gematria_sum, phrase_candidate, book, chapter, verse))
-    conn.commit()
-    logging.debug(f"Inserted phrase: {phrase_candidate} (Gematria: {gematria_sum}) at {book}:{chapter}:{verse}")
-  except sqlite3.IntegrityError:
-    logging.debug(f"Phrase already exists: {phrase_candidate} (Gematria: {gematria_sum}) at {book}:{chapter}:{verse}")
 def populate_database(tanach_texts, max_phrase_length=1):
-  """Populates the database with phrases from the Tanach and their Gematria values."""
-  global conn, book_names
-  logging.info("Populating database...")
-  c = conn.cursor()
-  for book_id, text in tanach_texts:  # Unpack the tuple (book_id, text)
-    c.execute('''SELECT max_phrase_length FROM processed_books WHERE book = ?''', (book_id,))
-    result = c.fetchone()
-    if result and result[0] >= max_phrase_length:
-      logging.info(f"Skipping book {book_id}: Already processed with max_phrase_length {result[0]}")
-      continue
-    logging.info(f"Processing book {book_id} with max_phrase_length {max_phrase_length}")
-    if 'text' not in text or not isinstance(text['text'], list):
-      logging.warning(f"Skipping book {book_id} due to missing or invalid 'text' field.")
-      continue
-    title = text.get('title', 'Unknown')
-    book_names[book_id] = title  # Store book name
-    chapters = text['text']
-    for chapter_id, chapter in enumerate(chapters):
-      if not isinstance(chapter, list):
-        logging.warning(f"Skipping chapter {chapter_id} in book {title} due to invalid format.")
-        continue
-      for verse_id, verse in enumerate(chapter):
-        verse_text = flatten_text(verse)
-        verse_text = re.sub(r"[^\u05D0-\u05EA ]+", "", verse_text)
-        verse_text = re.sub(r" +", " ", verse_text)
-        words = verse_text.split()
-        for length in range(1, max_phrase_length + 1):
-          for start in range(len(words) - length + 1):
-            phrase_candidate = " ".join(words[start:start + length])
-            gematria_sum = calculate_gematria(phrase_candidate.replace(" ", ""))
-            insert_phrase_to_db(gematria_sum, phrase_candidate, book_id, chapter_id + 1, verse_id + 1)
     try:
-      c.execute('''INSERT INTO processed_books (book, max_phrase_length) VALUES (?, ?)''', (book_id, max_phrase_length))
     except sqlite3.IntegrityError:
-      c.execute('''UPDATE processed_books SET max_phrase_length = ? WHERE book = ?''', (max_phrase_length, book_id))
-    conn.commit()
-  logging.info("Database population complete.")
 def get_translation(phrase):
-  """Retrieves or generates the English translation of a Hebrew phrase."""
-  global translator, conn
-  c = conn.cursor()
-  c.execute('''
-  SELECT translation FROM results
-  WHERE words = ?
-  ''', (phrase,))
-  result = c.fetchone()
-  if result and result[0]:
-    return result[0]
-  else:
-    translation = translate_and_store(phrase)
-    c.execute('''
-      UPDATE results
-      SET translation = ?
-      WHERE words = ?
-    ''', (translation, phrase))
-    conn.commit()
-    return translation
 def translate_and_store(phrase):
-  global translator
-  max_retries = 3  # You can adjust the number of retries
-  retries = 0
-  while retries < max_retries:
-    try:
-      translation = translator.translate(phrase)
-      logging.debug(f"Translated phrase: {translation}")
-      return translation
-    except (exceptions.TranslationNotFound, exceptions.NotValidPayload,
-        exceptions.ServerException, exceptions.RequestError, requests.exceptions.ConnectionError) as e:  # Add ConnectionError
-      retries += 1
-      logging.warning(f"Error translating phrase '{phrase}': {e}. Retrying... ({retries}/{max_retries})")
-  logging.error(f"Failed to translate phrase '{phrase}' after {max_retries} retries.")
-  return "[Translation Error]"
 def search_gematria_in_db(gematria_sum):
-  """Searches the database for phrases with a given Gematria value."""
-  global conn
-  c = conn.cursor()
-  c.execute('''
-  SELECT words, book, chapter, verse FROM results WHERE gematria_sum = ?
-  ''', (gematria_sum,))
-  results = c.fetchall()
-  logging.debug(f"Found {len(results)} matching phrases for Gematria: {gematria_sum}")
-  return results
 def gematria_search_interface(phrase):
-  """The main function for the Gradio interface."""
-  if not phrase.strip():
-    return "Please enter a phrase."
-  # Create database connection inside the function
-  global conn, book_names
-  conn = sqlite3.connect('gematria.db')
-  c = conn.cursor()
-  phrase_gematria = calculate_gematria(phrase.replace(" ", ""))
-  logging.info(f"Searching for phrases with Gematria: {phrase_gematria}")
-  matching_phrases = search_gematria_in_db(phrase_gematria)
-  if not matching_phrases:
-    return "No matching phrases found."
-  # Sort results by book, chapter, and verse
-  sorted_phrases = sorted(matching_phrases, key=lambda x: (x[1], x[2], x[3]))
-  # Group results by book
-  results_by_book = defaultdict(list)
-  for words, book, chapter, verse in sorted_phrases:
-    results_by_book[book].append((words, chapter, verse))
-  # Format results for display with enhanced structure
-  results = []
-  results.append("<div class='results-container'>")
-  for book, phrases in results_by_book.items():
-    results.append(f"<h4>Book: {book_names.get(book, 'Unknown')}</h4>")
-    for words, chapter, verse in phrases:
-      translation = get_translation(words)
-      book_name_english = book_names.get(book, 'Unknown')
-      link = f"https://www.biblegateway.com/passage/?search={quote_plus(book_name_english)}+{chapter}%3A{verse}"
-      results.append(f"""
-      <div class='result-item'>
-        <p>Chapter: {chapter}, Verse: {verse}</p>
-        <p class='hebrew-phrase'>Hebrew Phrase: {words}</p>
-        <p>Translation: {translation}</p>
-        <a href='{link}' target='_blank' class='bible-link'>[See on Bible Gateway]</a>
-      </div>
-      """)
-  results.append("</div>") # Close results-container div
-  conn.close()
-  # Add CSS styling
-  style = """
-  <style>
-    .results-container {
-      display: grid;
-      grid-template-columns: repeat(auto-fit, minmax(300px, 1fr));
-      gap: 20px;
-    }
-    .result-item {
-      border: 1px solid #ccc;
-      padding: 15px;
-      border-radius: 5px;
-      box-shadow: 2px 2px 5px rgba(0, 0, 0, 0.1);
-    }
-    .hebrew-phrase {
-      font-family: 'SBL Hebrew', 'Ezra SIL', serif;
-      direction: rtl;
-    }
-    .bible-link {
-      display: block;
-      margin-top: 10px;
-      color: #007bff;
-      text-decoration: none;
-    }
-  </style>
-  """
-  return style + "\n".join(results) # Concatenate style and results
 def run_app():
-  """Initializes and launches the Gradio app."""
-  initialize_database()
-  initialize_translator()
-  # Pre-populate the database
-  tanach_texts = process_json_files(27, 27) # Process all books
-  populate_database(tanach_texts, max_phrase_length=12)
-  tanach_texts = process_json_files(1, 39) # Process all books
-  populate_database(tanach_texts, max_phrase_length=1)
-  #tanach_texts = process_json_files(1, 1) # Process all books
-  #populate_database(tanach_texts, max_phrase_length=4)
-  #tanach_texts = process_json_files(27, 27) # Process all books
-  #populate_database(tanach_texts, max_phrase_length=4)
-  iface = gr.Interface(
-    fn=gematria_search_interface,
-    inputs=gr.Textbox(label="Enter phrase"),
-    outputs=gr.HTML(label="Results"),
-    title="Gematria Search in Tanach",
-    description="Search for phrases in the Tanach that have the same Gematria value.",
-    live=False,
-    allow_flagging="never"
-  )
-  iface.launch()
 if __name__ == "__main__":
-  run_app()

 from urllib.parse import quote_plus
 # Set up logging
+logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
+# Global variables for database connection, translator and book names
 conn = None
 translator = None
+book_names = {}
 def initialize_database():
+    """Initializes the SQLite database."""
+    global conn
+    conn = sqlite3.connect('gematria.db')
+    cursor = conn.cursor()
+    # Create tables if they don't exist
+    cursor.execute('''
+    CREATE TABLE IF NOT EXISTS results (
+        gematria_sum INTEGER,
+        words TEXT,
+        translation TEXT,
+        book INTEGER,
+        chapter INTEGER,
+        verse INTEGER,
+        PRIMARY KEY (gematria_sum, book, chapter, verse)
+    )
+    ''')
+    cursor.execute('''
+    CREATE TABLE IF NOT EXISTS processed_books (
+        book INTEGER PRIMARY KEY,
+        max_phrase_length INTEGER
+    )
+    ''')
+    conn.commit()
+    logging.info("Database initialized.")
 def initialize_translator():
+    """Initializes the Google Translator."""
+    global translator
+    translator = GoogleTranslator(source='iw', target='en')
+    logging.info("Translator initialized.")
 def populate_database(tanach_texts, max_phrase_length=1):
+    """Populates the database with phrases from the Tanach and their Gematria values."""
+    global conn, book_names
+    logging.info("Populating database...")
+    cursor = conn.cursor()
+    for book_id, book_data in tanach_texts.items():
+        # Check if the book is already processed for this max_phrase_length
+        cursor.execute('''SELECT max_phrase_length FROM processed_books WHERE book = ?''', (book_id,))
+        result = cursor.fetchone()
+        if result and result[0] >= max_phrase_length:
+            logging.info(f"Skipping book {book_id}: Already processed with max_phrase_length {result[0]}")
+            continue
+        logging.info(f"Processing book {book_id} with max_phrase_length {max_phrase_length}")
+        if 'text' not in book_data or not isinstance(book_data['text'], list):
+            logging.warning(f"Skipping book {book_id} due to missing or invalid 'text' field.")
+            continue
+        title = book_data.get('title', 'Unknown')
+        book_names[book_id] = title
+        chapters = book_data['text']
+        for chapter_id, chapter in enumerate(chapters):
+            if not isinstance(chapter, list):
+                logging.warning(f"Skipping chapter {chapter_id} in book {title} due to invalid format.")
+                continue
+            for verse_id, verse in enumerate(chapter):
+                verse_text = flatten_text(verse)
+                # Remove text in square brackets
+                verse_text = re.sub(r'\[.*?\]', '', verse_text)
+                verse_text = re.sub(r"[^\u05D0-\u05EA ]+", "", verse_text)
+                verse_text = re.sub(r" +", " ", verse_text)
+                words = verse_text.split()
+                # Iterate through phrases of different lengths
+                for length in range(1, max_phrase_length + 1):
+                    for start in range(len(words) - length + 1):
+                        phrase_candidate = " ".join(words[start:start + length])
+                        gematria_sum = calculate_gematria(phrase_candidate.replace(" ", ""))
+                        insert_phrase_to_db(gematria_sum, phrase_candidate, book_id, chapter_id + 1, verse_id + 1)
+        # Mark the book as processed for this max_phrase_length
+        cursor.execute('''INSERT OR REPLACE INTO processed_books (book, max_phrase_length) VALUES (?, ?)''', (book_id, max_phrase_length))
+        conn.commit()
+    logging.info("Database population complete.")
+def insert_phrase_to_db(gematria_sum, phrase_candidate, book, chapter, verse):
+    """Inserts a phrase and its Gematria value into the database."""
+    global conn
+    cursor = conn.cursor()
     try:
+        cursor.execute('''
+            INSERT INTO results (gematria_sum, words, book, chapter, verse)
+            VALUES (?, ?, ?, ?, ?)
+        ''', (gematria_sum, phrase_candidate, book, chapter, verse))
+        conn.commit()
+        logging.debug(f"Inserted phrase: {phrase_candidate} (Gematria: {gematria_sum}) at {book}:{chapter}:{verse}")
     except sqlite3.IntegrityError:
+        logging.debug(f"Phrase already exists: {phrase_candidate} (Gematria: {gematria_sum}) at {book}:{chapter}:{verse}")
 def get_translation(phrase):
+    """Retrieves or generates the English translation of a Hebrew phrase."""
+    global translator, conn
+    cursor = conn.cursor()
+    cursor.execute('''
+    SELECT translation FROM results
+    WHERE words = ?
+    ''', (phrase,))
+    result = cursor.fetchone()
+    if result and result[0]:
+        return result[0]
+    else:
+        translation = translate_and_store(phrase)
+        cursor.execute('''
+            UPDATE results
+            SET translation = ?
+            WHERE words = ?
+        ''', (translation, phrase))
+        conn.commit()
+        return translation
 def translate_and_store(phrase):
+    """Translates a Hebrew phrase to English using Google Translate and handles potential errors."""
+    global translator
+    max_retries = 3
+    retries = 0
+    while retries < max_retries:
+        try:
+            translation = translator.translate(phrase)
+            logging.debug(f"Translated phrase: {translation}")
+            return translation
+        except (exceptions.TranslationNotFound, exceptions.NotValidPayload,
+            exceptions.ServerException, exceptions.RequestError, requests.exceptions.ConnectionError) as e:
+            retries += 1
+            logging.warning(f"Error translating phrase '{phrase}': {e}. Retrying... ({retries}/{max_retries})")
+    logging.error(f"Failed to translate phrase '{phrase}' after {max_retries} retries.")
+    return "[Translation Error]"
 def search_gematria_in_db(gematria_sum):
+    """Searches the database for phrases with a given Gematria value."""
+    global conn
+    cursor = conn.cursor()
+    cursor.execute('''
+    SELECT words, book, chapter, verse FROM results WHERE gematria_sum = ?
+    ''', (gematria_sum,))
+    results = cursor.fetchall()
+    logging.debug(f"Found {len(results)} matching phrases for Gematria: {gematria_sum}")
+    return results
 def gematria_search_interface(phrase):
+    """The main function for the Gradio interface."""
+    if not phrase.strip():
+        return "Please enter a phrase."
+    global conn, book_names
+    conn = sqlite3.connect('gematria.db')
+    cursor = conn.cursor()
+    phrase_gematria = calculate_gematria(phrase.replace(" ", ""))
+    logging.info(f"Searching for phrases with Gematria: {phrase_gematria}")
+    matching_phrases = search_gematria_in_db(phrase_gematria)
+    if not matching_phrases:
+        return "No matching phrases found."
+    # Sort results by book, chapter, and verse
+    sorted_phrases = sorted(matching_phrases, key=lambda x: (x[1], x[2], x[3]))
+    # Group results by book
+    results_by_book = defaultdict(list)
+    for words, book, chapter, verse in sorted_phrases:
+        results_by_book[book].append((words, chapter, verse))
+    # Format results for display
+    results = []
+    results.append("<div class='results-container'>")
+    for book, phrases in results_by_book.items():
+        results.append(f"<h4>Book: {book_names.get(book, 'Unknown')}</h4>")
+        for words, chapter, verse in phrases:
+            translation = get_translation(words)
+            book_name_english = book_names.get(book, 'Unknown')
+            link = f"https://www.biblegateway.com/passage/?search={quote_plus(book_name_english)}+{chapter}%3A{verse}&version=CJB"
+            results.append(f"""
+            <div class='result-item'>
+                <p>Chapter: {chapter}, Verse: {verse}</p>
+                <p class='hebrew-phrase'>Hebrew Phrase: {words}</p>
+                <p>Translation: {translation}</p>
+                <a href='{link}' target='_blank' class='bible-link'>[See on Bible Gateway]</a>
+            </div>
+            """)
+    results.append("</div>") # Close results-container div
+    conn.close()
+    # Add CSS styling
+    style = """
+    <style>
+        .results-container {
+            display: grid;
+            grid-template-columns: repeat(auto-fit, minmax(300px, 1fr));
+            gap: 20px;
+        }
+        .result-item {
+            border: 1px solid #ccc;
+            padding: 15px;
+            border-radius: 5px;
+            box-shadow: 2px 2px 5px rgba(0, 0, 0, 0.1);
+        }
+        .hebrew-phrase {
+            font-family: 'SBL Hebrew', 'Ezra SIL', serif;
+            direction: rtl;
+        }
+        .bible-link {
+            display: block;
+            margin-top: 10px;
+            color: #007bff;
+            text-decoration: none;
+        }
+    </style>
+    """
+    return style + "\n".join(results)
+def flatten_text(text):
+    """Helper function to flatten nested lists into a single list."""
+    if isinstance(text, list):
+        return " ".join(flatten_text(item) if isinstance(item, list) else item for item in text)
+    return text
 def run_app():
+    """Initializes and launches the Gradio app."""
+    initialize_database()
+    initialize_translator()
+    # Pre-populate the database
+    tanach_texts = process_json_files(1, 39)
+    populate_database(tanach_texts, max_phrase_length=12)
+    tanach_texts = process_json_files(27, 27)
+    populate_database(tanach_texts, max_phrase_length=24)
+    iface = gr.Interface(
+        fn=gematria_search_interface,
+        inputs=gr.Textbox(label="Enter phrase"),
+        outputs=gr.HTML(label="Results"),
+        title="Gematria Search in Tanach",
+        description="Search for phrases in the Tanach that have the same Gematria value.",
+        live=False,
+        allow_flagging="never"
+    )
+    iface.launch()
 if __name__ == "__main__":
+    run_app()

gematria.db DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:635af9e89ddd0c0ebb0c43f05db64b2014f3ce6390cc85e9c7f51201bef214ac
-size 16007168

util.py CHANGED Viewed

@@ -2,8 +2,20 @@ import json
 import re
 def process_json_files(start, end):
     base_path = "texts"
-    results = []
     for i in range(start, end + 1):
         file_name = f"{base_path}/{i:02}.json"
@@ -11,14 +23,14 @@ def process_json_files(start, end):
             with open(file_name, 'r', encoding='utf-8') as file:
                 data = json.load(file)
                 if data:
-                    # Return a tuple of book_id and text data
-                    results.append((i, {"title": data.get("title", "No title"), "text": data.get("text", [])}))
         except FileNotFoundError:
-            results.append((i, {"error": f"File {file_name} not found."}))  # Use a tuple here
         except json.JSONDecodeError as e:
-            results.append((i, {"error": f"File {file_name} could not be read as JSON: {e}"}))  # Use a tuple here
         except KeyError as e:
-            results.append((i, {"error": f"Expected key 'text' is missing in {file_name}: {e}"}))  # Use a tuple here
     return results

 import re
 def process_json_files(start, end):
+    """
+    Processes JSON files containing Tanach text and returns a dictionary
+    mapping book IDs to their data.
+    Args:
+        start: The starting book ID (inclusive).
+        end: The ending book ID (inclusive).
+    Returns:
+        A dictionary where keys are book IDs and values are dictionaries
+        containing 'title' and 'text' fields.
+    """
     base_path = "texts"
+    results = {}  # Use a dictionary to store results
     for i in range(start, end + 1):
         file_name = f"{base_path}/{i:02}.json"
             with open(file_name, 'r', encoding='utf-8') as file:
                 data = json.load(file)
                 if data:
+                    # Store book ID as key and book data as value
+                    results[i] = {"title": data.get("title", "No title"), "text": data.get("text", [])}
         except FileNotFoundError:
+            logging.warning(f"File {file_name} not found.")
         except json.JSONDecodeError as e:
+            logging.warning(f"File {file_name} could not be read as JSON: {e}")
         except KeyError as e:
+            logging.warning(f"Expected key 'text' is missing in {file_name}: {e}")
     return results