Spaces:
Sleeping
Sleeping
import json | |
import re | |
def process_json_files(start, end): | |
""" | |
Processes JSON files containing Tanach text and returns a dictionary | |
mapping book IDs to their data. | |
Args: | |
start: The starting book ID (inclusive). | |
end: The ending book ID (inclusive). | |
Returns: | |
A dictionary where keys are book IDs and values are dictionaries | |
containing 'title' and 'text' fields. | |
""" | |
base_path = "texts" | |
results = {} # Use a dictionary to store results | |
for i in range(start, end + 1): | |
file_name = f"{base_path}/{i:02}.json" | |
try: | |
with open(file_name, 'r', encoding='utf-8') as file: | |
data = json.load(file) | |
if data: | |
# Store book ID as key and book data as value | |
results[i] = {"title": data.get("title", "No title"), "text": data.get("text", [])} | |
except FileNotFoundError: | |
logging.warning(f"File {file_name} not found.") | |
except json.JSONDecodeError as e: | |
logging.warning(f"File {file_name} could not be read as JSON: {e}") | |
except KeyError as e: | |
logging.warning(f"Expected key 'text' is missing in {file_name}: {e}") | |
return results |