File size: 5,143 Bytes
613c93d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
import os
import time
from pathlib import Path

import streamlit as st

from src import bible_loader
from src.embeddings import EmbeddingsManager
from src.reranker import (
    CombinedScoreAndNumberReranker,
    MaxVerseReranker,
    Reranker,
    SemanticSimScoreReranker,
)
from src.retriever import Retriever, SemanticRetriever


def display_chapter(chapter):
    st.header(f"[{str(chapter)}]({chapter.get_biblegateway_url()})")
    chapter_text = chapter.get_formatted_text()
    st.markdown(chapter_text, unsafe_allow_html=True)
    # st.write(chapter.highlight_verses_df)


def config():
    n_results = st.sidebar.slider("Maximum Results?", 5, 30, 10)
    # bible_version = st.sidebar.selectbox("Bible Version", ["NIV", "ESV"]) # TODO
    bible_version = "NIV"
    new_testament = st.sidebar.checkbox("Search New Testament?", True)
    old_testament = st.sidebar.checkbox("Search Old Testament?", False)

    return n_results, new_testament, old_testament, bible_version


def main():

    st.set_page_config(page_title="Bible Search", layout="wide")

    n_results, new_testament, old_testament, bible_version = config()

    # Config
    ROOT_DIR = Path(os.path.abspath(os.path.dirname(__file__)))
    DATA_DIR = ROOT_DIR / "data"

    n_candidates = n_results * 2
    metadata_csv = DATA_DIR / "key_english.csv"
    verses_csv = DATA_DIR / f"{bible_version}.csv"

    semantic_sim_model = "msmarco-distilbert-base-v4"

    # Initialize / Index
    bible_df = bible_loader.load_bible(metadata_csv, verses_csv)
    embeddings_manager = EmbeddingsManager(
        model_name=semantic_sim_model,
        bible_version=bible_version,
        embeddings_cache_dir=DATA_DIR,
        texts=bible_df["text"].tolist(),
    )

    # Trim down search space if needed
    if not new_testament:
        bible_df = bible_df[bible_df["testament"] != "NT"]
    if not old_testament:
        bible_df = bible_df[bible_df["testament"] != "OT"]

    # Initialize retriever and reranker based on filtered texts
    retriever = SemanticRetriever(bible_df, embeddings_manager)
    reranker = CombinedScoreAndNumberReranker()
    # reranker = SemanticSimScoreReranker()
    # reranker = MaxVerseReranker()

    _, main_col, _ = st.columns([1, 2, 1])

    with main_col:

        # Get user input
        st.title("Verse Similarity Search")
        st.markdown(
            "- Have you ever been stumped by a verse and wondered what related things the Bible says about it?\n"
            "- Or you have a verse of interest and you simply want to find related ones?\n"
            "- Or you vaguely recall a verse's idea, but can't recall the exact text?\n"
            "This tool was made just for that!"
        )

        st.markdown("---")

        demo_query = st.selectbox(
            "Try some demo queries...",
            [
                "",
                "For God so loved the world that he gave his one and only Son, that whoever believes in him shall not perish but have eternal life.",
                "In the same way, faith by itself, if it is not accompanied by action, is dead.",
                "I tell you the truth, no one can enter the kingdom of God unless he is born of water and the Spirit.",
                "the Lord is patient with us, not wanting us to perish",
                "is it ok for believers to continue in sin?",
                "it is possible to resist every temptation",
                "heavenly rewards",
                "the old is gone, the new has come",
                "suffering for Christ",
                "rejoicing in trials",
                "Be careful of false prophets, wolves in sheep skin",
                "will there be marriage in heaven?",
            ],
            index=1,
        )

        query = st.text_area(
            "Or type a verse's text here to find similar verses",
            demo_query if demo_query.strip() else "",
        )

        clicked_search = st.button("Search", type="primary")

        if query or clicked_search:

            if len(bible_df) == 0:
                st.markdown(
                    "---\n:red[Please select at least one testament to search through (left hand side of the screen). :)]"
                )
            else:
                with st.spinner("Searching..."):

                    start = time.time()

                    # Retrieve and re-rank
                    candidate_chapters = retriever.retrieve(query, n=n_candidates)
                    candidate_chapters = reranker.rerank(candidate_chapters)

                    # Trim because candidates can be more than the desired results
                    final_chapter_results = candidate_chapters[:n_results]

                    # Display quick stats
                    st.markdown(
                        f"_{len(final_chapter_results)} results found in {time.time()-start:.2f}s_"
                    )
                    st.markdown("---")

                    # Display results
                    for chapter in final_chapter_results:
                        display_chapter(chapter)
                        st.markdown("---")


if __name__ == "__main__":
    main()