bible-search / src /reranker.py
alronlam's picture
Add app and data files
613c93d
raw
history blame contribute delete
No virus
2.07 kB
from typing import List
import numpy as np
import streamlit as st
from src.models import Chapter
class Reranker:
def rerank(self, chapters: List[Chapter]) -> List[Chapter]:
# TODO
return chapters
# Rerankers applicable to SemanticRetriever results
def sort_chapters(chapters, scores):
reranked_chapters = sorted(zip(chapters, scores), key=lambda x: x[1], reverse=True)
reranked_chapters = [x[0] for x in reranked_chapters]
return reranked_chapters
class CombinedScoreAndNumberReranker(Reranker):
def __init__(self, num_verse_weight=0.3, semantic_sim_weight=0.7):
self.num_verse_weight = num_verse_weight
self.semantic_sim_weight = semantic_sim_weight
def rerank(self, chapters: List[Chapter]) -> List[Chapter]:
num_verse_score = compute_num_verse_scores(chapters)
max_sem_sim_score = compute_sem_sim_scores(chapters)
final_scores = (
self.num_verse_weight * num_verse_score
+ self.semantic_sim_weight * max_sem_sim_score
)
return sort_chapters(chapters, final_scores)
class SemanticSimScoreReranker(Reranker):
def rerank(self, chapters: List[Chapter]) -> List[Chapter]:
sem_sim_scores = np.array(
[chapter.highlight_verses_df["score"].max() for chapter in chapters]
)
return sort_chapters(chapters, sem_sim_scores)
class MaxVerseReranker(Reranker):
def rerank(self, chapters: List[Chapter]) -> List[Chapter]:
num_verses = [chapter.get_num_unique_highlight_verse() for chapter in chapters]
return sort_chapters(chapters, num_verses)
def compute_num_verse_scores(chapters):
num_verses = np.array(
[chapter.get_num_unique_highlight_verse() for chapter in chapters]
)
max_verses = max(num_verses)
num_verse_scores = num_verses / max_verses
return num_verse_scores
def compute_sem_sim_scores(chapters):
sem_sim_scores = np.array(
[chapter.highlight_verses_df["score"].max() for chapter in chapters]
)
return sem_sim_scores