File size: 2,068 Bytes
613c93d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
from typing import List

import numpy as np
import streamlit as st

from src.models import Chapter


class Reranker:
    def rerank(self, chapters: List[Chapter]) -> List[Chapter]:
        # TODO
        return chapters


# Rerankers applicable to SemanticRetriever results


def sort_chapters(chapters, scores):
    reranked_chapters = sorted(zip(chapters, scores), key=lambda x: x[1], reverse=True)
    reranked_chapters = [x[0] for x in reranked_chapters]
    return reranked_chapters


class CombinedScoreAndNumberReranker(Reranker):
    def __init__(self, num_verse_weight=0.3, semantic_sim_weight=0.7):
        self.num_verse_weight = num_verse_weight
        self.semantic_sim_weight = semantic_sim_weight

    def rerank(self, chapters: List[Chapter]) -> List[Chapter]:
        num_verse_score = compute_num_verse_scores(chapters)
        max_sem_sim_score = compute_sem_sim_scores(chapters)

        final_scores = (
            self.num_verse_weight * num_verse_score
            + self.semantic_sim_weight * max_sem_sim_score
        )
        return sort_chapters(chapters, final_scores)


class SemanticSimScoreReranker(Reranker):
    def rerank(self, chapters: List[Chapter]) -> List[Chapter]:
        sem_sim_scores = np.array(
            [chapter.highlight_verses_df["score"].max() for chapter in chapters]
        )
        return sort_chapters(chapters, sem_sim_scores)


class MaxVerseReranker(Reranker):
    def rerank(self, chapters: List[Chapter]) -> List[Chapter]:

        num_verses = [chapter.get_num_unique_highlight_verse() for chapter in chapters]

        return sort_chapters(chapters, num_verses)


def compute_num_verse_scores(chapters):
    num_verses = np.array(
        [chapter.get_num_unique_highlight_verse() for chapter in chapters]
    )
    max_verses = max(num_verses)
    num_verse_scores = num_verses / max_verses
    return num_verse_scores


def compute_sem_sim_scores(chapters):
    sem_sim_scores = np.array(
        [chapter.highlight_verses_df["score"].max() for chapter in chapters]
    )
    return sem_sim_scores