|
[lexical_search] |
|
TOP_K = 20 |
|
SPLIT_BY = word |
|
SPLIT_LENGTH = 120 |
|
SPLIT_OVERLAP = 0 |
|
REMOVE_PUNC = 0 |
|
|
|
[semantic_search] |
|
RETRIEVER_TOP_K = 10 |
|
MAX_SEQ_LENGTH = 384 |
|
RETRIEVER = all-mpnet-base-v2 |
|
RETRIEVER_FORMAT = sentence_transformers |
|
EMBEDDING_DIM = 768 |
|
RETRIEVER_EMB_LAYER = -1 |
|
READER = deepset/tinyroberta-squad2 |
|
READER_TOP_K = 10 |
|
THRESHOLD = 0.1 |
|
SPLIT_BY = sentence |
|
SPLIT_LENGTH = 4 |
|
SPLIT_OVERLAP = 1 |
|
RESPECT_SENTENCE_BOUNDARY = 1 |
|
REMOVE_PUNC = 0 |
|
|
|
[sdg] |
|
THRESHOLD = 0.85 |
|
MODEL = jonas/sdg_classifier_osdg |
|
SPLIT_BY = word |
|
REMOVE_PUNC = 0 |
|
SPLIT_LENGTH = 120 |
|
SPLIT_OVERLAP = 10 |
|
RESPECT_SENTENCE_BOUNDARY = 1 |
|
TOP_KEY = 15 |
|
|
|
[tfidf] |
|
TOP_N = 20 |
|
|
|
[coherence] |
|
RETRIEVER_TOP_K = 10 |
|
MAX_SEQ_LENGTH = 512 |
|
RETRIEVER = msmarco-distilbert-dot-v5 |
|
RETRIEVER_FORMAT = sentence_transformers |
|
RETRIEVER_EMB_LAYER = -1 |
|
EMBEDDING_DIM = 768 |
|
THRESHOLD = 0.55 |
|
SPLIT_BY = word |
|
SPLIT_LENGTH = 120 |
|
SPLIT_OVERLAP = 10 |
|
RESPECT_SENTENCE_BOUNDARY = 1 |
|
REMOVE_PUNC = 0 |