Spaces:
AIR-Bench
/
Running on CPU Upgrade

File size: 2,051 Bytes
f766ce9
7d7a455
f766ce9
 
 
 
f3b296a
f766ce9
f3b296a
f766ce9
 
 
f3b296a
 
 
 
f766ce9
 
57ca843
f766ce9
 
f3b296a
f766ce9
 
93fda91
 
 
 
 
7845083
93fda91
 
23b3543
ba13e25
 
4791ac5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7845083
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import os
from src.display.formatting import model_hyperlink
from huggingface_hub import HfApi

# Info to change for your repository
# ----------------------------------
TOKEN = os.environ.get("TOKEN", "")  # A read/write token for your org

OWNER = "AIR-Bench"  # "nan"  # Change to your org - don't forget to create a results and request dataset, with the correct format!
# ----------------------------------

REPO_ID = f"{OWNER}/leaderboard"
# repo for storing the evaluation results
RESULTS_REPO = f"{OWNER}/eval_results"
# repo for submitting the evaluation
SEARCH_RESULTS_REPO = f"{OWNER}/search_results"

# If you setup a cache later, just change HF_HOME
CACHE_PATH = os.getenv("HF_HOME", ".")

# Local caches
EVAL_RESULTS_PATH = os.path.join(CACHE_PATH, "eval_results")

API = HfApi(token=TOKEN)

BM25_LINK = model_hyperlink("https://github.com/castorini/pyserini", "BM25")

BENCHMARK_VERSION_LIST = [
    "AIR-Bench_24.04",
    "AIR-Bench_24.05",
]

LATEST_BENCHMARK_VERSION = BENCHMARK_VERSION_LIST[0]
DEFAULT_METRIC_QA = "ndcg_at_10"
DEFAULT_METRIC_LONG_DOC = "recall_at_10"
METRIC_LIST = [
    "ndcg_at_1",
    "ndcg_at_3",
    "ndcg_at_5",
    "ndcg_at_10",
    "ndcg_at_100",
    "ndcg_at_1000",
    "map_at_1",
    "map_at_3",
    "map_at_5",
    "map_at_10",
    "map_at_100",
    "map_at_1000",
    "recall_at_1",
    "recall_at_3",
    "recall_at_5",
    "recall_at_10",
    "recall_at_100",
    "recall_at_1000",
    "precision_at_1",
    "precision_at_3",
    "precision_at_5",
    "precision_at_10",
    "precision_at_100",
    "precision_at_1000",
    "mrr_at_1",
    "mrr_at_3",
    "mrr_at_5",
    "mrr_at_10",
    "mrr_at_100",
    "mrr_at_1000"
]
COL_NAME_AVG = "Average ⬆️"
COL_NAME_RETRIEVAL_MODEL = "Retrieval Method"
COL_NAME_RERANKING_MODEL = "Reranking Model"
COL_NAME_RETRIEVAL_MODEL_LINK = "Retrieval Model LINK"
COL_NAME_RERANKING_MODEL_LINK = "Reranking Model LINK"
COL_NAME_RANK = "Rank 🏆"
COL_NAME_REVISION = "Revision"
COL_NAME_TIMESTAMP = "Submission Date"
COL_NAME_IS_ANONYMOUS = "Anonymous Submission"