Spaces:
AIR-Bench
/
Running on CPU Upgrade

nan commited on
Commit
0af261c
1 Parent(s): 729aa2a

test: add unit tests for envs

Browse files
Files changed (2) hide show
  1. src/envs.py +2 -2
  2. tests/src/test_envs.py +15 -0
src/envs.py CHANGED
@@ -6,7 +6,7 @@ from huggingface_hub import HfApi
6
  # ----------------------------------
7
  TOKEN = os.environ.get("TOKEN", "") # A read/write token for your org
8
 
9
- OWNER = "AIR-Bench" # "nan" # Change to your org - don't forget to create a results and request dataset, with the correct format!
10
  # ----------------------------------
11
 
12
  REPO_ID = f"{OWNER}/leaderboard"
@@ -15,7 +15,7 @@ RESULTS_REPO = f"{OWNER}/eval_results"
15
  # repo for submitting the evaluation
16
  SEARCH_RESULTS_REPO = f"{OWNER}/search_results"
17
 
18
- # If you setup a cache later, just change HF_HOME
19
  CACHE_PATH = os.getenv("HF_HOME", ".")
20
 
21
  # Local caches
 
6
  # ----------------------------------
7
  TOKEN = os.environ.get("TOKEN", "") # A read/write token for your org
8
 
9
+ OWNER = "AIR-Bench" # Change to your org - don't forget to create a results and request dataset, with the correct format!
10
  # ----------------------------------
11
 
12
  REPO_ID = f"{OWNER}/leaderboard"
 
15
  # repo for submitting the evaluation
16
  SEARCH_RESULTS_REPO = f"{OWNER}/search_results"
17
 
18
+ # If you set up a cache later, just change HF_HOME
19
  CACHE_PATH = os.getenv("HF_HOME", ".")
20
 
21
  # Local caches
tests/src/test_envs.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from air_benchmark.tasks import BenchmarkTable
2
+
3
+ from src.envs import BENCHMARK_VERSION_LIST, DEFAULT_METRIC_QA, DEFAULT_METRIC_LONG_DOC, METRIC_LIST
4
+
5
+
6
+ def test_benchmark_version_list():
7
+ leaderboard_versions = frozenset(BENCHMARK_VERSION_LIST)
8
+ available_versions = frozenset([k for k in BenchmarkTable.keys()])
9
+ assert leaderboard_versions.issubset(
10
+ available_versions)
11
+
12
+
13
+ def test_default_metrics():
14
+ assert DEFAULT_METRIC_QA in METRIC_LIST
15
+ assert DEFAULT_METRIC_LONG_DOC in METRIC_LIST