Spaces:
Restarting
Restarting
upgrade rtansfoemrs
Browse files- is_model_on_hub_cache/joblib/src/submission/check_validity/is_model_on_hub/23a0fd5602af5241fb2f8d005924180b/metadata.json +1 -0
- is_model_on_hub_cache/joblib/src/submission/check_validity/is_model_on_hub/{cc0c3d4788195b117302c7e49e5e6d2f → 23a0fd5602af5241fb2f8d005924180b}/output.pkl +2 -2
- is_model_on_hub_cache/joblib/src/submission/check_validity/is_model_on_hub/23c8246269ba365a1ae561a7b989d991/metadata.json +1 -0
- is_model_on_hub_cache/joblib/src/submission/check_validity/is_model_on_hub/23c8246269ba365a1ae561a7b989d991/output.pkl +3 -0
- is_model_on_hub_cache/joblib/src/submission/check_validity/is_model_on_hub/{74c03d080f7935bad0054064b2595518 → 2ec974c87eca0a06aea982a02ac46031}/output.pkl +2 -2
- is_model_on_hub_cache/joblib/src/submission/check_validity/is_model_on_hub/3d66489be6398e5b5b21a6b0a08f15b3/output.pkl +3 -0
- is_model_on_hub_cache/joblib/src/submission/check_validity/is_model_on_hub/4d9c362734452593bb8a51ce9ed946cc/metadata.json +1 -0
- is_model_on_hub_cache/joblib/src/submission/check_validity/is_model_on_hub/4d9c362734452593bb8a51ce9ed946cc/output.pkl +3 -0
- is_model_on_hub_cache/joblib/src/submission/check_validity/is_model_on_hub/74c03d080f7935bad0054064b2595518/metadata.json +0 -1
- is_model_on_hub_cache/joblib/src/submission/check_validity/is_model_on_hub/7bae5d318de18eac6d87e1ec54218733/metadata.json +1 -0
- is_model_on_hub_cache/joblib/src/submission/check_validity/is_model_on_hub/{81c700664b9416c1564a63e2eeb7b859 → 7bae5d318de18eac6d87e1ec54218733}/output.pkl +2 -2
- is_model_on_hub_cache/joblib/src/submission/check_validity/is_model_on_hub/829584a23f83469286cb95d376b70c03/metadata.json +1 -0
- is_model_on_hub_cache/joblib/src/submission/check_validity/is_model_on_hub/829584a23f83469286cb95d376b70c03/output.pkl +3 -0
- is_model_on_hub_cache/joblib/src/submission/check_validity/is_model_on_hub/97bf2401f2957f6381272e0e8b844a60/metadata.json +1 -0
- is_model_on_hub_cache/joblib/src/submission/check_validity/is_model_on_hub/{d04e5f9d53ccdec3f2df6f97b1e7e77c → 97bf2401f2957f6381272e0e8b844a60}/output.pkl +2 -2
- is_model_on_hub_cache/joblib/src/submission/check_validity/is_model_on_hub/b21add535236b801b30265b4f54bd6d9/metadata.json +1 -0
- is_model_on_hub_cache/joblib/src/submission/check_validity/is_model_on_hub/b21add535236b801b30265b4f54bd6d9/output.pkl +3 -0
- is_model_on_hub_cache/joblib/src/submission/check_validity/is_model_on_hub/b57a6734f8edf0cd078bb309ef289bcf/metadata.json +1 -0
- is_model_on_hub_cache/joblib/src/submission/check_validity/is_model_on_hub/b57a6734f8edf0cd078bb309ef289bcf/output.pkl +3 -0
- is_model_on_hub_cache/joblib/src/submission/check_validity/is_model_on_hub/cc0c3d4788195b117302c7e49e5e6d2f/metadata.json +0 -1
- is_model_on_hub_cache/joblib/src/submission/check_validity/is_model_on_hub/d04e5f9d53ccdec3f2df6f97b1e7e77c/metadata.json +0 -1
- is_model_on_hub_cache/joblib/src/submission/check_validity/is_model_on_hub/d729fe08a059410d3935f92cb2dbcbd9/metadata.json +1 -0
- is_model_on_hub_cache/joblib/src/submission/check_validity/is_model_on_hub/d729fe08a059410d3935f92cb2dbcbd9/output.pkl +3 -0
- is_model_on_hub_cache/joblib/src/submission/check_validity/is_model_on_hub/e175e10ba74320a4086e347a0e1a6862/output.pkl +3 -0
- is_model_on_hub_cache/joblib/src/submission/check_validity/is_model_on_hub/e8594cd72ace59135678f6b5e84f5f4c/metadata.json +1 -0
- is_model_on_hub_cache/joblib/src/submission/check_validity/is_model_on_hub/e8594cd72ace59135678f6b5e84f5f4c/output.pkl +3 -0
- requirements.txt +1 -1
- src/leaderboard/read_evals.py +14 -4
is_model_on_hub_cache/joblib/src/submission/check_validity/is_model_on_hub/23a0fd5602af5241fb2f8d005924180b/metadata.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"duration": 0.0002968311309814453, "input_args": {"model_name": "'speakleash/plgchriso/models/bielik_11B-v2_dpo/dpo5-001_e2'", "revision": "''", "token": "None", "trust_remote_code": "True", "test_tokenizer": "False"}, "time": 1723799950.899789}
|
is_model_on_hub_cache/joblib/src/submission/check_validity/is_model_on_hub/{cc0c3d4788195b117302c7e49e5e6d2f → 23a0fd5602af5241fb2f8d005924180b}/output.pkl
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3a1fb7b246f8007481abf14bc208069cd51769a48f64455abc1f83d41c9c93d9
|
3 |
+
size 213
|
is_model_on_hub_cache/joblib/src/submission/check_validity/is_model_on_hub/23c8246269ba365a1ae561a7b989d991/metadata.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"duration": 0.1410212516784668, "input_args": {"model_name": "'piotr-ai/polanka-7b-v0.1'", "revision": "'main'", "token": "None", "trust_remote_code": "True", "test_tokenizer": "False"}, "time": 1723799919.9540784}
|
is_model_on_hub_cache/joblib/src/submission/check_validity/is_model_on_hub/23c8246269ba365a1ae561a7b989d991/output.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e3a20e156a5e4fd6ca69b0093e4568506780fa865d012ffefedc4a608cc3c4d9
|
3 |
+
size 2050
|
is_model_on_hub_cache/joblib/src/submission/check_validity/is_model_on_hub/{74c03d080f7935bad0054064b2595518 → 2ec974c87eca0a06aea982a02ac46031}/output.pkl
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4411e5d8c00ef326aabc5d5009e0e7cfff43b3dcfa542e783b32b2acd39203de
|
3 |
+
size 220
|
is_model_on_hub_cache/joblib/src/submission/check_validity/is_model_on_hub/3d66489be6398e5b5b21a6b0a08f15b3/output.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:936397f16e56c7fe947168df4949eb52522087e41be00e0468b26d41be6bcf80
|
3 |
+
size 1941
|
is_model_on_hub_cache/joblib/src/submission/check_validity/is_model_on_hub/4d9c362734452593bb8a51ce9ed946cc/metadata.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"duration": 0.2852437496185303, "input_args": {"model_name": "'google/gemma-2-2b-it'", "revision": "'main'", "token": "None", "trust_remote_code": "True", "test_tokenizer": "False"}, "time": 1723799938.6140876}
|
is_model_on_hub_cache/joblib/src/submission/check_validity/is_model_on_hub/4d9c362734452593bb8a51ce9ed946cc/output.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:451a1a0a65280fcc7988e008e7602fbe8ce816d2ad9e06407bef92e696cf09d6
|
3 |
+
size 1951
|
is_model_on_hub_cache/joblib/src/submission/check_validity/is_model_on_hub/74c03d080f7935bad0054064b2595518/metadata.json
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
{"duration": 0.0005774497985839844, "input_args": {"model_name": "'model=mistralai/Mistral-Large-Instruct-2407'", "revision": "'main'", "token": "None", "trust_remote_code": "True", "test_tokenizer": "False"}, "time": 1722432813.2754002}
|
|
|
|
is_model_on_hub_cache/joblib/src/submission/check_validity/is_model_on_hub/7bae5d318de18eac6d87e1ec54218733/metadata.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"duration": 0.0002970695495605469, "input_args": {"model_name": "'speakleash/plgchriso/models/bielik_11B-v2_dpo/dpo5-001-sft7a12k_e3'", "revision": "'main'", "token": "None", "trust_remote_code": "True", "test_tokenizer": "False"}, "time": 1723799950.961776}
|
is_model_on_hub_cache/joblib/src/submission/check_validity/is_model_on_hub/{81c700664b9416c1564a63e2eeb7b859 → 7bae5d318de18eac6d87e1ec54218733}/output.pkl
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e40faeafe06c3c74a55e5bc8fef1b2b6a94b87214d5a10c16f00e88942e28a38
|
3 |
+
size 222
|
is_model_on_hub_cache/joblib/src/submission/check_validity/is_model_on_hub/829584a23f83469286cb95d376b70c03/metadata.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"duration": 1.3236052989959717, "input_args": {"model_name": "'THUDM/glm-4-9b-chat'", "revision": "'main'", "token": "None", "trust_remote_code": "True", "test_tokenizer": "False"}, "time": 1723799927.7191012}
|
is_model_on_hub_cache/joblib/src/submission/check_validity/is_model_on_hub/829584a23f83469286cb95d376b70c03/output.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:992a9ffe516d3a3b1218edb22f8cb8a0bcb13ff4b95485bd7223a2227b86b557
|
3 |
+
size 2562
|
is_model_on_hub_cache/joblib/src/submission/check_validity/is_model_on_hub/97bf2401f2957f6381272e0e8b844a60/metadata.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"duration": 0.21419787406921387, "input_args": {"model_name": "'gpt-3.5-turbo-instruct'", "revision": "'main'", "token": "None", "trust_remote_code": "True", "test_tokenizer": "False"}, "time": 1723799910.6515977}
|
is_model_on_hub_cache/joblib/src/submission/check_validity/is_model_on_hub/{d04e5f9d53ccdec3f2df6f97b1e7e77c → 97bf2401f2957f6381272e0e8b844a60}/output.pkl
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:42ca0aa1866b90f59e18d92eb331a61a4388338ade36adecaeba8a4461bc0342
|
3 |
+
size 338
|
is_model_on_hub_cache/joblib/src/submission/check_validity/is_model_on_hub/b21add535236b801b30265b4f54bd6d9/metadata.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"duration": 0.16406464576721191, "input_args": {"model_name": "'meta-llama/Meta-Llama-3.1-405B-Instruct-FP8'", "revision": "'main'", "token": "None", "trust_remote_code": "True", "test_tokenizer": "False"}, "time": 1723799917.5419252}
|
is_model_on_hub_cache/joblib/src/submission/check_validity/is_model_on_hub/b21add535236b801b30265b4f54bd6d9/output.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:42866bd579e2954212d3ad7b9dae26befb527144295d782075ef90b5650e9bb4
|
3 |
+
size 19962
|
is_model_on_hub_cache/joblib/src/submission/check_validity/is_model_on_hub/b57a6734f8edf0cd078bb309ef289bcf/metadata.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"duration": 0.2595674991607666, "input_args": {"model_name": "'gpt-4-turbo-2024-04-09'", "revision": "'main'", "token": "None", "trust_remote_code": "True", "test_tokenizer": "False"}, "time": 1723799926.336528}
|
is_model_on_hub_cache/joblib/src/submission/check_validity/is_model_on_hub/b57a6734f8edf0cd078bb309ef289bcf/output.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:05fe4b62096374aa9f869cc12c8cbf796a8b4a5e62701c22b4eedd0f01fb2f70
|
3 |
+
size 338
|
is_model_on_hub_cache/joblib/src/submission/check_validity/is_model_on_hub/cc0c3d4788195b117302c7e49e5e6d2f/metadata.json
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
{"duration": 0.0007023811340332031, "input_args": {"model_name": "'model=gpt-3.5-turbo-instruct'", "revision": "'main'", "token": "None", "trust_remote_code": "True", "test_tokenizer": "False"}, "time": 1722432787.2373662}
|
|
|
|
is_model_on_hub_cache/joblib/src/submission/check_validity/is_model_on_hub/d04e5f9d53ccdec3f2df6f97b1e7e77c/metadata.json
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
{"duration": 0.0002880096435546875, "input_args": {"model_name": "'model=gpt-4-turbo-2024-04-09'", "revision": "'main'", "token": "None", "trust_remote_code": "True", "test_tokenizer": "False"}, "time": 1722432800.258018}
|
|
|
|
is_model_on_hub_cache/joblib/src/submission/check_validity/is_model_on_hub/d729fe08a059410d3935f92cb2dbcbd9/metadata.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"duration": 0.20917677879333496, "input_args": {"model_name": "'Nexusflow/Athene-70B'", "revision": "'main'", "token": "None", "trust_remote_code": "True", "test_tokenizer": "False"}, "time": 1723799943.0636876}
|
is_model_on_hub_cache/joblib/src/submission/check_validity/is_model_on_hub/d729fe08a059410d3935f92cb2dbcbd9/output.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:607b128531bc5ccbd39fdac4330c4d3a622235bf77985dc8ca5a4f8e7784bf14
|
3 |
+
size 1777
|
is_model_on_hub_cache/joblib/src/submission/check_validity/is_model_on_hub/e175e10ba74320a4086e347a0e1a6862/output.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dcad58286697cca342fbb5964762265c57f099076db96920553d22f2595e79d5
|
3 |
+
size 1791
|
is_model_on_hub_cache/joblib/src/submission/check_validity/is_model_on_hub/e8594cd72ace59135678f6b5e84f5f4c/metadata.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"duration": 0.00033974647521972656, "input_args": {"model_name": "'speakleash/plgchriso/models/bielik_11B-v2_dpo/dpo5-001_e2'", "revision": "'main'", "token": "None", "trust_remote_code": "True", "test_tokenizer": "False"}, "time": 1723799951.1384425}
|
is_model_on_hub_cache/joblib/src/submission/check_validity/is_model_on_hub/e8594cd72ace59135678f6b5e84f5f4c/output.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3a1fb7b246f8007481abf14bc208069cd51769a48f64455abc1f83d41c9c93d9
|
3 |
+
size 213
|
requirements.txt
CHANGED
@@ -11,6 +11,6 @@ pandas==2.0.0
|
|
11 |
python-dateutil==2.8.2
|
12 |
requests==2.28.2
|
13 |
tqdm==4.65.0
|
14 |
-
transformers[cpu]==4.
|
15 |
git+https://github.com/EleutherAI/lm-evaluation-harness.git@b281b0921b636bc36ad05c0b0b0763bd6dd43463#egg=lm-eval
|
16 |
joblib
|
|
|
11 |
python-dateutil==2.8.2
|
12 |
requests==2.28.2
|
13 |
tqdm==4.65.0
|
14 |
+
transformers[cpu]==4.44.0
|
15 |
git+https://github.com/EleutherAI/lm-evaluation-harness.git@b281b0921b636bc36ad05c0b0b0763bd6dd43463#egg=lm-eval
|
16 |
joblib
|
src/leaderboard/read_evals.py
CHANGED
@@ -68,6 +68,7 @@ class EvalResult:
|
|
68 |
org_and_model = org_and_model.replace("models/hf_v7_e2", "APT3-1B-Instruct-e2")
|
69 |
|
70 |
org_and_model = re.sub(r"^pretrained=", "", org_and_model)
|
|
|
71 |
org_and_model = org_and_model.replace(",trust_remote_code=True", "")
|
72 |
org_and_model = org_and_model.replace(",parallelize=True", "")
|
73 |
org_and_model = org_and_model.replace(",tokenizer_backend=huggingface", "")
|
@@ -76,10 +77,19 @@ class EvalResult:
|
|
76 |
org_and_model = re.sub(",prefix_token_id=\d+", "", org_and_model)
|
77 |
org_and_model = re.sub("/$", "", org_and_model)
|
78 |
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
83 |
|
84 |
if chat_template:
|
85 |
org_and_model += ",chat"
|
|
|
68 |
org_and_model = org_and_model.replace("models/hf_v7_e2", "APT3-1B-Instruct-e2")
|
69 |
|
70 |
org_and_model = re.sub(r"^pretrained=", "", org_and_model)
|
71 |
+
org_and_model = re.sub(r"^model=", "", org_and_model)
|
72 |
org_and_model = org_and_model.replace(",trust_remote_code=True", "")
|
73 |
org_and_model = org_and_model.replace(",parallelize=True", "")
|
74 |
org_and_model = org_and_model.replace(",tokenizer_backend=huggingface", "")
|
|
|
77 |
org_and_model = re.sub(",prefix_token_id=\d+", "", org_and_model)
|
78 |
org_and_model = re.sub("/$", "", org_and_model)
|
79 |
|
80 |
+
model_mapping={
|
81 |
+
'speakleash/mistral_7B-v2/spkl-only-e1_333887a5':'speakleash/Bielik-7B-v0.1',
|
82 |
+
'speakleash/mistral_7B-v2/spkl-only_sft_v2/e1_base/spkl-only_v10wa_7e6-e2_bbc67e89':'speakleash/Bielik-7B-Instruct-v0.1',
|
83 |
+
'meta-llama/Meta-Llama-3.1-405B-Instruct-FP8,API': 'meta-llama/Meta-Llama-3.1-405B-Instruct-FP8,API'
|
84 |
+
}
|
85 |
+
#map org_and_model using model_mapping
|
86 |
+
if org_and_model in model_mapping:
|
87 |
+
org_and_model=model_mapping[org_and_model]
|
88 |
+
# if org_and_model=='speakleash/mistral_7B-v2/spkl-only-e1_333887a5':
|
89 |
+
# org_and_model='speakleash/Bielik-7B-v0.1'
|
90 |
+
# elif org_and_model=='speakleash/mistral_7B-v2/spkl-only_sft_v2/e1_base/spkl-only_v10wa_7e6-e2_bbc67e89':
|
91 |
+
# org_and_model='speakleash/Bielik-7B-Instruct-v0.1'
|
92 |
+
|
93 |
|
94 |
if chat_template:
|
95 |
org_and_model += ",chat"
|