eduagarcia commited on
Commit
de3b367
β€’
1 Parent(s): 811ded7

add env variables: REQUIRE_MODEL_CARD and REQUIRE_MODEL_LICENSE

Browse files
src/envs.py CHANGED
@@ -66,5 +66,7 @@ ORIGINAL_HF_LEADERBOARD_RESULTS_REPO = get_config("ORIGINAL_HF_LEADERBOARD_RESUL
66
  ORIGINAL_HF_LEADERBOARD_EVAL_RESULTS_PATH = os.path.join(CACHE_PATH, 'original_results')
67
 
68
  SHOW_INCOMPLETE_EVALS = str2bool(get_config("SHOW_INCOMPLETE_EVALS", False))
 
 
69
 
70
  API = HfApi(token=H4_TOKEN)
 
66
  ORIGINAL_HF_LEADERBOARD_EVAL_RESULTS_PATH = os.path.join(CACHE_PATH, 'original_results')
67
 
68
  SHOW_INCOMPLETE_EVALS = str2bool(get_config("SHOW_INCOMPLETE_EVALS", False))
69
+ REQUIRE_MODEL_CARD = str2bool(get_config("REQUIRE_MODEL_CARD", True))
70
+ REQUIRE_MODEL_LICENSE = str2bool(get_config("REQUIRE_MODEL_LICENSE", True))
71
 
72
  API = HfApi(token=H4_TOKEN)
src/scripts/update_all_request_files.py CHANGED
@@ -46,9 +46,9 @@ def update_models(file_path, models, original_leaderboard_files=None):
46
 
47
  # Is the model still on the hub?
48
  model_name = model_id
49
-
50
  if model_cfg.card_data is not None and hasattr(model_cfg.card_data, "base_model") and model_cfg.card_data.base_model is not None:
51
- model_name = model_cfg.card_data.base_model # for adapters, we look at the parent model
 
52
  still_on_hub, _, _ = is_model_on_hub(
53
  model_name=model_name, revision=data.get("revision"), trust_remote_code=True, test_tokenizer=False, token=H4_TOKEN
54
  )
@@ -57,7 +57,7 @@ def update_models(file_path, models, original_leaderboard_files=None):
57
  tags = []
58
 
59
  if still_on_hub:
60
- status, _, model_card = check_model_card(model_id)
61
  tags = get_model_tags(model_card, model_id)
62
 
63
 
 
46
 
47
  # Is the model still on the hub?
48
  model_name = model_id
 
49
  if model_cfg.card_data is not None and hasattr(model_cfg.card_data, "base_model") and model_cfg.card_data.base_model is not None:
50
+ if isinstance(model_cfg.card_data.base_model, str):
51
+ model_name = model_cfg.card_data.base_model # for adapters, we look at the parent model
52
  still_on_hub, _, _ = is_model_on_hub(
53
  model_name=model_name, revision=data.get("revision"), trust_remote_code=True, test_tokenizer=False, token=H4_TOKEN
54
  )
 
57
  tags = []
58
 
59
  if still_on_hub:
60
+ status, _, _, model_card = check_model_card(model_id)
61
  tags = get_model_tags(model_card, model_id)
62
 
63
 
src/submission/check_validity.py CHANGED
@@ -20,23 +20,27 @@ def check_model_card(repo_id: str) -> tuple[bool, str]:
20
  try:
21
  card = ModelCard.load(repo_id)
22
  except huggingface_hub.utils.EntryNotFoundError:
23
- return False, "Please add a model card to your model to explain how you trained/fine-tuned it.", None
24
  except Exception as e:
25
- return False, f"Error while loading the model card. Exception: {str(e)}", None
26
 
27
- # Enforce license metadata
28
  if card.data.license is None:
29
  if not ("license_name" in card.data and "license_link" in card.data):
30
- return False, (
31
- "License not found. Please add a license to your model card using the `license` metadata or a"
32
- " `license_name`/`license_link` pair."
33
- ), None
34
 
35
  # Enforce card content
36
  if len(card.text) < 200:
37
- return False, "Please add a description to your model card, it is too short.", None
38
 
39
- return True, "", card
 
 
 
 
 
 
 
40
 
41
 
42
  def is_model_on_hub(model_name: str, revision: str, token: str = None, trust_remote_code=TRUST_REMOTE_CODE, test_tokenizer=False) -> tuple[bool, str, AutoConfig]:
@@ -149,19 +153,19 @@ def get_model_tags(model_card, model: str):
149
 
150
  # Storing the model tags
151
  tags = []
152
- moe_keywords = ["moe", "mixture of experts", "mixtral"]
 
 
153
  if model_card is not None:
154
  if model_card.data.tags:
155
  is_merge_from_metadata = "merge" in model_card.data.tags
156
  is_moe_from_metadata = "moe" in model_card.data.tags
157
- merge_keywords = ["merged model", "merge model"]
158
  # If the model is a merge but not saying it in the metadata, we flag it
159
  is_merge_from_model_card = any(keyword in model_card.text.lower() for keyword in merge_keywords)
160
  if is_merge_from_model_card or is_merge_from_metadata:
161
  tags.append("merge")
162
  if not is_merge_from_metadata:
163
  tags.append("flagged:undisclosed_merge")
164
- moe_keywords = ["moe", "mixtral"]
165
  is_moe_from_model_card = any(keyword in model_card.text.lower() for keyword in moe_keywords)
166
  is_moe_from_name = "moe" in model.lower().replace("/", "-").replace("_", "-").split("-")
167
  if is_moe_from_model_card or is_moe_from_name or is_moe_from_metadata:
 
20
  try:
21
  card = ModelCard.load(repo_id)
22
  except huggingface_hub.utils.EntryNotFoundError:
23
+ return False, False, "Please add a model card to your model to explain how you trained/fine-tuned it.", None
24
  except Exception as e:
25
+ return False, False, f"Error while loading the model card. Exception: {str(e)}", None
26
 
27
+ license = True
28
  if card.data.license is None:
29
  if not ("license_name" in card.data and "license_link" in card.data):
30
+ license = False
 
 
 
31
 
32
  # Enforce card content
33
  if len(card.text) < 200:
34
+ return False, license, "Please add a description to your model card bigger than 200 characters, it is too short.", None
35
 
36
+ # Enforce license metadata
37
+ if not license:
38
+ return True, False, (
39
+ "License not found. Please add a license to your model card using the `license` metadata or a"
40
+ " `license_name`/`license_link` pair."
41
+ ), None
42
+
43
+ return True, True, "", card
44
 
45
 
46
  def is_model_on_hub(model_name: str, revision: str, token: str = None, trust_remote_code=TRUST_REMOTE_CODE, test_tokenizer=False) -> tuple[bool, str, AutoConfig]:
 
153
 
154
  # Storing the model tags
155
  tags = []
156
+ merge_keywords = ["merged model", "merge model"]
157
+ moe_keywords = ["moe", "mixtral"]
158
+
159
  if model_card is not None:
160
  if model_card.data.tags:
161
  is_merge_from_metadata = "merge" in model_card.data.tags
162
  is_moe_from_metadata = "moe" in model_card.data.tags
 
163
  # If the model is a merge but not saying it in the metadata, we flag it
164
  is_merge_from_model_card = any(keyword in model_card.text.lower() for keyword in merge_keywords)
165
  if is_merge_from_model_card or is_merge_from_metadata:
166
  tags.append("merge")
167
  if not is_merge_from_metadata:
168
  tags.append("flagged:undisclosed_merge")
 
169
  is_moe_from_model_card = any(keyword in model_card.text.lower() for keyword in moe_keywords)
170
  is_moe_from_name = "moe" in model.lower().replace("/", "-").replace("_", "-").split("-")
171
  if is_moe_from_model_card or is_moe_from_name or is_moe_from_metadata:
src/submission/submit.py CHANGED
@@ -5,7 +5,7 @@ from datetime import datetime, timezone
5
  from huggingface_hub import ModelCard, snapshot_download
6
 
7
  from src.display.formatting import styled_error, styled_message, styled_warning
8
- from src.envs import API, EVAL_REQUESTS_PATH, DYNAMIC_INFO_PATH, DYNAMIC_INFO_FILE_PATH, DYNAMIC_INFO_REPO, H4_TOKEN, QUEUE_REPO, RATE_LIMIT_PERIOD, RATE_LIMIT_QUOTA
9
  from src.leaderboard.filter_models import DO_NOT_SUBMIT_MODELS
10
  from src.submission.check_validity import (
11
  already_submitted_models,
@@ -99,9 +99,11 @@ def add_new_eval(
99
  license = None
100
  #return styled_error("Please select a license for your model")
101
 
102
- modelcard_OK, error_msg, model_card = check_model_card(model)
103
- #if not modelcard_OK:
104
- # return styled_error(error_msg)
 
 
105
 
106
  tags = get_model_tags(model_card, model)
107
 
 
5
  from huggingface_hub import ModelCard, snapshot_download
6
 
7
  from src.display.formatting import styled_error, styled_message, styled_warning
8
+ from src.envs import API, EVAL_REQUESTS_PATH, DYNAMIC_INFO_PATH, DYNAMIC_INFO_FILE_PATH, DYNAMIC_INFO_REPO, H4_TOKEN, QUEUE_REPO, RATE_LIMIT_PERIOD, RATE_LIMIT_QUOTA, REQUIRE_MODEL_LICENSE, REQUIRE_MODEL_CARD
9
  from src.leaderboard.filter_models import DO_NOT_SUBMIT_MODELS
10
  from src.submission.check_validity import (
11
  already_submitted_models,
 
99
  license = None
100
  #return styled_error("Please select a license for your model")
101
 
102
+ modelcard_OK, license_OK, error_msg, model_card = check_model_card(model)
103
+ if not modelcard_OK and REQUIRE_MODEL_CARD:
104
+ return styled_error(error_msg)
105
+ if not license_OK and REQUIRE_MODEL_LICENSE:
106
+ return styled_error(error_msg)
107
 
108
  tags = get_model_tags(model_card, model)
109
 
tasks_config/pt_config.yaml CHANGED
@@ -11,6 +11,8 @@ config:
11
  GET_ORIGINAL_HF_LEADERBOARD_EVAL_RESULTS: true
12
  TRUST_REMOTE_CODE: true
13
  SHOW_INCOMPLETE_EVALS: false
 
 
14
  readme:
15
  general_description: |
16
  πŸ“ The πŸš€ Open PT LLM Leaderboard aims to provide a benchmark for the evaluation of
 
11
  GET_ORIGINAL_HF_LEADERBOARD_EVAL_RESULTS: true
12
  TRUST_REMOTE_CODE: true
13
  SHOW_INCOMPLETE_EVALS: false
14
+ REQUIRE_MODEL_CARD: true
15
+ REQUIRE_MODEL_LICENSE: false
16
  readme:
17
  general_description: |
18
  πŸ“ The πŸš€ Open PT LLM Leaderboard aims to provide a benchmark for the evaluation of