Nathan Habib commited on
Commit
adb0416
1 Parent(s): 5491f2d

reformat files, put metadata in request files

Browse files
app.py CHANGED
@@ -1,11 +1,12 @@
1
  import json
2
  import os
 
3
  from datetime import datetime, timezone
4
 
5
  import gradio as gr
6
  import pandas as pd
7
  from apscheduler.schedulers.background import BackgroundScheduler
8
- from huggingface_hub import HfApi
9
 
10
  from src.assets.css_html_js import custom_css, get_window_url_params
11
  from src.assets.text_content import (
@@ -26,7 +27,7 @@ from src.display_models.utils import (
26
  styled_message,
27
  styled_warning,
28
  )
29
- from src.load_from_hub import get_evaluation_queue_df, get_leaderboard_df, is_model_on_hub, load_all_info_from_hub
30
  from src.rate_limiting import user_submission_permission
31
 
32
  pd.set_option("display.precision", 1)
@@ -82,32 +83,21 @@ BENCHMARK_COLS = [
82
  ]
83
  ]
84
 
85
- ## LOAD INFO FROM HUB
86
- eval_queue, requested_models, eval_results, users_to_submission_dates = load_all_info_from_hub(
87
- QUEUE_REPO, RESULTS_REPO, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH
88
- )
89
 
90
- if not IS_PUBLIC:
91
- (eval_queue_private, requested_models_private, eval_results_private, _) = load_all_info_from_hub(
92
- PRIVATE_QUEUE_REPO,
93
- PRIVATE_RESULTS_REPO,
94
- EVAL_REQUESTS_PATH_PRIVATE,
95
- EVAL_RESULTS_PATH_PRIVATE,
96
- )
97
- else:
98
- eval_queue_private, eval_results_private = None, None
99
 
100
- original_df = get_leaderboard_df(eval_results, eval_results_private, COLS, BENCHMARK_COLS)
101
  models = original_df["model_name_for_query"].tolist() # needed for model backlinks in their to the leaderboard
102
-
103
  to_be_dumped = f"models = {repr(models)}\n"
104
 
105
- leaderboard_df = original_df.copy()
106
  (
107
  finished_eval_queue_df,
108
  running_eval_queue_df,
109
  pending_eval_queue_df,
110
- ) = get_evaluation_queue_df(eval_queue, eval_queue_private, EVAL_REQUESTS_PATH, EVAL_COLS)
111
 
112
 
113
  ## INTERACTION FUNCTIONS
@@ -155,6 +145,27 @@ def add_new_eval(
155
  if not model_on_hub:
156
  return styled_error(f'Model "{model}" {error}')
157
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
158
  # Were the model card and license filled?
159
  modelcard_OK, error_msg = check_model_card(model)
160
  if not modelcard_OK:
@@ -173,6 +184,9 @@ def add_new_eval(
173
  "status": "PENDING",
174
  "submitted_time": current_time,
175
  "model_type": model_type,
 
 
 
176
  }
177
 
178
  user_name = ""
@@ -240,6 +254,7 @@ def update_table(
240
  def search_table(df: pd.DataFrame, query: str) -> pd.DataFrame:
241
  return df[(df[AutoEvalColumn.dummy.name].str.contains(query, case=False))]
242
 
 
243
  def select_columns(df: pd.DataFrame, columns: list) -> pd.DataFrame:
244
  always_here_cols = [
245
  AutoEvalColumn.model_type_symbol.name,
@@ -277,10 +292,13 @@ def filter_queries(query: str, filtered_df: pd.DataFrame):
277
  final_df.append(temp_filtered_df)
278
  if len(final_df) > 0:
279
  filtered_df = pd.concat(final_df)
280
- filtered_df = filtered_df.drop_duplicates(subset=[AutoEvalColumn.model.name, AutoEvalColumn.precision.name, AutoEvalColumn.revision.name])
 
 
281
 
282
  return filtered_df
283
 
 
284
  def filter_models(
285
  df: pd.DataFrame, type_query: list, size_query: list, precision_query: list, show_deleted: bool
286
  ) -> pd.DataFrame:
@@ -288,7 +306,7 @@ def filter_models(
288
  if show_deleted:
289
  filtered_df = df
290
  else: # Show only still on the hub models
291
- filtered_df = df[df[AutoEvalColumn.still_on_hub.name] == True]
292
 
293
  type_emoji = [t[0] for t in type_query]
294
  filtered_df = filtered_df[df[AutoEvalColumn.model_type_symbol.name].isin(type_emoji)]
@@ -599,7 +617,8 @@ with demo:
599
  label=CITATION_BUTTON_LABEL,
600
  lines=20,
601
  elem_id="citation-button",
602
- ).style(show_copy_button=True)
 
603
 
604
  dummy = gr.Textbox(visible=False)
605
  demo.load(
 
1
  import json
2
  import os
3
+ import re
4
  from datetime import datetime, timezone
5
 
6
  import gradio as gr
7
  import pandas as pd
8
  from apscheduler.schedulers.background import BackgroundScheduler
9
+ from huggingface_hub import HfApi, snapshot_download
10
 
11
  from src.assets.css_html_js import custom_css, get_window_url_params
12
  from src.assets.text_content import (
 
27
  styled_message,
28
  styled_warning,
29
  )
30
+ from src.load_from_hub import get_all_requested_models, get_evaluation_queue_df, get_leaderboard_df, is_model_on_hub
31
  from src.rate_limiting import user_submission_permission
32
 
33
  pd.set_option("display.precision", 1)
 
83
  ]
84
  ]
85
 
86
+ snapshot_download(repo_id=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH, repo_type="dataset", tqdm_class=None)
87
+ snapshot_download(repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None)
88
+ requested_models, users_to_submission_dates = get_all_requested_models(EVAL_REQUESTS_PATH)
 
89
 
90
+ original_df = get_leaderboard_df(EVAL_RESULTS_PATH, COLS, BENCHMARK_COLS)
91
+ leaderboard_df = original_df.copy()
 
 
 
 
 
 
 
92
 
 
93
  models = original_df["model_name_for_query"].tolist() # needed for model backlinks in their to the leaderboard
 
94
  to_be_dumped = f"models = {repr(models)}\n"
95
 
 
96
  (
97
  finished_eval_queue_df,
98
  running_eval_queue_df,
99
  pending_eval_queue_df,
100
+ ) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
101
 
102
 
103
  ## INTERACTION FUNCTIONS
 
145
  if not model_on_hub:
146
  return styled_error(f'Model "{model}" {error}')
147
 
148
+ model_info = api.model_info(repo_id=model, revision=revision)
149
+
150
+ size_pattern = size_pattern = re.compile(r"(\d\.)?\d+(b|m)")
151
+ try:
152
+ model_size = round(model_info.safetensors["total"] / 1e9, 3)
153
+ except AttributeError:
154
+ try:
155
+ size_match = re.search(size_pattern, model.lower())
156
+ model_size = size_match.group(0)
157
+ model_size = round(float(model_size[:-1]) if model_size[-1] == "b" else float(model_size[:-1]) / 1e3, 3)
158
+ except AttributeError:
159
+ return 65
160
+
161
+ size_factor = 8 if (precision == "GPTQ" or "GPTQ" in model) else 1
162
+ model_size = size_factor * model_size
163
+
164
+ try:
165
+ license = model_info.cardData["license"]
166
+ except Exception:
167
+ license = "?"
168
+
169
  # Were the model card and license filled?
170
  modelcard_OK, error_msg = check_model_card(model)
171
  if not modelcard_OK:
 
184
  "status": "PENDING",
185
  "submitted_time": current_time,
186
  "model_type": model_type,
187
+ "likes": model_info.likes,
188
+ "params": model_size,
189
+ "license": license,
190
  }
191
 
192
  user_name = ""
 
254
  def search_table(df: pd.DataFrame, query: str) -> pd.DataFrame:
255
  return df[(df[AutoEvalColumn.dummy.name].str.contains(query, case=False))]
256
 
257
+
258
  def select_columns(df: pd.DataFrame, columns: list) -> pd.DataFrame:
259
  always_here_cols = [
260
  AutoEvalColumn.model_type_symbol.name,
 
292
  final_df.append(temp_filtered_df)
293
  if len(final_df) > 0:
294
  filtered_df = pd.concat(final_df)
295
+ filtered_df = filtered_df.drop_duplicates(
296
+ subset=[AutoEvalColumn.model.name, AutoEvalColumn.precision.name, AutoEvalColumn.revision.name]
297
+ )
298
 
299
  return filtered_df
300
 
301
+
302
  def filter_models(
303
  df: pd.DataFrame, type_query: list, size_query: list, precision_query: list, show_deleted: bool
304
  ) -> pd.DataFrame:
 
306
  if show_deleted:
307
  filtered_df = df
308
  else: # Show only still on the hub models
309
+ filtered_df = df[df[AutoEvalColumn.still_on_hub.name] is True]
310
 
311
  type_emoji = [t[0] for t in type_query]
312
  filtered_df = filtered_df[df[AutoEvalColumn.model_type_symbol.name].isin(type_emoji)]
 
617
  label=CITATION_BUTTON_LABEL,
618
  lines=20,
619
  elem_id="citation-button",
620
+ show_copy_button=True,
621
+ )
622
 
623
  dummy = gr.Textbox(visible=False)
624
  demo.load(
model_info_cache.pkl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f98e18ea0fcf11737e108f966f6d8b09120c6a2c231b70e9e242e9bba3145a47
3
- size 3780284
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:15ee9a3cdd3ffdfa4d46497b829fbb43ea5a66222a17d34dfef5ad1111a8eb18
3
+ size 3789941
requirements.txt CHANGED
@@ -60,7 +60,7 @@ sniffio==1.3.0
60
  starlette==0.26.1
61
  toolz==0.12.0
62
  tqdm==4.65.0
63
- transformers@git+https://github.com/huggingface/transformers
64
  typing_extensions==4.5.0
65
  tzdata==2023.3
66
  tzlocal==4.3
@@ -69,3 +69,4 @@ urllib3==1.26.15
69
  uvicorn==0.21.1
70
  websockets==11.0.1
71
  yarl==1.8.2
 
 
60
  starlette==0.26.1
61
  toolz==0.12.0
62
  tqdm==4.65.0
63
+ transformers==4.34.0
64
  typing_extensions==4.5.0
65
  tzdata==2023.3
66
  tzlocal==4.3
 
69
  uvicorn==0.21.1
70
  websockets==11.0.1
71
  yarl==1.8.2
72
+ hf_transfer==0.1.3
src/display_models/get_model_metadata.py CHANGED
@@ -1,15 +1,10 @@
1
  import glob
2
  import json
3
  import os
4
- import re
5
- import pickle
6
  from typing import List
7
 
8
- import huggingface_hub
9
  from huggingface_hub import HfApi
10
  from tqdm import tqdm
11
- from transformers import AutoModel, AutoConfig
12
- from accelerate import init_empty_weights
13
 
14
  from src.display_models.model_metadata_flags import DO_NOT_SUBMIT_MODELS, FLAGGED_MODELS
15
  from src.display_models.model_metadata_type import MODEL_TYPE_METADATA, ModelType, model_type_from_str
@@ -18,86 +13,8 @@ from src.display_models.utils import AutoEvalColumn, model_hyperlink
18
  api = HfApi(token=os.environ.get("H4_TOKEN", None))
19
 
20
 
21
- def get_model_infos_from_hub(leaderboard_data: List[dict]):
22
- # load cache from disk
23
- try:
24
- with open("model_info_cache.pkl", "rb") as f:
25
- model_info_cache = pickle.load(f)
26
- except (EOFError, FileNotFoundError):
27
- model_info_cache = {}
28
- try:
29
- with open("model_size_cache.pkl", "rb") as f:
30
- model_size_cache = pickle.load(f)
31
- except (EOFError, FileNotFoundError):
32
- model_size_cache = {}
33
-
34
  for model_data in tqdm(leaderboard_data):
35
- model_name = model_data["model_name_for_query"]
36
-
37
- if model_name in model_info_cache:
38
- model_info = model_info_cache[model_name]
39
- else:
40
- try:
41
- model_info = api.model_info(model_name)
42
- model_info_cache[model_name] = model_info
43
- except (huggingface_hub.utils._errors.RepositoryNotFoundError, huggingface_hub.utils._errors.HfHubHTTPError):
44
- print("Repo not found!", model_name)
45
- model_data[AutoEvalColumn.license.name] = None
46
- model_data[AutoEvalColumn.likes.name] = None
47
- if model_name not in model_size_cache:
48
- size_factor = 8 if model_data["Precision"] == "GPTQ" else 1
49
- model_size_cache[model_name] = size_factor * get_model_size(model_name, None)
50
- model_data[AutoEvalColumn.params.name] = model_size_cache[model_name]
51
-
52
- model_data[AutoEvalColumn.license.name] = get_model_license(model_info)
53
- model_data[AutoEvalColumn.likes.name] = get_model_likes(model_info)
54
- if model_name not in model_size_cache:
55
- size_factor = 8 if model_data["Precision"] == "GPTQ" else 1
56
- model_size_cache[model_name] = size_factor * get_model_size(model_name, model_info)
57
- model_data[AutoEvalColumn.params.name] = model_size_cache[model_name]
58
-
59
- # save cache to disk in pickle format
60
- with open("model_info_cache.pkl", "wb") as f:
61
- pickle.dump(model_info_cache, f)
62
- with open("model_size_cache.pkl", "wb") as f:
63
- pickle.dump(model_size_cache, f)
64
-
65
-
66
- def get_model_license(model_info):
67
- try:
68
- return model_info.cardData["license"]
69
- except Exception:
70
- return "?"
71
-
72
-
73
- def get_model_likes(model_info):
74
- return model_info.likes
75
-
76
-
77
- size_pattern = re.compile(r"(\d\.)?\d+(b|m)")
78
-
79
-
80
- def get_model_size(model_name, model_info):
81
- # In billions
82
- try:
83
- return round(model_info.safetensors["total"] / 1e9, 3)
84
- except AttributeError:
85
- try:
86
- config = AutoConfig.from_pretrained(model_name, trust_remote_code=False)
87
- with init_empty_weights():
88
- model = AutoModel.from_config(config, trust_remote_code=False)
89
- return round(sum(p.numel() for p in model.parameters() if p.requires_grad) / 1e9, 3)
90
- except (EnvironmentError, ValueError, KeyError): # model config not found, likely private
91
- try:
92
- size_match = re.search(size_pattern, model_name.lower())
93
- size = size_match.group(0)
94
- return round(float(size[:-1]) if size[-1] == "b" else float(size[:-1]) / 1e3, 3)
95
- except AttributeError:
96
- return 0
97
-
98
-
99
- def get_model_type(leaderboard_data: List[dict]):
100
- for model_data in leaderboard_data:
101
  request_files = os.path.join(
102
  "eval-queue",
103
  model_data["model_name_for_query"] + "_eval_request_*" + ".json",
@@ -125,6 +42,9 @@ def get_model_type(leaderboard_data: List[dict]):
125
  model_type = model_type_from_str(request["model_type"])
126
  model_data[AutoEvalColumn.model_type.name] = model_type.value.name
127
  model_data[AutoEvalColumn.model_type_symbol.name] = model_type.value.symbol # + ("🔺" if is_delta else "")
 
 
 
128
  except Exception:
129
  if model_data["model_name_for_query"] in MODEL_TYPE_METADATA:
130
  model_data[AutoEvalColumn.model_type.name] = MODEL_TYPE_METADATA[
@@ -164,6 +84,5 @@ def remove_forbidden_models(leaderboard_data: List[dict]):
164
 
165
  def apply_metadata(leaderboard_data: List[dict]):
166
  leaderboard_data = remove_forbidden_models(leaderboard_data)
167
- get_model_type(leaderboard_data)
168
- get_model_infos_from_hub(leaderboard_data)
169
  flag_models(leaderboard_data)
 
1
  import glob
2
  import json
3
  import os
 
 
4
  from typing import List
5
 
 
6
  from huggingface_hub import HfApi
7
  from tqdm import tqdm
 
 
8
 
9
  from src.display_models.model_metadata_flags import DO_NOT_SUBMIT_MODELS, FLAGGED_MODELS
10
  from src.display_models.model_metadata_type import MODEL_TYPE_METADATA, ModelType, model_type_from_str
 
13
  api = HfApi(token=os.environ.get("H4_TOKEN", None))
14
 
15
 
16
+ def get_model_metadata(leaderboard_data: List[dict]):
 
 
 
 
 
 
 
 
 
 
 
 
17
  for model_data in tqdm(leaderboard_data):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  request_files = os.path.join(
19
  "eval-queue",
20
  model_data["model_name_for_query"] + "_eval_request_*" + ".json",
 
42
  model_type = model_type_from_str(request["model_type"])
43
  model_data[AutoEvalColumn.model_type.name] = model_type.value.name
44
  model_data[AutoEvalColumn.model_type_symbol.name] = model_type.value.symbol # + ("🔺" if is_delta else "")
45
+ model_data[AutoEvalColumn.license.name] = request["license"]
46
+ model_data[AutoEvalColumn.likes.name] = request["likes"]
47
+ model_data[AutoEvalColumn.params.name] = request["params"]
48
  except Exception:
49
  if model_data["model_name_for_query"] in MODEL_TYPE_METADATA:
50
  model_data[AutoEvalColumn.model_type.name] = MODEL_TYPE_METADATA[
 
84
 
85
  def apply_metadata(leaderboard_data: List[dict]):
86
  leaderboard_data = remove_forbidden_models(leaderboard_data)
87
+ get_model_metadata(leaderboard_data)
 
88
  flag_models(leaderboard_data)
src/display_models/read_results.py CHANGED
@@ -116,10 +116,10 @@ def parse_eval_result(json_filepath: str) -> Tuple[str, list[dict]]:
116
  return result_key, eval_results
117
 
118
 
119
- def get_eval_results() -> List[EvalResult]:
120
  json_filepaths = []
121
 
122
- for root, dir, files in os.walk("eval-results"):
123
  # We should only have json files in model results
124
  if len(files) == 0 or any([not f.endswith(".json") for f in files]):
125
  continue
@@ -149,7 +149,7 @@ def get_eval_results() -> List[EvalResult]:
149
  return eval_results
150
 
151
 
152
- def get_eval_results_dicts() -> List[Dict]:
153
- eval_results = get_eval_results()
154
 
155
  return [e.to_dict() for e in eval_results]
 
116
  return result_key, eval_results
117
 
118
 
119
+ def get_eval_results(results_path: str) -> List[EvalResult]:
120
  json_filepaths = []
121
 
122
+ for root, dir, files in os.walk(results_path):
123
  # We should only have json files in model results
124
  if len(files) == 0 or any([not f.endswith(".json") for f in files]):
125
  continue
 
149
  return eval_results
150
 
151
 
152
+ def get_eval_results_dicts(results_path: str) -> List[Dict]:
153
+ eval_results = get_eval_results(results_path)
154
 
155
  return [e.to_dict() for e in eval_results]
src/load_from_hub.py CHANGED
@@ -1,10 +1,9 @@
1
  import json
2
  import os
 
3
 
4
  import pandas as pd
5
- from huggingface_hub import Repository
6
  from transformers import AutoConfig
7
- from collections import defaultdict
8
 
9
  from src.assets.hardcoded_evals import baseline, gpt4_values, gpt35_values
10
  from src.display_models.get_model_metadata import apply_metadata
@@ -38,43 +37,8 @@ def get_all_requested_models(requested_models_dir: str) -> set[str]:
38
  return set(file_names), users_to_submission_dates
39
 
40
 
41
- def load_all_info_from_hub(QUEUE_REPO: str, RESULTS_REPO: str, QUEUE_PATH: str, RESULTS_PATH: str) -> list[Repository]:
42
- eval_queue_repo = None
43
- eval_results_repo = None
44
- requested_models = None
45
-
46
- print("Pulling evaluation requests and results.")
47
-
48
- eval_queue_repo = Repository(
49
- local_dir=QUEUE_PATH,
50
- clone_from=QUEUE_REPO,
51
- repo_type="dataset",
52
- )
53
- eval_queue_repo.git_pull()
54
-
55
- eval_results_repo = Repository(
56
- local_dir=RESULTS_PATH,
57
- clone_from=RESULTS_REPO,
58
- repo_type="dataset",
59
- )
60
- eval_results_repo.git_pull()
61
-
62
- requested_models, users_to_submission_dates = get_all_requested_models("eval-queue")
63
-
64
- return eval_queue_repo, requested_models, eval_results_repo, users_to_submission_dates
65
-
66
-
67
- def get_leaderboard_df(
68
- eval_results: Repository, eval_results_private: Repository, cols: list, benchmark_cols: list
69
- ) -> pd.DataFrame:
70
- if eval_results:
71
- print("Pulling evaluation results for the leaderboard.")
72
- eval_results.git_pull()
73
- if eval_results_private:
74
- print("Pulling evaluation results for the leaderboard.")
75
- eval_results_private.git_pull()
76
-
77
- all_data = get_eval_results_dicts()
78
 
79
  if not IS_PUBLIC:
80
  all_data.append(gpt4_values)
@@ -92,16 +56,7 @@ def get_leaderboard_df(
92
  return df
93
 
94
 
95
- def get_evaluation_queue_df(
96
- eval_queue: Repository, eval_queue_private: Repository, save_path: str, cols: list
97
- ) -> list[pd.DataFrame]:
98
- if eval_queue:
99
- print("Pulling changes for the evaluation queue.")
100
- eval_queue.git_pull()
101
- if eval_queue_private:
102
- print("Pulling changes for the evaluation queue.")
103
- eval_queue_private.git_pull()
104
-
105
  entries = [entry for entry in os.listdir(save_path) if not entry.startswith(".")]
106
  all_evals = []
107
 
@@ -147,6 +102,5 @@ def is_model_on_hub(model_name: str, revision: str) -> bool:
147
  "needs to be launched with `trust_remote_code=True`. For safety reason, we do not allow these models to be automatically submitted to the leaderboard.",
148
  )
149
 
150
- except Exception as e:
151
- print(f"Could not get the model config from the hub.: {e}")
152
  return False, "was not found on hub!"
 
1
  import json
2
  import os
3
+ from collections import defaultdict
4
 
5
  import pandas as pd
 
6
  from transformers import AutoConfig
 
7
 
8
  from src.assets.hardcoded_evals import baseline, gpt4_values, gpt35_values
9
  from src.display_models.get_model_metadata import apply_metadata
 
37
  return set(file_names), users_to_submission_dates
38
 
39
 
40
+ def get_leaderboard_df(results_path: str, cols: list, benchmark_cols: list) -> pd.DataFrame:
41
+ all_data = get_eval_results_dicts(results_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
 
43
  if not IS_PUBLIC:
44
  all_data.append(gpt4_values)
 
56
  return df
57
 
58
 
59
+ def get_evaluation_queue_df(save_path: str, cols: list) -> list[pd.DataFrame]:
 
 
 
 
 
 
 
 
 
60
  entries = [entry for entry in os.listdir(save_path) if not entry.startswith(".")]
61
  all_evals = []
62
 
 
102
  "needs to be launched with `trust_remote_code=True`. For safety reason, we do not allow these models to be automatically submitted to the leaderboard.",
103
  )
104
 
105
+ except Exception:
 
106
  return False, "was not found on hub!"
src/rate_limiting.py CHANGED
@@ -1,4 +1,4 @@
1
- from datetime import datetime, timezone, timedelta
2
 
3
 
4
  def user_submission_permission(submission_name, users_to_submission_dates, rate_limit_period):
 
1
+ from datetime import datetime, timedelta, timezone
2
 
3
 
4
  def user_submission_permission(submission_name, users_to_submission_dates, rate_limit_period):