meg-huggingface commited on
Commit
24b20ff
1 Parent(s): 75324f0

Adds CPU/Float32 support; adds user access Token passing; fixes hard-coded environment variable.

Browse files
app.py CHANGED
@@ -26,7 +26,7 @@ from src.display.utils import (
26
  WeightType,
27
  Precision
28
  )
29
- from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN
30
  from src.populate import get_evaluation_queue_df, get_leaderboard_df
31
  from src.submission.submit import add_new_eval
32
 
@@ -309,7 +309,7 @@ with demo:
309
  choices=[i.value.name for i in Precision if i != Precision.Unknown],
310
  label="Precision",
311
  multiselect=False,
312
- value="float16",
313
  interactive=True,
314
  )
315
  weight_type = gr.Dropdown(
 
26
  WeightType,
27
  Precision
28
  )
29
+ from src.envs import API, DEVICE, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN
30
  from src.populate import get_evaluation_queue_df, get_leaderboard_df
31
  from src.submission.submit import add_new_eval
32
 
 
309
  choices=[i.value.name for i in Precision if i != Precision.Unknown],
310
  label="Precision",
311
  multiselect=False,
312
+ value="float16" if DEVICE != "cpu" else "float32",
313
  interactive=True,
314
  )
315
  weight_type = gr.Dropdown(
main_backend.py CHANGED
@@ -9,7 +9,7 @@ from src.backend.run_eval_suite import run_evaluation
9
  from src.backend.manage_requests import check_completed_evals, get_eval_requests, set_eval_request
10
  from src.backend.sort_queue import sort_models_by_priority
11
 
12
- from src.envs import QUEUE_REPO, EVAL_REQUESTS_PATH_BACKEND, RESULTS_REPO, EVAL_RESULTS_PATH_BACKEND, DEVICE, API, LIMIT
13
  from src.about import Tasks, NUM_FEWSHOT
14
  TASKS_HARNESS = [task.value.benchmark for task in Tasks]
15
 
@@ -21,8 +21,8 @@ RUNNING_STATUS = "RUNNING"
21
  FINISHED_STATUS = "FINISHED"
22
  FAILED_STATUS = "FAILED"
23
 
24
- snapshot_download(repo_id=RESULTS_REPO, revision="main", local_dir=EVAL_RESULTS_PATH_BACKEND, repo_type="dataset", max_workers=60)
25
- snapshot_download(repo_id=QUEUE_REPO, revision="main", local_dir=EVAL_REQUESTS_PATH_BACKEND, repo_type="dataset", max_workers=60)
26
 
27
  def run_auto_eval():
28
  current_pending_status = [PENDING_STATUS]
 
9
  from src.backend.manage_requests import check_completed_evals, get_eval_requests, set_eval_request
10
  from src.backend.sort_queue import sort_models_by_priority
11
 
12
+ from src.envs import QUEUE_REPO, EVAL_REQUESTS_PATH_BACKEND, RESULTS_REPO, EVAL_RESULTS_PATH_BACKEND, DEVICE, API, LIMIT, TOKEN
13
  from src.about import Tasks, NUM_FEWSHOT
14
  TASKS_HARNESS = [task.value.benchmark for task in Tasks]
15
 
 
21
  FINISHED_STATUS = "FINISHED"
22
  FAILED_STATUS = "FAILED"
23
 
24
+ snapshot_download(repo_id=RESULTS_REPO, revision="main", local_dir=EVAL_RESULTS_PATH_BACKEND, repo_type="dataset", max_workers=60, token=TOKEN)
25
+ snapshot_download(repo_id=QUEUE_REPO, revision="main", local_dir=EVAL_REQUESTS_PATH_BACKEND, repo_type="dataset", max_workers=60, token=TOKEN)
26
 
27
  def run_auto_eval():
28
  current_pending_status = [PENDING_STATUS]
scripts/create_request_file.py CHANGED
@@ -7,11 +7,9 @@ from datetime import datetime, timezone
7
  import click
8
  from colorama import Fore
9
  from huggingface_hub import HfApi, snapshot_download
 
10
 
11
- EVAL_REQUESTS_PATH = "eval-queue"
12
- QUEUE_REPO = "open-llm-leaderboard/requests"
13
-
14
- precisions = ("float16", "bfloat16", "8bit (LLM.int8)", "4bit (QLoRA / FP4)", "GPTQ")
15
  model_types = ("pretrained", "fine-tuned", "RL-tuned", "instruction-tuned")
16
  weight_types = ("Original", "Delta", "Adapter")
17
 
@@ -36,7 +34,7 @@ def get_model_size(model_info, precision: str):
36
  def main():
37
  api = HfApi()
38
  current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
39
- snapshot_download(repo_id=QUEUE_REPO, revision="main", local_dir=EVAL_REQUESTS_PATH, repo_type="dataset")
40
 
41
  model_name = click.prompt("Enter model name")
42
  revision = click.prompt("Enter revision", default="main")
 
7
  import click
8
  from colorama import Fore
9
  from huggingface_hub import HfApi, snapshot_download
10
+ from src.envs import TOKEN, EVAL_REQUESTS_PATH, QUEUE_REPO
11
 
12
+ precisions = ("float16", "bfloat16", "8bit (LLM.int8)", "4bit (QLoRA / FP4)", "GPTQ", "float32")
 
 
 
13
  model_types = ("pretrained", "fine-tuned", "RL-tuned", "instruction-tuned")
14
  weight_types = ("Original", "Delta", "Adapter")
15
 
 
34
  def main():
35
  api = HfApi()
36
  current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
37
+ snapshot_download(repo_id=QUEUE_REPO, revision="main", local_dir=EVAL_REQUESTS_PATH, repo_type="dataset", token=TOKEN)
38
 
39
  model_name = click.prompt("Enter model name")
40
  revision = click.prompt("Enter revision", default="main")
src/backend/manage_requests.py CHANGED
@@ -26,7 +26,7 @@ class EvalRequest:
26
  def get_model_args(self):
27
  model_args = f"pretrained={self.model},revision={self.revision}"
28
 
29
- if self.precision in ["float16", "bfloat16"]:
30
  model_args += f",dtype={self.precision}"
31
  # Quantized models need some added config, the install of bits and bytes, etc
32
  #elif self.precision == "8bit":
@@ -71,7 +71,7 @@ def get_eval_requests(job_status: list, local_dir: str, hf_repo: str) -> list[Ev
71
  Returns:
72
  `list[EvalRequest]`: a list of model info dicts.
73
  """
74
- snapshot_download(repo_id=hf_repo, revision="main", local_dir=local_dir, repo_type="dataset", max_workers=60)
75
  json_files = glob.glob(f"{local_dir}/**/*.json", recursive=True)
76
 
77
  eval_requests = []
@@ -97,7 +97,7 @@ def check_completed_evals(
97
  local_dir_results: str,
98
  ):
99
  """Checks if the currently running evals are completed, if yes, update their status on the hub."""
100
- snapshot_download(repo_id=hf_repo_results, revision="main", local_dir=local_dir_results, repo_type="dataset", max_workers=60)
101
 
102
  running_evals = get_eval_requests(checked_status, hf_repo=hf_repo, local_dir=local_dir)
103
 
 
26
  def get_model_args(self):
27
  model_args = f"pretrained={self.model},revision={self.revision}"
28
 
29
+ if self.precision in ["float16", "bfloat16", "float32"]:
30
  model_args += f",dtype={self.precision}"
31
  # Quantized models need some added config, the install of bits and bytes, etc
32
  #elif self.precision == "8bit":
 
71
  Returns:
72
  `list[EvalRequest]`: a list of model info dicts.
73
  """
74
+ snapshot_download(repo_id=hf_repo, revision="main", local_dir=local_dir, repo_type="dataset", max_workers=60, token=TOKEN)
75
  json_files = glob.glob(f"{local_dir}/**/*.json", recursive=True)
76
 
77
  eval_requests = []
 
97
  local_dir_results: str,
98
  ):
99
  """Checks if the currently running evals are completed, if yes, update their status on the hub."""
100
+ snapshot_download(repo_id=hf_repo_results, revision="main", local_dir=local_dir_results, repo_type="dataset", max_workers=60, token=TOKEN)
101
 
102
  running_evals = get_eval_requests(checked_status, hf_repo=hf_repo, local_dir=local_dir)
103
 
src/display/utils.py CHANGED
@@ -94,6 +94,7 @@ class WeightType(Enum):
94
  class Precision(Enum):
95
  float16 = ModelDetails("float16")
96
  bfloat16 = ModelDetails("bfloat16")
 
97
  #qt_8bit = ModelDetails("8bit")
98
  #qt_4bit = ModelDetails("4bit")
99
  #qt_GPTQ = ModelDetails("GPTQ")
@@ -104,6 +105,8 @@ class Precision(Enum):
104
  return Precision.float16
105
  if precision in ["torch.bfloat16", "bfloat16"]:
106
  return Precision.bfloat16
 
 
107
  #if precision in ["8bit"]:
108
  # return Precision.qt_8bit
109
  #if precision in ["4bit"]:
 
94
  class Precision(Enum):
95
  float16 = ModelDetails("float16")
96
  bfloat16 = ModelDetails("bfloat16")
97
+ float32 = ModelDetails("float32")
98
  #qt_8bit = ModelDetails("8bit")
99
  #qt_4bit = ModelDetails("4bit")
100
  #qt_GPTQ = ModelDetails("GPTQ")
 
105
  return Precision.float16
106
  if precision in ["torch.bfloat16", "bfloat16"]:
107
  return Precision.bfloat16
108
+ if precision in ["float32"]:
109
+ return Precision.float32
110
  #if precision in ["8bit"]:
111
  # return Precision.qt_8bit
112
  #if precision in ["4bit"]:
src/envs.py CHANGED
@@ -7,7 +7,7 @@ from huggingface_hub import HfApi
7
  TOKEN = os.environ.get("TOKEN") # A read/write token for your org
8
 
9
  OWNER = "demo-leaderboard-backend" # Change to your org - don't forget to create a results and request file
10
- DEVICE = "cpu" # cuda:0 if you add compute
11
  LIMIT = 20 # !!!! Should be None for actual evaluations!!!
12
  # ----------------------------------
13
 
 
7
  TOKEN = os.environ.get("TOKEN") # A read/write token for your org
8
 
9
  OWNER = "demo-leaderboard-backend" # Change to your org - don't forget to create a results and request file
10
+ DEVICE = "cpu" # "cuda:0" if you add compute
11
  LIMIT = 20 # !!!! Should be None for actual evaluations!!!
12
  # ----------------------------------
13
 
src/submission/check_validity.py CHANGED
@@ -8,7 +8,7 @@ import huggingface_hub
8
  from huggingface_hub import ModelCard
9
  from huggingface_hub.hf_api import ModelInfo
10
  from transformers import AutoConfig
11
- from transformers.models.auto.tokenization_auto import tokenizer_class_from_name, get_tokenizer_config
12
 
13
  def check_model_card(repo_id: str) -> tuple[bool, str]:
14
  """Checks if the model card and license exist and have been filled"""
 
8
  from huggingface_hub import ModelCard
9
  from huggingface_hub.hf_api import ModelInfo
10
  from transformers import AutoConfig
11
+ from transformers.models.auto.tokenization_auto import AutoTokenizer
12
 
13
  def check_model_card(repo_id: str) -> tuple[bool, str]:
14
  """Checks if the model card and license exist and have been filled"""
src/submission/submit.py CHANGED
@@ -50,7 +50,7 @@ def add_new_eval(
50
  return styled_error(f'Base model "{base_model}" {error}')
51
 
52
  if not weight_type == "Adapter":
53
- model_on_hub, error, _ = is_model_on_hub(model_name=model, revision=revision, test_tokenizer=True)
54
  if not model_on_hub:
55
  return styled_error(f'Model "{model}" {error}')
56
 
 
50
  return styled_error(f'Base model "{base_model}" {error}')
51
 
52
  if not weight_type == "Adapter":
53
+ model_on_hub, error, _ = is_model_on_hub(model_name=model, revision=revision, token=TOKEN, test_tokenizer=True)
54
  if not model_on_hub:
55
  return styled_error(f'Model "{model}" {error}')
56