Spaces:

demo-leaderboard-backend
/

backend

Running on CPU Upgrade

App Files Files Community

Clémentine commited on Apr 11

Commit

7689092

•

1 Parent(s): 816b1dc

added doc

Browse files

Files changed (5) hide show

custom_tasks.py +1 -1
src/backend/manage_requests.py +10 -6
src/backend/run_eval_suite_harness.py +17 -1
src/backend/run_eval_suite_lighteval.py +16 -0
src/backend/sort_queue.py +1 -1

custom_tasks.py CHANGED Viewed

@@ -1,6 +1,6 @@
 # ruff: noqa: F405, F403, F401
 """
-Custom evaluation tasks for lighteval. Copy this file and complete it with the info for your task.
 This file generally create just a TASKS_TABLE and TASKS_GROUPS which are then imported by LightEval.

 # ruff: noqa: F405, F403, F401
 """
+Custom evaluation tasks for lighteval. Complete this task with your own configuration if you want to use a custom lighteval task.
 This file generally create just a TASKS_TABLE and TASKS_GROUPS which are then imported by LightEval.

src/backend/manage_requests.py CHANGED Viewed

@@ -11,27 +11,32 @@ logger = setup_logger(__name__)
 @dataclass
 class EvalRequest:
     model: str
-    private: bool
     status: str
     json_filepath: str
     weight_type: str = "Original"
     model_type: str = ""  # pretrained, finetuned, with RL
     precision: str = ""  # float16, bfloat16
-    base_model: Optional[str] = None # for adapter models
-    revision: str = "main" # commit
     submitted_time: Optional[str] = "2022-05-18T11:40:22.519222"  # random date just so that we can still order requests by date
-    model_type: Optional[str] = None
     likes: Optional[int] = 0
     params: Optional[int] = None
     license: Optional[str] = ""
     def get_model_args(self):
         model_args = f"pretrained={self.model},revision={self.revision}"
         if self.precision in ["float16", "bfloat16", "float32"]:
             model_args += f",dtype={self.precision}"
         # Quantized models need some added config, the install of bits and bytes, etc
         #elif self.precision == "8bit":
         #    model_args += ",load_in_8bit=True"
         #elif self.precision == "4bit":
@@ -39,7 +44,6 @@ class EvalRequest:
         #elif self.precision == "GPTQ":
             # A GPTQ model does not need dtype to be specified,
             # it will be inferred from the config
-            pass
         else:
             raise Exception(f"Unknown precision {self.precision}.")
@@ -67,7 +71,7 @@ def set_eval_request(api: HfApi, eval_request: EvalRequest, set_to_status: str,
 def get_eval_requests(job_status: list, local_dir: str, hf_repo: str) -> list[EvalRequest]:
-    """Get all pending evaluation requests and return a list in which private
     models appearing first, followed by public models sorted by the number of
     likes.

 @dataclass
 class EvalRequest:
+    """This class represents one evaluation request file.
+    """
     model: str
     status: str
     json_filepath: str
     weight_type: str = "Original"
     model_type: str = ""  # pretrained, finetuned, with RL
     precision: str = ""  # float16, bfloat16
+    revision: str = "main" # commit hash
     submitted_time: Optional[str] = "2022-05-18T11:40:22.519222"  # random date just so that we can still order requests by date
+    model_type: Optional[str] = None # pretrained, fine-tuned, etc - define your own categories in
     likes: Optional[int] = 0
     params: Optional[int] = None
     license: Optional[str] = ""
     def get_model_args(self):
+        """Edit this function if you want to manage more complex quantization issues. You'll need to map it to
+        the evaluation suite you chose.
+        """
         model_args = f"pretrained={self.model},revision={self.revision}"
         if self.precision in ["float16", "bfloat16", "float32"]:
             model_args += f",dtype={self.precision}"
         # Quantized models need some added config, the install of bits and bytes, etc
         #elif self.precision == "8bit":
         #    model_args += ",load_in_8bit=True"
         #elif self.precision == "4bit":
         #elif self.precision == "GPTQ":
             # A GPTQ model does not need dtype to be specified,
             # it will be inferred from the config
         else:
             raise Exception(f"Unknown precision {self.precision}.")
 def get_eval_requests(job_status: list, local_dir: str, hf_repo: str) -> list[EvalRequest]:
+    """Gets all pending evaluation requests and return a list in which private
     models appearing first, followed by public models sorted by the number of
     likes.

src/backend/run_eval_suite_harness.py CHANGED Viewed

@@ -12,7 +12,23 @@ from src.logging import setup_logger
 logging.getLogger("openai").setLevel(logging.WARNING)
 logger = setup_logger(__name__)
-def run_evaluation(eval_request: EvalRequest, task_names, num_fewshot, batch_size, device, local_dir: str, results_repo: str, no_cache=True, limit=None):
     if limit:
         logger.info(
             "WARNING: --limit SHOULD ONLY BE USED FOR TESTING. REAL METRICS SHOULD NOT BE COMPUTED USING LIMIT."

 logging.getLogger("openai").setLevel(logging.WARNING)
 logger = setup_logger(__name__)
+def run_evaluation(eval_request: EvalRequest, task_names: list, num_fewshot: int, batch_size: int, device: str, local_dir: str, results_repo: str, no_cache: bool =True, limit: int =None):
+    """Runs one evaluation for the current evaluation request file, then pushes the results to the hub.
+    Args:
+        eval_request (EvalRequest): Input evaluation request file representation
+        task_names (list): Tasks to launch
+        num_fewshot (int): Number of few shots to use
+        batch_size (int): Selected batch size
+        device (str): "cpu" or "gpu:0", depending on what you assigned to the space
+        local_dir (str): Where to save the results locally
+        results_repo (str): To which repository to upload the results
+        no_cache (bool, optional): Whether to use a cache or not.
+        limit (int, optional): Whether to use a number of samples only for the evaluation - only for debugging
+    Returns:
+        _type_: _description_
+    """
     if limit:
         logger.info(
             "WARNING: --limit SHOULD ONLY BE USED FOR TESTING. REAL METRICS SHOULD NOT BE COMPUTED USING LIMIT."

src/backend/run_eval_suite_lighteval.py CHANGED Viewed

@@ -13,6 +13,22 @@ logging.getLogger("openai").setLevel(logging.WARNING)
 logger = setup_logger(__name__)
 def run_evaluation(eval_request: EvalRequest, task_names: str, batch_size: int, local_dir: str, accelerator: str, region: str, vendor: str, instance_size: str, instance_type: str, limit=None):
     if limit:
         logger.info("WARNING: --limit SHOULD ONLY BE USED FOR TESTING. REAL METRICS SHOULD NOT BE COMPUTED USING LIMIT.")

 logger = setup_logger(__name__)
 def run_evaluation(eval_request: EvalRequest, task_names: str, batch_size: int, local_dir: str, accelerator: str, region: str, vendor: str, instance_size: str, instance_type: str, limit=None):
+    """Runs one evaluation for the current evaluation request file using lighteval, then pushes the results to the hub.
+    Args:
+        eval_request (EvalRequest): Input evaluation request file representation
+        task_names (list): Tasks to launch
+        batch_size (int): Selected batch size
+        accelerator (str): Inference endpoint parameter for running the evaluation
+        region (str):  Inference endpoint parameter for running the evaluation
+        vendor (str):  Inference endpoint parameter for running the evaluation
+        instance_size (str):  Inference endpoint parameter for running the evaluation
+        instance_type (str):  Inference endpoint parameter for running the evaluation
+        local_dir (str): Where to save the results locally
+        no_cache (bool, optional): Whether to use a cache or not.
+        limit (int, optional): Whether to use a number of samples only for the evaluation - only for debugging
+    """
     if limit:
         logger.info("WARNING: --limit SHOULD ONLY BE USED FOR TESTING. REAL METRICS SHOULD NOT BE COMPUTED USING LIMIT.")

src/backend/sort_queue.py CHANGED Viewed

@@ -11,7 +11,7 @@ class ModelMetadata:
     likes: int = 0
     size: int = 15
 def sort_models_by_priority(api: HfApi, models: list[EvalRequest]) -> list[EvalRequest]:
     private_models = [model for model in models if model.private]
     public_models = [model for model in models if not model.private]

     likes: int = 0
     size: int = 15
+# All the functions below sort the models in the queue based on different parameters
 def sort_models_by_priority(api: HfApi, models: list[EvalRequest]) -> list[EvalRequest]:
     private_models = [model for model in models if model.private]
     public_models = [model for model in models if not model.private]