open_pt_llm_leaderboard

Running on CPU Upgrade

Clémentine commited on Jan 22

Commit

05bda40

•

1 Parent(s): c2cc6bf

change model types available at submission time

Files changed (7) hide show

app.py CHANGED Viewed

@@ -102,7 +102,7 @@ def update_table(
     hide_models: list,
     query: str,
 ):
-    filtered_df = filter_models(hidden_df, type_query, size_query, precision_query, hide_models)
     filtered_df = filter_queries(query, filtered_df)
     df = select_columns(filtered_df, columns)
     return df

     hide_models: list,
     query: str,
 ):
+    filtered_df = filter_models(df=hidden_df, type_query=type_query, size_query=size_query, precision_query=precision_query, hide_models=hide_models)
     filtered_df = filter_queries(query, filtered_df)
     df = select_columns(filtered_df, columns)
     return df

src/display/about.py CHANGED Viewed

@@ -56,9 +56,8 @@ Side note on the baseline scores:
 ## Icons
 - {ModelType.PT.to_str(" : ")} model: new, base models, trained on a given corpora
 - {ModelType.FT.to_str(" : ")} model: pretrained models finetuned on more data
-Specific fine-tune subcategories (more adapted to chat):
-- {ModelType.IFT.to_str(" : ")} model: instruction fine-tunes, which are model fine-tuned specifically on datasets of task instruction
-- {ModelType.RL.to_str(" : ")} model: reinforcement fine-tunes, which usually change the model loss a bit with an added policy.
 If there is no icon, we have not uploaded the information on the model yet, feel free to open an issue with the model information!
 "Flagged" indicates that this model has been flagged by the community, and should probably be ignored! Clicking the link will redirect you to the discussion about the model.

 ## Icons
 - {ModelType.PT.to_str(" : ")} model: new, base models, trained on a given corpora
 - {ModelType.FT.to_str(" : ")} model: pretrained models finetuned on more data
+- {ModelType.chat.to_str(" : ")} model: chat like fine-tunes, either using IFT (datasets of task instruction), RLHF or DPO (changing the model loss a bit with an added policy), etc
+- {ModelType.merges.to_str(" : ")} model: merges or MoErges, models which have been merged or fused without additional fine-tuning.
 If there is no icon, we have not uploaded the information on the model yet, feel free to open an issue with the model information!
 "Flagged" indicates that this model has been flagged by the community, and should probably be ignored! Clicking the link will redirect you to the discussion about the model.

src/display/utils.py CHANGED Viewed

@@ -120,9 +120,9 @@ class ModelDetails:
 class ModelType(Enum):
     PT = ModelDetails(name="pretrained", symbol="🟢")
-    FT = ModelDetails(name="fine-tuned", symbol="🔶")
-    IFT = ModelDetails(name="instruction-tuned", symbol="⭕")
-    RL = ModelDetails(name="RL-tuned", symbol="🟦")
     Unknown = ModelDetails(name="", symbol="?")
     def to_str(self, separator=" "):
@@ -134,10 +134,10 @@ class ModelType(Enum):
             return ModelType.FT
         if "pretrained" in type or "🟢" in type:
             return ModelType.PT
-        if "RL-tuned" in type or "🟦" in type:
-            return ModelType.RL
-        if "instruction-tuned" in type or "⭕" in type:
-            return ModelType.IFT
         return ModelType.Unknown
 class WeightType(Enum):

 class ModelType(Enum):
     PT = ModelDetails(name="pretrained", symbol="🟢")
+    FT = ModelDetails(name="fine-tuned on domain-specific datasets", symbol="🔶")
+    chat = ModelDetails(name="chat models (RLHF, DPO, IFT, ...)", symbol="💬")
+    merges = ModelDetails(name="merges and moerges", symbol="🤝")
     Unknown = ModelDetails(name="", symbol="?")
     def to_str(self, separator=" "):
             return ModelType.FT
         if "pretrained" in type or "🟢" in type:
             return ModelType.PT
+        if any([k in type for k in ["instruction-tuned", "RL-tuned", "chat", "🟦", "⭕", "💬"]]):
+            return ModelType.chat
+        if "merge" in type or "🤝" in type:
+            return ModelType.merges
         return ModelType.Unknown
 class WeightType(Enum):

src/leaderboard/filter_models.py CHANGED Viewed

@@ -133,6 +133,6 @@ def remove_forbidden_models(leaderboard_data: list[dict]):
     return leaderboard_data
-def filter_models(leaderboard_data: list[dict]):
     leaderboard_data = remove_forbidden_models(leaderboard_data)
     flag_models(leaderboard_data)

     return leaderboard_data
+def filter_models_flags(leaderboard_data: list[dict]):
     leaderboard_data = remove_forbidden_models(leaderboard_data)
     flag_models(leaderboard_data)

src/populate.py CHANGED Viewed

@@ -5,7 +5,7 @@ import pandas as pd
 from src.display.formatting import has_no_nan_values, make_clickable_model
 from src.display.utils import AutoEvalColumn, EvalQueueColumn, baseline_row
-from src.leaderboard.filter_models import filter_models
 from src.leaderboard.read_evals import get_raw_eval_results
@@ -13,7 +13,7 @@ def get_leaderboard_df(results_path: str, requests_path: str, dynamic_path: str,
     raw_data = get_raw_eval_results(results_path=results_path, requests_path=requests_path, dynamic_path=dynamic_path)
     all_data_json = [v.to_dict() for v in raw_data]
     all_data_json.append(baseline_row)
-    filter_models(all_data_json)
     df = pd.DataFrame.from_records(all_data_json)
     df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)

 from src.display.formatting import has_no_nan_values, make_clickable_model
 from src.display.utils import AutoEvalColumn, EvalQueueColumn, baseline_row
+from src.leaderboard.filter_models import filter_models_flags
 from src.leaderboard.read_evals import get_raw_eval_results
     raw_data = get_raw_eval_results(results_path=results_path, requests_path=requests_path, dynamic_path=dynamic_path)
     all_data_json = [v.to_dict() for v in raw_data]
     all_data_json.append(baseline_row)
+    filter_models_flags(all_data_json)
     df = pd.DataFrame.from_records(all_data_json)
     df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)

src/scripts/update_all_request_files.py CHANGED Viewed

@@ -3,6 +3,7 @@ from huggingface_hub import ModelCard
 import json
 import time
 from src.submission.check_validity import is_model_on_hub, check_model_card, get_model_tags
 from src.envs import DYNAMIC_INFO_REPO, DYNAMIC_INFO_PATH, DYNAMIC_INFO_FILE_PATH, API, H4_TOKEN
@@ -85,3 +86,4 @@ def update_dynamic_files():
         commit_message=f"Daily request file update.",
     )
     print(f"UPDATE_DYNAMIC: pushed to hub")

 import json
 import time
 from src.submission.check_validity import is_model_on_hub, check_model_card, get_model_tags
 from src.envs import DYNAMIC_INFO_REPO, DYNAMIC_INFO_PATH, DYNAMIC_INFO_FILE_PATH, API, H4_TOKEN
         commit_message=f"Daily request file update.",
     )
     print(f"UPDATE_DYNAMIC: pushed to hub")

update_dynamic.py ADDED Viewed

+from src.scripts.update_all_request_files import update_dynamic_files
+if __name__ == "__main__":
+    update_dynamic_files()