mteb_leaderboard_mtr

Runtime error

App Files Files Community

orionweller commited on May 7

Commit

0d0563c

•

1 Parent(s): cf7ddc6

update

Browse files

Files changed (1) hide show

app.py +28 -3

app.py CHANGED Viewed

@@ -331,6 +331,19 @@ TASK_TO_METRIC = {
     "InstructionRetrieval": "p-MRR",
 }
 def make_clickable_model(model_name, link=None):
     if link is None:
         link = "https://huggingface.co/" + model_name
@@ -1170,6 +1183,15 @@ SENTENCE_TRANSFORMERS_COMPATIBLE_MODELS = {
     for model in SENTENCE_TRANSFORMERS_COMPATIBLE_MODELS
 }
 MODELS_TO_SKIP = {
     "baseplate/instructor-large-1", # Duplicate
     "radames/e5-large", # Duplicate
@@ -1493,7 +1515,7 @@ def get_mteb_data(tasks=["Clustering"], langs=[], datasets=[], fillna=True, add_
     df = pd.DataFrame(df_list)
     # If there are any models that are the same, merge them
     # E.g. if out["Model"] has the same value in two places, merge & take whichever one is not NaN else just take the first one
-    df = df.groupby("Model", as_index=False).first()
     # Put 'Model' column first
     cols = sorted(list(df.columns))
     cols.insert(0, cols.pop(cols.index("Model")))
@@ -1502,6 +1524,9 @@ def get_mteb_data(tasks=["Clustering"], langs=[], datasets=[], fillna=True, add_
         df = add_rank(df)
     if fillna:
         df.fillna("", inplace=True)
     return df
 def get_mteb_average():
@@ -2196,7 +2221,7 @@ function(goalUrlObject) {
 def update_url_task(event: gr.SelectData, current_task_language: dict, language_per_task: dict):
     current_task_language["task"] = event.target.id
     # Either use the cached language for this task or the 1st language
-    current_task_language["language"] = language_per_task.get(event.target.id, event.target.children[0].children[0].id)
     return current_task_language, language_per_task
 def update_url_language(event: gr.SelectData, current_task_language: dict, language_per_task: dict):
@@ -2300,7 +2325,7 @@ with gr.Blocks(css=css) as block:
             with gr.Tab(task, id=task_tab_id) as task_tab:
                 # For updating the 'task' in the URL
                 task_tab.select(update_url_task, [current_task_language, language_per_task], [current_task_language, language_per_task]).then(None, [current_task_language], [], js=set_window_url_params)
                 with gr.Tabs() as task_tabs:
                     # Store the task tabs for updating them on load based on URL parameters
                     tabs.append(task_tabs)

     "InstructionRetrieval": "p-MRR",
 }
+TASK_DESCRIPTION = {
+    "Bitext Mining": "Bitext mining is the task of finding parallel sentences in two languages.",
+    "Clustering": "Clustering is the task of grouping similar documents together.",
+    "Classification": "Classification is the task of assigning a label to a text.",
+    "Pair Classification": "Pair classification is the task of determining whether two texts are similar.",
+    "Reranking": "Reranking is the task of reordering a list of documents to improve relevance.",
+    "Retrieval": "Retrieval is the task of finding relevant documents for a query.",
+    "STS": "Semantic Textual Similarity is the task of determining how similar two texts are.",
+    "Summarization": "Summarization is the task of generating a summary of a text.",
+    "Retrieval w/Instructions": "Retrieval w/Instructions is the task of finding relevant documents for a query that has detailed instructions.",
+    "Overall": "Overall performance across MTEB tasks.",
+}
 def make_clickable_model(model_name, link=None):
     if link is None:
         link = "https://huggingface.co/" + model_name
     for model in SENTENCE_TRANSFORMERS_COMPATIBLE_MODELS
 }
+CROSS_ENCODERS = {
+    "FollowIR-7B",
+    "flan-t5-base",
+    "flan-t5-large",
+    "monobert-large-msmarco",
+    "monot5-3b-msmarco-10k",
+    "monot5-base-msmarco-10k",
+}
 MODELS_TO_SKIP = {
     "baseplate/instructor-large-1", # Duplicate
     "radames/e5-large", # Duplicate
     df = pd.DataFrame(df_list)
     # If there are any models that are the same, merge them
     # E.g. if out["Model"] has the same value in two places, merge & take whichever one is not NaN else just take the first one
+    df = df.groupby("Model", as_index=False).first()
     # Put 'Model' column first
     cols = sorted(list(df.columns))
     cols.insert(0, cols.pop(cols.index("Model")))
         df = add_rank(df)
     if fillna:
         df.fillna("", inplace=True)
+    if "instruction" in task.lower():
+        df["Model"] = df.Model.apply(lambda x: "❎" + x if x.split(">")[1].split("<")[0] in CROSS_ENCODERS else x)
     return df
 def get_mteb_average():
 def update_url_task(event: gr.SelectData, current_task_language: dict, language_per_task: dict):
     current_task_language["task"] = event.target.id
     # Either use the cached language for this task or the 1st language
+    current_task_language["language"] = language_per_task.get(event.target.id, event.target.children[1].children[0].id)
     return current_task_language, language_per_task
 def update_url_language(event: gr.SelectData, current_task_language: dict, language_per_task: dict):
             with gr.Tab(task, id=task_tab_id) as task_tab:
                 # For updating the 'task' in the URL
                 task_tab.select(update_url_task, [current_task_language, language_per_task], [current_task_language, language_per_task]).then(None, [current_task_language], [], js=set_window_url_params)
+                gr.Markdown(TASK_DESCRIPTION[task])
                 with gr.Tabs() as task_tabs:
                     # Store the task tabs for updating them on load based on URL parameters
                     tabs.append(task_tabs)