IL-TUR-Leaderboard

Running

App Files Files Community

abhinav-joshi commited on Jul 8

Commit

e22e877

•

1 Parent(s): a6011ea

clean codebase

Browse files

Files changed (6) hide show

dummy.py +0 -15
eval_utils.py +155 -3
evaluation_results.json +0 -38
labels.txt +0 -12
ner_helpers.py +0 -141
uploads.py +41 -91

dummy.py DELETED Viewed

@@ -1,15 +0,0 @@
-import json
-# load the results json file
-with open("submissions/baseline/results.json") as f:
-    results = json.load(f)
-# update the results
-with open("submissions/baseline/submission.json") as f:
-    submission = json.load(f)
-breakpoint()
-# update the results
-results.append(submission[0])

eval_utils.py CHANGED Viewed

@@ -13,7 +13,147 @@ from sklearn.metrics import f1_score
 from tqdm import tqdm
 from transformers import AutoTokenizer
-from ner_helpers import span2bio
 def load_json(file_path):
@@ -76,8 +216,20 @@ def evaluate_cjpe(gold_data, pred_data):
 def evaluate_lner(gold_data, pred_data, text_data):
-    with open("labels.txt") as f:
-        labels = f.read().strip().split("\n")
     results_per_fold = {}
     for fold in range(1, 4):

 from tqdm import tqdm
 from transformers import AutoTokenizer
+from transformers import AutoTokenizer
+import re
+import string
+class TF_Tokenizer:
+    def __init__(self, model_str):
+        tok = AutoTokenizer.from_pretrained(model_str)
+    def __call__(self, txt):
+        return self.tok.tokenize(txt)
+class WS_Tokenizer:
+    def __init__(self):
+        pass
+    def __call__(self, txt):
+        return re.findall(r"[{}]|\w+".format(string.punctuation), txt)
+def convert_spans_to_bio(txt, roles, tokenizer_func):
+    roles = sorted(roles, key=lambda x: x["start"])
+    roles_left = [r["start"] for r in roles]
+    ttxt = tokenizer_func(txt)
+    c = 0
+    cr = -1
+    prev = "O"
+    troles = []
+    for tok in ttxt:
+        if c >= len(txt):
+            break
+        while txt[c] == " ":
+            c += 1
+        else:
+            if c in roles_left:  # Start of a new role
+                ind = roles_left.index(c)
+                cr = roles[ind]["end"]
+                prev = "I-" + roles[ind]["label"]
+                troles.append("B-" + roles[ind]["label"])
+            else:
+                if c < cr:  # Assign previous role
+                    troles.append(prev)
+                else:  # Assign 'O'
+                    troles.append("O")
+            c += len(tok)
+    if len(ttxt) != len(troles):
+        troles += ["O"] * (len(ttxt) - len(troles))
+    assert len(ttxt) == len(troles)
+    return troles
+def convert_bio_to_spans(txt, troles, tokenizer_func):
+    c = 0
+    c2 = 0
+    cr = -1
+    cs = -1
+    prev = "O"
+    roles = []
+    ttxt = tokenizer_func(txt)
+    if len(ttxt) != len(troles):
+        ttxt = ttxt[: len(troles)]
+    for j, tok in enumerate(ttxt):
+        if c >= len(txt):
+            break
+        while c < len(txt) and txt[c].isspace():
+            c += 1
+        if tok[:2] == "##" or tok == "[UNK]":
+            c += len(tok) - 2 if tok[:2] == "##" else 1
+        else:
+            if troles[j].startswith("B-"):
+                if cs >= cr:
+                    cr = c
+                    if cs >= 0:
+                        roles.append({"start": cs, "end": c2, "label": prev})
+                cs = c
+                prev = troles[j][2:]
+            else:
+                if troles[j] == "O":
+                    if cs >= cr:
+                        cr = c
+                        if cs >= 0:
+                            roles.append({"start": cs, "end": c2, "label": prev})
+            c += len(tok)
+        c2 = c
+    if cs >= cr:
+        if cs >= 0:
+            roles.append({"start": cs, "end": c2, "label": prev})
+    return roles
+def span2bio(txt, labels):
+    roles = sorted(labels, key=lambda x: x["label"])
+    roles_left = [r["start"] for r in roles]
+    ttxt = re.findall(r"[{}]|\w+".format(string.punctuation), txt)
+    c = 0
+    cr = -1
+    prev = "O"
+    troles = []
+    for tok in ttxt:
+        if c >= len(txt):
+            break
+        while txt[c] == " ":
+            c += 1
+        else:
+            if c in roles_left:  # Start of a new role
+                ind = roles_left.index(c)
+                cr = roles[ind]["end"]
+                prev = "I-" + roles[ind]["label"]
+                troles.append("B-" + roles[ind]["label"])
+            else:
+                if c < cr:  # Assign previous role
+                    troles.append(prev)
+                else:  # Assign 'O'
+                    troles.append("O")
+            c += len(tok)
+    if len(ttxt) != len(troles):
+        troles += ["O"] * (len(ttxt) - len(troles))
+    assert len(ttxt) == len(troles)
+    return ttxt, troles
 def load_json(file_path):
 def evaluate_lner(gold_data, pred_data, text_data):
+    labels = [
+        "APP",
+        "RESP",
+        "A.COUNSEL",
+        "R.COUNSEL",
+        "JUDGE",
+        "WIT",
+        "AUTH",
+        "COURT",
+        "STAT",
+        "PREC",
+        "DATE",
+        "CASENO",
+    ]
     results_per_fold = {}
     for fold in range(1, 4):

evaluation_results.json DELETED Viewed

@@ -1,38 +0,0 @@
-[
-  {
-    "Method": "GPT-5 (2-shot)",
-    "Submitted By": "IL-TUR",
-    "Github Link": "dummy submission",
-    "L-NER": {
-      "strict mF1": "-"
-    },
-    "RR": {
-      "mF1": {
-        "mF1": "0.10"
-      }
-    },
-    "CJPE": {
-      "mF1": "-",
-      "ROUGE-L": "-",
-      "BLEU": "-"
-    },
-    "BAIL": {
-      "mF1": "0.02"
-    },
-    "LSI": {
-      "mF1": "0.26"
-    },
-    "PCR": {
-      "muF1@K": "0.63"
-    },
-    "SUMM": {
-      "ROUGE-L": "-",
-      "BERTSCORE": "-"
-    },
-    "L-MT": {
-      "BLEU": "-",
-      "GLEU": "-",
-      "chrF++": "-"
-    }
-  }
-]

labels.txt DELETED Viewed

@@ -1,12 +0,0 @@
-APP
-RESP
-A.COUNSEL
-R.COUNSEL
-JUDGE
-WIT
-AUTH
-COURT
-STAT
-PREC
-DATE
-CASENO

ner_helpers.py DELETED Viewed

@@ -1,141 +0,0 @@
-from transformers import AutoTokenizer
-import re
-import string
-class TF_Tokenizer:
-    def __init__(self, model_str):
-        tok = AutoTokenizer.from_pretrained(model_str)
-    def __call__(self, txt):
-        return self.tok.tokenize(txt)
-class WS_Tokenizer:
-    def __init__(self):
-        pass
-    def __call__(self, txt):
-        return re.findall(r"[{}]|\w+".format(string.punctuation), txt)
-def convert_spans_to_bio(txt, roles, tokenizer_func):
-    roles = sorted(roles, key=lambda x: x["start"])
-    roles_left = [r["start"] for r in roles]
-    ttxt = tokenizer_func(txt)
-    c = 0
-    cr = -1
-    prev = "O"
-    troles = []
-    for tok in ttxt:
-        if c >= len(txt):
-            break
-        while txt[c] == " ":
-            c += 1
-        else:
-            if c in roles_left:  # Start of a new role
-                ind = roles_left.index(c)
-                cr = roles[ind]["end"]
-                prev = "I-" + roles[ind]["label"]
-                troles.append("B-" + roles[ind]["label"])
-            else:
-                if c < cr:  # Assign previous role
-                    troles.append(prev)
-                else:  # Assign 'O'
-                    troles.append("O")
-            c += len(tok)
-    if len(ttxt) != len(troles):
-        troles += ["O"] * (len(ttxt) - len(troles))
-    assert len(ttxt) == len(troles)
-    return troles
-def convert_bio_to_spans(txt, troles, tokenizer_func):
-    c = 0
-    c2 = 0
-    cr = -1
-    cs = -1
-    prev = "O"
-    roles = []
-    ttxt = tokenizer_func(txt)
-    if len(ttxt) != len(troles):
-        ttxt = ttxt[: len(troles)]
-    for j, tok in enumerate(ttxt):
-        if c >= len(txt):
-            break
-        while c < len(txt) and txt[c].isspace():
-            c += 1
-        if tok[:2] == "##" or tok == "[UNK]":
-            c += len(tok) - 2 if tok[:2] == "##" else 1
-        else:
-            if troles[j].startswith("B-"):
-                if cs >= cr:
-                    cr = c
-                    if cs >= 0:
-                        roles.append({"start": cs, "end": c2, "label": prev})
-                cs = c
-                prev = troles[j][2:]
-            else:
-                if troles[j] == "O":
-                    if cs >= cr:
-                        cr = c
-                        if cs >= 0:
-                            roles.append({"start": cs, "end": c2, "label": prev})
-            c += len(tok)
-        c2 = c
-    if cs >= cr:
-        if cs >= 0:
-            roles.append({"start": cs, "end": c2, "label": prev})
-    return roles
-def span2bio(txt, labels):
-    roles = sorted(labels, key=lambda x: x["label"])
-    roles_left = [r["start"] for r in roles]
-    ttxt = re.findall(r"[{}]|\w+".format(string.punctuation), txt)
-    c = 0
-    cr = -1
-    prev = "O"
-    troles = []
-    for tok in ttxt:
-        if c >= len(txt):
-            break
-        while txt[c] == " ":
-            c += 1
-        else:
-            if c in roles_left:  # Start of a new role
-                ind = roles_left.index(c)
-                cr = roles[ind]["end"]
-                prev = "I-" + roles[ind]["label"]
-                troles.append("B-" + roles[ind]["label"])
-            else:
-                if c < cr:  # Assign previous role
-                    troles.append(prev)
-                else:  # Assign 'O'
-                    troles.append("O")
-            c += len(tok)
-    if len(ttxt) != len(troles):
-        troles += ["O"] * (len(ttxt) - len(troles))
-    assert len(ttxt) == len(troles)
-    return ttxt, troles

uploads.py CHANGED Viewed

@@ -1,33 +1,38 @@
-from email.utils import parseaddr
-from huggingface_hub import HfApi
 import os
-import datetime
 import json
-import pandas as pd
 import gradio as gr
 from eval_utils import get_evaluation_scores
 LEADERBOARD_PATH = "Exploration-Lab/IL-TUR-Leaderboard"
 SUBMISSION_FORMAT = "predictions"
-# RESULTS_PATH = "Exploration-Lab/IL-TUR-Leaderboard-results"
 TOKEN = os.environ.get("TOKEN", None)
 YEAR_VERSION = "2024"
 api = HfApi(token=TOKEN)
 def format_error(msg):
-    return f"<p style='color: red; font-size: 20px; text-align: center;'>{msg}</p>"
 def format_warning(msg):
-    return f"<p style='color: orange; font-size: 20px; text-align: center;'>{msg}</p>"
 def format_log(msg):
-    return f"<p style='color: green; font-size: 20px; text-align: center;'>{msg}</p>"
 def model_hyperlink(link, model_name):
@@ -35,26 +40,22 @@ def model_hyperlink(link, model_name):
 def input_verification(method_name, url, path_to_file, organisation, mail):
-    for input in [method_name, url, path_to_file, organisation, mail]:
-        if input == "":
-            return format_warning("Please fill all the fields.")
-    # Very basic email parsing
     _, parsed_mail = parseaddr(mail)
-    if not "@" in parsed_mail:
-        return format_warning("Please provide a valid email adress.")
     if path_to_file is None:
         return format_warning("Please attach a file.")
-    # check the required fields
-    required_fields = ["Method", "Submitted By", "url", "organisation", "mail"]
-    # Check if the required_fields are not blank
-    for field in required_fields:
-        if field not in locals():
-            raise gr.Error(f"{field} cannot be blank")
     return parsed_mail
@@ -66,98 +67,47 @@ def add_new_eval(
     organisation: str,
     mail: str,
 ):
-    parsed_mail = input_verification(
-        method_name,
-        url,
-        path_to_file,
-        organisation,
-        mail,
-    )
-    # # load the file
-    # df = pd.read_csv(path_to_file)
-    # submission_df = pd.read_csv(path_to_file)
-    # # modify the df to include metadata
-    # df["Method"] = method_name
-    # df["url"] = url
-    # df["organisation"] = organisation
-    # df["mail"] = parsed_mail
-    # df["timestamp"] = datetime.datetime.now()
-    # submission_df = pd.read_csv(path_to_file)
-    # submission_df["Method"] = method_name
-    # submission_df["Submitted By"] = organisation
-    # # upload to spaces using the hf api at
-    # path_in_repo = f"submissions/{method_name}"
-    # file_name = f"{method_name}-{organisation}-{datetime.datetime.now().strftime('%Y-%m-%d')}.csv"
-    # upload the df to spaces
-    import io
     if SUBMISSION_FORMAT == "predictions":
-        # read the submission json file
         with open(path_to_file, "r") as f:
             submission_data = json.load(f)
-        # read the gold json file
         with open("submissions/baseline/IL_TUR_eval_gold_small.json", "r") as f:
             gold_data = json.load(f)
         submission = get_evaluation_scores(gold_data, submission_data)
     else:
-        # read the submission json file
         with open(path_to_file, "r") as f:
             submission = json.load(f)
     with open("submissions/baseline/results.json", "r") as f:
         results = json.load(f)
-    # update the results
     results.append(submission[0])
-    leaderboard_buffer = io.BytesIO()
-    # df.to_csv(buffer, index=False)  # Write the DataFrame to a buffer in CSV format
-    # buffer.seek(0)  # Rewind the buffer to the beginning
-    # save the results to buffer
-    leaderboard_buffer.write(json.dumps(results).encode())
     leaderboard_buffer.seek(0)
-    # api.upload_file(
-    #     repo_id=RESULTS_PATH,
-    #     path_in_repo=f"{path_in_repo}/{file_name}",
-    #     path_or_fileobj=buffer,
-    #     token=TOKEN,
-    #     repo_type="dataset",
-    # )
-    # # read the leaderboard
-    # leaderboard_df = pd.read_csv(f"submissions/baseline/baseline.csv")
-    # # append the new submission_df csv to the leaderboard
-    # # leaderboard_df = leaderboard_df._append(submission_df)
-    # # leaderboard_df = pd.concat([leaderboard_df, submission_df], ignore_index=True)
-    # # save the new leaderboard
-    # # leaderboard_df.to_csv(f"submissions/baseline/baseline.csv", index=False)
-    # leaderboard_buffer = io.BytesIO()
-    # leaderboard_df.to_csv(leaderboard_buffer, index=False)
-    # leaderboard_buffer.seek(0)
-    # with open("submissions/baseline/results.json", "w") as f:
-    #     json.dump(results, f)
     api.upload_file(
         repo_id=LEADERBOARD_PATH,
-        # path_in_repo=f"submissions/baseline/baseline.csv",
-        path_in_repo=f"submissions/baseline/results.json",
         path_or_fileobj=leaderboard_buffer,
         token=TOKEN,
         repo_type="space",
     )
     return format_log(
-        f"Method {method_name} submitted by {organisation} successfully. \nPlease refresh the leaderboard, and wait a bit to see the score displayed"
     )

 import os
 import json
+import datetime
+from email.utils import parseaddr
+from io import BytesIO
+from huggingface_hub import HfApi
 import gradio as gr
 from eval_utils import get_evaluation_scores
+# Constants
 LEADERBOARD_PATH = "Exploration-Lab/IL-TUR-Leaderboard"
 SUBMISSION_FORMAT = "predictions"
 TOKEN = os.environ.get("TOKEN", None)
 YEAR_VERSION = "2024"
 api = HfApi(token=TOKEN)
+# Helper functions for formatting messages
+def format_message(msg, color):
+    return f"<p style='color: {color}; font-size: 20px; text-align: center;'>{msg}</p>"
 def format_error(msg):
+    return format_message(msg, "red")
 def format_warning(msg):
+    return format_message(msg, "orange")
 def format_log(msg):
+    return format_message(msg, "green")
 def model_hyperlink(link, model_name):
 def input_verification(method_name, url, path_to_file, organisation, mail):
+    """Verify the input fields for submission."""
+    # Check if any field is empty
+    if any(
+        input == "" for input in [method_name, url, path_to_file, organisation, mail]
+    ):
+        return format_warning("Please fill all the fields.")
+    # Verify email format
     _, parsed_mail = parseaddr(mail)
+    if "@" not in parsed_mail:
+        return format_warning("Please provide a valid email address.")
+    # Check if file is attached
     if path_to_file is None:
         return format_warning("Please attach a file.")
     return parsed_mail
     organisation: str,
     mail: str,
 ):
+    """Add a new evaluation to the leaderboard."""
+    # Verify input
+    parsed_mail = input_verification(method_name, url, path_to_file, organisation, mail)
+    if parsed_mail.startswith("<p"):  # If it's a warning message
+        return parsed_mail
+    # Process submission
     if SUBMISSION_FORMAT == "predictions":
+        # Read submission and gold data
         with open(path_to_file, "r") as f:
             submission_data = json.load(f)
         with open("submissions/baseline/IL_TUR_eval_gold_small.json", "r") as f:
             gold_data = json.load(f)
+        # Get evaluation scores
         submission = get_evaluation_scores(gold_data, submission_data)
     else:
+        # Read submission directly if it's not in predictions format
         with open(path_to_file, "r") as f:
             submission = json.load(f)
+    # Update results
     with open("submissions/baseline/results.json", "r") as f:
         results = json.load(f)
     results.append(submission[0])
+    # Prepare buffer for upload
+    leaderboard_buffer = BytesIO(json.dumps(results).encode())
     leaderboard_buffer.seek(0)
+    # Upload to Hugging Face
     api.upload_file(
         repo_id=LEADERBOARD_PATH,
+        path_in_repo="submissions/baseline/results.json",
         path_or_fileobj=leaderboard_buffer,
         token=TOKEN,
         repo_type="space",
     )
     return format_log(
+        f"Method {method_name} submitted by {organisation} successfully. \n"
+        "Please refresh the leaderboard, and wait a bit to see the score displayed"
     )