Spaces:

Yeyito
/

llm_contamination_detector

Runtime error

App Files Files Community

Yeyito commited on Dec 20, 2023

Commit

ca453e8

•

1 Parent(s): 98d650b

Load tokenizer from parent model & app.py fixes

Browse files

Files changed (2) hide show

app.py +9 -8
detect-pretrain-code-contamination/src/run.py +4 -9

app.py CHANGED Viewed

@@ -6,6 +6,8 @@ import time
 import pandas as pd
 from threading import Thread
 import numpy as np
 # Add the path to the "src" directory of detect-pretrain-code-contamination to the sys.path
 project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "detect-pretrain-code-contamination"))
@@ -52,6 +54,9 @@ def save_to_txt(model, results, model_type,ref_model):
     with open(file_path, "a") as f:
         f.write(f"\n{model_type},{model}," + str(formatr(results["arc"])) + "," + str(formatr(results["hellaswag"])) + "," + str(formatr(results["mmlu"])) + "," + str(formatr(results["truthfulQA"])) + "," + str(formatr(results["winogrande"])) + "," + str(formatr(results["gsm8k"])) + f",{ref_model}")
         f.close()
 def run_test(model,ref_model,data):
@@ -88,7 +93,9 @@ def worker_thread():
         for submission in modelQueue:
             #evaluate(submission[1],submission[0].split(" ")[0],submission[2])
             #modelQueue.pop(modelQueue.index(submission))
             # Uncomment those lines in order to begin testing, I test these models outside of this space and later commit the results back.
             # I highly encourage you to try to reproduce the results I get using your own implementation.
             # Do NOT take anything listed here as fact, as I'm not 100% my implementation works as intended.
@@ -105,16 +112,10 @@ def queue(model,model_type,ref_model):
     file_path = "data/queue.csv"
     with open(file_path, "a") as f:
         model = model.strip()
         f.write(f"\n{model_type},{model},{ref_model}")
         f.close()
     print(f"QUEUE:\n{modelQueue}")
-    eval_entry = {
-        "model": model,
-        "model_type": model_type,
-        "ref_model": ref_model,
-    }
 ### bigcode/bigcode-models-leaderboard
 def add_new_eval(

 import pandas as pd
 from threading import Thread
 import numpy as np
+import discord
+from discord.ext import commands
 # Add the path to the "src" directory of detect-pretrain-code-contamination to the sys.path
 project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "detect-pretrain-code-contamination"))
     with open(file_path, "a") as f:
         f.write(f"\n{model_type},{model}," + str(formatr(results["arc"])) + "," + str(formatr(results["hellaswag"])) + "," + str(formatr(results["mmlu"])) + "," + str(formatr(results["truthfulQA"])) + "," + str(formatr(results["winogrande"])) + "," + str(formatr(results["gsm8k"])) + f",{ref_model}")
+        print(f"Finished evaluation of model: {model} using ref_model: {ref_model}")
+        print(f"\n{model_type},{model}," + str(formatr(results["arc"])) + "," + str(formatr(results["hellaswag"])) + "," + str(formatr(results["mmlu"])) + "," + str(formatr(results["truthfulQA"])) + "," + str(formatr(results["winogrande"])) + "," + str(formatr(results["gsm8k"])) + f",{ref_model}")
         f.close()
 def run_test(model,ref_model,data):
         for submission in modelQueue:
             #evaluate(submission[1],submission[0].split(" ")[0],submission[2])
             #modelQueue.pop(modelQueue.index(submission))
+            #exit()
+            #The exit above is temporal while I figure out how to unload a model from a thread or similar.
             # Uncomment those lines in order to begin testing, I test these models outside of this space and later commit the results back.
             # I highly encourage you to try to reproduce the results I get using your own implementation.
             # Do NOT take anything listed here as fact, as I'm not 100% my implementation works as intended.
     file_path = "data/queue.csv"
     with open(file_path, "a") as f:
         model = model.strip()
+        ref_model = ref_model.strip()
         f.write(f"\n{model_type},{model},{ref_model}")
         f.close()
     print(f"QUEUE:\n{modelQueue}")
 ### bigcode/bigcode-models-leaderboard
 def add_new_eval(

detect-pretrain-code-contamination/src/run.py CHANGED Viewed

@@ -40,15 +40,11 @@ def load_data(filename):
 def unload_model(model,tokenizer):
     print("[X] Cannot unload model! Functionality not implemented!")
-def load_model(name1):
     if name1 not in models:
         model1 = AutoModelForCausalLM.from_pretrained(name1, return_dict=True, device_map='auto')
         model1.eval()
-        if "mistral" in name1 or "Mistral" in name1: #Loading default mistral tokenizers as some tokenizers don't work out of the box.
-            tokenizer1 = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1")
-        else:
-            tokenizer1 = AutoTokenizer.from_pretrained(name1)
         tokenizer1.pad_token = tokenizer1.eos_token
         models[name1] = model1
         models[name1 + "_tokenizer"] = tokenizer1
@@ -124,7 +120,7 @@ def evaluate_data(test_data, col_name, target_model, ref_model, ratio_gen, data_
         neighbors_dls = load_data(f'saves/{ref_model_clean}/{data_name_clean}/neighbors_dls.txt')
     except:
         ### MODEL 2 likelihoods
-        model2, tokenizer2 = load_model(ref_model)
         inference2_pass = [] #0: p_ref, #1: all_prob_ref, #2: p_ref_likelihood
         for ex in tqdm(test_data):
             text = ex[col_name]
@@ -147,7 +143,7 @@ def evaluate_data(test_data, col_name, target_model, ref_model, ratio_gen, data_
         print("Saved ref data, exiting.")
     ### MODEL 1 likelihoods
-    model1, tokenizer1 = load_model(target_model)
     inference1_pass = [] #0: p1, #1: all_prob, #2: p1_likelihood, #3: p_lower, #4: p_lower_likelihood
     for ex in tqdm(test_data):
         text = ex[col_name]
@@ -155,7 +151,6 @@ def evaluate_data(test_data, col_name, target_model, ref_model, ratio_gen, data_
         inference1_pass.append(new_ex)
     ### RIMA results
-    model1, tokenizer1 = load_model(target_model)
     counter = 0
     results = []
     for ex in tqdm(test_data):

 def unload_model(model,tokenizer):
     print("[X] Cannot unload model! Functionality not implemented!")
+def load_model(name1,ref_model):
     if name1 not in models:
         model1 = AutoModelForCausalLM.from_pretrained(name1, return_dict=True, device_map='auto')
         model1.eval()
+        tokenizer1 = AutoTokenizer.from_pretrained(ref_model)
         tokenizer1.pad_token = tokenizer1.eos_token
         models[name1] = model1
         models[name1 + "_tokenizer"] = tokenizer1
         neighbors_dls = load_data(f'saves/{ref_model_clean}/{data_name_clean}/neighbors_dls.txt')
     except:
         ### MODEL 2 likelihoods
+        model2, tokenizer2 = load_model(ref_model,ref_model)
         inference2_pass = [] #0: p_ref, #1: all_prob_ref, #2: p_ref_likelihood
         for ex in tqdm(test_data):
             text = ex[col_name]
         print("Saved ref data, exiting.")
     ### MODEL 1 likelihoods
+    model1, tokenizer1 = load_model(target_model,ref_model)
     inference1_pass = [] #0: p1, #1: all_prob, #2: p1_likelihood, #3: p_lower, #4: p_lower_likelihood
     for ex in tqdm(test_data):
         text = ex[col_name]
         inference1_pass.append(new_ex)
     ### RIMA results
     counter = 0
     results = []
     for ex in tqdm(test_data):