Spaces:
Runtime error
Runtime error
Load tokenizer from parent model & app.py fixes
Browse files- app.py +9 -8
- detect-pretrain-code-contamination/src/run.py +4 -9
app.py
CHANGED
@@ -6,6 +6,8 @@ import time
|
|
6 |
import pandas as pd
|
7 |
from threading import Thread
|
8 |
import numpy as np
|
|
|
|
|
9 |
|
10 |
# Add the path to the "src" directory of detect-pretrain-code-contamination to the sys.path
|
11 |
project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "detect-pretrain-code-contamination"))
|
@@ -52,6 +54,9 @@ def save_to_txt(model, results, model_type,ref_model):
|
|
52 |
|
53 |
with open(file_path, "a") as f:
|
54 |
f.write(f"\n{model_type},{model}," + str(formatr(results["arc"])) + "," + str(formatr(results["hellaswag"])) + "," + str(formatr(results["mmlu"])) + "," + str(formatr(results["truthfulQA"])) + "," + str(formatr(results["winogrande"])) + "," + str(formatr(results["gsm8k"])) + f",{ref_model}")
|
|
|
|
|
|
|
55 |
f.close()
|
56 |
|
57 |
def run_test(model,ref_model,data):
|
@@ -88,7 +93,9 @@ def worker_thread():
|
|
88 |
for submission in modelQueue:
|
89 |
#evaluate(submission[1],submission[0].split(" ")[0],submission[2])
|
90 |
#modelQueue.pop(modelQueue.index(submission))
|
91 |
-
|
|
|
|
|
92 |
# Uncomment those lines in order to begin testing, I test these models outside of this space and later commit the results back.
|
93 |
# I highly encourage you to try to reproduce the results I get using your own implementation.
|
94 |
# Do NOT take anything listed here as fact, as I'm not 100% my implementation works as intended.
|
@@ -105,16 +112,10 @@ def queue(model,model_type,ref_model):
|
|
105 |
file_path = "data/queue.csv"
|
106 |
with open(file_path, "a") as f:
|
107 |
model = model.strip()
|
|
|
108 |
f.write(f"\n{model_type},{model},{ref_model}")
|
109 |
f.close()
|
110 |
print(f"QUEUE:\n{modelQueue}")
|
111 |
-
|
112 |
-
eval_entry = {
|
113 |
-
"model": model,
|
114 |
-
"model_type": model_type,
|
115 |
-
"ref_model": ref_model,
|
116 |
-
}
|
117 |
-
|
118 |
|
119 |
### bigcode/bigcode-models-leaderboard
|
120 |
def add_new_eval(
|
|
|
6 |
import pandas as pd
|
7 |
from threading import Thread
|
8 |
import numpy as np
|
9 |
+
import discord
|
10 |
+
from discord.ext import commands
|
11 |
|
12 |
# Add the path to the "src" directory of detect-pretrain-code-contamination to the sys.path
|
13 |
project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "detect-pretrain-code-contamination"))
|
|
|
54 |
|
55 |
with open(file_path, "a") as f:
|
56 |
f.write(f"\n{model_type},{model}," + str(formatr(results["arc"])) + "," + str(formatr(results["hellaswag"])) + "," + str(formatr(results["mmlu"])) + "," + str(formatr(results["truthfulQA"])) + "," + str(formatr(results["winogrande"])) + "," + str(formatr(results["gsm8k"])) + f",{ref_model}")
|
57 |
+
|
58 |
+
print(f"Finished evaluation of model: {model} using ref_model: {ref_model}")
|
59 |
+
print(f"\n{model_type},{model}," + str(formatr(results["arc"])) + "," + str(formatr(results["hellaswag"])) + "," + str(formatr(results["mmlu"])) + "," + str(formatr(results["truthfulQA"])) + "," + str(formatr(results["winogrande"])) + "," + str(formatr(results["gsm8k"])) + f",{ref_model}")
|
60 |
f.close()
|
61 |
|
62 |
def run_test(model,ref_model,data):
|
|
|
93 |
for submission in modelQueue:
|
94 |
#evaluate(submission[1],submission[0].split(" ")[0],submission[2])
|
95 |
#modelQueue.pop(modelQueue.index(submission))
|
96 |
+
#exit()
|
97 |
+
|
98 |
+
#The exit above is temporal while I figure out how to unload a model from a thread or similar.
|
99 |
# Uncomment those lines in order to begin testing, I test these models outside of this space and later commit the results back.
|
100 |
# I highly encourage you to try to reproduce the results I get using your own implementation.
|
101 |
# Do NOT take anything listed here as fact, as I'm not 100% my implementation works as intended.
|
|
|
112 |
file_path = "data/queue.csv"
|
113 |
with open(file_path, "a") as f:
|
114 |
model = model.strip()
|
115 |
+
ref_model = ref_model.strip()
|
116 |
f.write(f"\n{model_type},{model},{ref_model}")
|
117 |
f.close()
|
118 |
print(f"QUEUE:\n{modelQueue}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
119 |
|
120 |
### bigcode/bigcode-models-leaderboard
|
121 |
def add_new_eval(
|
detect-pretrain-code-contamination/src/run.py
CHANGED
@@ -40,15 +40,11 @@ def load_data(filename):
|
|
40 |
def unload_model(model,tokenizer):
|
41 |
print("[X] Cannot unload model! Functionality not implemented!")
|
42 |
|
43 |
-
def load_model(name1):
|
44 |
if name1 not in models:
|
45 |
model1 = AutoModelForCausalLM.from_pretrained(name1, return_dict=True, device_map='auto')
|
46 |
model1.eval()
|
47 |
-
|
48 |
-
tokenizer1 = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1")
|
49 |
-
else:
|
50 |
-
tokenizer1 = AutoTokenizer.from_pretrained(name1)
|
51 |
-
|
52 |
tokenizer1.pad_token = tokenizer1.eos_token
|
53 |
models[name1] = model1
|
54 |
models[name1 + "_tokenizer"] = tokenizer1
|
@@ -124,7 +120,7 @@ def evaluate_data(test_data, col_name, target_model, ref_model, ratio_gen, data_
|
|
124 |
neighbors_dls = load_data(f'saves/{ref_model_clean}/{data_name_clean}/neighbors_dls.txt')
|
125 |
except:
|
126 |
### MODEL 2 likelihoods
|
127 |
-
model2, tokenizer2 = load_model(ref_model)
|
128 |
inference2_pass = [] #0: p_ref, #1: all_prob_ref, #2: p_ref_likelihood
|
129 |
for ex in tqdm(test_data):
|
130 |
text = ex[col_name]
|
@@ -147,7 +143,7 @@ def evaluate_data(test_data, col_name, target_model, ref_model, ratio_gen, data_
|
|
147 |
print("Saved ref data, exiting.")
|
148 |
|
149 |
### MODEL 1 likelihoods
|
150 |
-
model1, tokenizer1 = load_model(target_model)
|
151 |
inference1_pass = [] #0: p1, #1: all_prob, #2: p1_likelihood, #3: p_lower, #4: p_lower_likelihood
|
152 |
for ex in tqdm(test_data):
|
153 |
text = ex[col_name]
|
@@ -155,7 +151,6 @@ def evaluate_data(test_data, col_name, target_model, ref_model, ratio_gen, data_
|
|
155 |
inference1_pass.append(new_ex)
|
156 |
|
157 |
### RIMA results
|
158 |
-
model1, tokenizer1 = load_model(target_model)
|
159 |
counter = 0
|
160 |
results = []
|
161 |
for ex in tqdm(test_data):
|
|
|
40 |
def unload_model(model,tokenizer):
|
41 |
print("[X] Cannot unload model! Functionality not implemented!")
|
42 |
|
43 |
+
def load_model(name1,ref_model):
|
44 |
if name1 not in models:
|
45 |
model1 = AutoModelForCausalLM.from_pretrained(name1, return_dict=True, device_map='auto')
|
46 |
model1.eval()
|
47 |
+
tokenizer1 = AutoTokenizer.from_pretrained(ref_model)
|
|
|
|
|
|
|
|
|
48 |
tokenizer1.pad_token = tokenizer1.eos_token
|
49 |
models[name1] = model1
|
50 |
models[name1 + "_tokenizer"] = tokenizer1
|
|
|
120 |
neighbors_dls = load_data(f'saves/{ref_model_clean}/{data_name_clean}/neighbors_dls.txt')
|
121 |
except:
|
122 |
### MODEL 2 likelihoods
|
123 |
+
model2, tokenizer2 = load_model(ref_model,ref_model)
|
124 |
inference2_pass = [] #0: p_ref, #1: all_prob_ref, #2: p_ref_likelihood
|
125 |
for ex in tqdm(test_data):
|
126 |
text = ex[col_name]
|
|
|
143 |
print("Saved ref data, exiting.")
|
144 |
|
145 |
### MODEL 1 likelihoods
|
146 |
+
model1, tokenizer1 = load_model(target_model,ref_model)
|
147 |
inference1_pass = [] #0: p1, #1: all_prob, #2: p1_likelihood, #3: p_lower, #4: p_lower_likelihood
|
148 |
for ex in tqdm(test_data):
|
149 |
text = ex[col_name]
|
|
|
151 |
inference1_pass.append(new_ex)
|
152 |
|
153 |
### RIMA results
|
|
|
154 |
counter = 0
|
155 |
results = []
|
156 |
for ex in tqdm(test_data):
|