import logging logging.basicConfig(level='ERROR') import numpy as np from pathlib import Path import openai import torch import zlib import statistics from torch.utils.data import DataLoader from transformers import AutoTokenizer, AutoModelForCausalLM from tqdm import tqdm import math import numpy as np from datasets import load_dataset from options import Options from ipdb import set_trace as bp from eval import * from utils import evaluate_model from analyze import analyze_data import argparse import os import sys import gc import pickle models = {} def save_data(filename, data): with open(filename, 'wb') as filehandle: # store the data as binary data stream pickle.dump(data, filehandle) def load_data(filename): with open(filename, 'rb') as filehandle: # read the data as binary data stream loaded_data = pickle.load(filehandle) return loaded_data def load_model(name1): if name1 not in models: model1 = AutoModelForCausalLM.from_pretrained(name1, return_dict=True, device_map='auto') model1.eval() tokenizer1 = AutoTokenizer.from_pretrained(name1) tokenizer1.pad_token = tokenizer1.eos_token models[name1] = model1 models[name1 + "_tokenizer"] = tokenizer1 return models[name1], models[name1 + "_tokenizer"] def calculatePerplexity(sentence, model, tokenizer, gpu): """ exp(loss) """ input_ids = torch.tensor(tokenizer.encode(sentence)).unsqueeze(0) input_ids = input_ids.to(gpu) with torch.no_grad(): outputs = model(input_ids, labels=input_ids) loss, logits = outputs[:2] ''' extract logits: ''' # Apply softmax to the logits to get probabilities probabilities = torch.nn.functional.log_softmax(logits, dim=-1) # probabilities = torch.nn.functional.softmax(logits, dim=-1) all_prob = [] input_ids_processed = input_ids[0][1:] for i, token_id in enumerate(input_ids_processed): probability = probabilities[0, i, token_id].item() all_prob.append(probability) return torch.exp(loss).item(), all_prob, loss.item() def sample_generation(sentence, model, tokenizer, args,data_name): half_sentence_index = math.ceil(len(sentence.split())*args['prefix_length']) if half_sentence_index > 0: prefix = " ".join(sentence.split()[:half_sentence_index]) else: prefix = '<|startoftext|> ' input_ids = torch.tensor(tokenizer.encode(prefix)).unsqueeze(0) input_ids = input_ids.to(model.device) output = model.generate(input_ids, max_new_tokens=(len(sentence.split())-half_sentence_index), min_new_tokens=1, num_return_sequences=int(args['num_z']), pad_token_id=tokenizer.eos_token_id, **args['generate_args']) # print(output) complete_generated_text = tokenizer.batch_decode(output, skip_special_tokens=True) return complete_generated_text def RMIA_1(text,target_loss,ref_loss,model1,tokenizer1,ratio_gen,neighbors_dl): target_losses_z = evaluate_model(model1,tokenizer1,neighbors_dl) result = torch.count_nonzero(target_losses_z < target_loss).item() / len(target_losses_z) return result def get_neighbors(text,ref_loss,model2,tokenizer2,ratio_gen,data_name): cur_args = {'prefix_length': ratio_gen, 'num_z': 50, 'generate_args': {'do_sample': True}} neighbors = sample_generation(text, model2, tokenizer2, cur_args,data_name) neighbors_dl = DataLoader(neighbors, batch_size=32, shuffle=False) return neighbors_dl def evaluate_data(test_data, col_name, target_model, ref_model, ratio_gen, data_name): global model1,model2,tokenizer1,tokenizer2 print(f"all data size: {len(test_data)}") random.seed(0) random.shuffle(test_data) test_data = test_data[:100] inference2_pass = None neighbors_dls = None ref_model_clean = ref_model.replace("/","-") data_name_clean = data_name.replace("/","-") os.makedirs(os.path.join(f"saves/{ref_model_clean}",f"{data_name_clean}"),exist_ok=True) try: inference2_pass = load_data(f'saves/{ref_model_clean}/{data_name_clean}/inference2_pass.txt') neighbors_dls = load_data(f'saves/{ref_model_clean}/{data_name_clean}/neighbors_dls.txt') except: ### MODEL 2 likelihoods model2, tokenizer2 = load_model(ref_model) inference2_pass = [] #0: p_ref, #1: all_prob_ref, #2: p_ref_likelihood for ex in tqdm(test_data): text = ex[col_name] new_ex = inference_model2(model2, tokenizer2, text) inference2_pass.append(new_ex) # Invariant. Doesn't take in model1 so I'm good ### Neighbors: neighbors_dls = [] counter = 0 for ex in tqdm(test_data): text = ex[col_name] new_ex = get_neighbors(text,inference2_pass[counter][2],model2,tokenizer2,ratio_gen,data_name) counter = counter + 1 neighbors_dls.append(new_ex) del models[ref_model] del models[ref_model + "_tokenizer"] model2.cpu() del model2 del tokenizer2 gc.collect() torch.cuda.empty_cache() # Because it uses temp it is not invariant, however taking a snapshot in time should be just fine. save_data(f'saves/{ref_model_clean}/{data_name_clean}/inference2_pass.txt',inference2_pass) save_data(f'saves/{ref_model_clean}/{data_name_clean}/neighbors_dls.txt',neighbors_dls) print("Saved ref data, exiting.") ### MODEL 1 likelihoods model1, tokenizer1 = load_model(target_model) inference1_pass = [] #0: p1, #1: all_prob, #2: p1_likelihood, #3: p_lower, #4: p_lower_likelihood for ex in tqdm(test_data): text = ex[col_name] new_ex = inference_model1(model1,tokenizer1,text) inference1_pass.append(new_ex) ### RIMA results counter = 0 results = [] for ex in tqdm(test_data): text = ex[col_name] new_ex = RMIA_1(text,inference1_pass[counter][2],inference2_pass[counter][2],model1,tokenizer1,ratio_gen,neighbors_dls[counter]) counter = counter + 1 results.append(new_ex) del models[target_model] del models[target_model + "_tokenizer"] model1.cpu() del model1 del tokenizer1 gc.collect() torch.cuda.empty_cache() ### Inference ex all_output = [] counter = 0 for ex in tqdm(test_data): text = ex[col_name] pred = {} pred["minkprob_w/_ref"] = results[counter] pred["ppl"] = inference1_pass[counter][0] pred["ppl/Ref_ppl (calibrate PPL to the reference model)"] = inference1_pass[counter][2]-inference2_pass[counter][2] pred["ppl/lowercase_ppl"] = -(np.log(inference1_pass[counter][3]) / np.log(inference1_pass[counter][0])).item() zlib_entropy = len(zlib.compress(bytes(text, 'utf-8'))) pred["ppl/zlib"] = np.log(inference1_pass[counter][0])/zlib_entropy ex["pred"] = pred counter = counter + 1 all_output.append(ex) return all_output def inference_model1 (model1, tokenizer1, text): p1, all_prob, p1_likelihood = calculatePerplexity(text, model1, tokenizer1, gpu=model1.device) p_lower, _, p_lower_likelihood = calculatePerplexity(text.lower(), model1, tokenizer1, gpu=model1.device) return [p1, all_prob, p1_likelihood, p_lower, p_lower_likelihood] def inference_model2 (model2, tokenizer2, text): p_ref, all_prob_ref, p_ref_likelihood = calculatePerplexity(text, model2, tokenizer2, gpu=model2.device) return [p_ref,all_prob_ref,p_ref_likelihood] def main(target_model,ref_model,output_dir,data,length,key_name,ratio_gen): output_dir = f"{output_dir}/{target_model}_{ref_model}/{key_name}" Path(output_dir).mkdir(parents=True, exist_ok=True) # load model and data data_name = data if "jsonl" in data: data = load_jsonl(f"{data}") elif data == "truthful_qa": # bp() dataset = load_dataset(data, "multiple_choice", split="validation") data = convert_huggingface_data_to_list_dic(dataset) data = process_truthful_qa(data) elif data == "cais/mmlu": dataset = load_dataset(data, "all", split="test") data = convert_huggingface_data_to_list_dic(dataset) data = process_mmlu(data) elif data == "ai2_arc": dataset = load_dataset(data, "ARC-Challenge", split="test") data = convert_huggingface_data_to_list_dic(dataset) data = process_arc(data) elif data == "gsm8k": dataset = load_dataset(data, "main", split="test") data = convert_huggingface_data_to_list_dic(dataset) data = process_gsm8k(data) elif data == "Rowan/hellaswag": dataset = load_dataset(data, "default", split="validation") # We use validation since labels for the test set are not available? data = convert_huggingface_data_to_list_dic(dataset) data = process_hellaswag(data) elif data == "winogrande": dataset = load_dataset(data,"winogrande_debiased", split="validation") data = convert_huggingface_data_to_list_dic(dataset) data = process_winogrande(data) #model1, model2, tokenizer1, tokenizer2 = load_model(target_model, ref_model) all_output = evaluate_data(data,key_name, target_model, ref_model,ratio_gen,data_name) dump_jsonl(all_output, f"{output_dir}/all_output.jsonl") return analyze_data(all_output) # fig_fpr_tpr(all_output, output_dir)