Spaces:

Yeyito
/

llm_contamination_detector

Runtime error

File size: 5,281 Bytes

import logging
logging.basicConfig(level='ERROR')
import numpy as np
from tqdm import tqdm
import json
from collections import defaultdict
import matplotlib.pyplot as plt
from sklearn.metrics import auc, roc_curve
import matplotlib
import random
from ipdb import set_trace as bp
import time

matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['ps.fonttype'] = 42


matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['ps.fonttype'] = 42

# plot data 
def sweep(score, x):
    """
    Compute a ROC curve and then return the FPR, TPR, AUC, and ACC.
    """
    fpr, tpr, _ = roc_curve(x, -score)
    acc = np.max(1-(fpr+(1-tpr))/2)
    return fpr, tpr, auc(fpr, tpr), acc


def do_plot(prediction, answers, sweep_fn=sweep, metric='auc', legend="", output_dir=None):
    """
    Generate the ROC curves by using ntest models as test models and the rest to train.
    """
    fpr, tpr, auc, acc = sweep_fn(np.array(prediction), np.array(answers, dtype=bool))

    low = tpr[np.where(fpr<.05)[0][-1]]
    # bp()
    print('Attack %s   AUC %.4f, Accuracy %.4f, TPR@5%%FPR of %.4f\n'%(legend, auc,acc, low))

    metric_text = ''
    if metric == 'auc':
        metric_text = 'auc=%.3f'%auc
    elif metric == 'acc':
        metric_text = 'acc=%.3f'%acc

    plt.plot(fpr, tpr, label=legend+metric_text)
    return legend, auc,acc, low


def fig_fpr_tpr(all_output, output_dir):
    print("output_dir", output_dir)
    answers = []
    metric2predictions = defaultdict(list)
    for ex in all_output:
        answers.append(ex["label"])
        for metric in ex["pred"].keys():
            if ("raw" in metric) and ("clf" not in metric):
                continue
            metric2predictions[metric].append(ex["pred"][metric])
    
    plt.figure(figsize=(4,3))
    with open(f"{output_dir}/auc.txt", "w") as f:
        for metric, predictions in metric2predictions.items():
            legend, auc, acc, low = do_plot(predictions, answers, legend=metric, metric='auc', output_dir=output_dir)
            f.write('%s   AUC %.4f, Accuracy %.4f, [email protected]%%FPR of %.4f\n'%(legend, auc, acc, low))

    plt.semilogx()
    plt.semilogy()
    plt.xlim(1e-5,1)
    plt.ylim(1e-5,1)
    plt.xlabel("False Positive Rate")
    plt.ylabel("True Positive Rate")
    plt.plot([0, 1], [0, 1], ls='--', color='gray')
    plt.subplots_adjust(bottom=.18, left=.18, top=.96, right=.96)
    plt.legend(fontsize=8)
    plt.savefig(f"{output_dir}/auc.png")


def load_jsonl(input_path):
    with open(input_path, 'r') as f:
        data = [json.loads(line) for line in tqdm(f)]
    random.seed(0)
    random.shuffle(data)
    return data

def dump_jsonl(data, path):
    with open(path, 'w') as f:
        for line in tqdm(data):
            f.write(json.dumps(line) + "\n")

def read_jsonl(path):
    with open(path, 'r') as f:
        return [json.loads(line) for line in tqdm(f)]

def convert_huggingface_data_to_list_dic(dataset):
    all_data = []
    for i in range(len(dataset)):
        ex = dataset[i]
        all_data.append(ex)
    return all_data


def process_truthful_qa(data):
    new_data = []
    for ex in data:
        new_ex = {}
        label = ex["mc2_targets"]["labels"].index(1)
        output = ex["mc2_targets"]["choices"][label]
        # We change to mc2 instead of mc1 as it's those that open llm lead uses. (check about)
        new_ex["output"] = output
        new_ex["input"] = ex["question"] + " " + output
        new_data.append(new_ex)
    return new_data



def process_mmlu(data):
    new_data = []
    for ex in data:
        new_ex = {}
        label = ex["choices"][ex["answer"]]
        output = label
        new_ex["output"] = output
        new_ex["input"] = ex["question"] + " " + output
        new_data.append(new_ex)
    return new_data


def process_arc(data):
    new_data = []
    choice2label = {"A": 0, "B": 1, "C": 2, "D": 3}
    for ex in data:
        new_ex = {}
        # bp()
        # print(ex["answerKey"])
        if ex["answerKey"] not in choice2label:
            continue
        label = choice2label[ex["answerKey"]]
        output = ex["choices"]["text"][label]
        new_ex["output"] = output
        new_ex["input"] = ex["question"] + " " + output
        new_data.append(new_ex)
    return new_data

def process_gsm8k(data):
    new_data = []
    for ex in data:
        new_ex = {}
        output = ex["answer"].split('####')[0].strip()
        new_ex["output"] = output
        new_ex["input"] = ex["question"] + " " + output
        new_data.append(new_ex)
    return new_data

def process_winogrande(data):
    new_data = []
    for ex in data:
        new_ex = {}
        label = int(ex["answer"])
        output = ex[f"option{label}"]
        new_ex["output"] = output
        new_ex["input"] = ex["sentence"] + " " + output
        new_data.append(new_ex)
    return new_data
# I'm not sure if that's the correct format for winogrande given how the dataset works.

def process_hellaswag(data):
    new_data = []
    for ex in data:
        new_ex = {}
        label = int(ex["label"]) # For some reason label is in str and not int?
        output = ex["endings"][label]
        new_ex["output"] = output
        new_ex["input"] = ex["ctx"] + " " + output
        new_data.append(new_ex)
    return new_data