Spaces:

ToxicityPrompts
/

PTP

Running

App Files Files Community

PTP / load_data.py

kpriyanshu256

Added LB files

50fb808 5 months ago

raw

history blame contribute delete

No virus

7.44 kB

	SIZE_MAP = {
	'Airavata': '7b',
	'CCK-v2.0-DPO': '13b',
	'GEITje-7B': '7b',
	'Heimer-dpo-TinyLlama-1.1B': '1b',
	'Heimer-kto-TinyLlama-1.1B': '1b',
	'Heimer-ipo-TinyLlama-1.1B': '1b',
	'HuggingFaceH4': '7b',
	'Llama-2-13b-chat-hf': '13b',
	'Llama-2-13b-hf': '13b',
	'Llama-2-70b-chat-hf': '70b',
	'Llama-2-7b-chat-hf': '7b',
	'Llama-2-7b-hf': '7b',
	'Mistral-7B-Instruct-v0.1': '7b',
	'Mistral-7B-Instruct-v0.2': '7b',
	'OLMo-7B-Instruct': '7b',
	'OpenHathi-7B-Hi-v0.1-Base': '7b',
	'Qwen-7B-Chat': '7b',
	'Swallow-13b-instruct-hf': '13b',
	'Swallow-70b-instruct-hf': '70b',
	'Swallow-7b-hf': '7b',
	'Swallow-7b-instruct-hf': '7b',
	'Yi-6B-Chat': '1b-7b',
	'archangel_dpo_llama13b': '13b',
	'archangel_dpo_llama7b': '7b',
	'archangel_kto_llama13b': '13b',
	'archangel_kto_llama7b': '7b',
	'archangel_ppo_llama13b': '13b',
	'archangel_ppo_llama7b': '7b',
	'archangel_sft-dpo_llama13b': '13b',
	'archangel_sft-dpo_llama7b': '7b',
	'archangel_sft-kto_llama13b': '13b',
	'archangel_sft-kto_llama7b': '7b',
	'archangel_sft-ppo_llama13b': '13b',
	'archangel_sft-ppo_llama7b': '7b',
	'bloomz-1b1': '1b',
	'bloomz-1b7': '7b',
	'bloomz-3b': '1b-7b',
	'bloomz-560m': '<1b',
	'bloomz-7b1': '7b',
	'gemma-7b-it': '7b',
	'llama-30b': '30b',
	'mpt-7b': '7b',
	'mpt-7b-instruct': '7b',
	'pythia-1.4b': '1b-7b',
	'pythia-12b': '13b',
	'pythia-160m': '<1b',
	'pythia-1b': '1b',
	'pythia-2.8b': '1b-7b',
	'pythia-410m': '1b',
	'pythia-6.9b': '7b',
	'pythia-70m': '1b',
	'ruGPT-3.5-13B': '13b',
	'stablelm-2-1_6b': '1b-7b',
	'stablelm-2-zephyr-1_6b': '1b-7b',
	'tulu-2-13b': '13b',
	'tulu-2-7b': '7b',
	'tulu-2-dpo-13b': '13b',
	'tulu-2-dpo-70b': '70b',
	'tulu-2-dpo-7b': '7b',
	'zephyr-7b-beta': '7b',
	'gpt-35-turbo': "Unknown",
	"Aya101": '13b',
	"zephyr-7b-gemma-v0.1": "7b",
	"Mistral-7B-v0.1": '7b',
	"Meta-Llama-3-8B-Instruct": "8b",
	"Meta-Llama-3-8B": '8b',
	}


	MODEL_FAMILY = {
	'Airavata': 'OpenHathi',
	'CCK-v2.0-DPO': 'NA',
	'GEITje-7B': 'Mistral-GEITje',
	'Heimer-dpo-TinyLlama-1.1B': 'Llama-Tiny',
	'Heimer-kto-TinyLlama-1.1B': 'Llama-Tiny',
	'Heimer-ipo-TinyLlama-1.1B': 'Llama-Tiny',
	'HuggingFaceH4': 'Mistral-CAI',
	'Llama-2-13b-chat-hf': 'Llama',
	'Llama-2-13b-hf': 'Llama',
	'Llama-2-70b-chat-hf': 'Llama',
	'Llama-2-7b-chat-hf': 'Llama',
	'Llama-2-7b-hf': 'Llama',
	'Mistral-7B-Instruct-v0.1': 'Mistral',
	'Mistral-7B-Instruct-v0.2': 'Mistral',
	'OLMo-7B-Instruct': 'OLMo',
	'OpenHathi-7B-Hi-v0.1-Base': 'OpenHathi',
	'Qwen-7B-Chat': 'Qwen',
	'Swallow-13b-instruct-hf': 'Llama-Swallow',
	'Swallow-70b-instruct-hf': 'Llama-Swallow',
	'Swallow-7b-hf': 'Llama-Swallow',
	'Swallow-7b-instruct-hf': 'Llama-Swallow',
	'Yi-6B-Chat': 'Yi',
	'archangel_dpo_llama13b': 'Llama-Archangel',
	'archangel_dpo_llama7b': 'Llama-Archangel',
	'archangel_kto_llama13b': 'Llama-Archangel',
	'archangel_kto_llama7b': 'Llama-Archangel',
	'archangel_ppo_llama13b': 'Llama-Archangel',
	'archangel_ppo_llama7b': 'Llama-Archangel',
	'archangel_sft-dpo_llama13b': 'Llama-Archangel',
	'archangel_sft-dpo_llama7b': 'Llama-Archangel',
	'archangel_sft-kto_llama13b': 'Llama-Archangel',
	'archangel_sft-kto_llama7b': 'Llama-Archangel',
	'archangel_sft-ppo_llama13b': 'Llama-Archangel',
	'archangel_sft-ppo_llama7b': 'Llama-Archangel',
	'bloomz-1b1': 'Bloomz',
	'bloomz-1b7': 'Bloomz',
	'bloomz-3b': 'Bloomz',
	'bloomz-560m': 'Bloomz',
	'bloomz-7b1': 'Bloomz',
	'gemma-7b-it': 'Gemma',
	'llama-30b': 'Llama',
	'mpt-7b': 'MPT',
	'mpt-7b-instruct': 'MPT',
	'pythia-1.4b': 'Pythia',
	'pythia-12b': 'Pythia',
	'pythia-160m': 'Pythia',
	'pythia-1b': 'Pythia',
	'pythia-2.8b': 'Pythia',
	'pythia-410m': 'Pythia',
	'pythia-6.9b': 'Pythia',
	'pythia-70m': 'Pythia',
	'ruGPT-3.5-13B': 'GPT',
	'stablelm-2-1_6b': 'StableLM',
	'stablelm-2-zephyr-1_6b': 'StableLM',
	'tulu-2-13b': 'Llama-Tulu',
	'tulu-2-7b': 'Llama-Tulu',
	'tulu-2-dpo-13b': 'Llama-Tulu',
	'tulu-2-dpo-70b': 'Llama-Tulu',
	'tulu-2-dpo-7b': 'Llama-Tulu',
	'zephyr-7b-beta': 'Mistral',
	'gpt-35-turbo': "GPT-OAI",
	'Aya101': 'Aya101',
	"zephyr-7b-gemma-v0.1": 'Gemma',
	"Mistral-7B-v0.1": 'Mistral',
	"Meta-Llama-3-8B-Instruct": "Llama",
	"Meta-Llama-3-8B": 'Llama',
	}


	MODEL_TYPE = {
	'Airavata': 'instruct',
	'CCK-v2.0-DPO': 'preference',
	'GEITje-7B': 'base',
	'Heimer-dpo-TinyLlama-1.1B': 'preference',
	'Heimer-kto-TinyLlama-1.1B': 'preference',
	'Heimer-ipo-TinyLlama-1.1B': 'preference',
	'HuggingFaceH4': 'preference',
	'Llama-2-13b-chat-hf': 'preference',
	'Llama-2-13b-hf': 'base',
	'Llama-2-70b-chat-hf': 'preference',
	'Llama-2-7b-chat-hf': 'preference',
	'Llama-2-7b-hf': 'base',
	'Mistral-7B-Instruct-v0.1': 'instruct',
	'Mistral-7B-Instruct-v0.2': 'instruct',
	'OLMo-7B-Instruct': 'preference',
	'OpenHathi-7B-Hi-v0.1-Base': 'instruct',
	'Qwen-7B-Chat': 'preference',
	'Swallow-13b-instruct-hf': 'instruct',
	'Swallow-70b-instruct-hf': 'instruct',
	'Swallow-7b-hf': 'base',
	'Swallow-7b-instruct-hf': 'instruct',
	'Yi-6B-Chat': 'preference',
	'archangel_dpo_llama13b': 'preference',
	'archangel_dpo_llama7b': 'preference',
	'archangel_kto_llama13b': 'preference',
	'archangel_kto_llama7b': 'preference',
	'archangel_ppo_llama13b': 'preference',
	'archangel_ppo_llama7b': 'preference',
	'archangel_sft-dpo_llama13b': 'preference',
	'archangel_sft-dpo_llama7b': 'preference',
	'archangel_sft-kto_llama13b': 'preference',
	'archangel_sft-kto_llama7b': 'preference',
	'archangel_sft-ppo_llama13b': 'preference',
	'archangel_sft-ppo_llama7b': 'preference',
	'bloomz-1b1': 'base',
	'bloomz-1b7': 'base',
	'bloomz-3b': 'base',
	'bloomz-560m': 'base',
	'bloomz-7b1': 'base',
	'gemma-7b-it': 'instruct',
	'llama-30b': 'base',
	'mpt-7b': 'base',
	'mpt-7b-instruct': 'instruct',
	'pythia-1.4b': 'base',
	'pythia-12b': 'base',
	'pythia-160m': 'base',
	'pythia-1b': 'base',
	'pythia-2.8b': 'base',
	'pythia-410m': 'base',
	'pythia-6.9b': 'base',
	'pythia-70m': 'base',
	'ruGPT-3.5-13B': 'base',
	'stablelm-2-1_6b': 'instruct',
	'stablelm-2-zephyr-1_6b': 'preference',
	'tulu-2-13b': 'preference',
	'tulu-2-7b': 'preference',
	'tulu-2-dpo-13b': 'preference',
	'tulu-2-dpo-70b': 'preference',
	'tulu-2-dpo-7b': 'preference',
	'zephyr-7b-beta': 'preference',
	'gpt-35-turbo': "preference",
	'Aya101': 'instruct',
	'zephyr-7b-gemma-v0.1': 'preference',
	'Mistral-7B-v0.1': 'base',
	"Meta-Llama-3-8B-Instruct": "preference",
	"Meta-Llama-3-8B": 'base',
	}

	MULTILINGUAL_FAMILY = ['Aya101', 'GPT-OAI', 'Bloomz']

	import pandas as pd

	def load_main_table():

	df = pd.read_csv("./data/data.csv").round(3)
	df = df[df.Model!='CCK-v2.0-DPO']
	assert len(set(df['Model'].unique()) - set(list(SIZE_MAP.keys())))==0

	df['Model Size'] = df['Model'].map(SIZE_MAP)

	df['Model Type'] = df['Model'].map(MODEL_TYPE)

	df['Model Family'] = df['Model'].map(MODEL_FAMILY)

	df['Multilingual'] = df['Model Family'].apply(lambda x: x in MULTILINGUAL_FAMILY)
	df = df.sort_values(by="Average Toxicity")

	return df