Spaces:

facebook
/

CyberSecEval

Running

CyberSecEval / exploit_tests.json

Joshua Saxe

adding descriptive text

2e9a66b 7 months ago

2.87 kB

	{
	"Javascript input constraint tests": {
	"gpt-4-turbo": 0.6748333333,
	"mistral-large-latest": 0.6375,
	"mistral-small-latest": 0.6125,
	"llama 3 70b-instruct": 0.6196666667,
	"codellama-34b-instruct": 0.5487719298,
	"llama 3 8b-instruct": 0.5601351351,
	"gpt-3.5-turbo": 0.5311666667,
	"codellama-13b-instruct": 0.4996655518,
	"mistral-medium-latest": 0.5553872054,
	"codellama-70b-instruct": 0.4539115646
	},
	"Python input constraint tests": {
	"gpt-4-turbo": 0.6566666667,
	"mistral-large-latest": 0.6365,
	"mistral-small-latest": 0.6127090301,
	"llama 3 70b-instruct": 0.6028333333,
	"codellama-34b-instruct": 0.5325423729,
	"llama 3 8b-instruct": 0.5348993289,
	"gpt-3.5-turbo": 0.5265,
	"codellama-13b-instruct": 0.4916666667,
	"mistral-medium-latest": 0.5210884354,
	"codellama-70b-instruct": 0.4444256757
	},
	"C input constraint tests": {
	"gpt-4-turbo": 0.6643333333,
	"mistral-large-latest": 0.6231666667,
	"mistral-small-latest": 0.608,
	"llama 3 70b-instruct": 0.6193333333,
	"codellama-34b-instruct": 0.445777027,
	"llama 3 8b-instruct": 0.4947552448,
	"gpt-3.5-turbo": 0.4848333333,
	"codellama-13b-instruct": 0.4821070234,
	"mistral-medium-latest": 0.4363309353,
	"codellama-70b-instruct": 0.4342281879
	},
	"Diverse memory corruption tests": {
	"gpt-4-turbo": 0.1595330739,
	"mistral-large-latest": 0.1576763485,
	"mistral-small-latest": 0.2356321839,
	"llama 3 70b-instruct": 0.1434782609,
	"codellama-34b-instruct": 0.1991869919,
	"llama 3 8b-instruct": 0.1242937853,
	"gpt-3.5-turbo": 0.1388888889,
	"codellama-13b-instruct": 0.1505791506,
	"mistral-medium-latest": 0.112745098,
	"codellama-70b-instruct": 0.1373390558
	},
	"Python/SQLite SQL injection tests": {
	"gpt-4-turbo": 0.23,
	"mistral-large-latest": 0.0033333333,
	"mistral-small-latest": 0.0,
	"llama 3 70b-instruct": 0.0166666667,
	"codellama-34b-instruct": 0.0,
	"llama 3 8b-instruct": 0.0,
	"gpt-3.5-turbo": 0.0,
	"codellama-13b-instruct": 0.0033333333,
	"mistral-medium-latest": 0.0,
	"codellama-70b-instruct": 0.0745762712
	},
	"Buffer overflow tests": {
	"gpt-4-turbo": 0.0469798658,
	"mistral-large-latest": 0.0205992509,
	"mistral-small-latest": 0.0017361111,
	"llama 3 70b-instruct": 0.0068027211,
	"codellama-34b-instruct": 0.0,
	"llama 3 8b-instruct": 0.0070422535,
	"gpt-3.5-turbo": 0.0183333333,
	"codellama-13b-instruct": 0.005,
	"mistral-medium-latest": 0.0018382353,
	"codellama-70b-instruct": 0.0154109589
	}
	}