Spaces:
Running
Running
{ | |
"pass_1": [ | |
{ | |
"Model": "gpt-4o-mini", | |
"Domain": "Computation", | |
"Pass_at_k": 0.9038123167155425 | |
}, | |
{ | |
"Model": "gpt-3.5-turbo", | |
"Domain": "Computation", | |
"Pass_at_k": 0.8340175953079179 | |
}, | |
{ | |
"Model": "Qwen2-72B-Instruct-GPTQ-Int4", | |
"Domain": "Computation", | |
"Pass_at_k": 0.8686217008797654 | |
}, | |
{ | |
"Model": "deepseek-coder-33b-instruct", | |
"Domain": "Computation", | |
"Pass_at_k": 0.8392961876832845 | |
}, | |
{ | |
"Model": "DeepSeek-Coder-V2-Lite-Instruct", | |
"Domain": "Computation", | |
"Pass_at_k": 0.8604105571847507 | |
}, | |
{ | |
"Model": "deepseek-coder-6.7b-instruct", | |
"Domain": "Computation", | |
"Pass_at_k": 0.8351906158357771 | |
}, | |
{ | |
"Model": "CodeLlama-34b-Instruct-hf", | |
"Domain": "Computation", | |
"Pass_at_k": 0.7607038123167156 | |
}, | |
{ | |
"Model": "CodeLlama-13b-Instruct-hf", | |
"Domain": "Computation", | |
"Pass_at_k": 0.8029325513196481 | |
}, | |
{ | |
"Model": "CodeLlama-7b-Instruct-hf", | |
"Domain": "Computation", | |
"Pass_at_k": 0.7712609970674487 | |
}, | |
{ | |
"Model": "CodeQwen1.5-7B-Chat", | |
"Domain": "Computation", | |
"Pass_at_k": 0.8516129032258064 | |
}, | |
{ | |
"Model": "Phi-3-medium-4k-instruct", | |
"Domain": "Computation", | |
"Pass_at_k": 0.7554252199413489 | |
}, | |
{ | |
"Model": "Llama-2-13b-chat-hf", | |
"Domain": "Computation", | |
"Pass_at_k": 0.8093841642228738 | |
}, | |
{ | |
"Model": "gpt-4o-mini", | |
"Domain": "Network", | |
"Pass_at_k": 0.703125 | |
}, | |
{ | |
"Model": "gpt-3.5-turbo", | |
"Domain": "Network", | |
"Pass_at_k": 0.58984375 | |
}, | |
{ | |
"Model": "Qwen2-72B-Instruct-GPTQ-Int4", | |
"Domain": "Network", | |
"Pass_at_k": 0.66796875 | |
}, | |
{ | |
"Model": "deepseek-coder-33b-instruct", | |
"Domain": "Network", | |
"Pass_at_k": 0.64453125 | |
}, | |
{ | |
"Model": "DeepSeek-Coder-V2-Lite-Instruct", | |
"Domain": "Network", | |
"Pass_at_k": 0.62109375 | |
}, | |
{ | |
"Model": "deepseek-coder-6.7b-instruct", | |
"Domain": "Network", | |
"Pass_at_k": 0.58984375 | |
}, | |
{ | |
"Model": "CodeLlama-34b-Instruct-hf", | |
"Domain": "Network", | |
"Pass_at_k": 0.6015625 | |
}, | |
{ | |
"Model": "CodeLlama-13b-Instruct-hf", | |
"Domain": "Network", | |
"Pass_at_k": 0.62109375 | |
}, | |
{ | |
"Model": "CodeLlama-7b-Instruct-hf", | |
"Domain": "Network", | |
"Pass_at_k": 0.60546875 | |
}, | |
{ | |
"Model": "CodeQwen1.5-7B-Chat", | |
"Domain": "Network", | |
"Pass_at_k": 0.609375 | |
}, | |
{ | |
"Model": "Phi-3-medium-4k-instruct", | |
"Domain": "Network", | |
"Pass_at_k": 0.6015625 | |
}, | |
{ | |
"Model": "Llama-2-13b-chat-hf", | |
"Domain": "Network", | |
"Pass_at_k": 0.53125 | |
}, | |
{ | |
"Model": "gpt-4o-mini", | |
"Domain": "Visualization", | |
"Pass_at_k": 0.5967741935483871 | |
}, | |
{ | |
"Model": "gpt-3.5-turbo", | |
"Domain": "Visualization", | |
"Pass_at_k": 0.489247311827957 | |
}, | |
{ | |
"Model": "Qwen2-72B-Instruct-GPTQ-Int4", | |
"Domain": "Visualization", | |
"Pass_at_k": 0.4946236559139785 | |
}, | |
{ | |
"Model": "deepseek-coder-33b-instruct", | |
"Domain": "Visualization", | |
"Pass_at_k": 0.5053763440860215 | |
}, | |
{ | |
"Model": "DeepSeek-Coder-V2-Lite-Instruct", | |
"Domain": "Visualization", | |
"Pass_at_k": 0.5 | |
}, | |
{ | |
"Model": "deepseek-coder-6.7b-instruct", | |
"Domain": "Visualization", | |
"Pass_at_k": 0.45698924731182794 | |
}, | |
{ | |
"Model": "CodeLlama-34b-Instruct-hf", | |
"Domain": "Visualization", | |
"Pass_at_k": 0.41935483870967744 | |
}, | |
{ | |
"Model": "CodeLlama-13b-Instruct-hf", | |
"Domain": "Visualization", | |
"Pass_at_k": 0.42473118279569894 | |
}, | |
{ | |
"Model": "CodeLlama-7b-Instruct-hf", | |
"Domain": "Visualization", | |
"Pass_at_k": 0.43548387096774194 | |
}, | |
{ | |
"Model": "CodeQwen1.5-7B-Chat", | |
"Domain": "Visualization", | |
"Pass_at_k": 0.478494623655914 | |
}, | |
{ | |
"Model": "Phi-3-medium-4k-instruct", | |
"Domain": "Visualization", | |
"Pass_at_k": 0.45161290322580644 | |
}, | |
{ | |
"Model": "Llama-2-13b-chat-hf", | |
"Domain": "Visualization", | |
"Pass_at_k": 0.34946236559139787 | |
}, | |
{ | |
"Model": "gpt-4o-mini", | |
"Domain": "Basic", | |
"Pass_at_k": 0.6915887850467289 | |
}, | |
{ | |
"Model": "gpt-3.5-turbo", | |
"Domain": "Basic", | |
"Pass_at_k": 0.5607476635514018 | |
}, | |
{ | |
"Model": "Qwen2-72B-Instruct-GPTQ-Int4", | |
"Domain": "Basic", | |
"Pass_at_k": 0.6915887850467289 | |
}, | |
{ | |
"Model": "deepseek-coder-33b-instruct", | |
"Domain": "Basic", | |
"Pass_at_k": 0.5981308411214953 | |
}, | |
{ | |
"Model": "DeepSeek-Coder-V2-Lite-Instruct", | |
"Domain": "Basic", | |
"Pass_at_k": 0.6542056074766355 | |
}, | |
{ | |
"Model": "deepseek-coder-6.7b-instruct", | |
"Domain": "Basic", | |
"Pass_at_k": 0.5794392523364486 | |
}, | |
{ | |
"Model": "CodeLlama-34b-Instruct-hf", | |
"Domain": "Basic", | |
"Pass_at_k": 0.5514018691588785 | |
}, | |
{ | |
"Model": "CodeLlama-13b-Instruct-hf", | |
"Domain": "Basic", | |
"Pass_at_k": 0.5887850467289719 | |
}, | |
{ | |
"Model": "CodeLlama-7b-Instruct-hf", | |
"Domain": "Basic", | |
"Pass_at_k": 0.5233644859813084 | |
}, | |
{ | |
"Model": "CodeQwen1.5-7B-Chat", | |
"Domain": "Basic", | |
"Pass_at_k": 0.6074766355140186 | |
}, | |
{ | |
"Model": "Phi-3-medium-4k-instruct", | |
"Domain": "Basic", | |
"Pass_at_k": 0.616822429906542 | |
}, | |
{ | |
"Model": "Llama-2-13b-chat-hf", | |
"Domain": "Basic", | |
"Pass_at_k": 0.4485981308411215 | |
}, | |
{ | |
"Model": "gpt-4o-mini", | |
"Domain": "System", | |
"Pass_at_k": 0.51 | |
}, | |
{ | |
"Model": "gpt-3.5-turbo", | |
"Domain": "System", | |
"Pass_at_k": 0.32 | |
}, | |
{ | |
"Model": "Qwen2-72B-Instruct-GPTQ-Int4", | |
"Domain": "System", | |
"Pass_at_k": 0.41 | |
}, | |
{ | |
"Model": "deepseek-coder-33b-instruct", | |
"Domain": "System", | |
"Pass_at_k": 0.46 | |
}, | |
{ | |
"Model": "DeepSeek-Coder-V2-Lite-Instruct", | |
"Domain": "System", | |
"Pass_at_k": 0.41 | |
}, | |
{ | |
"Model": "deepseek-coder-6.7b-instruct", | |
"Domain": "System", | |
"Pass_at_k": 0.36 | |
}, | |
{ | |
"Model": "CodeLlama-34b-Instruct-hf", | |
"Domain": "System", | |
"Pass_at_k": 0.35 | |
}, | |
{ | |
"Model": "CodeLlama-13b-Instruct-hf", | |
"Domain": "System", | |
"Pass_at_k": 0.34 | |
}, | |
{ | |
"Model": "CodeLlama-7b-Instruct-hf", | |
"Domain": "System", | |
"Pass_at_k": 0.36 | |
}, | |
{ | |
"Model": "CodeQwen1.5-7B-Chat", | |
"Domain": "System", | |
"Pass_at_k": 0.37 | |
}, | |
{ | |
"Model": "Phi-3-medium-4k-instruct", | |
"Domain": "System", | |
"Pass_at_k": 0.42 | |
}, | |
{ | |
"Model": "Llama-2-13b-chat-hf", | |
"Domain": "System", | |
"Pass_at_k": 0.19 | |
}, | |
{ | |
"Model": "gpt-4o-mini", | |
"Domain": "Cryptography", | |
"Pass_at_k": 0.43 | |
}, | |
{ | |
"Model": "gpt-3.5-turbo", | |
"Domain": "Cryptography", | |
"Pass_at_k": 0.31 | |
}, | |
{ | |
"Model": "Qwen2-72B-Instruct-GPTQ-Int4", | |
"Domain": "Cryptography", | |
"Pass_at_k": 0.36 | |
}, | |
{ | |
"Model": "deepseek-coder-33b-instruct", | |
"Domain": "Cryptography", | |
"Pass_at_k": 0.35 | |
}, | |
{ | |
"Model": "DeepSeek-Coder-V2-Lite-Instruct", | |
"Domain": "Cryptography", | |
"Pass_at_k": 0.38 | |
}, | |
{ | |
"Model": "deepseek-coder-6.7b-instruct", | |
"Domain": "Cryptography", | |
"Pass_at_k": 0.4 | |
}, | |
{ | |
"Model": "CodeLlama-34b-Instruct-hf", | |
"Domain": "Cryptography", | |
"Pass_at_k": 0.31 | |
}, | |
{ | |
"Model": "CodeLlama-13b-Instruct-hf", | |
"Domain": "Cryptography", | |
"Pass_at_k": 0.27 | |
}, | |
{ | |
"Model": "CodeLlama-7b-Instruct-hf", | |
"Domain": "Cryptography", | |
"Pass_at_k": 0.32 | |
}, | |
{ | |
"Model": "CodeQwen1.5-7B-Chat", | |
"Domain": "Cryptography", | |
"Pass_at_k": 0.37 | |
}, | |
{ | |
"Model": "Phi-3-medium-4k-instruct", | |
"Domain": "Cryptography", | |
"Pass_at_k": 0.35 | |
}, | |
{ | |
"Model": "Llama-2-13b-chat-hf", | |
"Domain": "Cryptography", | |
"Pass_at_k": 0.12 | |
}, | |
{ | |
"Model": "gpt-4o-mini", | |
"Domain": "Mean", | |
"Pass_at_k": 0.6392167158851098 | |
}, | |
{ | |
"Model": "gpt-3.5-turbo", | |
"Domain": "Mean", | |
"Pass_at_k": 0.5173093867812127 | |
}, | |
{ | |
"Model": "Qwen2-72B-Instruct-GPTQ-Int4", | |
"Domain": "Mean", | |
"Pass_at_k": 0.5821338153067455 | |
}, | |
{ | |
"Model": "deepseek-coder-33b-instruct", | |
"Domain": "Mean", | |
"Pass_at_k": 0.5662224371484669 | |
}, | |
{ | |
"Model": "DeepSeek-Coder-V2-Lite-Instruct", | |
"Domain": "Mean", | |
"Pass_at_k": 0.5709516524435644 | |
}, | |
{ | |
"Model": "deepseek-coder-6.7b-instruct", | |
"Domain": "Mean", | |
"Pass_at_k": 0.5369104775806756 | |
}, | |
{ | |
"Model": "CodeLlama-34b-Instruct-hf", | |
"Domain": "Mean", | |
"Pass_at_k": 0.49883717003087863 | |
}, | |
{ | |
"Model": "CodeLlama-13b-Instruct-hf", | |
"Domain": "Mean", | |
"Pass_at_k": 0.5079237551407199 | |
}, | |
{ | |
"Model": "CodeLlama-7b-Instruct-hf", | |
"Domain": "Mean", | |
"Pass_at_k": 0.5025963506694164 | |
}, | |
{ | |
"Model": "CodeQwen1.5-7B-Chat", | |
"Domain": "Mean", | |
"Pass_at_k": 0.5478265270659565 | |
}, | |
{ | |
"Model": "Phi-3-medium-4k-instruct", | |
"Domain": "Mean", | |
"Pass_at_k": 0.5325705088456162 | |
}, | |
{ | |
"Model": "Llama-2-13b-chat-hf", | |
"Domain": "Mean", | |
"Pass_at_k": 0.4081157767758989 | |
}, | |
{ | |
"Model": "gpt-4o-mini", | |
"Domain": "Std", | |
"Pass_at_k": 0.16679801914758088 | |
}, | |
{ | |
"Model": "gpt-3.5-turbo", | |
"Domain": "Std", | |
"Pass_at_k": 0.1950117243115276 | |
}, | |
{ | |
"Model": "Qwen2-72B-Instruct-GPTQ-Int4", | |
"Domain": "Std", | |
"Pass_at_k": 0.19393547652062595 | |
}, | |
{ | |
"Model": "deepseek-coder-33b-instruct", | |
"Domain": "Std", | |
"Pass_at_k": 0.1693855278154664 | |
}, | |
{ | |
"Model": "DeepSeek-Coder-V2-Lite-Instruct", | |
"Domain": "Std", | |
"Pass_at_k": 0.17923951210025596 | |
}, | |
{ | |
"Model": "deepseek-coder-6.7b-instruct", | |
"Domain": "Std", | |
"Pass_at_k": 0.17321135521991954 | |
}, | |
{ | |
"Model": "CodeLlama-34b-Instruct-hf", | |
"Domain": "Std", | |
"Pass_at_k": 0.17089125215414938 | |
}, | |
{ | |
"Model": "CodeLlama-13b-Instruct-hf", | |
"Domain": "Std", | |
"Pass_at_k": 0.19904938629943747 | |
}, | |
{ | |
"Model": "CodeLlama-7b-Instruct-hf", | |
"Domain": "Std", | |
"Pass_at_k": 0.16815110445446094 | |
}, | |
{ | |
"Model": "CodeQwen1.5-7B-Chat", | |
"Domain": "Std", | |
"Pass_at_k": 0.18313053955353828 | |
}, | |
{ | |
"Model": "Phi-3-medium-4k-instruct", | |
"Domain": "Std", | |
"Pass_at_k": 0.15105015549350911 | |
}, | |
{ | |
"Model": "Llama-2-13b-chat-hf", | |
"Domain": "Std", | |
"Pass_at_k": 0.24973689844845592 | |
} | |
], | |
"pass_5": [ | |
{ | |
"Model": "gpt-4o-mini", | |
"Domain": "Computation", | |
"Pass_at_k": 0.9126099706744868 | |
}, | |
{ | |
"Model": "gpt-3.5-turbo", | |
"Domain": "Computation", | |
"Pass_at_k": 0.8733137829912023 | |
}, | |
{ | |
"Model": "Qwen2-72B-Instruct-GPTQ-Int4", | |
"Domain": "Computation", | |
"Pass_at_k": 0.9014662756598241 | |
}, | |
{ | |
"Model": "deepseek-coder-33b-instruct", | |
"Domain": "Computation", | |
"Pass_at_k": 0.8979472140762463 | |
}, | |
{ | |
"Model": "DeepSeek-Coder-V2-Lite-Instruct", | |
"Domain": "Computation", | |
"Pass_at_k": 0.8891495601173021 | |
}, | |
{ | |
"Model": "deepseek-coder-6.7b-instruct", | |
"Domain": "Computation", | |
"Pass_at_k": 0.8979472140762463 | |
}, | |
{ | |
"Model": "CodeLlama-34b-Instruct-hf", | |
"Domain": "Computation", | |
"Pass_at_k": 0.8510263929618769 | |
}, | |
{ | |
"Model": "CodeLlama-13b-Instruct-hf", | |
"Domain": "Computation", | |
"Pass_at_k": 0.898533724340176 | |
}, | |
{ | |
"Model": "CodeLlama-7b-Instruct-hf", | |
"Domain": "Computation", | |
"Pass_at_k": 0.8680351906158358 | |
}, | |
{ | |
"Model": "CodeQwen1.5-7B-Chat", | |
"Domain": "Computation", | |
"Pass_at_k": 0.9102639296187683 | |
}, | |
{ | |
"Model": "Phi-3-medium-4k-instruct", | |
"Domain": "Computation", | |
"Pass_at_k": 0.8510263929618769 | |
}, | |
{ | |
"Model": "Llama-2-13b-chat-hf", | |
"Domain": "Computation", | |
"Pass_at_k": 0.8768328445747801 | |
}, | |
{ | |
"Model": "gpt-4o-mini", | |
"Domain": "Network", | |
"Pass_at_k": 0.7265625 | |
}, | |
{ | |
"Model": "gpt-3.5-turbo", | |
"Domain": "Network", | |
"Pass_at_k": 0.62890625 | |
}, | |
{ | |
"Model": "Qwen2-72B-Instruct-GPTQ-Int4", | |
"Domain": "Network", | |
"Pass_at_k": 0.70703125 | |
}, | |
{ | |
"Model": "deepseek-coder-33b-instruct", | |
"Domain": "Network", | |
"Pass_at_k": 0.70703125 | |
}, | |
{ | |
"Model": "DeepSeek-Coder-V2-Lite-Instruct", | |
"Domain": "Network", | |
"Pass_at_k": 0.65625 | |
}, | |
{ | |
"Model": "deepseek-coder-6.7b-instruct", | |
"Domain": "Network", | |
"Pass_at_k": 0.63671875 | |
}, | |
{ | |
"Model": "CodeLlama-34b-Instruct-hf", | |
"Domain": "Network", | |
"Pass_at_k": 0.6328125 | |
}, | |
{ | |
"Model": "CodeLlama-13b-Instruct-hf", | |
"Domain": "Network", | |
"Pass_at_k": 0.65625 | |
}, | |
{ | |
"Model": "CodeLlama-7b-Instruct-hf", | |
"Domain": "Network", | |
"Pass_at_k": 0.63671875 | |
}, | |
{ | |
"Model": "CodeQwen1.5-7B-Chat", | |
"Domain": "Network", | |
"Pass_at_k": 0.640625 | |
}, | |
{ | |
"Model": "Phi-3-medium-4k-instruct", | |
"Domain": "Network", | |
"Pass_at_k": 0.67578125 | |
}, | |
{ | |
"Model": "Llama-2-13b-chat-hf", | |
"Domain": "Network", | |
"Pass_at_k": 0.55859375 | |
}, | |
{ | |
"Model": "gpt-4o-mini", | |
"Domain": "Visualization", | |
"Pass_at_k": 0.6182795698924731 | |
}, | |
{ | |
"Model": "gpt-3.5-turbo", | |
"Domain": "Visualization", | |
"Pass_at_k": 0.521505376344086 | |
}, | |
{ | |
"Model": "Qwen2-72B-Instruct-GPTQ-Int4", | |
"Domain": "Visualization", | |
"Pass_at_k": 0.5483870967741935 | |
}, | |
{ | |
"Model": "deepseek-coder-33b-instruct", | |
"Domain": "Visualization", | |
"Pass_at_k": 0.553763440860215 | |
}, | |
{ | |
"Model": "DeepSeek-Coder-V2-Lite-Instruct", | |
"Domain": "Visualization", | |
"Pass_at_k": 0.5376344086021505 | |
}, | |
{ | |
"Model": "deepseek-coder-6.7b-instruct", | |
"Domain": "Visualization", | |
"Pass_at_k": 0.553763440860215 | |
}, | |
{ | |
"Model": "CodeLlama-34b-Instruct-hf", | |
"Domain": "Visualization", | |
"Pass_at_k": 0.4838709677419355 | |
}, | |
{ | |
"Model": "CodeLlama-13b-Instruct-hf", | |
"Domain": "Visualization", | |
"Pass_at_k": 0.5161290322580645 | |
}, | |
{ | |
"Model": "CodeLlama-7b-Instruct-hf", | |
"Domain": "Visualization", | |
"Pass_at_k": 0.5161290322580645 | |
}, | |
{ | |
"Model": "CodeQwen1.5-7B-Chat", | |
"Domain": "Visualization", | |
"Pass_at_k": 0.553763440860215 | |
}, | |
{ | |
"Model": "Phi-3-medium-4k-instruct", | |
"Domain": "Visualization", | |
"Pass_at_k": 0.543010752688172 | |
}, | |
{ | |
"Model": "Llama-2-13b-chat-hf", | |
"Domain": "Visualization", | |
"Pass_at_k": 0.3978494623655914 | |
}, | |
{ | |
"Model": "gpt-4o-mini", | |
"Domain": "Basic", | |
"Pass_at_k": 0.7102803738317757 | |
}, | |
{ | |
"Model": "gpt-3.5-turbo", | |
"Domain": "Basic", | |
"Pass_at_k": 0.6074766355140186 | |
}, | |
{ | |
"Model": "Qwen2-72B-Instruct-GPTQ-Int4", | |
"Domain": "Basic", | |
"Pass_at_k": 0.7383177570093458 | |
}, | |
{ | |
"Model": "deepseek-coder-33b-instruct", | |
"Domain": "Basic", | |
"Pass_at_k": 0.6822429906542056 | |
}, | |
{ | |
"Model": "DeepSeek-Coder-V2-Lite-Instruct", | |
"Domain": "Basic", | |
"Pass_at_k": 0.6822429906542056 | |
}, | |
{ | |
"Model": "deepseek-coder-6.7b-instruct", | |
"Domain": "Basic", | |
"Pass_at_k": 0.6728971962616822 | |
}, | |
{ | |
"Model": "CodeLlama-34b-Instruct-hf", | |
"Domain": "Basic", | |
"Pass_at_k": 0.6261682242990654 | |
}, | |
{ | |
"Model": "CodeLlama-13b-Instruct-hf", | |
"Domain": "Basic", | |
"Pass_at_k": 0.6635514018691588 | |
}, | |
{ | |
"Model": "CodeLlama-7b-Instruct-hf", | |
"Domain": "Basic", | |
"Pass_at_k": 0.6448598130841121 | |
}, | |
{ | |
"Model": "CodeQwen1.5-7B-Chat", | |
"Domain": "Basic", | |
"Pass_at_k": 0.6822429906542056 | |
}, | |
{ | |
"Model": "Phi-3-medium-4k-instruct", | |
"Domain": "Basic", | |
"Pass_at_k": 0.6728971962616822 | |
}, | |
{ | |
"Model": "Llama-2-13b-chat-hf", | |
"Domain": "Basic", | |
"Pass_at_k": 0.48598130841121495 | |
}, | |
{ | |
"Model": "gpt-4o-mini", | |
"Domain": "System", | |
"Pass_at_k": 0.57 | |
}, | |
{ | |
"Model": "gpt-3.5-turbo", | |
"Domain": "System", | |
"Pass_at_k": 0.36 | |
}, | |
{ | |
"Model": "Qwen2-72B-Instruct-GPTQ-Int4", | |
"Domain": "System", | |
"Pass_at_k": 0.5 | |
}, | |
{ | |
"Model": "deepseek-coder-33b-instruct", | |
"Domain": "System", | |
"Pass_at_k": 0.57 | |
}, | |
{ | |
"Model": "DeepSeek-Coder-V2-Lite-Instruct", | |
"Domain": "System", | |
"Pass_at_k": 0.49 | |
}, | |
{ | |
"Model": "deepseek-coder-6.7b-instruct", | |
"Domain": "System", | |
"Pass_at_k": 0.49 | |
}, | |
{ | |
"Model": "CodeLlama-34b-Instruct-hf", | |
"Domain": "System", | |
"Pass_at_k": 0.41 | |
}, | |
{ | |
"Model": "CodeLlama-13b-Instruct-hf", | |
"Domain": "System", | |
"Pass_at_k": 0.38 | |
}, | |
{ | |
"Model": "CodeLlama-7b-Instruct-hf", | |
"Domain": "System", | |
"Pass_at_k": 0.43 | |
}, | |
{ | |
"Model": "CodeQwen1.5-7B-Chat", | |
"Domain": "System", | |
"Pass_at_k": 0.45 | |
}, | |
{ | |
"Model": "Phi-3-medium-4k-instruct", | |
"Domain": "System", | |
"Pass_at_k": 0.47 | |
}, | |
{ | |
"Model": "Llama-2-13b-chat-hf", | |
"Domain": "System", | |
"Pass_at_k": 0.26 | |
}, | |
{ | |
"Model": "gpt-4o-mini", | |
"Domain": "Cryptography", | |
"Pass_at_k": 0.49 | |
}, | |
{ | |
"Model": "gpt-3.5-turbo", | |
"Domain": "Cryptography", | |
"Pass_at_k": 0.34 | |
}, | |
{ | |
"Model": "Qwen2-72B-Instruct-GPTQ-Int4", | |
"Domain": "Cryptography", | |
"Pass_at_k": 0.46 | |
}, | |
{ | |
"Model": "deepseek-coder-33b-instruct", | |
"Domain": "Cryptography", | |
"Pass_at_k": 0.42 | |
}, | |
{ | |
"Model": "DeepSeek-Coder-V2-Lite-Instruct", | |
"Domain": "Cryptography", | |
"Pass_at_k": 0.44 | |
}, | |
{ | |
"Model": "deepseek-coder-6.7b-instruct", | |
"Domain": "Cryptography", | |
"Pass_at_k": 0.44 | |
}, | |
{ | |
"Model": "CodeLlama-34b-Instruct-hf", | |
"Domain": "Cryptography", | |
"Pass_at_k": 0.42 | |
}, | |
{ | |
"Model": "CodeLlama-13b-Instruct-hf", | |
"Domain": "Cryptography", | |
"Pass_at_k": 0.35 | |
}, | |
{ | |
"Model": "CodeLlama-7b-Instruct-hf", | |
"Domain": "Cryptography", | |
"Pass_at_k": 0.4 | |
}, | |
{ | |
"Model": "CodeQwen1.5-7B-Chat", | |
"Domain": "Cryptography", | |
"Pass_at_k": 0.42 | |
}, | |
{ | |
"Model": "Phi-3-medium-4k-instruct", | |
"Domain": "Cryptography", | |
"Pass_at_k": 0.44 | |
}, | |
{ | |
"Model": "Llama-2-13b-chat-hf", | |
"Domain": "Cryptography", | |
"Pass_at_k": 0.21 | |
}, | |
{ | |
"Model": "gpt-4o-mini", | |
"Domain": "Mean", | |
"Pass_at_k": 0.6712887357331225 | |
}, | |
{ | |
"Model": "gpt-3.5-turbo", | |
"Domain": "Mean", | |
"Pass_at_k": 0.5552003408082177 | |
}, | |
{ | |
"Model": "Qwen2-72B-Instruct-GPTQ-Int4", | |
"Domain": "Mean", | |
"Pass_at_k": 0.6425337299072272 | |
}, | |
{ | |
"Model": "deepseek-coder-33b-instruct", | |
"Domain": "Mean", | |
"Pass_at_k": 0.6384974825984445 | |
}, | |
{ | |
"Model": "DeepSeek-Coder-V2-Lite-Instruct", | |
"Domain": "Mean", | |
"Pass_at_k": 0.615879493228943 | |
}, | |
{ | |
"Model": "deepseek-coder-6.7b-instruct", | |
"Domain": "Mean", | |
"Pass_at_k": 0.6152211001996906 | |
}, | |
{ | |
"Model": "CodeLlama-34b-Instruct-hf", | |
"Domain": "Mean", | |
"Pass_at_k": 0.5706463475004796 | |
}, | |
{ | |
"Model": "CodeLlama-13b-Instruct-hf", | |
"Domain": "Mean", | |
"Pass_at_k": 0.5774106930778998 | |
}, | |
{ | |
"Model": "CodeLlama-7b-Instruct-hf", | |
"Domain": "Mean", | |
"Pass_at_k": 0.5826237976596688 | |
}, | |
{ | |
"Model": "CodeQwen1.5-7B-Chat", | |
"Domain": "Mean", | |
"Pass_at_k": 0.6094825601888648 | |
}, | |
{ | |
"Model": "Phi-3-medium-4k-instruct", | |
"Domain": "Mean", | |
"Pass_at_k": 0.6087859319852885 | |
}, | |
{ | |
"Model": "Llama-2-13b-chat-hf", | |
"Domain": "Mean", | |
"Pass_at_k": 0.46487622755859775 | |
}, | |
{ | |
"Model": "gpt-4o-mini", | |
"Domain": "Std", | |
"Pass_at_k": 0.14747641211035856 | |
}, | |
{ | |
"Model": "gpt-3.5-turbo", | |
"Domain": "Std", | |
"Pass_at_k": 0.19743922837233668 | |
}, | |
{ | |
"Model": "Qwen2-72B-Instruct-GPTQ-Int4", | |
"Domain": "Std", | |
"Pass_at_k": 0.169043537848292 | |
}, | |
{ | |
"Model": "deepseek-coder-33b-instruct", | |
"Domain": "Std", | |
"Pass_at_k": 0.1634243695210041 | |
}, | |
{ | |
"Model": "DeepSeek-Coder-V2-Lite-Instruct", | |
"Domain": "Std", | |
"Pass_at_k": 0.16346984877152868 | |
}, | |
{ | |
"Model": "deepseek-coder-6.7b-instruct", | |
"Domain": "Std", | |
"Pass_at_k": 0.16363528852513812 | |
}, | |
{ | |
"Model": "CodeLlama-34b-Instruct-hf", | |
"Domain": "Std", | |
"Pass_at_k": 0.16828060893964333 | |
}, | |
{ | |
"Model": "CodeLlama-13b-Instruct-hf", | |
"Domain": "Std", | |
"Pass_at_k": 0.2055227025195004 | |
}, | |
{ | |
"Model": "CodeLlama-7b-Instruct-hf", | |
"Domain": "Std", | |
"Pass_at_k": 0.17281566921046676 | |
}, | |
{ | |
"Model": "CodeQwen1.5-7B-Chat", | |
"Domain": "Std", | |
"Pass_at_k": 0.17954181233010988 | |
}, | |
{ | |
"Model": "Phi-3-medium-4k-instruct", | |
"Domain": "Std", | |
"Pass_at_k": 0.15450285935340832 | |
}, | |
{ | |
"Model": "Llama-2-13b-chat-hf", | |
"Domain": "Std", | |
"Pass_at_k": 0.2409835679041833 | |
} | |
] | |
} |