Spaces:

livecodebench
/

code_generation_samples

Running

App Files Files Community

code_generation_samples / app.py

StringChaos

release 3 and strawberry

ab90864 about 2 months ago

raw

history blame contribute delete

3.69 kB

	import os
	import random
	import glob
	import json

	import numpy as np
	from flask import Flask, render_template, request

	app = Flask(__name__)


	with open("problems.json") as f:
	problems = json.load(f)
	problem_choices = [q["question_title"] for q in problems]

	random_idxs = list(range(len(problems)))
	random.shuffle(random_idxs)
	problems = [problems[idx] for idx in random_idxs]

	with open("all_outputs.json") as f:
	all_outputs = json.load(f)
	all_models = list(all_outputs.keys())


	num_questions_filtered = len(problems)

	all_correctness_by_problem = {
	idx: {model: np.mean(all_outputs[model][idx]["pass1_list"]) for model in all_models}
	for idx in random_idxs
	}


	def calculate_color(performance):
	# Convert performance to a value between 0 and 1
	# Calculate the red and green components of the color
	if performance > 0.75:
	return f"rgba(0, 150, 0, 0.5)"
	elif performance > 0.5:
	return f"rgba(50, 150, 0, {performance})"
	elif performance > 0.25:
	return f"rgba(150, 50, 0, {1-performance})"
	else:
	return f"rgba(150, 0, 0, 0.5)"


	all_evaluations_by_problem_colored = [
	(
	trueidx,
	{
	model: {
	"correctness": f"{all_correctness_by_problem[idx][model]*100:.1f}",
	"correctness_color": calculate_color(
	all_correctness_by_problem[idx][model]
	),
	}
	for model in all_models
	},
	problems[idx]["difficulty"],
	)
	for trueidx, idx in enumerate(random_idxs)
	]

	all_data_for_view_formatted = {
	model: [
	[{"code": a, "pass1": b} for a, b in zip(row["code_list"], row["pass1_list"])]
	# print(row)
	for idx in random_idxs
	for row in [resp[idx]]
	]
	for model, resp in all_outputs.items()
	}


	@app.route("/")
	def home():
	# Fetch your data here
	print(all_models)
	return render_template(
	"index.html", models=all_models, problems=all_evaluations_by_problem_colored
	)


	@app.route("/problem/<int:problem_idx>")
	def problem(problem_idx):
	# Fetch your data here

	data = {
	model: all_data_for_view_formatted[model][problem_idx] for model in all_models
	}
	evaluation = all_evaluations_by_problem_colored[problem_idx][1]
	question = problems[problem_idx]

	# print(data)

	return render_template(
	"problem.html",
	problem_idx=problem_idx,
	evaluation=evaluation,
	models=all_models,
	question=question,
	data=data,
	)


	mini_models = [
	# "DeepSeek-V2",
	"DeepSeekCoder-V2",
	# "DSCoder-33b-Ins",
	"LLama3.1-70b-Ins",
	"LLama3.1-405b-Ins-FP8",
	# "GPT-4-Turbo-2024-04-09",
	"GPT-4O-2024-05-13",
	"Claude-3-Opus",
	# "Claude-3-Sonnet",
	"Gemini-Pro-1.5-August",
	"O1-Mini (N=1)",
	"O1-Preview (N=1)",
	]


	@app.route("/mini")
	def mini():
	# Fetch your data here
	return render_template(
	"index_mini.html",
	models=mini_models,
	problems=all_evaluations_by_problem_colored,
	)


	@app.route("/problem_mini/<int:problem_idx>")
	def problem_mini(problem_idx):
	# Fetch your data here

	data = {
	model: all_data_for_view_formatted[model][problem_idx] for model in mini_models
	}
	evaluation = all_evaluations_by_problem_colored[problem_idx][1]
	question = problems[problem_idx]

	# print(data)

	return render_template(
	"problem_mini.html",
	problem_idx=problem_idx,
	evaluation=evaluation,
	models=mini_models,
	question=question,
	data=data,
	)


	if __name__ == "__main__":
	app.run(host="0.0.0.0", port=7860)