StringChaos's picture
release 3 and strawberry
ab90864
import os
import random
import glob
import json
import numpy as np
from flask import Flask, render_template, request
app = Flask(__name__)
with open("problems.json") as f:
problems = json.load(f)
problem_choices = [q["question_title"] for q in problems]
random_idxs = list(range(len(problems)))
random.shuffle(random_idxs)
problems = [problems[idx] for idx in random_idxs]
with open("all_outputs.json") as f:
all_outputs = json.load(f)
all_models = list(all_outputs.keys())
num_questions_filtered = len(problems)
all_correctness_by_problem = {
idx: {model: np.mean(all_outputs[model][idx]["pass1_list"]) for model in all_models}
for idx in random_idxs
}
def calculate_color(performance):
# Convert performance to a value between 0 and 1
# Calculate the red and green components of the color
if performance > 0.75:
return f"rgba(0, 150, 0, 0.5)"
elif performance > 0.5:
return f"rgba(50, 150, 0, {performance})"
elif performance > 0.25:
return f"rgba(150, 50, 0, {1-performance})"
else:
return f"rgba(150, 0, 0, 0.5)"
all_evaluations_by_problem_colored = [
(
trueidx,
{
model: {
"correctness": f"{all_correctness_by_problem[idx][model]*100:.1f}",
"correctness_color": calculate_color(
all_correctness_by_problem[idx][model]
),
}
for model in all_models
},
problems[idx]["difficulty"],
)
for trueidx, idx in enumerate(random_idxs)
]
all_data_for_view_formatted = {
model: [
[{"code": a, "pass1": b} for a, b in zip(row["code_list"], row["pass1_list"])]
# print(row)
for idx in random_idxs
for row in [resp[idx]]
]
for model, resp in all_outputs.items()
}
@app.route("/")
def home():
# Fetch your data here
print(all_models)
return render_template(
"index.html", models=all_models, problems=all_evaluations_by_problem_colored
)
@app.route("/problem/<int:problem_idx>")
def problem(problem_idx):
# Fetch your data here
data = {
model: all_data_for_view_formatted[model][problem_idx] for model in all_models
}
evaluation = all_evaluations_by_problem_colored[problem_idx][1]
question = problems[problem_idx]
# print(data)
return render_template(
"problem.html",
problem_idx=problem_idx,
evaluation=evaluation,
models=all_models,
question=question,
data=data,
)
mini_models = [
# "DeepSeek-V2",
"DeepSeekCoder-V2",
# "DSCoder-33b-Ins",
"LLama3.1-70b-Ins",
"LLama3.1-405b-Ins-FP8",
# "GPT-4-Turbo-2024-04-09",
"GPT-4O-2024-05-13",
"Claude-3-Opus",
# "Claude-3-Sonnet",
"Gemini-Pro-1.5-August",
"O1-Mini (N=1)",
"O1-Preview (N=1)",
]
@app.route("/mini")
def mini():
# Fetch your data here
return render_template(
"index_mini.html",
models=mini_models,
problems=all_evaluations_by_problem_colored,
)
@app.route("/problem_mini/<int:problem_idx>")
def problem_mini(problem_idx):
# Fetch your data here
data = {
model: all_data_for_view_formatted[model][problem_idx] for model in mini_models
}
evaluation = all_evaluations_by_problem_colored[problem_idx][1]
question = problems[problem_idx]
# print(data)
return render_template(
"problem_mini.html",
problem_idx=problem_idx,
evaluation=evaluation,
models=mini_models,
question=question,
data=data,
)
if __name__ == "__main__":
app.run(host="0.0.0.0", port=7860)