Spaces:

cot-leaderboard
/

open-cot-dashboard

Running

Gregor Betz

ui descriptions

5e531ec unverified about 2 months ago

5.27 kB

	import gradio as gr # type: ignore
	import plotly.express as px # type: ignore

	from backend.data import load_cot_data
	from backend.envs import API, REPO_ID, TOKEN

	logo1_url = "https://raw.githubusercontent.com/logikon-ai/cot-eval/main/assets/AI2_Logo_Square.png"
	logo2_url = "https://raw.githubusercontent.com/logikon-ai/cot-eval/main/assets/logo_logikon_notext_withborder.png"
	LOGOS = f'<div style="display: flex; justify-content: center;"><a href="https://allenai.org/"><img src="{logo1_url}" alt="AI2" style="width: 30vw; min-width: 20px; max-width: 60px;"></a> <a href="https://logikon.ai"><img src="{logo2_url}" alt="Logikon AI" style="width: 30vw; min-width: 20px; max-width: 60px; margin-left: 10px;"></a></div>'

	TITLE = f'<h1 align="center" id="space-title"> Open CoT Dashboard</h1> {LOGOS}'

	INTRODUCTION_TEXT = """
	Baseline accuracies and marginal accuracy gains for specific models and CoT regimes from the [Open CoT Leaderboard](https://huggingface.co/spaces/logikon/open_cot_leaderboard).
	"""

	def restart_space():
	API.restart_space(repo_id=REPO_ID, token=TOKEN)

	try:
	df_cot_err, df_cot_regimes = load_cot_data()
	except Exception as err:
	print(err)
	# sleep for 10 seconds before restarting the space
	import time
	time.sleep(10)
	restart_space()


	def plot_evals_init(model_id, regex_model_filter, plotly_mode, request: gr.Request):
	if request and "model" in request.query_params:
	model_param = request.query_params["model"]
	if model_param in df_cot_err.model.to_list():
	model_id = model_param
	return plot_evals(model_id, regex_model_filter, plotly_mode)


	def plot_evals(model_id, regex_model_filter, plotly_mode):
	df = df_cot_err.copy()
	df["selected"] = df_cot_err.model.apply(lambda x: "selected" if x==model_id else "-")

	try:
	df_filter = df.model.str.contains(regex_model_filter)
	except Exception as err:
	gr.Warning("Failed to apply regex filter", duration=4)
	print("Failed to apply regex filter" + err)
	df_filter = df.model.str.contains(".*")
	df = df[df_filter \| df.selected.eq("selected")]

	#df.sort_values(["selected", "model"], inplace=True, ascending=True) # has currently no effect with px.scatter
	template = "plotly_dark" if plotly_mode=="dark" else "plotly"
	fig = px.scatter(df, x="base accuracy", y="marginal acc. gain", color="selected", symbol="model",
	facet_col="task", facet_col_wrap=3,
	category_orders={"selected": ["selected", "-"]},
	color_discrete_sequence=["Orange", "Gray"],
	template=template,
	error_y="acc_gain-err", hover_data=['model', "cot accuracy"],
	width=1200, height=700)

	fig.update_layout(
	title={"automargin": True},
	)
	return fig, model_id


	def styled_model_table_init(model_id, request: gr.Request):
	if request and "model" in request.query_params:
	model_param = request.query_params["model"]
	if model_param in df_cot_regimes.model.to_list():
	model_id = model_param
	return styled_model_table(model_id)


	def styled_model_table(model_id):

	def make_pretty(styler):
	styler.hide(axis="index")
	styler.format(precision=1),
	styler.background_gradient(
	axis=None,
	subset=["acc_base", "acc_cot"],
	vmin=20, vmax=100, cmap="YlGnBu"
	)
	styler.background_gradient(
	axis=None,
	subset=["acc_gain"],
	vmin=-20, vmax=20, cmap="coolwarm"
	)
	styler.set_table_styles({
	'task': [{'selector': '',
	'props': [('font-weight', 'bold')]}],
	'B': [{'selector': 'td',
	'props': 'color: blue;'}]
	}, overwrite=False)
	return styler

	df_cot_model = df_cot_regimes[df_cot_regimes.model.eq(model_id)][['task', 'cot_chain', 'best_of',
	'temperature', 'top_k', 'top_p', 'acc_base', 'acc_cot', 'acc_gain']]

	df_cot_model = df_cot_model \
	.rename(columns={"temperature": "temp"}) \
	.replace({'cot_chain': 'ReflectBeforeRun'}, "Reflect") \
	.sort_values(["task", "cot_chain"]) \
	.reset_index(drop=True)

	return df_cot_model.style.pipe(make_pretty)


	demo = gr.Blocks()

	with demo:

	gr.HTML(TITLE)
	gr.Markdown(INTRODUCTION_TEXT)
	with gr.Row():
	selected_model = gr.Dropdown(list(df_cot_err.model.unique()), value="allenai/tulu-2-70b", label="Model", info="with performance details below", scale=2)
	regex_model_filter = gr.Textbox(".*", label="Regex", info="to filter models shown in plots", scale=2)
	plotly_mode = gr.Radio(["dark","light"], value="light", label="Theme", info="of plots", scale=1)
	submit = gr.Button("Update", scale=1)
	table = gr.DataFrame()
	plot = gr.Plot(label="evals")


	submit.click(plot_evals, [selected_model, regex_model_filter, plotly_mode], [plot, selected_model])
	submit.click(styled_model_table, selected_model, table)

	demo.load(plot_evals_init, [selected_model, regex_model_filter, plotly_mode], [plot, selected_model])
	demo.load(styled_model_table_init, selected_model, table)

	demo.launch()