Spaces:

ahmedheakl
/

CAMEL-Bench-leaderboard

Running

App Files Files Community

CAMEL-Bench-leaderboard / app.py

ahmedheakl

Update app.py

6e6ac6c verified 5 days ago

raw

history blame

2.99 kB

	import pandas as pd
	import gradio as gr

	data = {
	"Method": [
	"GPT-4o", "GPT-4o-mini", "Qwen2-VL-7B", "Gemini-1.5-Pro", "Gemini-1.5-Flash",
	"LLaVa-OneVision-7B", "Pangea-7B-Instruct", "Qwen2-VL-2B", "InternVL2-8B", "LLaVa-NeXt-7B"
	],
	"MM Understanding & Reasoning": [
	57.90, 48.82, 51.35, 46.67, 45.58, 42.90, 40.09, 40.59, 30.41, 26.33
	],
	"OCR & Document Understanding": [
	59.11, 42.89, 49.06, 36.59, 33.59, 31.35, 17.75, 25.68, 15.91, 19.12
	],
	"Charts & Diagram Understanding": [
	73.57, 64.98, 55.39, 47.06, 48.25, 40.86, 38.75, 27.83, 30.27, 27.56
	],
	"Video Understanding": [
	74.27, 68.11, 62.64, 42.94, 53.31, 29.41, 49.01, 38.90, 51.42, 44.90
	],
	"Cultural Specific Understanding": [
	80.86, 65.92, 75.64, 56.24, 46.54, 66.02, 20.34, 34.27, 20.88, 28.30
	],
	"Medical Imaging": [
	49.90, 47.37, 39.42, 33.77, 42.86, 27.29, 31.99, 29.12, 29.48, 22.54
	],
	"Agro Specific": [
	80.75, 79.58, 79.84, 72.12, 76.06, 75.03, 74.51, 52.02, 44.47, 42.00
	],
	"Remote Sensing Understanding": [
	22.85, 16.93, 22.28, 17.07, 14.95, 10.72, 6.67, 12.56, 5.36, 8.33
	]
	}

	df = pd.DataFrame(data)
	df['Average Score'] = df.iloc[:, 1:].mean(axis=1).round(2)
	df = df[['Method', 'Average Score'] + [col for col in df.columns if col not in ['Method', 'Average Score']]]

	def display_data():
	return df

	with gr.Blocks() as demo:
	gr.Markdown("![camel icon](https://cdn-uploads.huggingface.co/production/uploads/656864e12d73834278a8dea7/n-XfVKd1xVywH_vgPyJyQ.png)", elem_id="camel-icon") # Replace with actual camel icon URL
	gr.Markdown("# CAMEL-Bench: Model Performance Across Vision Understanding Tasks")
	gr.Markdown("""
	This table shows the performance of different models across various tasks including OCR, chart understanding, video, medical imaging, and more.
	""")
	with gr.Tabs(elem_classes="tab-buttons") as tabs:
	with gr.TabItem("🏅 LLM Leaderboard", elem_id="llm-benchmark-tab-table", id=0):
	# with gr.Row():
	# with gr.Column():
	gr.Dataframe(value=df, label="CAMEL-Bench Model Performance", interactive=False)

	with gr.TabItem("📤 How to Submit", elem_id="submission-tab", id=1):
	gr.Markdown("""
	## Submission Instructions

	To contribute your model's results to the CAMEL-Bench leaderboard:

	- Via GitHub Pull Request:
	- Use [this evaluation script](https://github.com/mbzuai-oryx/Camel-Bench/blob/main/scripts/eval_qwen.py) to test your model and generate results.
	- Create a pull request in the CAMEL-Bench GitHub repository with your results.

	- Via Email:
	- Send your results to [email protected], and we’ll add them to the leaderboard for you.

	We look forward to seeing your contributions!
	""")

	demo.launch()