Spaces:
Running
Running
File size: 2,422 Bytes
b650828 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 |
\begin{table}[h]
\centering
\caption{The detailed evaluation result of all multimodal judges on \textbf{alignment} perspective. The feedback are provided in numerical scale of range [0, 10]. Specifically, we study their individual performance over five alignment objectives: object (existence), attribute, action, location, and count. The best performance across all models is bolded.}
\resizebox{0.9\linewidth}{!}{%
\begin{tabular}{c|cccccc}
\toprule
& Object & Attribute & Action & Location & Count & \cellcolor{skyblue}Avg \\
\midrule
LLaVA-1.5-7b$^\heartsuit$ & $20.7$ & $25.2$ & $23.1$ & $18.2$ & $17.9$ & \cellcolor{skyblue} $22.0$ \\
LLaVA-1.5-13b$^\heartsuit$ & $17.7$ & $13.5$ & $11.8$ & $16.5$ & $8.9$ & \cellcolor{skyblue} $10.3$ \\
LLaVA-NeXT-mistral-7b$^\heartsuit$ & $25.9$ & $30.0$ & $41.9$ & $33.8$ & $35.7$ & \cellcolor{skyblue} $31.3$ \\
LLaVA-NeXT-vicuna-13b$^\heartsuit$ & $25.9$ & $27.4$ & $31.6$ & $38.9$ & $32.1$ & \cellcolor{skyblue} $29.1$ \\
Instructblip-7b$^\heartsuit$ & $17.1$ & $17.4$ & $16.2$ & $13.1$ & $21.4$ & \cellcolor{skyblue} $17.1$ \\
MiniGPT4-v2$^\heartsuit$ & $37.5$ & $30.9$ & $30.8$ & $32.5$ & $39.3$ & \cellcolor{skyblue} $32.8$ \\
Prometheus-Vision-7b$^\heartsuit$ & $19.5$ & $15.2$ & $16.2$ & $22.1$ & $26.8$ & \cellcolor{skyblue} $18.8$ \\
Prometheus-Vision-13b$^\heartsuit$ & $14.3$ & $10.9$ & $9.4$ & $11.7$ & $16.1$ & \cellcolor{skyblue} $11.8$ \\
Qwen-VL-Chat$^\spadesuit$ & $30.7$ & $29.1$ & $35.9$ & $29.9$ & $32.1$ & \cellcolor{skyblue} $31.1$ \\
Internvl-chat-v1-5$^\spadesuit$ & $\bf 73.3$ & $\bf 74.8$ & $\bf 78.6$ & $\bf 80.5$ & $\bf 78.6$ & \cellcolor{skyblue} $\bf 75.8$ \\
Idefics2-8b$^\spadesuit$ & $35.5$ & $31.7$ & $30.8$ & $29.9$ & $30.4$ & \cellcolor{skyblue} $32.6$ \\
\midrule
GPT-4-vision$^\clubsuit$ & $68.1$ & $62.9$ & $64.1$ & $67.1$ & $73.2$ & \cellcolor{skyblue} $66.1$ \\
GPT-4o$^\clubsuit$ & $62.2$ & $57.2$ & $64.1$ & $63.2$ & $67.9$ & \cellcolor{skyblue} $61.5$ \\
Gemini Ultra$^\clubsuit$ & $71.7$ & $65.1$ & $63.2$ & $64.5$ & $67.8$ & \cellcolor{skyblue} $67.2$ \\
Claude 3 Opus$^\clubsuit$ & $64.9$ & $38.9$ & $44.4$ & $55.3$ & $55.4$ & \cellcolor{skyblue} $57.1$ \\
\bottomrule
\end{tabular}}
\label{exp:alignment_number_10}
\end{table} |