jieyuz2 commited on Sep 16

Commit

d28afbb

•

1 Parent(s): 4585a5d

Upload folder using huggingface_hub

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +1 -0
BLINK.xlsx +0 -0
BLINK_acc.csv +2 -0
BLINK_openai_result.pkl +3 -0
BLINK_openai_result.xlsx +0 -0
ChartQA_TEST.xlsx +0 -0
ChartQA_TEST_acc.csv +2 -0
DocVQA_VAL.xlsx +0 -0
DocVQA_VAL_acc.csv +2 -0
HallusionBench.xlsx +0 -0
HallusionBench_auxmatch.xlsx +0 -0
HallusionBench_score.csv +13 -0
HallusionBench_tmp.pkl +3 -0
LLaVABench.xlsx +0 -0
LLaVABench_openai_result.xlsx +0 -0
LLaVABench_score.csv +5 -0
MMBench_DEV_EN.xlsx +0 -0
MMBench_DEV_EN_acc.csv +2 -0
MMBench_DEV_EN_openai_result.pkl +3 -0
MMBench_DEV_EN_openai_result.xlsx +0 -0
MME.xlsx +0 -0
MME_PREV.pkl +3 -0
MME_auxmatch.xlsx +0 -0
MME_score.csv +2 -0
MME_tmp.pkl +3 -0
MMMU_DEV_VAL.xlsx +0 -0
MMMU_DEV_VAL_acc.csv +3 -0
MMMU_DEV_VAL_openai_result.pkl +3 -0
MMMU_DEV_VAL_openai_result.xlsx +0 -0
MMVet.xlsx +0 -0
MMVet_gpt-4-0613.pkl +3 -0
MMVet_gpt-4-0613.xlsx +0 -0
MMVet_gpt-4-0613_score.csv +8 -0
MMVet_gpt-4-0613_score_fine.csv +18 -0
MathVista_MINI.xlsx +0 -0
MathVista_MINI_gpt-4-turbo.pkl +3 -0
MathVista_MINI_gpt-4-turbo.xlsx +0 -0
MathVista_MINI_gpt-4-turbo_score.csv +14 -0
POPE.xlsx +0 -0
POPE_auxmatch.xlsx +0 -0
POPE_score.csv +5 -0
POPE_tmp.pkl +3 -0
QBench2.xlsx +0 -0
QBench2_acc.csv +2 -0
QBench2_openai_result.pkl +3 -0
QBench2_openai_result.xlsx +0 -0
RealWorldQA.xlsx +0 -0
RealWorldQA_acc.csv +2 -0
RealWorldQA_openai_result.pkl +3 -0
RealWorldQA_openai_result.xlsx +0 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+SEEDBench_IMG_openai_result.xlsx filter=lfs diff=lfs merge=lfs -text

BLINK.xlsx ADDED Viewed

Binary file (93.8 kB). View file

BLINK_acc.csv ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ "split","Overall","Art_Style","Counting","Forensic_Detection","Functional_Correspondence","IQ_Test","Jigsaw","Multi-view_Reasoning","Object_Localization","Relative_Depth","Relative_Reflectance","Semantic_Correspondence","Spatial_Relation","Visual_Correspondence","Visual_Similarity"
2	+ "none","0.020515518148342977","0.0","0.016666666666666666","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.26119402985074625","0.0","0.013986013986013986","0.0","0.0"

BLINK_openai_result.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:78e3008fb80578d93a10d2cfb7e9b4106ddeb0171194c949843e645eb4771f09
+size 243380

BLINK_openai_result.xlsx ADDED Viewed

Binary file (106 kB). View file

ChartQA_TEST.xlsx ADDED Viewed

Binary file (125 kB). View file

ChartQA_TEST_acc.csv ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ "test_augmented","test_human","Overall"
2	+ "0.0","0.24","0.12"

DocVQA_VAL.xlsx ADDED Viewed

Binary file (400 kB). View file

DocVQA_VAL_acc.csv ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ "val","Overall"
2	+ "0.09721443260422508","0.09721443260422508"

HallusionBench.xlsx ADDED Viewed

Binary file (56.8 kB). View file

HallusionBench_auxmatch.xlsx ADDED Viewed

Binary file (61.6 kB). View file

HallusionBench_score.csv ADDED Viewed

	@@ -0,0 +1,13 @@

+"split","aAcc","fAcc","qAcc"
+"Overall","4.6267087276550996","0.8670520231213872","0.6593406593406593"
+"VD","4.230118443316413","1.3043478260869565","0.7220216606498195"
+"VS","5.277777777777778","0.0","0.5617977528089888"
+"VS_map","1.5625","0.0","0.0"
+"VD_video","2.941176470588235","0.0","0.0"
+"VS_table","5.357142857142857","0.0","0.0"
+"VD_illusion","6.944444444444445","3.225806451612903","1.3888888888888888"
+"VS_chart","9.230769230769232","0.0","1.3157894736842104"
+"VD_figure","3.75","0.0","2.564102564102564"
+"VS_ocr","0.0","0.0","0.0"
+"VD_ocr","6.741573033707865","2.3255813953488373","0.0"
+"VD_math","0.9259259259259258","0.0","0.0"

HallusionBench_tmp.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3a6a3b98ef300c1acd5bcb96aac8b99cd798d5650b60c3fc469ff34f715c996e
+size 27605

LLaVABench.xlsx ADDED Viewed

Binary file (23.4 kB). View file

LLaVABench_openai_result.xlsx ADDED Viewed

Binary file (23.8 kB). View file

LLaVABench_score.csv ADDED Viewed

	@@ -0,0 +1,5 @@

+"split","Relative Score (main)","VLM Score","GPT4 Score"
+"overall","12.7","11.5","90.3"
+"detail","11.9","10.7","90.0"
+"conv","13.2","11.8","89.4"
+"complex","12.9","11.8","91.1"

MMBench_DEV_EN.xlsx ADDED Viewed

Binary file (499 kB). View file

MMBench_DEV_EN_acc.csv ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ "split","Overall","AR","CP","FP-C","FP-S","LR","RR","action_recognition","attribute_comparison","attribute_recognition","celebrity_recognition","function_reasoning","future_prediction","identity_reasoning","image_emotion","image_quality","image_scene","image_style","image_topic","nature_relation","object_localization","ocr","physical_property_reasoning","physical_relation","social_relation","spatial_relationship","structuralized_imagetext_understanding"
2	+ "dev","0.020618556701030927","0.035175879396984924","0.02702702702702703","0.013986013986013986","0.020477815699658702","0.00847457627118644","0.0","0.018518518518518517","0.022727272727272728","0.08108108108108109","0.0","0.02531645569620253","0.0","0.0","0.0","0.0","0.038461538461538464","0.0","0.1111111111111111","0.0","0.0","0.0","0.06666666666666667","0.0","0.0","0.0","0.01282051282051282"

MMBench_DEV_EN_openai_result.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:88ad3ecc2c7ea2f54af3a378c66aa6c627af3cf22b7c1826cef7d711101b36f8
+size 153361

MMBench_DEV_EN_openai_result.xlsx ADDED Viewed

Binary file (239 kB). View file

MME.xlsx ADDED Viewed

Binary file (103 kB). View file

MME_PREV.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a155f9b7237f095ce7ef71be655ca373be0394a1e191252d91ede1eb0f8f2c03
+size 11896

MME_auxmatch.xlsx ADDED Viewed

Binary file (119 kB). View file

MME_score.csv ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ "perception","reasoning","OCR","artwork","celebrity","code_reasoning","color","commonsense_reasoning","count","existence","landmark","numerical_calculation","position","posters","scene","text_translation"
2	+ "19.13915566226491","3.2142857142857144","0.0","1.0","0.8823529411764706","0.0","15.0","0.7142857142857143","0.0","0.0","0.0","0.0","1.6666666666666667","0.3401360544217687","0.25","2.5"

MME_tmp.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:30242ba2712848c1f11dcc0164cbc7ebbf579373d92b7e234aaf861801dd67fa
+size 30462

MMMU_DEV_VAL.xlsx ADDED Viewed

Binary file (260 kB). View file

MMMU_DEV_VAL_acc.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+"split","Overall","Accounting","Agriculture","Architecture_and_Engineering","Art","Art_Theory","Basic_Medical_Science","Biology","Chemistry","Clinical_Medicine","Computer_Science","Design","Diagnostics_and_Laboratory_Medicine","Economics","Electronics","Energy_and_Power","Finance","Geography","History","Literature","Manage","Marketing","Materials","Math","Mechanical_Engineering","Music","Pharmacy","Physics","Psychology","Public_Health","Sociology","Art & Design","Business","Health & Medicine","Humanities & Social Science","Science","Tech & Engineering"
+"dev","0.013333333333333334","0.0","0.2","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.2","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.05","0.0","0.0","0.0","0.0","0.02857142857142857"
+"validation","0.012222222222222223","0.0","0.06666666666666667","0.0","0.0","0.06666666666666667","0.06666666666666667","0.0","0.0","0.03333333333333333","0.0","0.03333333333333333","0.0","0.0","0.0","0.0","0.0","0.03333333333333333","0.03333333333333333","0.0","0.03333333333333333","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.025","0.006666666666666667","0.02","0.008333333333333333","0.006666666666666667","0.009523809523809525"

MMMU_DEV_VAL_openai_result.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8d498b27893c88c5e5be830260c23597cf2f3be0ec1b7c075bc3c91ccc72b073
+size 135033

MMMU_DEV_VAL_openai_result.xlsx ADDED Viewed

Binary file (269 kB). View file

MMVet.xlsx ADDED Viewed

Binary file (44.1 kB). View file

MMVet_gpt-4-0613.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c623151288d47924f67b58fd914098f7e22b009a9864cd24009b6d0cc9f0a08f
+size 12309

MMVet_gpt-4-0613.xlsx ADDED Viewed

Binary file (45.6 kB). View file

MMVet_gpt-4-0613_score.csv ADDED Viewed

	@@ -0,0 +1,8 @@

+"Category","tot","acc"
+"rec","187","4.27807486631016"
+"ocr","108","0.0"
+"know","84","0.0"
+"gen","80","0.0"
+"spat","75","4.0"
+"math","26","0.0"
+"Overall","218","2.522935779816514"

MMVet_gpt-4-0613_score_fine.csv ADDED Viewed

	@@ -0,0 +1,18 @@

+"Category","tot","acc"
+"ocr_math","11","0.0"
+"ocr_spat_math","14","0.0"
+"rec_ocr_spat_math","1","0.0"
+"rec_spat","12","25.0"
+"ocr_spat","26","0.0"
+"rec_ocr_spat","7","0.0"
+"ocr_know_spat","3","0.0"
+"rec_ocr","4","0.0"
+"rec_know_spat","2","0.0"
+"ocr","108","0.0"
+"rec","187","4.27807486631016"
+"rec_know","9","0.0"
+"rec_know_gen","62","0.0"
+"rec_ocr_know_gen","8","0.0"
+"rec_ocr_gen_spat","8","0.0"
+"ocr_gen_spat","2","0.0"
+"Overall","218","2.522935779816514"

MathVista_MINI.xlsx ADDED Viewed

Binary file (107 kB). View file

MathVista_MINI_gpt-4-turbo.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ad77656d0bb03f24fb0b5338aaab28ea7b7c318607dd697bee6842f6a8b69cec
+size 168077

MathVista_MINI_gpt-4-turbo.xlsx ADDED Viewed

Binary file (124 kB). View file

MathVista_MINI_gpt-4-turbo_score.csv ADDED Viewed

	@@ -0,0 +1,14 @@

+"Task&Skill","tot","prefetch","hit","prefetch_rate","acc"
+"Overall","1000","15","112","1.5","11.200000000000001"
+"scientific reasoning","122","0","21","0.0","17.21311475409836"
+"textbook question answering","158","1","30","0.6329113924050633","18.9873417721519"
+"numeric commonsense","144","0","10","0.0","6.944444444444445"
+"arithmetic reasoning","353","0","42","0.0","11.89801699716714"
+"visual question answering","179","0","39","0.0","21.787709497206702"
+"geometry reasoning","239","2","5","0.8368200836820083","2.092050209205021"
+"algebraic reasoning","281","2","15","0.7117437722419928","5.338078291814947"
+"geometry problem solving","208","1","2","0.4807692307692308","0.9615384615384616"
+"math word problem","186","0","8","0.0","4.301075268817205"
+"logical reasoning","37","13","1","35.13513513513514","2.7027027027027026"
+"figure question answering","269","13","33","4.83271375464684","12.267657992565056"
+"statistical reasoning","301","0","35","0.0","11.627906976744185"

POPE.xlsx ADDED Viewed

Binary file (122 kB). View file

POPE_auxmatch.xlsx ADDED Viewed

Binary file (156 kB). View file

POPE_score.csv ADDED Viewed

	@@ -0,0 +1,5 @@

+"split","Overall","acc","precision","recall"
+"Overall","0","0.1111111111111111","0.0","0.0"
+"random","0","0.23333333333333336","0.0","0.0"
+"popular","0","0.06666666666666667","0.0","0.0"
+"adversarial","0","0.03333333333333333","0.0","0.0"

POPE_tmp.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:984de2a424a9642939bee53e8001e96f8d81642d786399d799573b6dbfb5247c
+size 66357

QBench2.xlsx ADDED Viewed

Binary file (64.4 kB). View file

QBench2_acc.csv ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ "split","Overall"
2	+ "none","0.042"

QBench2_openai_result.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cb3a3cd7ba4b546698eb88d4faede577926c01f2f009438d77b27d4ab1b14485
+size 127664

QBench2_openai_result.xlsx ADDED Viewed

Binary file (72.7 kB). View file

RealWorldQA.xlsx ADDED Viewed

Binary file (46.7 kB). View file

RealWorldQA_acc.csv ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ "split","Overall"
2	+ "none","0.06143790849673202"

RealWorldQA_openai_result.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:999c290dbdb2931cfabd27be105806cbed3da2ec4d937f14c023a1de92a3d6eb
+size 95946

RealWorldQA_openai_result.xlsx ADDED Viewed

Binary file (53.1 kB). View file