File size: 16,884 Bytes
1b9e8e9 |
1 |
{"vidore/arxivqa_test_subsampled": {"ndcg_at_1": 0.616, "ndcg_at_3": 0.6829, "ndcg_at_5": 0.69866, "ndcg_at_10": 0.71897, "ndcg_at_20": 0.72995, "ndcg_at_100": 0.74773, "ndcg_at_1000": 0.75186, "map_at_1": 0.616, "map_at_3": 0.666, "map_at_5": 0.6748, "map_at_10": 0.68334, "map_at_20": 0.68628, "map_at_100": 0.68887, "map_at_1000": 0.68908, "recall_at_1": 0.616, "recall_at_3": 0.732, "recall_at_5": 0.77, "recall_at_10": 0.832, "recall_at_20": 0.876, "recall_at_100": 0.97, "recall_at_1000": 1.0, "precision_at_1": 0.616, "precision_at_3": 0.244, "precision_at_5": 0.154, "precision_at_10": 0.0832, "precision_at_20": 0.0438, "precision_at_100": 0.0097, "precision_at_1000": 0.001, "mrr_at_1": 0.616, "mrr_at_3": 0.6659999999999999, "mrr_at_5": 0.6747999999999997, "mrr_at_10": 0.6833436507936507, "mrr_at_20": 0.6862771038558961, "mrr_at_100": 0.6888743153033208, "mrr_at_1000": 0.6890756489948757, "naucs_at_1_max": 0.5664347411760398, "naucs_at_1_std": -0.04030799868052915, "naucs_at_1_diff1": 0.8690669631403406, "naucs_at_3_max": 0.5848701334662456, "naucs_at_3_std": -0.00020965355738568925, "naucs_at_3_diff1": 0.7713510391696136, "naucs_at_5_max": 0.5576329983661964, "naucs_at_5_std": 0.02427877621360306, "naucs_at_5_diff1": 0.7304312795147817, "naucs_at_10_max": 0.6634899769963856, "naucs_at_10_std": 0.10158560630956358, "naucs_at_10_diff1": 0.7193441622459034, "naucs_at_20_max": 0.6846281017273889, "naucs_at_20_std": 0.1800624208151784, "naucs_at_20_diff1": 0.6991208553505763, "naucs_at_100_max": 0.7393713040771861, "naucs_at_100_std": 0.29707438530968483, "naucs_at_100_diff1": 0.560037348272644, "naucs_at_1000_max": 1.0, "naucs_at_1000_std": 1.0, "naucs_at_1000_diff1": 1.0}, "vidore/docvqa_test_subsampled": {"ndcg_at_1": 0.42794, "ndcg_at_3": 0.49226, "ndcg_at_5": 0.51068, "ndcg_at_10": 0.52706, "ndcg_at_20": 0.54028, "ndcg_at_100": 0.56573, "ndcg_at_1000": 0.58653, "map_at_1": 0.42794, "map_at_3": 0.47487, "map_at_5": 0.48518, "map_at_10": 0.49186, "map_at_20": 0.49537, "map_at_100": 0.49843, "map_at_1000": 0.49917, "recall_at_1": 0.42794, "recall_at_3": 0.54324, "recall_at_5": 0.58758, "recall_at_10": 0.63858, "recall_at_20": 0.6918, "recall_at_100": 0.83592, "recall_at_1000": 1.0, "precision_at_1": 0.42794, "precision_at_3": 0.18108, "precision_at_5": 0.11752, "precision_at_10": 0.06386, "precision_at_20": 0.03459, "precision_at_100": 0.00836, "precision_at_1000": 0.001, "mrr_at_1": 0.4279379157427938, "mrr_at_3": 0.47487065779748744, "mrr_at_5": 0.48518107908351843, "mrr_at_10": 0.4918611199099007, "mrr_at_20": 0.49536559175982775, "mrr_at_100": 0.4984327625677607, "mrr_at_1000": 0.4991666478010711, "naucs_at_1_max": 0.32751074383924944, "naucs_at_1_std": 0.11372478807673965, "naucs_at_1_diff1": 0.7309001382945034, "naucs_at_3_max": 0.323364668970982, "naucs_at_3_std": 0.15046527236345536, "naucs_at_3_diff1": 0.6685882833272869, "naucs_at_5_max": 0.28615200457331036, "naucs_at_5_std": 0.1800145271401355, "naucs_at_5_diff1": 0.6582645936332455, "naucs_at_10_max": 0.2654395801642593, "naucs_at_10_std": 0.2618416815749919, "naucs_at_10_diff1": 0.6361013610861579, "naucs_at_20_max": 0.20809192826047268, "naucs_at_20_std": 0.2837837445662298, "naucs_at_20_diff1": 0.616856279203243, "naucs_at_100_max": 0.14658755092658263, "naucs_at_100_std": 0.47396509539640147, "naucs_at_100_diff1": 0.5489469352463915, "naucs_at_1000_max": 1.0, "naucs_at_1000_std": 1.0, "naucs_at_1000_diff1": 1.0}, "vidore/infovqa_test_subsampled": {"ndcg_at_1": 0.56478, "ndcg_at_3": 0.64245, "ndcg_at_5": 0.6549, "ndcg_at_10": 0.66627, "ndcg_at_20": 0.67763, "ndcg_at_100": 0.69881, "ndcg_at_1000": 0.70935, "map_at_1": 0.56478, "map_at_3": 0.62314, "map_at_5": 0.63003, "map_at_10": 0.63485, "map_at_20": 0.63802, "map_at_100": 0.641, "map_at_1000": 0.64146, "recall_at_1": 0.56478, "recall_at_3": 0.69838, "recall_at_5": 0.72874, "recall_at_10": 0.76316, "recall_at_20": 0.80769, "recall_at_100": 0.92105, "recall_at_1000": 1.0, "precision_at_1": 0.56478, "precision_at_3": 0.23279, "precision_at_5": 0.14575, "precision_at_10": 0.07632, "precision_at_20": 0.04038, "precision_at_100": 0.00921, "precision_at_1000": 0.001, "mrr_at_1": 0.5647773279352226, "mrr_at_3": 0.623144399460189, "mrr_at_5": 0.6300269905533062, "mrr_at_10": 0.6348483387957071, "mrr_at_20": 0.6380237011473616, "mrr_at_100": 0.6409966691916469, "mrr_at_1000": 0.6414591403727847, "naucs_at_1_max": 0.4954096697999347, "naucs_at_1_std": -0.09529615564222386, "naucs_at_1_diff1": 0.7929993892344983, "naucs_at_3_max": 0.45916619524176466, "naucs_at_3_std": -0.10217167988275037, "naucs_at_3_diff1": 0.6710978766450731, "naucs_at_5_max": 0.458005161549138, "naucs_at_5_std": -0.12647374104352355, "naucs_at_5_diff1": 0.6612876073475845, "naucs_at_10_max": 0.456943826688784, "naucs_at_10_std": -0.11221973652245797, "naucs_at_10_diff1": 0.6323769846480376, "naucs_at_20_max": 0.5108554593745827, "naucs_at_20_std": -0.049255464787786675, "naucs_at_20_diff1": 0.622013845193991, "naucs_at_100_max": 0.5740503836133115, "naucs_at_100_std": 0.07922057867841056, "naucs_at_100_diff1": 0.6220390320682018, "naucs_at_1000_max": 1.0, "naucs_at_1000_std": 1.0, "naucs_at_1000_diff1": 1.0}, "vidore/tabfquad_test_subsampled": {"ndcg_at_1": 0.33929, "ndcg_at_3": 0.40888, "ndcg_at_5": 0.43485, "ndcg_at_10": 0.47185, "ndcg_at_20": 0.49989, "ndcg_at_100": 0.54774, "ndcg_at_1000": 0.54774, "map_at_1": 0.33929, "map_at_3": 0.39107, "map_at_5": 0.40518, "map_at_10": 0.42115, "map_at_20": 0.42888, "map_at_100": 0.43571, "map_at_1000": 0.43571, "recall_at_1": 0.33929, "recall_at_3": 0.46071, "recall_at_5": 0.525, "recall_at_10": 0.63571, "recall_at_20": 0.74643, "recall_at_100": 1.0, "recall_at_1000": 1.0, "precision_at_1": 0.33929, "precision_at_3": 0.15357, "precision_at_5": 0.105, "precision_at_10": 0.06357, "precision_at_20": 0.03732, "precision_at_100": 0.01, "precision_at_1000": 0.001, "mrr_at_1": 0.3392857142857143, "mrr_at_3": 0.39107142857142846, "mrr_at_5": 0.4051785714285714, "mrr_at_10": 0.4211493764172336, "mrr_at_20": 0.42888106385051883, "mrr_at_100": 0.43570884542956795, "mrr_at_1000": 0.43570884542956795, "naucs_at_1_max": -0.0910941234941897, "naucs_at_1_std": -0.18577142451423567, "naucs_at_1_diff1": 0.4466712422786546, "naucs_at_3_max": -0.06120647974726933, "naucs_at_3_std": -0.0978158313500726, "naucs_at_3_diff1": 0.376904047729694, "naucs_at_5_max": -0.08838653910954154, "naucs_at_5_std": -0.06773420064364596, "naucs_at_5_diff1": 0.328567033417818, "naucs_at_10_max": -0.072943651933889, "naucs_at_10_std": -0.053553853637566755, "naucs_at_10_diff1": 0.2796870934533982, "naucs_at_20_max": -0.07176481890671475, "naucs_at_20_std": -0.09066118817295662, "naucs_at_20_diff1": 0.2846501178713686, "naucs_at_100_max": 1.0, "naucs_at_100_std": 1.0, "naucs_at_100_diff1": 1.0, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}, "vidore/tatdqa_test": {"ndcg_at_1": 0.52612, "ndcg_at_3": 0.63176, "ndcg_at_5": 0.66227, "ndcg_at_10": 0.68881, "ndcg_at_20": 0.7011, "ndcg_at_100": 0.7145, "ndcg_at_1000": 0.71685, "map_at_1": 0.52612, "map_at_3": 0.60571, "map_at_5": 0.62278, "map_at_10": 0.63374, "map_at_20": 0.63712, "map_at_100": 0.63901, "map_at_1000": 0.63912, "recall_at_1": 0.52612, "recall_at_3": 0.70717, "recall_at_5": 0.78068, "recall_at_10": 0.8627, "recall_at_20": 0.9113, "recall_at_100": 0.98299, "recall_at_1000": 1.0, "precision_at_1": 0.52612, "precision_at_3": 0.23572, "precision_at_5": 0.15614, "precision_at_10": 0.08627, "precision_at_20": 0.04557, "precision_at_100": 0.00983, "precision_at_1000": 0.001, "mrr_at_1": 0.523086269744836, "mrr_at_3": 0.6043944916970443, "mrr_at_5": 0.621375050627786, "mrr_at_10": 0.6323644139713411, "mrr_at_20": 0.6357366790886096, "mrr_at_100": 0.6376256697893141, "mrr_at_1000": 0.6377411868121536, "naucs_at_1_max": 0.33811955835159613, "naucs_at_1_std": -0.2381167142437989, "naucs_at_1_diff1": 0.6323152677093454, "naucs_at_3_max": 0.3946300000869103, "naucs_at_3_std": -0.20840802207079867, "naucs_at_3_diff1": 0.5274274523562704, "naucs_at_5_max": 0.42657653603792894, "naucs_at_5_std": -0.1326409039026798, "naucs_at_5_diff1": 0.4885846495154553, "naucs_at_10_max": 0.5137484446091465, "naucs_at_10_std": 0.019346581681626673, "naucs_at_10_diff1": 0.42169752139824945, "naucs_at_20_max": 0.6072014485839812, "naucs_at_20_std": 0.1761512267675784, "naucs_at_20_diff1": 0.43977115184885635, "naucs_at_100_max": 0.6586502486298156, "naucs_at_100_std": 0.41114829373671014, "naucs_at_100_diff1": 0.328920319124671, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}, "vidore/shiftproject_test": {"ndcg_at_1": 0.17, "ndcg_at_3": 0.23309, "ndcg_at_5": 0.25375, "ndcg_at_10": 0.29065, "ndcg_at_20": 0.32128, "ndcg_at_100": 0.35523, "ndcg_at_1000": 0.38706, "map_at_1": 0.17, "map_at_3": 0.22, "map_at_5": 0.2315, "map_at_10": 0.24749, "map_at_20": 0.25603, "map_at_100": 0.25962, "map_at_1000": 0.26085, "recall_at_1": 0.17, "recall_at_3": 0.27, "recall_at_5": 0.32, "recall_at_10": 0.43, "recall_at_20": 0.55, "recall_at_100": 0.75, "recall_at_1000": 1.0, "precision_at_1": 0.17, "precision_at_3": 0.09, "precision_at_5": 0.064, "precision_at_10": 0.043, "precision_at_20": 0.0275, "precision_at_100": 0.0075, "precision_at_1000": 0.001, "mrr_at_1": 0.17, "mrr_at_3": 0.22, "mrr_at_5": 0.23149999999999998, "mrr_at_10": 0.2474920634920635, "mrr_at_20": 0.25602712401977107, "mrr_at_100": 0.25962018885755245, "mrr_at_1000": 0.26084856763570835, "naucs_at_1_max": -0.12369130347091092, "naucs_at_1_std": -0.26117090522378494, "naucs_at_1_diff1": 0.18399677295146594, "naucs_at_3_max": -0.08141897171433048, "naucs_at_3_std": -0.2319822699569536, "naucs_at_3_diff1": 0.21450794868516393, "naucs_at_5_max": -0.06649363159584717, "naucs_at_5_std": -0.22092743230225845, "naucs_at_5_diff1": 0.22779085946698055, "naucs_at_10_max": -0.005371239806999001, "naucs_at_10_std": -0.21803592097904778, "naucs_at_10_diff1": 0.23080724662184188, "naucs_at_20_max": 0.04087621989749442, "naucs_at_20_std": -0.11500386154602169, "naucs_at_20_diff1": 0.14043389735308645, "naucs_at_100_max": 0.1548927263212983, "naucs_at_100_std": 0.10553636839351159, "naucs_at_100_diff1": 0.027901622187337105, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}, "vidore/syntheticDocQA_artificial_intelligence_test": {"ndcg_at_1": 0.87, "ndcg_at_3": 0.90155, "ndcg_at_5": 0.90972, "ndcg_at_10": 0.91662, "ndcg_at_20": 0.92177, "ndcg_at_100": 0.92382, "ndcg_at_1000": 0.92509, "map_at_1": 0.87, "map_at_3": 0.895, "map_at_5": 0.8995, "map_at_10": 0.9026, "map_at_20": 0.90405, "map_at_100": 0.90441, "map_at_1000": 0.90445, "recall_at_1": 0.87, "recall_at_3": 0.92, "recall_at_5": 0.94, "recall_at_10": 0.96, "recall_at_20": 0.98, "recall_at_100": 0.99, "recall_at_1000": 1.0, "precision_at_1": 0.87, "precision_at_3": 0.30667, "precision_at_5": 0.188, "precision_at_10": 0.096, "precision_at_20": 0.049, "precision_at_100": 0.0099, "precision_at_1000": 0.001, "mrr_at_1": 0.87, "mrr_at_3": 0.895, "mrr_at_5": 0.8995000000000001, "mrr_at_10": 0.9025952380952381, "mrr_at_20": 0.9040535714285715, "mrr_at_100": 0.9044107142857143, "mrr_at_1000": 0.9044523809523811, "naucs_at_1_max": 0.46004591572243314, "naucs_at_1_std": -0.07631637413907967, "naucs_at_1_diff1": 0.7817892320225138, "naucs_at_3_max": 0.7529178338001865, "naucs_at_3_std": -0.0445845004668537, "naucs_at_3_diff1": 0.7514589169000948, "naucs_at_5_max": 0.6923436041083116, "naucs_at_5_std": -0.17577030812325037, "naucs_at_5_diff1": 0.7149081854964241, "naucs_at_10_max": 0.68265639589169, "naucs_at_10_std": -0.19304388422035199, "naucs_at_10_diff1": 0.6418067226890778, "naucs_at_20_max": 0.4960317460317504, "naucs_at_20_std": -0.661531279178339, "naucs_at_20_diff1": 0.9346405228758136, "naucs_at_100_max": 0.12278244631185525, "naucs_at_100_std": -1.1517273576096694, "naucs_at_100_diff1": 1.0, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}, "vidore/syntheticDocQA_energy_test": {"ndcg_at_1": 0.81, "ndcg_at_3": 0.88047, "ndcg_at_5": 0.88434, "ndcg_at_10": 0.89661, "ndcg_at_20": 0.89661, "ndcg_at_100": 0.89853, "ndcg_at_1000": 0.8999, "map_at_1": 0.81, "map_at_3": 0.86333, "map_at_5": 0.86533, "map_at_10": 0.87001, "map_at_20": 0.87001, "map_at_100": 0.87029, "map_at_1000": 0.87035, "recall_at_1": 0.81, "recall_at_3": 0.93, "recall_at_5": 0.94, "recall_at_10": 0.98, "recall_at_20": 0.98, "recall_at_100": 0.99, "recall_at_1000": 1.0, "precision_at_1": 0.81, "precision_at_3": 0.31, "precision_at_5": 0.188, "precision_at_10": 0.098, "precision_at_20": 0.049, "precision_at_100": 0.0099, "precision_at_1000": 0.001, "mrr_at_1": 0.81, "mrr_at_3": 0.8633333333333334, "mrr_at_5": 0.8653333333333333, "mrr_at_10": 0.8700119047619048, "mrr_at_20": 0.8700119047619048, "mrr_at_100": 0.8702896825396826, "mrr_at_1000": 0.8703521825396825, "naucs_at_1_max": 0.6153281994866158, "naucs_at_1_std": -0.18463513017968597, "naucs_at_1_diff1": 0.9252187123474257, "naucs_at_3_max": 0.840602907829797, "naucs_at_3_std": -0.4598506069094306, "naucs_at_3_diff1": 0.9206349206349186, "naucs_at_5_max": 0.8358232181761603, "naucs_at_5_std": -0.6288515406162465, "naucs_at_5_diff1": 0.9074074074074102, "naucs_at_10_max": 0.7117180205415541, "naucs_at_10_std": -0.8085901027077421, "naucs_at_10_diff1": 1.0, "naucs_at_20_max": 0.7117180205415541, "naucs_at_20_std": -0.8085901027077421, "naucs_at_20_diff1": 1.0, "naucs_at_100_max": 0.8692810457516374, "naucs_at_100_std": 0.12278244631185525, "naucs_at_100_diff1": 1.0, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}, "vidore/syntheticDocQA_government_reports_test": {"ndcg_at_1": 0.84, "ndcg_at_3": 0.90047, "ndcg_at_5": 0.91296, "ndcg_at_10": 0.91944, "ndcg_at_20": 0.91944, "ndcg_at_100": 0.92111, "ndcg_at_1000": 0.92111, "map_at_1": 0.84, "map_at_3": 0.88667, "map_at_5": 0.89367, "map_at_10": 0.89635, "map_at_20": 0.89635, "map_at_100": 0.8965, "map_at_1000": 0.8965, "recall_at_1": 0.84, "recall_at_3": 0.94, "recall_at_5": 0.97, "recall_at_10": 0.99, "recall_at_20": 0.99, "recall_at_100": 1.0, "recall_at_1000": 1.0, "precision_at_1": 0.84, "precision_at_3": 0.31333, "precision_at_5": 0.194, "precision_at_10": 0.099, "precision_at_20": 0.0495, "precision_at_100": 0.01, "precision_at_1000": 0.001, "mrr_at_1": 0.84, "mrr_at_3": 0.8866666666666666, "mrr_at_5": 0.8936666666666666, "mrr_at_10": 0.896345238095238, "mrr_at_20": 0.896345238095238, "mrr_at_100": 0.8965039682539682, "mrr_at_1000": 0.8965039682539682, "naucs_at_1_max": 0.5586340206185558, "naucs_at_1_std": 0.12067378497790919, "naucs_at_1_diff1": 0.8251104565537545, "naucs_at_3_max": 0.5624805477746656, "naucs_at_3_std": 0.05213196389666916, "naucs_at_3_diff1": 0.7506224712107028, "naucs_at_5_max": 0.8513849984438217, "naucs_at_5_std": 0.1901649548708344, "naucs_at_5_diff1": 0.59383753501401, "naucs_at_10_max": 1.0, "naucs_at_10_std": 1.0, "naucs_at_10_diff1": 0.35807656395891135, "naucs_at_20_max": 1.0, "naucs_at_20_std": 1.0, "naucs_at_20_diff1": 0.35807656395891135, "naucs_at_100_max": NaN, "naucs_at_100_std": NaN, "naucs_at_100_diff1": NaN, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}, "vidore/syntheticDocQA_healthcare_industry_test": {"ndcg_at_1": 0.92, "ndcg_at_3": 0.95524, "ndcg_at_5": 0.95954, "ndcg_at_10": 0.95954, "ndcg_at_20": 0.95954, "ndcg_at_100": 0.9617, "ndcg_at_1000": 0.9617, "map_at_1": 0.92, "map_at_3": 0.94667, "map_at_5": 0.94917, "map_at_10": 0.94917, "map_at_20": 0.94917, "map_at_100": 0.94958, "map_at_1000": 0.94958, "recall_at_1": 0.92, "recall_at_3": 0.98, "recall_at_5": 0.99, "recall_at_10": 0.99, "recall_at_20": 0.99, "recall_at_100": 1.0, "recall_at_1000": 1.0, "precision_at_1": 0.92, "precision_at_3": 0.32667, "precision_at_5": 0.198, "precision_at_10": 0.099, "precision_at_20": 0.0495, "precision_at_100": 0.01, "precision_at_1000": 0.001, "mrr_at_1": 0.92, "mrr_at_3": 0.9466666666666665, "mrr_at_5": 0.9491666666666666, "mrr_at_10": 0.9491666666666666, "mrr_at_20": 0.9491666666666666, "mrr_at_100": 0.9495833333333333, "mrr_at_1000": 0.9495833333333333, "naucs_at_1_max": 0.3300070028011205, "naucs_at_1_std": -0.07376283846871944, "naucs_at_1_diff1": 0.8074229691876756, "naucs_at_3_max": 0.7117180205415458, "naucs_at_3_std": -0.17133520074696315, "naucs_at_3_diff1": 0.7957516339869218, "naucs_at_5_max": 0.5541549953314738, "naucs_at_5_std": -0.1713352007469681, "naucs_at_5_diff1": 0.8692810457516413, "naucs_at_10_max": 0.5541549953314738, "naucs_at_10_std": -0.1713352007469681, "naucs_at_10_diff1": 0.8692810457516413, "naucs_at_20_max": 0.5541549953314738, "naucs_at_20_std": -0.1713352007469681, "naucs_at_20_diff1": 0.8692810457516413, "naucs_at_100_max": NaN, "naucs_at_100_std": NaN, "naucs_at_100_diff1": NaN, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}} |