Muennighoff's picture
Add eval
ac2efad
raw
history blame
4.71 kB
{"GEM/web_nlg_en": {"PALM_prompt": {"bleu": 0.44983162806527605, "bleu_stderr": 0.038154018591984334, "rouge1_fmeasure": 0.11147137683487435, "rouge1_fmeasure_stderr": 0.0019659665903934338, "rouge1_precision": 0.071413617880418, "rouge1_precision_stderr": 0.0014314988577837512, "rouge1_recall": 0.36121845616052844, "rouge1_recall_stderr": 0.005456116499217837, "rouge2_fmeasure": 0.05042435840541935, "rouge2_fmeasure_stderr": 0.0012067757893367797, "rouge2_precision": 0.03213823237467542, "rouge2_precision_stderr": 0.0008563458625515534, "rouge2_recall": 0.17097921105516134, "rouge2_recall_stderr": 0.0036300270018532117, "rougeL_fmeasure": 0.10420727855856317, "rougeL_fmeasure_stderr": 0.0017848841642653212, "rougeL_precision": 0.06670334417280534, "rougeL_precision_stderr": 0.0012884965791976942, "rougeL_recall": 0.3363081320406107, "rougeL_recall_stderr": 0.004917871710898438, "rougeLsum_fmeasure": 0.10544393773714214, "rougeLsum_fmeasure_stderr": 0.001845516360617543, "rougeLsum_precision": 0.06755791243184023, "rougeLsum_precision_stderr": 0.001343070108019487, "rougeLsum_recall": 0.34079824561716204, "rougeLsum_recall_stderr": 0.005016173768502028}}, "GEM/wiki_lingua_en": {"tldr_en": {"bleu": 2.275559653426858, "bleu_stderr": 0.06399269196052909, "rouge1_fmeasure": 0.20054602824156798, "rouge1_fmeasure_stderr": 0.002000312312143333, "rouge1_precision": 0.17892096147640557, "rouge1_precision_stderr": 0.0022648525570164767, "rouge1_recall": 0.28394021777601, "rouge1_recall_stderr": 0.0028653211542179397, "rouge2_fmeasure": 0.04655649352957454, "rouge2_fmeasure_stderr": 0.0009914762488450282, "rouge2_precision": 0.042047063434399935, "rouge2_precision_stderr": 0.0010066804392018039, "rouge2_recall": 0.06791589976567478, "rouge2_recall_stderr": 0.0015879262638163016, "rougeL_fmeasure": 0.14794102711054255, "rougeL_fmeasure_stderr": 0.0013617604986620765, "rougeL_precision": 0.13109089164647547, "rougeL_precision_stderr": 0.0015859197320449397, "rougeL_recall": 0.2143739250636573, "rougeL_recall_stderr": 0.0022488504088131875, "rougeLsum_fmeasure": 0.18664417732685562, "rougeLsum_fmeasure_stderr": 0.0018598279590220587, "rougeLsum_precision": 0.16637070185395486, "rougeLsum_precision_stderr": 0.0021074798010892954, "rougeLsum_recall": 0.26496660044445103, "rougeLsum_recall_stderr": 0.0027004470852608512}}, "e2e_nlg_cleaned": {"generate_text_restaurant": {"bleu": 4.60857341288916, "bleu_stderr": 0.07091478097604244, "rouge1_fmeasure": 0.28307413792452757, "rouge1_fmeasure_stderr": 0.0017345666507319096, "rouge1_precision": 0.22003605992748757, "rouge1_precision_stderr": 0.0016634925664108194, "rouge1_recall": 0.4349109426362562, "rouge1_recall_stderr": 0.0026795975680511614, "rouge2_fmeasure": 0.10538118293432128, "rouge2_fmeasure_stderr": 0.0011922460727243065, "rouge2_precision": 0.08133706936294005, "rouge2_precision_stderr": 0.001011706981693919, "rouge2_recall": 0.16609374412846958, "rouge2_recall_stderr": 0.001955786555953094, "rougeL_fmeasure": 0.2320488807632003, "rougeL_fmeasure_stderr": 0.001266631291357149, "rougeL_precision": 0.17905562119769688, "rougeL_precision_stderr": 0.0011815805464116809, "rougeL_recall": 0.36101365402052843, "rougeL_recall_stderr": 0.002253209383791391, "rougeLsum_fmeasure": 0.22885806406725848, "rougeLsum_fmeasure_stderr": 0.0016097640905790775, "rougeLsum_precision": 0.17784834223848153, "rougeLsum_precision_stderr": 0.001488841924808455, "rougeLsum_recall": 0.3521512097724569, "rougeLsum_recall_stderr": 0.0025235785084938953}}, "gem_xsum": {"article_DOC_summary": {"bleu": 1.2470641961768807, "bleu_stderr": 0.09745406322184069, "rouge1_fmeasure": 0.16214203494333726, "rouge1_fmeasure_stderr": 0.0024112737892753133, "rouge1_precision": 0.11527813018783109, "rouge1_precision_stderr": 0.0017932590108456488, "rouge1_recall": 0.2851427490566371, "rouge1_recall_stderr": 0.004149472686464088, "rouge2_fmeasure": 0.030418541159472234, "rouge2_fmeasure_stderr": 0.0012717097325942262, "rouge2_precision": 0.021424469955042002, "rouge2_precision_stderr": 0.0009015566861994638, "rouge2_recall": 0.055056684725856495, "rouge2_recall_stderr": 0.0023739933653977095, "rougeL_fmeasure": 0.12824160933859685, "rougeL_fmeasure_stderr": 0.0017962920547828305, "rougeL_precision": 0.0909829458725424, "rougeL_precision_stderr": 0.0013238250644292398, "rougeL_recall": 0.2270115690543438, "rougeL_recall_stderr": 0.0032363302876825557, "rougeLsum_fmeasure": 0.12983804475653796, "rougeLsum_fmeasure_stderr": 0.001974954778554704, "rougeLsum_precision": 0.09207403605608483, "rougeLsum_precision_stderr": 0.001448778184518186, "rougeLsum_recall": 0.22997778938614646, "rougeLsum_recall_stderr": 0.0035243817191569385}}}