yuchenlin commited on
Commit
57a9cf2
1 Parent(s): 1757118

zebra logic bench

Browse files
ZeroEval-main/result_dirs/zebra-grid.summary.json CHANGED
@@ -285,6 +285,17 @@
285
  "Total Puzzles": 1000,
286
  "Reason Lens": "1216.40"
287
  },
 
 
 
 
 
 
 
 
 
 
 
288
  {
289
  "Model": "gpt-3.5-turbo-0125",
290
  "Mode": "greedy",
 
285
  "Total Puzzles": 1000,
286
  "Reason Lens": "1216.40"
287
  },
288
+ {
289
+ "Model": "Meta-Llama-3-8B-Instruct",
290
+ "Mode": "sampling",
291
+ "Puzzle Acc": "11.00",
292
+ "Cell Acc": "26.11",
293
+ "No answer": "22.30",
294
+ "Easy Puzzle Acc": "36.79",
295
+ "Hard Puzzle Acc": "0.97",
296
+ "Total Puzzles": 1000,
297
+ "Reason Lens": "1282.40"
298
+ },
299
  {
300
  "Model": "gpt-3.5-turbo-0125",
301
  "Mode": "greedy",
zebra_banner.png ADDED