eduagarcia commited on
Commit
33e7caf
1 Parent(s): e2369db

External models: Sabia-3 e Llama-3.1-405b

Browse files
Files changed (1) hide show
  1. external_models_results.json +45 -0
external_models_results.json CHANGED
@@ -241,5 +241,50 @@
241
  },
242
  "result_metrics_average": 0.7777870380406591,
243
  "result_metrics_npm": 0.6740728488043128
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
244
  }
245
  ]
 
241
  },
242
  "result_metrics_average": 0.7777870380406591,
243
  "result_metrics_npm": 0.6740728488043128
244
+ },
245
+ {
246
+ "model": "llama_405b_instruct",
247
+ "name": "meta-llama/Meta-Llama-3.1-405B-Instruct (Vertex AI)",
248
+ "link": "https://cloud.google.com/vertex-ai",
249
+ "date": "2024-08-20",
250
+ "status": "full",
251
+ "main_language": "English",
252
+ "model_type": "chat",
253
+ "params": 406.0,
254
+ "result_metrics": {
255
+ "enem_challenge": 0.8523442967109867,
256
+ "bluex": 0.8011126564673157,
257
+ "oab_exams": 0.7640091116173121,
258
+ "assin2_sts": 0.7888441732870783,
259
+ "assin2_rte": 0.6317630318610981,
260
+ "faquad_nli": 0.825063276593557,
261
+ "hatebr_offensive": 0.9073940659389119,
262
+ "portuguese_hate_speech": 0.7191480935512969,
263
+ "tweetsentbr": 0.7821434639106575
264
+ },
265
+ "result_metrics_average": 0.7857580188820238,
266
+ "result_metrics_npm": 0.6584973442501938
267
+ },
268
+ {
269
+ "model": "sabia-3",
270
+ "name": "Sabiá-3",
271
+ "link": "https://www.maritaca.ai/",
272
+ "date": "2024-08-20",
273
+ "status": "full",
274
+ "main_language": "Portuguese",
275
+ "model_type": "proprietary",
276
+ "result_metrics": {
277
+ "enem_challenge": 0.8789363191042687,
278
+ "bluex": 0.7899860917941586,
279
+ "oab_exams": 0.8391799544419134,
280
+ "assin2_sts": 0.8253863689009022,
281
+ "assin2_rte": 0.9477034821619312,
282
+ "faquad_nli": 0.8243848812618203,
283
+ "hatebr_offensive": 0.5519158516393349,
284
+ "portuguese_hate_speech": 0.48273809523809524,
285
+ "tweetsentbr": 0.5632959814986498
286
+ },
287
+ "result_metrics_average": 0.744836336226786,
288
+ "result_metrics_npm": 0.5802643096708316
289
  }
290
  ]