Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
eduagarcia
commited on
Commit
•
33e7caf
1
Parent(s):
e2369db
External models: Sabia-3 e Llama-3.1-405b
Browse files- external_models_results.json +45 -0
external_models_results.json
CHANGED
@@ -241,5 +241,50 @@
|
|
241 |
},
|
242 |
"result_metrics_average": 0.7777870380406591,
|
243 |
"result_metrics_npm": 0.6740728488043128
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
244 |
}
|
245 |
]
|
|
|
241 |
},
|
242 |
"result_metrics_average": 0.7777870380406591,
|
243 |
"result_metrics_npm": 0.6740728488043128
|
244 |
+
},
|
245 |
+
{
|
246 |
+
"model": "llama_405b_instruct",
|
247 |
+
"name": "meta-llama/Meta-Llama-3.1-405B-Instruct (Vertex AI)",
|
248 |
+
"link": "https://cloud.google.com/vertex-ai",
|
249 |
+
"date": "2024-08-20",
|
250 |
+
"status": "full",
|
251 |
+
"main_language": "English",
|
252 |
+
"model_type": "chat",
|
253 |
+
"params": 406.0,
|
254 |
+
"result_metrics": {
|
255 |
+
"enem_challenge": 0.8523442967109867,
|
256 |
+
"bluex": 0.8011126564673157,
|
257 |
+
"oab_exams": 0.7640091116173121,
|
258 |
+
"assin2_sts": 0.7888441732870783,
|
259 |
+
"assin2_rte": 0.6317630318610981,
|
260 |
+
"faquad_nli": 0.825063276593557,
|
261 |
+
"hatebr_offensive": 0.9073940659389119,
|
262 |
+
"portuguese_hate_speech": 0.7191480935512969,
|
263 |
+
"tweetsentbr": 0.7821434639106575
|
264 |
+
},
|
265 |
+
"result_metrics_average": 0.7857580188820238,
|
266 |
+
"result_metrics_npm": 0.6584973442501938
|
267 |
+
},
|
268 |
+
{
|
269 |
+
"model": "sabia-3",
|
270 |
+
"name": "Sabiá-3",
|
271 |
+
"link": "https://www.maritaca.ai/",
|
272 |
+
"date": "2024-08-20",
|
273 |
+
"status": "full",
|
274 |
+
"main_language": "Portuguese",
|
275 |
+
"model_type": "proprietary",
|
276 |
+
"result_metrics": {
|
277 |
+
"enem_challenge": 0.8789363191042687,
|
278 |
+
"bluex": 0.7899860917941586,
|
279 |
+
"oab_exams": 0.8391799544419134,
|
280 |
+
"assin2_sts": 0.8253863689009022,
|
281 |
+
"assin2_rte": 0.9477034821619312,
|
282 |
+
"faquad_nli": 0.8243848812618203,
|
283 |
+
"hatebr_offensive": 0.5519158516393349,
|
284 |
+
"portuguese_hate_speech": 0.48273809523809524,
|
285 |
+
"tweetsentbr": 0.5632959814986498
|
286 |
+
},
|
287 |
+
"result_metrics_average": 0.744836336226786,
|
288 |
+
"result_metrics_npm": 0.5802643096708316
|
289 |
}
|
290 |
]
|