Add verifyToken field to verify evaluation results are produced by Hugging Face's automatic model evaluator

Beep boop, I am a bot from Hugging Face's automatic model evaluator 👋! We've added a new `verifyToken` field to your evaluation results to verify that they are produced by the model evaluator. Accept this PR to ensure that your results remain listed as **verified** on the [Hub leaderboard](https://huggingface.co/spaces/autoevaluate/leaderboards).

Files changed (1) hide show

README.md +19 -15

README.md CHANGED Viewed

@@ -1,4 +1,11 @@
 ---
 widget:
 - context: While deep and large pre-trained models are the state-of-the-art for various
     natural language processing tasks, their huge size poses significant challenges
@@ -28,13 +35,6 @@ widget:
     question answering dataset and a massive multi-lingual NER dataset with 41 languages.
   example_title: xtremedistil q2
   text: On what is the model validated?
-datasets:
-- squad_v2
-metrics:
-- f1
-- exact
-tags:
-- question-answering
 model-index:
 - name: nbroad/xdistil-l12-h384-squad2
   results:
@@ -47,14 +47,16 @@ model-index:
       config: squad_v2
       split: validation
     metrics:
-    - name: Exact Match
-      type: exact_match
       value: 75.4591
       verified: true
-    - name: F1
-      type: f1
       value: 79.3321
       verified: true
   - task:
       type: question-answering
       name: Question Answering
@@ -64,14 +66,16 @@ model-index:
       config: plain_text
       split: validation
     metrics:
-    - name: Exact Match
-      type: exact_match
       value: 81.8604
       verified: true
-    - name: F1
-      type: f1
       value: 89.6654
       verified: true
 ---
 xtremedistil-l12-h384 trained on SQuAD 2.0

 ---
+tags:
+- question-answering
+datasets:
+- squad_v2
+metrics:
+- f1
+- exact
 widget:
 - context: While deep and large pre-trained models are the state-of-the-art for various
     natural language processing tasks, their huge size poses significant challenges
     question answering dataset and a massive multi-lingual NER dataset with 41 languages.
   example_title: xtremedistil q2
   text: On what is the model validated?
 model-index:
 - name: nbroad/xdistil-l12-h384-squad2
   results:
       config: squad_v2
       split: validation
     metrics:
+    - type: exact_match
       value: 75.4591
+      name: Exact Match
       verified: true
+      verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiM2QzODE0YTE5ZjMyMWY3NzdjNjcwZDJjY2YyMjBkMWJjMTg3ZDAwYmUwNzU3ZTlkODhmM2VhMWFkY2I2ZjgzMyIsInZlcnNpb24iOjF9.IEjMS4U3uuSP6PfRcD87VFHBIdhoDsIfXkAYV7sz_bveSqhTE16VKJzHaDilCkUCBHYGTjoZ7pDqlYDcF6NKCQ
+    - type: f1
       value: 79.3321
+      name: F1
       verified: true
+      verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiMjAxMDdkNzcxNjAzNzQ4N2MwN2Y3ZDZhOGM5MmU0MzYyOGFjNDM3NjJkNGUzYTkyYmY3MDk1ZGIxYzQ0ZDllMyIsInZlcnNpb24iOjF9.N0jPenoMpxbTzKeJciDfoXiLronfXx3uM-A9NEJCMQ9tiApF-EyNmh4F-G9GBXdbVsq1IZ3MbPto0mn0P9hADQ
   - task:
       type: question-answering
       name: Question Answering
       config: plain_text
       split: validation
     metrics:
+    - type: exact_match
       value: 81.8604
+      name: Exact Match
       verified: true
+      verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiMzRiYjBkYTU0MGRjZDZhNzY2MDZhMGYzZDY2NDU2MTMyMjk0M2YwNTcxZjkyMDNkYTE0YTA5ODVlY2EwOWIxYyIsInZlcnNpb24iOjF9.3jco8t0D7YkHtWHWRttV3y3L0ylQZj3y534HtIW7NuUX34nvVSGMzHVJ32BgaFDomOtnJkaSQFXmumO10FL2BA
+    - type: f1
       value: 89.6654
+      name: F1
       verified: true
+      verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiZjg5YzNmODRlMTM1ZWQ1MjYwYzVkZmJhMzAwMDMzZGQyYzE1MzFlZGFlYmI4Y2JlMTQyNTBkZDRhMWQxYWQ2MCIsInZlcnNpb24iOjF9.Ld2IHVoqmZ-YFx71FgpuoVDEmAAboxRvhke1DhJYLbdIefM-AH60-58OlZcfZGxgUv6fywGjoPCE9g7CxbSzAQ
 ---
 xtremedistil-l12-h384 trained on SQuAD 2.0