OpenLLM-Ro
/

RoLlama2-7b-Instruct-2024-10-09

Safetensors

Romanian

llama

Eval Results

Model card Files Files and versions Community

mihaimasala commited on Oct 9

Commit

d0aef16

•

1 Parent(s): d1243da

Update README.md

Browse files

Files changed (1) hide show

README.md +474 -474

README.md CHANGED Viewed

@@ -15,480 +15,480 @@ datasets:
 - OpenLLM-Ro/ro_sft_oasst
 - OpenLLM-Ro/ro_sft_ultrachat
 model-index:
-    - name: OpenLLM-Ro/RoLlama2-7b-Instruct-v2
-      results:
-        - task:
-            type: text-generation
-          dataset:
-            name: RoMT-Bench
-            type: RoMT-Bench
-          metrics:
-            - name: Score
-              type: Score
-              value: 4.43
-        - task:
-            type: text-generation
-          dataset:
-            name: RoCulturaBench
-            type: RoCulturaBench
-          metrics:
-            - name: Score
-              type: Score
-              value: 4.08
-        - task:
-            type: text-generation
-          dataset:
-            name: Romanian_Academic_Benchmarks
-            type: Romanian_Academic_Benchmarks
-          metrics:
-            - name: Average accuracy
-              type: accuracy
-              value: 44.50
-        - task:
-            type: text-generation
-          dataset:
-            name: OpenLLM-Ro/ro_arc_challenge
-            type: OpenLLM-Ro/ro_arc_challenge
-          metrics:
-            - name: Average accuracy
-              type: accuracy
-              value: 44.73
-        - task:
-            type: text-generation
-          dataset:
-            name: OpenLLM-Ro/ro_mmlu
-            type: OpenLLM-Ro/ro_mmlu
-          metrics:
-            - name: Average accuracy
-              type: accuracy
-              value: 40.39
-        - task:
-            type: text-generation
-          dataset:
-            name: OpenLLM-Ro/ro_winogrande
-            type: OpenLLM-Ro/ro_winogrande
-          metrics:
-            - name: Average accuracy
-              type: accuracy
-              value: 63.67
-        - task:
-            type: text-generation
-          dataset:
-            name: OpenLLM-Ro/ro_hellaswag
-            type: OpenLLM-Ro/ro_hellaswag
-          metrics:
-            - name: Average accuracy
-              type: accuracy
-              value: 59.12
-        - task:
-            type: text-generation
-          dataset:
-            name: OpenLLM-Ro/ro_gsm8k
-            type: OpenLLM-Ro/ro_gsm8k
-          metrics:
-            - name: Average accuracy
-              type: accuracy
-              value: 13.29
-        - task:
-            type: text-generation
-          dataset:
-            name: OpenLLM-Ro/ro_truthfulqa
-            type: OpenLLM-Ro/ro_truthfulqa
-          metrics:
-            - name: Average accuracy
-              type: accuracy
-              value: 45.78
-        - task:
-            type: text-generation
-          dataset:
-            name: LaRoSeDa_binary
-            type: LaRoSeDa_binary
-          metrics:
-            - name: Average macro-f1
-              type: macro-f1
-              value: 97.66
-        - task:
-            type: text-generation
-          dataset:
-            name: LaRoSeDa_multiclass
-            type: LaRoSeDa_multiclass
-          metrics:
-            - name: Average macro-f1
-              type: macro-f1
-              value: 62.41
-        - task:
-            type: text-generation
-          dataset:
-            name: LaRoSeDa_binary_finetuned
-            type: LaRoSeDa_binary_finetuned
-          metrics:
-            - name: Average macro-f1
-              type: macro-f1
-              value: 97.97
-        - task:
-            type: text-generation
-          dataset:
-            name: LaRoSeDa_multiclass_finetuned
-            type: LaRoSeDa_multiclass_finetuned
-          metrics:
-            - name: Average macro-f1
-              type: macro-f1
-              value: 60.89
-        - task:
-            type: text-generation
-          dataset:
-            name: WMT_EN-RO
-            type: WMT_EN-RO
-          metrics:
-            - name: Average bleu
-              type: bleu
-              value: 27.13
-        - task:
-            type: text-generation
-          dataset:
-            name: WMT_RO-EN
-            type: WMT_RO-EN
-          metrics:
-            - name: Average bleu
-              type: bleu
-              value: 19.39
-        - task:
-            type: text-generation
-          dataset:
-            name: WMT_EN-RO_finetuned
-            type: WMT_EN-RO_finetuned
-          metrics:
-            - name: Average bleu
-              type: bleu
-              value: 27.63
-        - task:
-            type: text-generation
-          dataset:
-            name: WMT_RO-EN_finetuned
-            type: WMT_RO-EN_finetuned
-          metrics:
-            - name: Average bleu
-              type: bleu
-              value: 39.75
-        - task:
-            type: text-generation
-          dataset:
-            name: XQuAD
-            type: XQuAD
-          metrics:
-            - name: Average exact_match
-              type: exact_match
-              value: 45.71
-        - task:
-            type: text-generation
-          dataset:
-            name: XQuAD
-            type: XQuAD
-          metrics:
-            - name: Average f1
-              type: f1
-              value: 65.08
-        - task:
-            type: text-generation
-          dataset:
-            name: XQuAD_finetuned
-            type: XQuAD_finetuned
-          metrics:
-            - name: Average exact_match
-              type: exact_match
-              value: 59.24
-        - task:
-            type: text-generation
-          dataset:
-            name: XQuAD_finetuned
-            type: XQuAD_finetuned
-          metrics:
-            - name: Average f1
-              type: f1
-              value: 74.25
-        - task:
-            type: text-generation
-          dataset:
-            name: STS
-            type: STS
-          metrics:
-            - name: Average spearman
-              type: spearman
-              value: 59.69
-        - task:
-            type: text-generation
-          dataset:
-            name: STS
-            type: STS
-          metrics:
-            - name: Average pearson
-              type: pearson
-              value: 57.16
-        - task:
-            type: text-generation
-          dataset:
-            name: STS_finetuned
-            type: STS_finetuned
-          metrics:
-            - name: Average spearman
-              type: spearman
-              value: 84.66
-        - task:
-            type: text-generation
-          dataset:
-            name: STS_finetuned
-            type: STS_finetuned
-          metrics:
-            - name: Average pearson
-              type: pearson
-              value: 85.07
-        - task:
-            type: text-generation
-          dataset:
-            name: RoMT-Bench
-            type: RoMT-Bench
-          metrics:
-            - name: First turn
-              type: Score
-              value: 4.92
-            - name: Second turn
-              type: Score
-              value: 3.94
-        - task:
-            type: text-generation
-          dataset:
-            name: OpenLLM-Ro/ro_arc_challenge
-            type: OpenLLM-Ro/ro_arc_challenge
-          metrics:
-            - name: 0-shot
-              type: accuracy
-              value: 42.67
-            - name: 1-shot
-              type: accuracy
-              value: 44.64
-            - name: 3-shot
-              type: accuracy
-              value: 44.90
-            - name: 5-shot
-              type: accuracy
-              value: 45.16
-            - name: 10-shot
-              type: accuracy
-              value: 45.67
-            - name: 25-shot
-              type: accuracy
-              value: 45.33
-        - task:
-            type: text-generation
-          dataset:
-            name: OpenLLM-Ro/ro_mmlu
-            type: OpenLLM-Ro/ro_mmlu
-          metrics:
-            - name: 0-shot
-              type: accuracy
-              value: 39.89
-            - name: 1-shot
-              type: accuracy
-              value: 40.08
-            - name: 3-shot
-              type: accuracy
-              value: 40.60
-            - name: 5-shot
-              type: accuracy
-              value: 40.99
-        - task:
-            type: text-generation
-          dataset:
-            name: OpenLLM-Ro/ro_winogrande
-            type: OpenLLM-Ro/ro_winogrande
-          metrics:
-            - name: 0-shot
-              type: accuracy
-              value: 63.06
-            - name: 1-shot
-              type: accuracy
-              value: 62.98
-            - name: 3-shot
-              type: accuracy
-              value: 65.19
-            - name: 5-shot
-              type: accuracy
-              value: 63.46
-        - task:
-            type: text-generation
-          dataset:
-            name: OpenLLM-Ro/ro_hellaswag
-            type: OpenLLM-Ro/ro_hellaswag
-          metrics:
-            - name: 0-shot
-              type: accuracy
-              value: 58.82
-            - name: 1-shot
-              type: accuracy
-              value: 58.44
-            - name: 3-shot
-              type: accuracy
-              value: 59.28
-            - name: 5-shot
-              type: accuracy
-              value: 59.29
-            - name: 10-shot
-              type: accuracy
-              value: 59.77
-        - task:
-            type: text-generation
-          dataset:
-            name: OpenLLM-Ro/ro_gsm8k
-            type: OpenLLM-Ro/ro_gsm8k
-          metrics:
-            - name: 0-shot
-              type: accuracy
-              value: 6.14
-            - name: 1-shot
-              type: accuracy
-              value: 15.01
-            - name: 3-shot
-              type: accuracy
-              value: 18.72
-        - task:
-            type: text-generation
-          dataset:
-            name: LaRoSeDa_binary
-            type: LaRoSeDa_binary
-          metrics:
-            - name: 0-shot
-              type: macro-f1
-              value: 98.20
-            - name: 1-shot
-              type: macro-f1
-              value: 96.63
-            - name: 3-shot
-              type: macro-f1
-              value: 97.67
-            - name: 5-shot
-              type: macro-f1
-              value: 98.13
-        - task:
-            type: text-generation
-          dataset:
-            name: LaRoSeDa_multiclass
-            type: LaRoSeDa_multiclass
-          metrics:
-            - name: 0-shot
-              type: macro-f1
-              value: 63.43
-            - name: 1-shot
-              type: macro-f1
-              value: 53.58
-            - name: 3-shot
-              type: macro-f1
-              value: 63.78
-            - name: 5-shot
-              type: macro-f1
-              value: 68.85
-        - task:
-            type: text-generation
-          dataset:
-            name: WMT_EN-RO
-            type: WMT_EN-RO
-          metrics:
-            - name: 0-shot
-              type: bleu
-              value: 20.57
-            - name: 1-shot
-              type: bleu
-              value: 29.59
-            - name: 3-shot
-              type: bleu
-              value: 29.50
-            - name: 5-shot
-              type: bleu
-              value: 28.88
-        - task:
-            type: text-generation
-          dataset:
-            name: WMT_RO-EN
-            type: WMT_RO-EN
-          metrics:
-            - name: 0-shot
-              type: bleu
-              value: 2.19
-            - name: 1-shot
-              type: bleu
-              value: 9.97
-            - name: 3-shot
-              type: bleu
-              value: 31.19
-            - name: 5-shot
-              type: bleu
-              value: 34.23
-        - task:
-            type: text-generation
-          dataset:
-            name: XQuAD_EM
-            type: XQuAD_EM
-          metrics:
-            - name: 0-shot
-              type: exact_match
-              value: 40.25
-            - name: 1-shot
-              type: exact_match
-              value: 46.47
-            - name: 3-shot
-              type: exact_match
-              value: 47.56
-            - name: 5-shot
-              type: exact_match
-              value: 48.57
-        - task:
-            type: text-generation
-          dataset:
-            name: XQuAD_F1
-            type: XQuAD_F1
-          metrics:
-            - name: 0-shot
-              type: f1
-              value: 62.24
-            - name: 1-shot
-              type: f1
-              value: 65.33
-            - name: 3-shot
-              type: f1
-              value: 65.89
-            - name: 5-shot
-              type: f1
-              value: 66.86
-        - task:
-            type: text-generation
-          dataset:
-            name: STS
-            type: STS
-          metrics:
-            - name: 0-shot
-              type: spearman
-              value: 55.44
-            - name: 1-shot
-              type: spearman
-              value: 61.98
-            - name: 3-shot
-              type: spearman
-              value: 61.65
-        - task:
-            type: text-generation
-          dataset:
-            name: STS
-            type: STS
-          metrics:
-            - name: 0-shot
-              type: pearson
-              value: 56.18
-            - name: 1-shot
-              type: pearson
-              value: 58.37
-            - name: 3-shot
-              type: pearson
-              value: 56.94
 ---
 # Model Card for Model ID

 - OpenLLM-Ro/ro_sft_oasst
 - OpenLLM-Ro/ro_sft_ultrachat
 model-index:
+- name: OpenLLM-Ro/RoLlama2-7b-Instruct-v2
+  results:
+  - task:
+      type: text-generation
+    dataset:
+      name: RoMT-Bench
+      type: RoMT-Bench
+    metrics:
+    - name: Score
+      type: Score
+      value: 4.43
+  - task:
+      type: text-generation
+    dataset:
+      name: RoCulturaBench
+      type: RoCulturaBench
+    metrics:
+    - name: Score
+      type: Score
+      value: 4.08
+  - task:
+      type: text-generation
+    dataset:
+      name: Romanian_Academic_Benchmarks
+      type: Romanian_Academic_Benchmarks
+    metrics:
+    - name: Average accuracy
+      type: accuracy
+      value: 44.5
+  - task:
+      type: text-generation
+    dataset:
+      name: OpenLLM-Ro/ro_arc_challenge
+      type: OpenLLM-Ro/ro_arc_challenge
+    metrics:
+    - name: Average accuracy
+      type: accuracy
+      value: 44.73
+  - task:
+      type: text-generation
+    dataset:
+      name: OpenLLM-Ro/ro_mmlu
+      type: OpenLLM-Ro/ro_mmlu
+    metrics:
+    - name: Average accuracy
+      type: accuracy
+      value: 40.39
+  - task:
+      type: text-generation
+    dataset:
+      name: OpenLLM-Ro/ro_winogrande
+      type: OpenLLM-Ro/ro_winogrande
+    metrics:
+    - name: Average accuracy
+      type: accuracy
+      value: 63.67
+  - task:
+      type: text-generation
+    dataset:
+      name: OpenLLM-Ro/ro_hellaswag
+      type: OpenLLM-Ro/ro_hellaswag
+    metrics:
+    - name: Average accuracy
+      type: accuracy
+      value: 59.12
+  - task:
+      type: text-generation
+    dataset:
+      name: OpenLLM-Ro/ro_gsm8k
+      type: OpenLLM-Ro/ro_gsm8k
+    metrics:
+    - name: Average accuracy
+      type: accuracy
+      value: 13.29
+  - task:
+      type: text-generation
+    dataset:
+      name: OpenLLM-Ro/ro_truthfulqa
+      type: OpenLLM-Ro/ro_truthfulqa
+    metrics:
+    - name: Average accuracy
+      type: accuracy
+      value: 45.78
+  - task:
+      type: text-generation
+    dataset:
+      name: LaRoSeDa_binary
+      type: LaRoSeDa_binary
+    metrics:
+    - name: Average macro-f1
+      type: macro-f1
+      value: 97.66
+  - task:
+      type: text-generation
+    dataset:
+      name: LaRoSeDa_multiclass
+      type: LaRoSeDa_multiclass
+    metrics:
+    - name: Average macro-f1
+      type: macro-f1
+      value: 62.41
+  - task:
+      type: text-generation
+    dataset:
+      name: LaRoSeDa_binary_finetuned
+      type: LaRoSeDa_binary_finetuned
+    metrics:
+    - name: Average macro-f1
+      type: macro-f1
+      value: 97.97
+  - task:
+      type: text-generation
+    dataset:
+      name: LaRoSeDa_multiclass_finetuned
+      type: LaRoSeDa_multiclass_finetuned
+    metrics:
+    - name: Average macro-f1
+      type: macro-f1
+      value: 60.89
+  - task:
+      type: text-generation
+    dataset:
+      name: WMT_EN-RO
+      type: WMT_EN-RO
+    metrics:
+    - name: Average bleu
+      type: bleu
+      value: 27.13
+  - task:
+      type: text-generation
+    dataset:
+      name: WMT_RO-EN
+      type: WMT_RO-EN
+    metrics:
+    - name: Average bleu
+      type: bleu
+      value: 19.39
+  - task:
+      type: text-generation
+    dataset:
+      name: WMT_EN-RO_finetuned
+      type: WMT_EN-RO_finetuned
+    metrics:
+    - name: Average bleu
+      type: bleu
+      value: 27.63
+  - task:
+      type: text-generation
+    dataset:
+      name: WMT_RO-EN_finetuned
+      type: WMT_RO-EN_finetuned
+    metrics:
+    - name: Average bleu
+      type: bleu
+      value: 39.75
+  - task:
+      type: text-generation
+    dataset:
+      name: XQuAD
+      type: XQuAD
+    metrics:
+    - name: Average exact_match
+      type: exact_match
+      value: 45.71
+  - task:
+      type: text-generation
+    dataset:
+      name: XQuAD
+      type: XQuAD
+    metrics:
+    - name: Average f1
+      type: f1
+      value: 65.08
+  - task:
+      type: text-generation
+    dataset:
+      name: XQuAD_finetuned
+      type: XQuAD_finetuned
+    metrics:
+    - name: Average exact_match
+      type: exact_match
+      value: 59.24
+  - task:
+      type: text-generation
+    dataset:
+      name: XQuAD_finetuned
+      type: XQuAD_finetuned
+    metrics:
+    - name: Average f1
+      type: f1
+      value: 74.25
+  - task:
+      type: text-generation
+    dataset:
+      name: STS
+      type: STS
+    metrics:
+    - name: Average spearman
+      type: spearman
+      value: 59.69
+  - task:
+      type: text-generation
+    dataset:
+      name: STS
+      type: STS
+    metrics:
+    - name: Average pearson
+      type: pearson
+      value: 57.16
+  - task:
+      type: text-generation
+    dataset:
+      name: STS_finetuned
+      type: STS_finetuned
+    metrics:
+    - name: Average spearman
+      type: spearman
+      value: 84.66
+  - task:
+      type: text-generation
+    dataset:
+      name: STS_finetuned
+      type: STS_finetuned
+    metrics:
+    - name: Average pearson
+      type: pearson
+      value: 85.07
+  - task:
+      type: text-generation
+    dataset:
+      name: RoMT-Bench
+      type: RoMT-Bench
+    metrics:
+    - name: First turn
+      type: Score
+      value: 4.92
+    - name: Second turn
+      type: Score
+      value: 3.94
+  - task:
+      type: text-generation
+    dataset:
+      name: OpenLLM-Ro/ro_arc_challenge
+      type: OpenLLM-Ro/ro_arc_challenge
+    metrics:
+    - name: 0-shot
+      type: accuracy
+      value: 42.67
+    - name: 1-shot
+      type: accuracy
+      value: 44.64
+    - name: 3-shot
+      type: accuracy
+      value: 44.9
+    - name: 5-shot
+      type: accuracy
+      value: 45.16
+    - name: 10-shot
+      type: accuracy
+      value: 45.67
+    - name: 25-shot
+      type: accuracy
+      value: 45.33
+  - task:
+      type: text-generation
+    dataset:
+      name: OpenLLM-Ro/ro_mmlu
+      type: OpenLLM-Ro/ro_mmlu
+    metrics:
+    - name: 0-shot
+      type: accuracy
+      value: 39.89
+    - name: 1-shot
+      type: accuracy
+      value: 40.08
+    - name: 3-shot
+      type: accuracy
+      value: 40.6
+    - name: 5-shot
+      type: accuracy
+      value: 40.99
+  - task:
+      type: text-generation
+    dataset:
+      name: OpenLLM-Ro/ro_winogrande
+      type: OpenLLM-Ro/ro_winogrande
+    metrics:
+    - name: 0-shot
+      type: accuracy
+      value: 63.06
+    - name: 1-shot
+      type: accuracy
+      value: 62.98
+    - name: 3-shot
+      type: accuracy
+      value: 65.19
+    - name: 5-shot
+      type: accuracy
+      value: 63.46
+  - task:
+      type: text-generation
+    dataset:
+      name: OpenLLM-Ro/ro_hellaswag
+      type: OpenLLM-Ro/ro_hellaswag
+    metrics:
+    - name: 0-shot
+      type: accuracy
+      value: 58.82
+    - name: 1-shot
+      type: accuracy
+      value: 58.44
+    - name: 3-shot
+      type: accuracy
+      value: 59.28
+    - name: 5-shot
+      type: accuracy
+      value: 59.29
+    - name: 10-shot
+      type: accuracy
+      value: 59.77
+  - task:
+      type: text-generation
+    dataset:
+      name: OpenLLM-Ro/ro_gsm8k
+      type: OpenLLM-Ro/ro_gsm8k
+    metrics:
+    - name: 0-shot
+      type: accuracy
+      value: 6.14
+    - name: 1-shot
+      type: accuracy
+      value: 15.01
+    - name: 3-shot
+      type: accuracy
+      value: 18.72
+  - task:
+      type: text-generation
+    dataset:
+      name: LaRoSeDa_binary
+      type: LaRoSeDa_binary
+    metrics:
+    - name: 0-shot
+      type: macro-f1
+      value: 98.2
+    - name: 1-shot
+      type: macro-f1
+      value: 96.63
+    - name: 3-shot
+      type: macro-f1
+      value: 97.67
+    - name: 5-shot
+      type: macro-f1
+      value: 98.13
+  - task:
+      type: text-generation
+    dataset:
+      name: LaRoSeDa_multiclass
+      type: LaRoSeDa_multiclass
+    metrics:
+    - name: 0-shot
+      type: macro-f1
+      value: 63.43
+    - name: 1-shot
+      type: macro-f1
+      value: 53.58
+    - name: 3-shot
+      type: macro-f1
+      value: 63.78
+    - name: 5-shot
+      type: macro-f1
+      value: 68.85
+  - task:
+      type: text-generation
+    dataset:
+      name: WMT_EN-RO
+      type: WMT_EN-RO
+    metrics:
+    - name: 0-shot
+      type: bleu
+      value: 20.57
+    - name: 1-shot
+      type: bleu
+      value: 29.59
+    - name: 3-shot
+      type: bleu
+      value: 29.5
+    - name: 5-shot
+      type: bleu
+      value: 28.88
+  - task:
+      type: text-generation
+    dataset:
+      name: WMT_RO-EN
+      type: WMT_RO-EN
+    metrics:
+    - name: 0-shot
+      type: bleu
+      value: 2.19
+    - name: 1-shot
+      type: bleu
+      value: 9.97
+    - name: 3-shot
+      type: bleu
+      value: 31.19
+    - name: 5-shot
+      type: bleu
+      value: 34.23
+  - task:
+      type: text-generation
+    dataset:
+      name: XQuAD_EM
+      type: XQuAD_EM
+    metrics:
+    - name: 0-shot
+      type: exact_match
+      value: 40.25
+    - name: 1-shot
+      type: exact_match
+      value: 46.47
+    - name: 3-shot
+      type: exact_match
+      value: 47.56
+    - name: 5-shot
+      type: exact_match
+      value: 48.57
+  - task:
+      type: text-generation
+    dataset:
+      name: XQuAD_F1
+      type: XQuAD_F1
+    metrics:
+    - name: 0-shot
+      type: f1
+      value: 62.24
+    - name: 1-shot
+      type: f1
+      value: 65.33
+    - name: 3-shot
+      type: f1
+      value: 65.89
+    - name: 5-shot
+      type: f1
+      value: 66.86
+  - task:
+      type: text-generation
+    dataset:
+      name: STS
+      type: STS
+    metrics:
+    - name: 0-shot
+      type: spearman
+      value: 55.44
+    - name: 1-shot
+      type: spearman
+      value: 61.98
+    - name: 3-shot
+      type: spearman
+      value: 61.65
+  - task:
+      type: text-generation
+    dataset:
+      name: STS
+      type: STS
+    metrics:
+    - name: 0-shot
+      type: pearson
+      value: 56.18
+    - name: 1-shot
+      type: pearson
+      value: 58.37
+    - name: 3-shot
+      type: pearson
+      value: 56.94
+library_name: transformers
 ---
 # Model Card for Model ID