Update README.md
Browse files
README.md
CHANGED
@@ -76,10 +76,6 @@ Refer to https://github.com/FlagOpen/FlagEmbedding
|
|
76 |
|
77 |
## Evaluation
|
78 |
|
79 |
-
### Metrics
|
80 |
-
- ndcg, mrr, map metrics are metrics that consider ranking, while accuracy, precision, and recall are metrics that do not consider ranking. (Example: When considering ranking for retrieval top 10, different scores are given when the correct document is in 1st place and when it is in 10th place. However, accuracy, precision, and recall scores are the same if they are in the top 10.)
|
81 |
-
|
82 |
-
|
83 |
|
84 |
### Bi-encoder and Cross-encoder
|
85 |
|
@@ -98,103 +94,103 @@ This is a Korean embedding benchmark for the financial sector.
|
|
98 |
|
99 |
Bi-Encoder (Sentence Transformer)
|
100 |
|
101 |
-
| Model name | F1 | Recall | Precision |
|
102 |
-
|
103 |
-
| paraphrase-multilingual-mpnet-base-v2 | 0.3596 | 0.3596 | 0.3596 |
|
104 |
-
| KoSimCSE-roberta | 0.4298 | 0.4298 | 0.4298 |
|
105 |
-
| Cohere embed-multilingual-v3.0 | 0.3596 | 0.3596 | 0.3596 |
|
106 |
-
| openai ada 002 | 0.4737 | 0.4737 | 0.4737 |
|
107 |
-
| multilingual-e5-large-instruct | 0.4649 | 0.4649 | 0.4649 |
|
108 |
-
| Upstage Embedding | 0.6579 | 0.6579 | 0.6579 |
|
109 |
-
| paraphrase-multilingual-MiniLM-L12-v2 | 0.2982 | 0.2982 | 0.2982 |
|
110 |
-
| openai_embed_3_small | 0.5439 | 0.5439 | 0.5439 |
|
111 |
-
| ko-sroberta-multitask | 0.4211 | 0.4211 | 0.4211 |
|
112 |
-
| openai_embed_3_large | 0.6053 | 0.6053 | 0.6053 |
|
113 |
-
| KU-HIAI-ONTHEIT-large-v1 | 0.7105 | 0.7105 | 0.7105 |
|
114 |
-
| KU-HIAI-ONTHEIT-large-v1.1 | 0.7193 | 0.7193 | 0.7193 |
|
115 |
-
| kf-deberta-multitask | 0.4561 | 0.4561 | 0.4561 |
|
116 |
-
| gte-multilingual-base | 0.5877 | 0.5877 | 0.5877 |
|
117 |
-
| BGE-m3 | 0.6578 | 0.6578 | 0.6578 |
|
118 |
-
| bge-m3-korean | 0.5351 | 0.5351 | 0.5351 |
|
119 |
-
| **BGE-m3-ko** | **0.7456** | **0.7456** | **0.7456** |
|
120 |
|
121 |
|
122 |
Cross-Encoder (Reranker)
|
123 |
|
124 |
-
| Model name | F1 | Recall | Precision |
|
125 |
-
|
126 |
-
|
|
127 |
-
|
|
128 |
-
|
|
129 |
-
| **bge-reranker-v2-m3-ko** | **0.9123** | **0.9123** | **0.9123** |
|
130 |
|
131 |
|
132 |
**Top-k 3**
|
133 |
|
134 |
Bi-Encoder (Sentence Transformer)
|
135 |
|
136 |
-
| Model name | F1 | Recall | Precision |
|
137 |
-
|
138 |
-
| paraphrase-multilingual-mpnet-base-v2 | 0.2368 | 0.4737 | 0.1579 |
|
139 |
-
| KoSimCSE-roberta | 0.3026 | 0.6053 | 0.2018 |
|
140 |
-
| Cohere embed-multilingual-v3.0 | 0.2851 | 0.5702 | 0.1901 |
|
141 |
-
| openai ada 002 | 0.3553 | 0.7105 | 0.2368 |
|
142 |
-
| multilingual-e5-large-instruct | 0.3333 | 0.6667 | 0.2222 |
|
143 |
-
| Upstage Embedding | 0.4211 | 0.8421 | 0.2807 |
|
144 |
-
| paraphrase-multilingual-MiniLM-L12-v2 | 0.2061 | 0.4123 | 0.1374 |
|
145 |
-
| openai_embed_3_small | 0.3640 | 0.7281 | 0.2427 |
|
146 |
-
| ko-sroberta-multitask | 0.2939 | 0.5877 | 0.1959 |
|
147 |
-
| openai_embed_3_large | 0.3947 | 0.7895 | 0.2632 |
|
148 |
-
| KU-HIAI-ONTHEIT-large-v1 | 0.4386 | 0.8772 | 0.2924 |
|
149 |
-
| KU-HIAI-ONTHEIT-large-v1.1 | 0.4430 | 0.8860 | 0.2953 |
|
150 |
-
| kf-deberta-multitask | 0.3158 | 0.6316 | 0.2105 |
|
151 |
-
| gte-multilingual-base | 0.4035 | 0.8070 | 0.2690 |
|
152 |
-
| BGE-m3 | 0.4254 | 0.8508 | 0.2836 |
|
153 |
-
| bge-m3-korean | 0.3684 | 0.7368 | 0.2456 |
|
154 |
-
| **BGE-m3-ko** | **0.4517** | **0.9035** | **0.3011** |
|
155 |
|
156 |
Cross-Encoder (Reranker)
|
157 |
|
158 |
-
| Model name | F1 | Recall | Precision |
|
159 |
-
|
160 |
-
|
|
161 |
-
|
|
162 |
-
|
|
163 |
-
| **bge-reranker-v2-m3-ko** | **0.4825** | **0.9649** | **0.3216** |
|
164 |
|
165 |
|
166 |
**Top-k 5**
|
167 |
|
168 |
Bi-Encoder (Sentence Transformer)
|
169 |
|
170 |
-
| Model name | F1 | Recall | Precision |
|
171 |
-
|
172 |
-
| paraphrase-multilingual-mpnet-base-v2 | 0.1813 | 0.5439 | 0.1088 |
|
173 |
-
| KoSimCSE-roberta | 0.2164 | 0.6491 | 0.1298 |
|
174 |
-
| Cohere embed-multilingual-v3.0 | 0.2076 | 0.6228 | 0.1246 |
|
175 |
-
| openai ada 002 | 0.2602 | 0.7807 | 0.1561 |
|
176 |
-
| multilingual-e5-large-instruct | 0.2544 | 0.7632 | 0.1526 |
|
177 |
-
| Upstage Embedding | 0.2982 | 0.8947 | 0.1789 |
|
178 |
-
| paraphrase-multilingual-MiniLM-L12-v2 | 0.1637 | 0.4912 | 0.0982 |
|
179 |
-
| openai_embed_3_small | 0.2690 | 0.8070 | 0.1614 |
|
180 |
-
| ko-sroberta-multitask | 0.2164 | 0.6491 | 0.1298 |
|
181 |
-
| openai_embed_3_large | 0.2807 | 0.8421 | 0.1684 |
|
182 |
-
| KU-HIAI-ONTHEIT-large-v1 | 0.3041 | 0.9123 | 0.1825 |
|
183 |
-
| KU-HIAI-ONTHEIT-large-v1.1 | **0.3099** | **0.9298** | **0.1860** |
|
184 |
-
| kf-deberta-multitask | 0.2281 | 0.6842 | 0.1368 |
|
185 |
-
| gte-multilingual-base | 0.2865 | 0.8596 | 0.1719 |
|
186 |
-
| BGE-m3 | 0.3041 | 0.9123 | 0.1825 |
|
187 |
-
| bge-m3-korean | 0.2661 | 0.7982 | 0.1596 |
|
188 |
-
| **BGE-m3-ko** | **0.3099** | **0.9298** | **0.1860** |
|
189 |
|
190 |
Cross-Encoder (Reranker)
|
191 |
|
192 |
-
| Model name | F1 | Recall | Precision |
|
193 |
-
|
194 |
-
|
|
195 |
-
|
|
196 |
-
|
|
197 |
-
| **bge-reranker-v2-m3-ko** | **0.3216** | **0.9649** | **0.1930** |
|
198 |
|
199 |
|
200 |
|
|
|
76 |
|
77 |
## Evaluation
|
78 |
|
|
|
|
|
|
|
|
|
79 |
|
80 |
### Bi-encoder and Cross-encoder
|
81 |
|
|
|
94 |
|
95 |
Bi-Encoder (Sentence Transformer)
|
96 |
|
97 |
+
| Model name | F1 | Recall | Precision |
|
98 |
+
|---------------------------------------|------------|------------|------------|
|
99 |
+
| paraphrase-multilingual-mpnet-base-v2 | 0.3596 | 0.3596 | 0.3596 |
|
100 |
+
| KoSimCSE-roberta | 0.4298 | 0.4298 | 0.4298 |
|
101 |
+
| Cohere embed-multilingual-v3.0 | 0.3596 | 0.3596 | 0.3596 |
|
102 |
+
| openai ada 002 | 0.4737 | 0.4737 | 0.4737 |
|
103 |
+
| multilingual-e5-large-instruct | 0.4649 | 0.4649 | 0.4649 |
|
104 |
+
| Upstage Embedding | 0.6579 | 0.6579 | 0.6579 |
|
105 |
+
| paraphrase-multilingual-MiniLM-L12-v2 | 0.2982 | 0.2982 | 0.2982 |
|
106 |
+
| openai_embed_3_small | 0.5439 | 0.5439 | 0.5439 |
|
107 |
+
| ko-sroberta-multitask | 0.4211 | 0.4211 | 0.4211 |
|
108 |
+
| openai_embed_3_large | 0.6053 | 0.6053 | 0.6053 |
|
109 |
+
| KU-HIAI-ONTHEIT-large-v1 | 0.7105 | 0.7105 | 0.7105 |
|
110 |
+
| KU-HIAI-ONTHEIT-large-v1.1 | 0.7193 | 0.7193 | 0.7193 |
|
111 |
+
| kf-deberta-multitask | 0.4561 | 0.4561 | 0.4561 |
|
112 |
+
| gte-multilingual-base | 0.5877 | 0.5877 | 0.5877 |
|
113 |
+
| BGE-m3 | 0.6578 | 0.6578 | 0.6578 |
|
114 |
+
| bge-m3-korean | 0.5351 | 0.5351 | 0.5351 |
|
115 |
+
| **BGE-m3-ko** | **0.7456** | **0.7456** | **0.7456** |
|
116 |
|
117 |
|
118 |
Cross-Encoder (Reranker)
|
119 |
|
120 |
+
| Model name | F1 | Recall | Precision |
|
121 |
+
|---------------------------------------|------------|------------|------------|
|
122 |
+
| gte-multilingual-reranker-base | 0.7281 | 0.7281 | 0.7281 |
|
123 |
+
| jina-reranker-v2-base-multilingual | 0.8070 | 0.8070 | 0.8070 |
|
124 |
+
| bge-reranker-v2-m3 | 0.8772 | 0.8772 | 0.8772 |
|
125 |
+
| **bge-reranker-v2-m3-ko** | **0.9123** | **0.9123** | **0.9123** |
|
126 |
|
127 |
|
128 |
**Top-k 3**
|
129 |
|
130 |
Bi-Encoder (Sentence Transformer)
|
131 |
|
132 |
+
| Model name | F1 | Recall | Precision |
|
133 |
+
|---------------------------------------|------------|------------|------------|
|
134 |
+
| paraphrase-multilingual-mpnet-base-v2 | 0.2368 | 0.4737 | 0.1579 |
|
135 |
+
| KoSimCSE-roberta | 0.3026 | 0.6053 | 0.2018 |
|
136 |
+
| Cohere embed-multilingual-v3.0 | 0.2851 | 0.5702 | 0.1901 |
|
137 |
+
| openai ada 002 | 0.3553 | 0.7105 | 0.2368 |
|
138 |
+
| multilingual-e5-large-instruct | 0.3333 | 0.6667 | 0.2222 |
|
139 |
+
| Upstage Embedding | 0.4211 | 0.8421 | 0.2807 |
|
140 |
+
| paraphrase-multilingual-MiniLM-L12-v2 | 0.2061 | 0.4123 | 0.1374 |
|
141 |
+
| openai_embed_3_small | 0.3640 | 0.7281 | 0.2427 |
|
142 |
+
| ko-sroberta-multitask | 0.2939 | 0.5877 | 0.1959 |
|
143 |
+
| openai_embed_3_large | 0.3947 | 0.7895 | 0.2632 |
|
144 |
+
| KU-HIAI-ONTHEIT-large-v1 | 0.4386 | 0.8772 | 0.2924 |
|
145 |
+
| KU-HIAI-ONTHEIT-large-v1.1 | 0.4430 | 0.8860 | 0.2953 |
|
146 |
+
| kf-deberta-multitask | 0.3158 | 0.6316 | 0.2105 |
|
147 |
+
| gte-multilingual-base | 0.4035 | 0.8070 | 0.2690 |
|
148 |
+
| BGE-m3 | 0.4254 | 0.8508 | 0.2836 |
|
149 |
+
| bge-m3-korean | 0.3684 | 0.7368 | 0.2456 |
|
150 |
+
| **BGE-m3-ko** | **0.4517** | **0.9035** | **0.3011** |
|
151 |
|
152 |
Cross-Encoder (Reranker)
|
153 |
|
154 |
+
| Model name | F1 | Recall | Precision |
|
155 |
+
|---------------------------------------|------------|------------|------------|
|
156 |
+
| gte-multilingual-reranker-base | 0.4605 | 0.9211 | 0.3070 |
|
157 |
+
| jina-reranker-v2-base-multilingual | 0.4649 | 0.9298 | 0.3099 |
|
158 |
+
| bge-reranker-v2-m3 | 0.4781 | 0.9561 | 0.3187 |
|
159 |
+
| **bge-reranker-v2-m3-ko** | **0.4825** | **0.9649** | **0.3216** |
|
160 |
|
161 |
|
162 |
**Top-k 5**
|
163 |
|
164 |
Bi-Encoder (Sentence Transformer)
|
165 |
|
166 |
+
| Model name | F1 | Recall | Precision |
|
167 |
+
|---------------------------------------|------------|------------|------------|
|
168 |
+
| paraphrase-multilingual-mpnet-base-v2 | 0.1813 | 0.5439 | 0.1088 |
|
169 |
+
| KoSimCSE-roberta | 0.2164 | 0.6491 | 0.1298 |
|
170 |
+
| Cohere embed-multilingual-v3.0 | 0.2076 | 0.6228 | 0.1246 |
|
171 |
+
| openai ada 002 | 0.2602 | 0.7807 | 0.1561 |
|
172 |
+
| multilingual-e5-large-instruct | 0.2544 | 0.7632 | 0.1526 |
|
173 |
+
| Upstage Embedding | 0.2982 | 0.8947 | 0.1789 |
|
174 |
+
| paraphrase-multilingual-MiniLM-L12-v2 | 0.1637 | 0.4912 | 0.0982 |
|
175 |
+
| openai_embed_3_small | 0.2690 | 0.8070 | 0.1614 |
|
176 |
+
| ko-sroberta-multitask | 0.2164 | 0.6491 | 0.1298 |
|
177 |
+
| openai_embed_3_large | 0.2807 | 0.8421 | 0.1684 |
|
178 |
+
| KU-HIAI-ONTHEIT-large-v1 | 0.3041 | 0.9123 | 0.1825 |
|
179 |
+
| KU-HIAI-ONTHEIT-large-v1.1 | **0.3099** | **0.9298** | **0.1860** |
|
180 |
+
| kf-deberta-multitask | 0.2281 | 0.6842 | 0.1368 |
|
181 |
+
| gte-multilingual-base | 0.2865 | 0.8596 | 0.1719 |
|
182 |
+
| BGE-m3 | 0.3041 | 0.9123 | 0.1825 |
|
183 |
+
| bge-m3-korean | 0.2661 | 0.7982 | 0.1596 |
|
184 |
+
| **BGE-m3-ko** | **0.3099** | **0.9298** | **0.1860** |
|
185 |
|
186 |
Cross-Encoder (Reranker)
|
187 |
|
188 |
+
| Model name | F1 | Recall | Precision |
|
189 |
+
|---------------------------------------|------------|------------|------------|
|
190 |
+
| gte-multilingual-reranker-base | 0.3158 | 0.9474 | 0.1895 |
|
191 |
+
| jina-reranker-v2-base-multilingual | 0.3129 | 0.9386 | 0.1877 |
|
192 |
+
| bge-reranker-v2-m3 | **0.3216** | **0.9649** | **0.1930** |
|
193 |
+
| **bge-reranker-v2-m3-ko** | **0.3216** | **0.9649** | **0.1930** |
|
194 |
|
195 |
|
196 |
|