Update README.md
Browse files
README.md
CHANGED
@@ -158,8 +158,8 @@ The training corpus consists of 26B tokens of several corpora gathered from web
|
|
158 |
| Gutenberg | es | 53.18M | 0.7140722425 |
|
159 |
| C4_ca | ca | 2826.00M | 2.142216727 |
|
160 |
| Biomedical | ca | 11.80M | 1.428144485 |
|
161 |
-
|
|
162 |
-
|
|
163 |
| CaWaC | ca | 57.79M | 2.142216727 |
|
164 |
| Wikipedia | ca | 228.01M | 3.570361212 |
|
165 |
| Vilaweb | ca | 50.34M | 2.142216727 |
|
|
|
158 |
| Gutenberg | es | 53.18M | 0.7140722425 |
|
159 |
| C4_ca | ca | 2826.00M | 2.142216727 |
|
160 |
| Biomedical | ca | 11.80M | 1.428144485 |
|
161 |
+
| RacoCatalà Noticias | ca | 17.16M | 2.142216727 |
|
162 |
+
| RacoCatalà Forums | ca | 333.73M | 2.142216727 |
|
163 |
| CaWaC | ca | 57.79M | 2.142216727 |
|
164 |
| Wikipedia | ca | 228.01M | 3.570361212 |
|
165 |
| Vilaweb | ca | 50.34M | 2.142216727 |
|