victormiller
commited on
Commit
•
64c513d
1
Parent(s):
cdae785
Update web.py
Browse files
web.py
CHANGED
@@ -442,11 +442,11 @@ def web_data():
|
|
442 |
After text extraction, the non-English texts are then filtered out by fastText language identifier with a threshold of 0.65.
|
443 |
This step removes over 60% of the whole data.
|
444 |
"""),
|
445 |
-
|
446 |
-
|
447 |
-
|
448 |
-
|
449 |
-
|
450 |
|
451 |
#DV("data/sample_non_en.json", 3, "Sample documents that are classified as non-English"),
|
452 |
|
|
|
442 |
After text extraction, the non-English texts are then filtered out by fastText language identifier with a threshold of 0.65.
|
443 |
This step removes over 60% of the whole data.
|
444 |
"""),
|
445 |
+
|
446 |
+
Details(
|
447 |
+
Summary("Sample documents that are classified as non-English"),
|
448 |
+
DV("data/sample_non_en.json", 3),
|
449 |
+
),
|
450 |
|
451 |
#DV("data/sample_non_en.json", 3, "Sample documents that are classified as non-English"),
|
452 |
|