diff --git a/datasets/allenai_c4_en/none/avg_line_length/metric.json b/datasets/allenai_c4_en/none/avg_line_length/metric.json deleted file mode 100644 index 26592f7bb8542c822379083d58fb32c6940bc50f..0000000000000000000000000000000000000000 --- a/datasets/allenai_c4_en/none/avg_line_length/metric.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:fe9f5f5bbe46291624a9701ada2ffc7e4e60fbf3bc3e0419d42575741c5d8d9f -size 189 diff --git a/datasets/allenai_c4_en/none/avg_word_length/metric.json b/datasets/allenai_c4_en/none/avg_word_length/metric.json deleted file mode 100644 index f3a6b4fdad1a6d7ce542b73bf6ca027e73cf6b8a..0000000000000000000000000000000000000000 --- a/datasets/allenai_c4_en/none/avg_word_length/metric.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:33f7f9d1067c47f911a4a108b983496f6c2db3b5996b8576eb13dc2625325bb5 -size 202 diff --git a/datasets/allenai_c4_en/none/avg_words_per_line/metric.json b/datasets/allenai_c4_en/none/avg_words_per_line/metric.json deleted file mode 100644 index 773497afa70ca10fbcb35d65bef951d472f0568d..0000000000000000000000000000000000000000 --- a/datasets/allenai_c4_en/none/avg_words_per_line/metric.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:43343d6a9662b61d6126d3cd74459d678e6141d856708adedb8682b83896b3f4 -size 190 diff --git a/datasets/allenai_c4_en/none/digit_ratio/metric.json b/datasets/allenai_c4_en/none/digit_ratio/metric.json deleted file mode 100644 index 800e5d2cb7c5169b1c76254c1a7dc8e0d46c32a3..0000000000000000000000000000000000000000 --- a/datasets/allenai_c4_en/none/digit_ratio/metric.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:65871937d6e3dbfdece9c44bb6782e9a629fda563d4eb07819f6d4d5b5af31df -size 210 diff --git a/datasets/allenai_c4_en/none/fasttext_en/metric.json b/datasets/allenai_c4_en/none/fasttext_en/metric.json deleted file mode 100644 index d8df7acb6be07213e67bcc39ce5194c0ea203594..0000000000000000000000000000000000000000 --- a/datasets/allenai_c4_en/none/fasttext_en/metric.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:780c9e30031a45049e0eeb34aa44da497891200792a0670ae7702d7596968297 -size 208 diff --git a/datasets/allenai_c4_en/none/length/metric.json b/datasets/allenai_c4_en/none/length/metric.json deleted file mode 100644 index 15aabf387bc6dcd5b783acae6d4692b4239d1010..0000000000000000000000000000000000000000 --- a/datasets/allenai_c4_en/none/length/metric.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:37f408bdd40810a16f94be7a8326e02b9d2e3b452c2fdc07193aa9a3cc6171f9 -size 185 diff --git a/datasets/allenai_c4_en/none/line_char_duplicates/metric.json b/datasets/allenai_c4_en/none/line_char_duplicates/metric.json deleted file mode 100644 index 1ea0e0d75aebca9088e8f2419632dcd45e44abab..0000000000000000000000000000000000000000 --- a/datasets/allenai_c4_en/none/line_char_duplicates/metric.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9a6f01c247cd19807f8b10d02d1a3d68e814635d04dd59513b07607284c24c5c -size 14 diff --git a/datasets/allenai_c4_en/none/line_duplicates/metric.json b/datasets/allenai_c4_en/none/line_duplicates/metric.json deleted file mode 100644 index 1ea0e0d75aebca9088e8f2419632dcd45e44abab..0000000000000000000000000000000000000000 --- a/datasets/allenai_c4_en/none/line_duplicates/metric.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9a6f01c247cd19807f8b10d02d1a3d68e814635d04dd59513b07607284c24c5c -size 14 diff --git a/datasets/allenai_c4_en/none/lines_ending_with_terminal_mark_ratio/metric.json b/datasets/allenai_c4_en/none/lines_ending_with_terminal_mark_ratio/metric.json deleted file mode 100644 index 8609885b049f0ccc693c78e0e5f27a17d1694bdc..0000000000000000000000000000000000000000 --- a/datasets/allenai_c4_en/none/lines_ending_with_terminal_mark_ratio/metric.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:41a5c94e51b095f28ef4212cb105e199f0ad05cb3e3714ed065a46923fe11a9f -size 51 diff --git a/datasets/allenai_c4_en/none/long_line_ratio_chars_10000/metric.json b/datasets/allenai_c4_en/none/long_line_ratio_chars_10000/metric.json deleted file mode 100644 index 06f3375fde2520d50f91a683e6ed8326daa7724d..0000000000000000000000000000000000000000 --- a/datasets/allenai_c4_en/none/long_line_ratio_chars_10000/metric.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:06713828378b7783fb3180b5cedfa87a12490d99c5359b1baab7d1174fdfaab2 -size 197 diff --git a/datasets/allenai_c4_en/none/long_line_ratio_chars_2000/metric.json b/datasets/allenai_c4_en/none/long_line_ratio_chars_2000/metric.json deleted file mode 100644 index 556db30bbd7c90460ebabc6c6610f245a031b00c..0000000000000000000000000000000000000000 --- a/datasets/allenai_c4_en/none/long_line_ratio_chars_2000/metric.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e82802c7d5dc2b934bafc082d9a9d91b72488c86a611e8b7a1040f9585721255 -size 195 diff --git a/datasets/allenai_c4_en/none/long_word_ratio_5/metric.json b/datasets/allenai_c4_en/none/long_word_ratio_5/metric.json deleted file mode 100644 index e6543a6449fda3d8741bd4e26161ebefb79c4fcb..0000000000000000000000000000000000000000 --- a/datasets/allenai_c4_en/none/long_word_ratio_5/metric.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b0da2001f73ec3a43228f1549d2029642a77ae71c6309e20ad24d8ab8477c02b -size 207 diff --git a/datasets/allenai_c4_en/none/long_word_ratio_7/metric.json b/datasets/allenai_c4_en/none/long_word_ratio_7/metric.json deleted file mode 100644 index 1a21c2aa6a451e37bab08f374fd83ee4511655fe..0000000000000000000000000000000000000000 --- a/datasets/allenai_c4_en/none/long_word_ratio_7/metric.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3bb91a30a897c1df1cf4b7a1013218e4b15f1374a70da8223983da9e42d68b05 -size 205 diff --git a/datasets/allenai_c4_en/none/n_lines/metric.json b/datasets/allenai_c4_en/none/n_lines/metric.json deleted file mode 100644 index 9af5b243294220621094ebf474921165c001b66c..0000000000000000000000000000000000000000 --- a/datasets/allenai_c4_en/none/n_lines/metric.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ac18b6d77562467950b66d17f68e3ec0e5187dcef88e027023274688b62a05ef -size 179 diff --git a/datasets/allenai_c4_en/none/n_words/metric.json b/datasets/allenai_c4_en/none/n_words/metric.json deleted file mode 100644 index f39d1a968d6b6ec1fed43888fd29101b07e0e9e5..0000000000000000000000000000000000000000 --- a/datasets/allenai_c4_en/none/n_words/metric.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:4de7f5473e2a1347fa332f59d768bc1396d94704fb4fe432f976b65dd13eb388 -size 181 diff --git a/datasets/allenai_c4_en/none/non_alpha_digit_ratio/metric.json b/datasets/allenai_c4_en/none/non_alpha_digit_ratio/metric.json deleted file mode 100644 index e3a426159a59e4f1ec0d49b8b7aadb3128120691..0000000000000000000000000000000000000000 --- a/datasets/allenai_c4_en/none/non_alpha_digit_ratio/metric.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e849f5e5aa5d9fab521ebe9a592d97a886b2ca4226701a2094235518097c10cc -size 224 diff --git a/datasets/allenai_c4_en/none/short_line_ratio_chars_10/metric.json b/datasets/allenai_c4_en/none/short_line_ratio_chars_10/metric.json deleted file mode 100644 index 4dbbaf9d5d389b700bb983928c4b8e8d4c36853c..0000000000000000000000000000000000000000 --- a/datasets/allenai_c4_en/none/short_line_ratio_chars_10/metric.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9010d74b31a2f6f66fecdaeaba07abc0efe070842394c6406ad5603807150104 -size 199 diff --git a/datasets/allenai_c4_en/none/short_line_ratio_chars_30/metric.json b/datasets/allenai_c4_en/none/short_line_ratio_chars_30/metric.json deleted file mode 100644 index 49d08c35befc7a7c56d4d26fac5951e79eee738d..0000000000000000000000000000000000000000 --- a/datasets/allenai_c4_en/none/short_line_ratio_chars_30/metric.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e51f9b3bf39a0cef85d15f2fe60f5a9f492168ab65a90b4ac8491658ba46207b -size 195 diff --git a/datasets/allenai_c4_en/none/short_word_ratio_3/metric.json b/datasets/allenai_c4_en/none/short_word_ratio_3/metric.json deleted file mode 100644 index a43ef8c1c380ec17fa861268a30c2fc9ddd7b172..0000000000000000000000000000000000000000 --- a/datasets/allenai_c4_en/none/short_word_ratio_3/metric.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e83b469275d0e450c059222754551fa3a41d19374ffb5b559c86160ba899dc71 -size 207 diff --git a/datasets/allenai_c4_en/none/white_space_ratio/metric.json b/datasets/allenai_c4_en/none/white_space_ratio/metric.json deleted file mode 100644 index c4ad43c68f6ca9e5fbeed7c5336d613e56c3d561..0000000000000000000000000000000000000000 --- a/datasets/allenai_c4_en/none/white_space_ratio/metric.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:75ccf0093f94be65ac6e14059926d66f0497dae631955077a4ef901ad0d08b80 -size 226 diff --git a/datasets/dedup_global_CC-MAIN-2013-48/none/avg_line_length/metric.json b/datasets/dedup_global_CC-MAIN-2013-48/none/avg_line_length/metric.json deleted file mode 100644 index 4ccbb705b7d46ded4ececa39952ebb147514a591..0000000000000000000000000000000000000000 --- a/datasets/dedup_global_CC-MAIN-2013-48/none/avg_line_length/metric.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:450c5f61b9838efb08ccfa7e217310075c7bf08df8c4713e075586be0dcbecf7 -size 214 diff --git a/datasets/dedup_global_CC-MAIN-2013-48/none/avg_word_length/metric.json b/datasets/dedup_global_CC-MAIN-2013-48/none/avg_word_length/metric.json deleted file mode 100644 index c5994261674bbe9a01c2c801911ee0d2d3b8abb2..0000000000000000000000000000000000000000 --- a/datasets/dedup_global_CC-MAIN-2013-48/none/avg_word_length/metric.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:1fabde06f81853e33bb18b14562e91c9ed4b13779c9e552c400bd459a2237a5f -size 211 diff --git a/datasets/dedup_global_CC-MAIN-2013-48/none/avg_words_per_line/metric.json b/datasets/dedup_global_CC-MAIN-2013-48/none/avg_words_per_line/metric.json deleted file mode 100644 index 502bc5d3dbef13031df2a283d8cb759e7d413adc..0000000000000000000000000000000000000000 --- a/datasets/dedup_global_CC-MAIN-2013-48/none/avg_words_per_line/metric.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:89ea5111d80945958457e3b507f5a93a4dd91f2575d23c0a78f7110d6c48c663 -size 200 diff --git a/datasets/dedup_global_CC-MAIN-2013-48/none/digit_ratio/metric.json b/datasets/dedup_global_CC-MAIN-2013-48/none/digit_ratio/metric.json deleted file mode 100644 index 09082ffd56ef7501015bb56107d117649ce8bc8f..0000000000000000000000000000000000000000 --- a/datasets/dedup_global_CC-MAIN-2013-48/none/digit_ratio/metric.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:cf7c6c2fbe176cd1a626652c7b865a6fda8f83808fb2d66cc12f744b81f32d57 -size 217 diff --git a/datasets/dedup_global_CC-MAIN-2013-48/none/fasttext_en/metric.json b/datasets/dedup_global_CC-MAIN-2013-48/none/fasttext_en/metric.json deleted file mode 100644 index 005d87ff86a4c0bc43d52985e53d807ccc600eb4..0000000000000000000000000000000000000000 --- a/datasets/dedup_global_CC-MAIN-2013-48/none/fasttext_en/metric.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d32e89bcced69be77d95ba8a8c9cbf054054b298cf865950e72a2e16c75df9fe -size 221 diff --git a/datasets/dedup_global_CC-MAIN-2013-48/none/length/metric.json b/datasets/dedup_global_CC-MAIN-2013-48/none/length/metric.json deleted file mode 100644 index f692f829313ddac1443cf2e1098e7b28fab6a72f..0000000000000000000000000000000000000000 --- a/datasets/dedup_global_CC-MAIN-2013-48/none/length/metric.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:aa103d682f1319373583e7cd29957379f5b5bf2c1265285dbdeb5ae55df99e2d -size 186 diff --git a/datasets/dedup_global_CC-MAIN-2013-48/none/line_char_duplicates/metric.json b/datasets/dedup_global_CC-MAIN-2013-48/none/line_char_duplicates/metric.json deleted file mode 100644 index 822931b32b61c0a25eec4a39a14e9b1c52915b3a..0000000000000000000000000000000000000000 --- a/datasets/dedup_global_CC-MAIN-2013-48/none/line_char_duplicates/metric.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9d44b549f2905ababe83d4dc8c965e2ae988c99bd6876869bf900982accb533b -size 194 diff --git a/datasets/dedup_global_CC-MAIN-2013-48/none/line_duplicates/metric.json b/datasets/dedup_global_CC-MAIN-2013-48/none/line_duplicates/metric.json deleted file mode 100644 index 9e225f6f4c86696704501ca3ab0627551cce3d72..0000000000000000000000000000000000000000 --- a/datasets/dedup_global_CC-MAIN-2013-48/none/line_duplicates/metric.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5c53752761a1c99a2cf65f5b1b9e553ff37ac96041e2effa1519f24877b6c6c7 -size 192 diff --git a/datasets/dedup_global_CC-MAIN-2013-48/none/lines_ending_with_terminal_mark_ratio/metric.json b/datasets/dedup_global_CC-MAIN-2013-48/none/lines_ending_with_terminal_mark_ratio/metric.json deleted file mode 100644 index e7edd14b47737167403aeb2a96189b893b8bd10b..0000000000000000000000000000000000000000 --- a/datasets/dedup_global_CC-MAIN-2013-48/none/lines_ending_with_terminal_mark_ratio/metric.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ee606206061325d5c803cca5935dfbfd74de01c01cf99480e3780db6b5e41e42 -size 197 diff --git a/datasets/dedup_global_CC-MAIN-2013-48/none/long_word_ratio_5/metric.json b/datasets/dedup_global_CC-MAIN-2013-48/none/long_word_ratio_5/metric.json deleted file mode 100644 index ae2e07164881b43a3cb490d6d465d011ca8a26f6..0000000000000000000000000000000000000000 --- a/datasets/dedup_global_CC-MAIN-2013-48/none/long_word_ratio_5/metric.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b7e135f8cdafdee33e6cae93f82d35ba2db2fa421bbe4b27b999d2d5638d078f -size 215 diff --git a/datasets/dedup_global_CC-MAIN-2013-48/none/long_word_ratio_7/metric.json b/datasets/dedup_global_CC-MAIN-2013-48/none/long_word_ratio_7/metric.json deleted file mode 100644 index ae2e07164881b43a3cb490d6d465d011ca8a26f6..0000000000000000000000000000000000000000 --- a/datasets/dedup_global_CC-MAIN-2013-48/none/long_word_ratio_7/metric.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b7e135f8cdafdee33e6cae93f82d35ba2db2fa421bbe4b27b999d2d5638d078f -size 215 diff --git a/datasets/dedup_global_CC-MAIN-2013-48/none/n_lines/metric.json b/datasets/dedup_global_CC-MAIN-2013-48/none/n_lines/metric.json deleted file mode 100644 index 4ba753e221aea9372fffb395d3bab5800ee2bb1f..0000000000000000000000000000000000000000 --- a/datasets/dedup_global_CC-MAIN-2013-48/none/n_lines/metric.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:fddd7ed1fb2a741b6c06bc079abf76ba4d8ccb48c733f57ee35a0befb5a8d6b8 -size 179 diff --git a/datasets/dedup_global_CC-MAIN-2013-48/none/n_words/metric.json b/datasets/dedup_global_CC-MAIN-2013-48/none/n_words/metric.json deleted file mode 100644 index 78e3542f99ad69839959cd18c8682352d41a3ba1..0000000000000000000000000000000000000000 --- a/datasets/dedup_global_CC-MAIN-2013-48/none/n_words/metric.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ceb8224de5f5c37a137b48caf55d49fc40a5a1d83a87397ad2f2d188f42cffbd -size 183 diff --git a/datasets/dedup_global_CC-MAIN-2013-48/none/non_alpha_digit_ratio/metric.json b/datasets/dedup_global_CC-MAIN-2013-48/none/non_alpha_digit_ratio/metric.json deleted file mode 100644 index f6ac13a6c0b13f9ec3b25a043810eca3e7b28d2d..0000000000000000000000000000000000000000 --- a/datasets/dedup_global_CC-MAIN-2013-48/none/non_alpha_digit_ratio/metric.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a13dfea96ce6d9032f261752f03c20cd70190016745c98e4268bfaa14835339f -size 231 diff --git a/datasets/dedup_global_CC-MAIN-2013-48/none/short_word_ratio_3/metric.json b/datasets/dedup_global_CC-MAIN-2013-48/none/short_word_ratio_3/metric.json deleted file mode 100644 index e09bdee1c50566eb644a3d8a485bec5adc633d7a..0000000000000000000000000000000000000000 --- a/datasets/dedup_global_CC-MAIN-2013-48/none/short_word_ratio_3/metric.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:39294ca3521b685b21bb88c95d14c656bcd8e38a92d2929d81d892d83a683113 -size 233 diff --git a/datasets/dedup_global_CC-MAIN-2013-48/none/white_space_ratio/metric.json b/datasets/dedup_global_CC-MAIN-2013-48/none/white_space_ratio/metric.json deleted file mode 100644 index a7ffe4cb19d42e6597e0b2558fef7b05b2a41802..0000000000000000000000000000000000000000 --- a/datasets/dedup_global_CC-MAIN-2013-48/none/white_space_ratio/metric.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:53847f9de52893988813383ce6497e5104aad9f3cd31799917474bc5d4384775 -size 235 diff --git a/datasets/dedup_independant_CC-MAIN-2013-48/none/avg_line_length/metric.json b/datasets/dedup_independant_CC-MAIN-2013-48/none/avg_line_length/metric.json deleted file mode 100644 index 2053670a0b48c44a72d7e6dd5475e32d14abf5ff..0000000000000000000000000000000000000000 --- a/datasets/dedup_independant_CC-MAIN-2013-48/none/avg_line_length/metric.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5ab9d11295d4903544a9562446c2bba82ef88ceff52621d37d908b0f30403608 -size 193 diff --git a/datasets/dedup_independant_CC-MAIN-2013-48/none/avg_word_length/metric.json b/datasets/dedup_independant_CC-MAIN-2013-48/none/avg_word_length/metric.json deleted file mode 100644 index 3287a6942a82b1e1fc2db8e9621d4a0e99f2bb24..0000000000000000000000000000000000000000 --- a/datasets/dedup_independant_CC-MAIN-2013-48/none/avg_word_length/metric.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8120df4bf6e235172e38fb470b7421b78cd1cd1d0201a1665e1d3d0b2cb45f5e -size 204 diff --git a/datasets/dedup_independant_CC-MAIN-2013-48/none/avg_words_per_line/metric.json b/datasets/dedup_independant_CC-MAIN-2013-48/none/avg_words_per_line/metric.json deleted file mode 100644 index de2c3430a63da4add79fb29bdbb010770c9fdd82..0000000000000000000000000000000000000000 --- a/datasets/dedup_independant_CC-MAIN-2013-48/none/avg_words_per_line/metric.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b829e8f6d21d0f313f96202b9f7a2bc0a4d72e62755fe350d868a8ca261b7903 -size 192 diff --git a/datasets/dedup_independant_CC-MAIN-2013-48/none/digit_ratio/metric.json b/datasets/dedup_independant_CC-MAIN-2013-48/none/digit_ratio/metric.json deleted file mode 100644 index 0044141f6d4ec081fb5ad3bf2381d9ecd89ffee9..0000000000000000000000000000000000000000 --- a/datasets/dedup_independant_CC-MAIN-2013-48/none/digit_ratio/metric.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2e10dc2247db7c49f8803bee84275318236faa77f95797ece72a4e4fd06adee0 -size 212 diff --git a/datasets/dedup_independant_CC-MAIN-2013-48/none/fasttext_en/metric.json b/datasets/dedup_independant_CC-MAIN-2013-48/none/fasttext_en/metric.json deleted file mode 100644 index 627b3fb840571087fbbe88ebbbea4e0a4e432e09..0000000000000000000000000000000000000000 --- a/datasets/dedup_independant_CC-MAIN-2013-48/none/fasttext_en/metric.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d769e56c6e5b9b97f35083ae704b8eccf98fed88f232c7b62c14254890fb2cce -size 222 diff --git a/datasets/dedup_independant_CC-MAIN-2013-48/none/length/metric.json b/datasets/dedup_independant_CC-MAIN-2013-48/none/length/metric.json deleted file mode 100644 index 78c1ead6efa300010aa8b5b2ce438be3a1342faf..0000000000000000000000000000000000000000 --- a/datasets/dedup_independant_CC-MAIN-2013-48/none/length/metric.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c4efe9c94d45d5141b7568b4ab2256ed689178186ffa02be51b66c0328809367 -size 185 diff --git a/datasets/dedup_independant_CC-MAIN-2013-48/none/line_char_duplicates/metric.json b/datasets/dedup_independant_CC-MAIN-2013-48/none/line_char_duplicates/metric.json deleted file mode 100644 index 85627005bee1adcccd44288046b676476c73f883..0000000000000000000000000000000000000000 --- a/datasets/dedup_independant_CC-MAIN-2013-48/none/line_char_duplicates/metric.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:497bdec038462b72eab364c7f6e43200699ba419646e476c76d3cd4d27b8147c -size 195 diff --git a/datasets/dedup_independant_CC-MAIN-2013-48/none/line_duplicates/metric.json b/datasets/dedup_independant_CC-MAIN-2013-48/none/line_duplicates/metric.json deleted file mode 100644 index ccd0b8bdfa2d2112ed220cd4801f30bd0ea15665..0000000000000000000000000000000000000000 --- a/datasets/dedup_independant_CC-MAIN-2013-48/none/line_duplicates/metric.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3d0b22e4cfdcec2cf09d80b81019e5e9536e36a7c5fb5ab99a42ddae61c6ede1 -size 192 diff --git a/datasets/dedup_independant_CC-MAIN-2013-48/none/lines_ending_with_terminal_mark_ratio/metric.json b/datasets/dedup_independant_CC-MAIN-2013-48/none/lines_ending_with_terminal_mark_ratio/metric.json deleted file mode 100644 index 5dfa50eb55917f9c5314fa1cd5b5d7ef33fe6ee0..0000000000000000000000000000000000000000 --- a/datasets/dedup_independant_CC-MAIN-2013-48/none/lines_ending_with_terminal_mark_ratio/metric.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:911d308df972d1a7f6363447d059b27ecc0a2558e4664346726152668039f74a -size 189 diff --git a/datasets/dedup_independant_CC-MAIN-2013-48/none/long_line_ratio_chars_10000/metric.json b/datasets/dedup_independant_CC-MAIN-2013-48/none/long_line_ratio_chars_10000/metric.json deleted file mode 100644 index f4b7ce1b8c561784990357755e4b70d408351c55..0000000000000000000000000000000000000000 --- a/datasets/dedup_independant_CC-MAIN-2013-48/none/long_line_ratio_chars_10000/metric.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5f4fc3741f56881cf4ac7c45dc4e437817467c2650af34c549d92bd4b3e91c23 -size 195 diff --git a/datasets/dedup_independant_CC-MAIN-2013-48/none/long_line_ratio_chars_2000/metric.json b/datasets/dedup_independant_CC-MAIN-2013-48/none/long_line_ratio_chars_2000/metric.json deleted file mode 100644 index b936cafb8eee1aa38fc882733af126a30ea15442..0000000000000000000000000000000000000000 --- a/datasets/dedup_independant_CC-MAIN-2013-48/none/long_line_ratio_chars_2000/metric.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5911c0c489e243480569793b6f95ac6e9043e0597fa188dc3f4611c06a782521 -size 194 diff --git a/datasets/dedup_independant_CC-MAIN-2013-48/none/long_word_ratio_5/metric.json b/datasets/dedup_independant_CC-MAIN-2013-48/none/long_word_ratio_5/metric.json deleted file mode 100644 index 5226484d9d0d35c5333a1abb12558ae6898c4c76..0000000000000000000000000000000000000000 --- a/datasets/dedup_independant_CC-MAIN-2013-48/none/long_word_ratio_5/metric.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:38973f39e8a2fb654309b5589c72b8ba429eb5e6a2f775215f98e2df05bc293d -size 216 diff --git a/datasets/dedup_independant_CC-MAIN-2013-48/none/long_word_ratio_7/metric.json b/datasets/dedup_independant_CC-MAIN-2013-48/none/long_word_ratio_7/metric.json deleted file mode 100644 index b0c21c104f7015157f914f7ce69c29f865beadf1..0000000000000000000000000000000000000000 --- a/datasets/dedup_independant_CC-MAIN-2013-48/none/long_word_ratio_7/metric.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ce8d44e54ce32688edc5e13576006e9f71dcd282d593a5e086f38018fa0a7213 -size 205 diff --git a/datasets/dedup_independant_CC-MAIN-2013-48/none/n_lines/metric.json b/datasets/dedup_independant_CC-MAIN-2013-48/none/n_lines/metric.json deleted file mode 100644 index c538a70475f45d51aeedadafb23f68507b42ef49..0000000000000000000000000000000000000000 --- a/datasets/dedup_independant_CC-MAIN-2013-48/none/n_lines/metric.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:658e305b1fe00438575fcbb8128457cac9d0f81b770db5da08fd407877e21f68 -size 180 diff --git a/datasets/dedup_independant_CC-MAIN-2013-48/none/n_words/metric.json b/datasets/dedup_independant_CC-MAIN-2013-48/none/n_words/metric.json deleted file mode 100644 index e4d585d8ab66334003b66a2a1bbdbc8420c8eafb..0000000000000000000000000000000000000000 --- a/datasets/dedup_independant_CC-MAIN-2013-48/none/n_words/metric.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b69dd2b89b5584282d519a1ee2fc4d0f9990394afe9b6b814900ec7320c11816 -size 181 diff --git a/datasets/dedup_independant_CC-MAIN-2013-48/none/non_alpha_digit_ratio/metric.json b/datasets/dedup_independant_CC-MAIN-2013-48/none/non_alpha_digit_ratio/metric.json deleted file mode 100644 index 1e12ff3e9e9b0f2d2f436940e26da217bee6f391..0000000000000000000000000000000000000000 --- a/datasets/dedup_independant_CC-MAIN-2013-48/none/non_alpha_digit_ratio/metric.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e3b91c3c8b66106dc81f94b66f4c81278ce9451503d2cd6935be5d763d3fc28e -size 223 diff --git a/datasets/dedup_independant_CC-MAIN-2013-48/none/short_line_ratio_chars_10/metric.json b/datasets/dedup_independant_CC-MAIN-2013-48/none/short_line_ratio_chars_10/metric.json deleted file mode 100644 index 8d0ce76d82b85f9241ba2682ca83e06f10f45eb6..0000000000000000000000000000000000000000 --- a/datasets/dedup_independant_CC-MAIN-2013-48/none/short_line_ratio_chars_10/metric.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:94e6b94550fd4a1ff9a93ad5edcc1049dc942cad614bb0252b7a8d67da612aa9 -size 206 diff --git a/datasets/dedup_independant_CC-MAIN-2013-48/none/short_line_ratio_chars_30/metric.json b/datasets/dedup_independant_CC-MAIN-2013-48/none/short_line_ratio_chars_30/metric.json deleted file mode 100644 index ab5f9f3de262d87b8ac44ac370f7965a9f8695c4..0000000000000000000000000000000000000000 --- a/datasets/dedup_independant_CC-MAIN-2013-48/none/short_line_ratio_chars_30/metric.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5a08e2b60d02279492138dd72eee37d4d6ea51a864c1850461e62b58755ece7b -size 191 diff --git a/datasets/dedup_independant_CC-MAIN-2013-48/none/short_word_ratio_3/metric.json b/datasets/dedup_independant_CC-MAIN-2013-48/none/short_word_ratio_3/metric.json deleted file mode 100644 index 6808da1ce1628aaf4713a18ab04c10881625bc45..0000000000000000000000000000000000000000 --- a/datasets/dedup_independant_CC-MAIN-2013-48/none/short_word_ratio_3/metric.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:966172bb6397af992f456a08550537df4692ddb2af9a61aaf82c14aa7e92fd77 -size 225 diff --git a/datasets/dedup_independant_CC-MAIN-2013-48/none/white_space_ratio/metric.json b/datasets/dedup_independant_CC-MAIN-2013-48/none/white_space_ratio/metric.json deleted file mode 100644 index e6409ce2b5f053c03c1b0384ff1f90f3eb846508..0000000000000000000000000000000000000000 --- a/datasets/dedup_independant_CC-MAIN-2013-48/none/white_space_ratio/metric.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8e60fcb87073d7385ee0e9e0d08bf4c9fc059b939c0d5cf28b566a680032b455 -size 227 diff --git a/datasets/dolma-v1.7-cc/none/avg_line_length/metric.json b/datasets/dolma-v1.7-cc/none/avg_line_length/metric.json deleted file mode 100644 index dabdb0c1a23c5619638aa1900922edae2c93ca9d..0000000000000000000000000000000000000000 --- a/datasets/dolma-v1.7-cc/none/avg_line_length/metric.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:7ee4aa2e2960085e8460aecb814d24365374f61054d2a6b606782aba29cd24c6 -size 204 diff --git a/datasets/dolma-v1.7-cc/none/avg_word_length/metric.json b/datasets/dolma-v1.7-cc/none/avg_word_length/metric.json deleted file mode 100644 index f5a05daf30b6323752a26f0e6026d60f5d3fc4b6..0000000000000000000000000000000000000000 --- a/datasets/dolma-v1.7-cc/none/avg_word_length/metric.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0864872e233d8e2d22c400efd4115e1a9d6b34b1e34d55d744db777b12dda2e4 -size 204 diff --git a/datasets/dolma-v1.7-cc/none/avg_words_per_line/metric.json b/datasets/dolma-v1.7-cc/none/avg_words_per_line/metric.json deleted file mode 100644 index ee85ddee9437e25b3d236bf377b51fd6f551ea47..0000000000000000000000000000000000000000 --- a/datasets/dolma-v1.7-cc/none/avg_words_per_line/metric.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:39788119e779a6231f5d3c11563fee201b7c5e4788b192d2c7324d5169061951 -size 205 diff --git a/datasets/dolma-v1.7-cc/none/digit_ratio/metric.json b/datasets/dolma-v1.7-cc/none/digit_ratio/metric.json deleted file mode 100644 index d91a8730834d70b97737a9964e55a7693645f2d3..0000000000000000000000000000000000000000 --- a/datasets/dolma-v1.7-cc/none/digit_ratio/metric.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e7ecae7ab485f7bdd11a11708e8bcd51543f50b1e2168e8dff234969e13c61a9 -size 209 diff --git a/datasets/dolma-v1.7-cc/none/length/metric.json b/datasets/dolma-v1.7-cc/none/length/metric.json deleted file mode 100644 index 334f8ea7bd64666659d15dfd99f486447153a875..0000000000000000000000000000000000000000 --- a/datasets/dolma-v1.7-cc/none/length/metric.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:17b28fc703bbed815ef20656d19dd5399148a33675d8da159afef493e8ad9d1d -size 180 diff --git a/datasets/dolma-v1.7-cc/none/lines_ending_with_terminal_mark_ratio/metric.json b/datasets/dolma-v1.7-cc/none/lines_ending_with_terminal_mark_ratio/metric.json deleted file mode 100644 index dd34a1fc6485f3dd1817d719d419a008f2b943bf..0000000000000000000000000000000000000000 --- a/datasets/dolma-v1.7-cc/none/lines_ending_with_terminal_mark_ratio/metric.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:673456a7b9b1d46a4e632a5fd189944871fb6c53cc6fdda37ed1b5fa9af5419f -size 188 diff --git a/datasets/dolma-v1.7-cc/none/long_line_ratio_chars_10000/metric.json b/datasets/dolma-v1.7-cc/none/long_line_ratio_chars_10000/metric.json deleted file mode 100644 index 519628a619bbb06a55c8efccf39100530fdea42d..0000000000000000000000000000000000000000 --- a/datasets/dolma-v1.7-cc/none/long_line_ratio_chars_10000/metric.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ad0976364b4018345dd3c40b2247aa3778ece15a15a4168084366959f62377c5 -size 194 diff --git a/datasets/dolma-v1.7-cc/none/long_line_ratio_chars_2000/metric.json b/datasets/dolma-v1.7-cc/none/long_line_ratio_chars_2000/metric.json deleted file mode 100644 index e26a02115cf1f0b7469ba8ffa65b4ebb8f981cc9..0000000000000000000000000000000000000000 --- a/datasets/dolma-v1.7-cc/none/long_line_ratio_chars_2000/metric.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c71656788c12c392c64046bb959a6d49b19024768f7cc5959f96bce49a70273b -size 194 diff --git a/datasets/dolma-v1.7-cc/none/long_word_ratio_7/metric.json b/datasets/dolma-v1.7-cc/none/long_word_ratio_7/metric.json deleted file mode 100644 index d02bec71451d0a0e2da42f749d802cbee67c5f78..0000000000000000000000000000000000000000 --- a/datasets/dolma-v1.7-cc/none/long_word_ratio_7/metric.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2082c8d7d0129aed260e024d00581c8cdc1e28b7434ab8aba3f77903b79c9204 -size 207 diff --git a/datasets/dolma-v1.7-cc/none/n_lines/metric.json b/datasets/dolma-v1.7-cc/none/n_lines/metric.json deleted file mode 100644 index a53f9a1dd2289c8672166e87247cd1d87567892c..0000000000000000000000000000000000000000 --- a/datasets/dolma-v1.7-cc/none/n_lines/metric.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2184a3e2aa32e51d6b70478deb43aae1450be42bdfcfc31ccc4bba8ce850425d -size 176 diff --git a/datasets/dolma-v1.7-cc/none/n_words/metric.json b/datasets/dolma-v1.7-cc/none/n_words/metric.json deleted file mode 100644 index e58be62d1945172da4eb7ea18f985634f96a102c..0000000000000000000000000000000000000000 --- a/datasets/dolma-v1.7-cc/none/n_words/metric.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f1e8b44818a61f8528bda44574663b2c72559d8b76df3ae82075f827308e3238 -size 180 diff --git a/datasets/dolma-v1.7-cc/none/non_alpha_digit_ratio/metric.json b/datasets/dolma-v1.7-cc/none/non_alpha_digit_ratio/metric.json deleted file mode 100644 index bae417628c3be91395fe1ce6347471711940dbfa..0000000000000000000000000000000000000000 --- a/datasets/dolma-v1.7-cc/none/non_alpha_digit_ratio/metric.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b992adb9acfad0d4a10801db490404b73badb6d70d81804da3a23a260e105292 -size 222 diff --git a/datasets/dolma-v1.7-cc/none/short_line_ratio_chars_10/metric.json b/datasets/dolma-v1.7-cc/none/short_line_ratio_chars_10/metric.json deleted file mode 100644 index 04b537eb5e24b9324ab70e275c138d23f7d28cbb..0000000000000000000000000000000000000000 --- a/datasets/dolma-v1.7-cc/none/short_line_ratio_chars_10/metric.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:89886f5fa7741a2d95f386d30c59dc135b8912decafe064a75c0781cf10e2130 -size 189 diff --git a/datasets/dolma-v1.7-cc/none/short_line_ratio_chars_30/metric.json b/datasets/dolma-v1.7-cc/none/short_line_ratio_chars_30/metric.json deleted file mode 100644 index 7cc8ecb1075b8020708b4bb23362b23db53bc4be..0000000000000000000000000000000000000000 --- a/datasets/dolma-v1.7-cc/none/short_line_ratio_chars_30/metric.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5b4bd4041bf82e41c0e554406af175ca172127273a9d977d73823c3e7cac5860 -size 192 diff --git a/datasets/dolma-v1.7-cc/none/short_word_ratio_3/metric.json b/datasets/dolma-v1.7-cc/none/short_word_ratio_3/metric.json deleted file mode 100644 index f422a49f265b6ead4358700f37e0e079f936277b..0000000000000000000000000000000000000000 --- a/datasets/dolma-v1.7-cc/none/short_word_ratio_3/metric.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:1f1804c3fb433426aa83845249454c51a5722fea8f29e8797f6e281dbb255b88 -size 193 diff --git a/datasets/dolma-v1.7-cc/none/white_space_ratio/metric.json b/datasets/dolma-v1.7-cc/none/white_space_ratio/metric.json deleted file mode 100644 index cf8cfbdb63d4fbe2f0a20ac23ed9da8f68075052..0000000000000000000000000000000000000000 --- a/datasets/dolma-v1.7-cc/none/white_space_ratio/metric.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d5b6146686c7fb088fef11e402b4fd08763dd5e75df947de77cba1424ffdcb42 -size 226 diff --git a/datasets/fineweb/none/avg_line_length/metric.json b/datasets/fineweb/none/avg_line_length/metric.json deleted file mode 100644 index 6266b44829885706722e99b75b2d48e086091b0f..0000000000000000000000000000000000000000 --- a/datasets/fineweb/none/avg_line_length/metric.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:29c63cc1185710c8e80db27f65133aaec5516dcbea6c07606303664d6e89e4e7 -size 208 diff --git a/datasets/fineweb/none/avg_word_length/metric.json b/datasets/fineweb/none/avg_word_length/metric.json deleted file mode 100644 index 94d9a64f60851b83baaa9c49dfa8cd6eb975e70f..0000000000000000000000000000000000000000 --- a/datasets/fineweb/none/avg_word_length/metric.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:259cafd4f1952508cab6b851eb10c0c0ac00155eadc465edbcc08c57027469ef -size 218 diff --git a/datasets/fineweb/none/avg_words_per_line/metric.json b/datasets/fineweb/none/avg_words_per_line/metric.json deleted file mode 100644 index b3c2fd7e5e009e8db2d3d4b74f4c69a87d39f4f9..0000000000000000000000000000000000000000 --- a/datasets/fineweb/none/avg_words_per_line/metric.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a923850f99b655095d64216aafab16079daf577eec6d76f8e063c2d96e6e21b6 -size 208 diff --git a/datasets/fineweb/none/digit_ratio/metric.json b/datasets/fineweb/none/digit_ratio/metric.json deleted file mode 100644 index 80cce4153fade5043102216dc9e13c84cedfc22a..0000000000000000000000000000000000000000 --- a/datasets/fineweb/none/digit_ratio/metric.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2884d6169089cf4a105eec660835f0df11003f8245da71036d869648b46c91a0 -size 209 diff --git a/datasets/fineweb/none/fasttext_en/metric.json b/datasets/fineweb/none/fasttext_en/metric.json deleted file mode 100644 index 5038457ea483aa6e621a4d4a09430f2349172310..0000000000000000000000000000000000000000 --- a/datasets/fineweb/none/fasttext_en/metric.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f8825943428cb03c9fd30cb1d75fb1b12ad5faffeb3281d73bf30b4a1c3f8f3f -size 220 diff --git a/datasets/fineweb/none/length/metric.json b/datasets/fineweb/none/length/metric.json deleted file mode 100644 index 25571190255339abb3d1a29ead791e72d9d05c54..0000000000000000000000000000000000000000 --- a/datasets/fineweb/none/length/metric.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:508bd7b66685e22e11d34a81fd33b30800f4e7e77f66bc0093fb6c2a9fb256c8 -size 183 diff --git a/datasets/fineweb/none/lines_ending_with_terminal_mark_ratio/metric.json b/datasets/fineweb/none/lines_ending_with_terminal_mark_ratio/metric.json deleted file mode 100644 index 80bd31dee248fe358387542a17cf4793d7839cd9..0000000000000000000000000000000000000000 --- a/datasets/fineweb/none/lines_ending_with_terminal_mark_ratio/metric.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c3e1ecbed0f3c3ca250c41f7a59df6d1c319453bc0ef64115dfb9a67428c4315 -size 207 diff --git a/datasets/fineweb/none/long_line_ratio_chars_10000/metric.json b/datasets/fineweb/none/long_line_ratio_chars_10000/metric.json deleted file mode 100644 index ef8cf175656fcf90a49c169ef8cd6a7273547966..0000000000000000000000000000000000000000 --- a/datasets/fineweb/none/long_line_ratio_chars_10000/metric.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6edaaafd67503b7266593c9e4d0a111fe370740ee2dfc69d9ec08eac00e3f634 -size 196 diff --git a/datasets/fineweb/none/long_line_ratio_chars_2000/metric.json b/datasets/fineweb/none/long_line_ratio_chars_2000/metric.json deleted file mode 100644 index 4003524de9d30883f4acbc6b877f1e7053107c9d..0000000000000000000000000000000000000000 --- a/datasets/fineweb/none/long_line_ratio_chars_2000/metric.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:dfe1a748a75adde3003019216a0a681f82ceba098cae19ebbdee4a327e954b41 -size 192 diff --git a/datasets/fineweb/none/long_word_ratio_7/metric.json b/datasets/fineweb/none/long_word_ratio_7/metric.json deleted file mode 100644 index da732cfc47a0e9914be4144a21f26497a92bbbb3..0000000000000000000000000000000000000000 --- a/datasets/fineweb/none/long_word_ratio_7/metric.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:219cdfe8fc7c158d556e6395270bbb3ad66dfac92090bc5df819b32e2ab72bf9 -size 205 diff --git a/datasets/fineweb/none/n_lines/metric.json b/datasets/fineweb/none/n_lines/metric.json deleted file mode 100644 index d16eff7e1bb7b6b9ad3a05b2241de82cc8c95e1e..0000000000000000000000000000000000000000 --- a/datasets/fineweb/none/n_lines/metric.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6dd7e5178f0933f1b9af76b538b10034b996685943c82e1a02300cebbf3cebd7 -size 180 diff --git a/datasets/fineweb/none/n_words/metric.json b/datasets/fineweb/none/n_words/metric.json deleted file mode 100644 index d5776989c19ac9e82c3e9bc2cc9b5c1c64c3bbcf..0000000000000000000000000000000000000000 --- a/datasets/fineweb/none/n_words/metric.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:1afeee7262798667d3bdfb902efde91bca4e3e6abe3d73b03a3e03e3d2dc2e17 -size 184 diff --git a/datasets/fineweb/none/non_alpha_digit_ratio/metric.json b/datasets/fineweb/none/non_alpha_digit_ratio/metric.json deleted file mode 100644 index 803575b69f9a8e2f90a4b9a0520c246c7b90fef8..0000000000000000000000000000000000000000 --- a/datasets/fineweb/none/non_alpha_digit_ratio/metric.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e70cff4d1395cbc7c7173ea4bbb2f586e58c232a1c475d0980a69e71476d4074 -size 224 diff --git a/datasets/fineweb/none/short_line_ratio_chars_10/metric.json b/datasets/fineweb/none/short_line_ratio_chars_10/metric.json deleted file mode 100644 index 66f78e6273780a8ae203a7a4212ca9e31f2f0e75..0000000000000000000000000000000000000000 --- a/datasets/fineweb/none/short_line_ratio_chars_10/metric.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c95f9d2c6e06b4618e8a203b5fa8fc6865ce560474355a88a65fa9360963b30e -size 211 diff --git a/datasets/fineweb/none/short_line_ratio_chars_30/metric.json b/datasets/fineweb/none/short_line_ratio_chars_30/metric.json deleted file mode 100644 index 30af41068a2d9a1de7cf1aeb31ef7b329078398c..0000000000000000000000000000000000000000 --- a/datasets/fineweb/none/short_line_ratio_chars_30/metric.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c92f8fe6e4b802431c0337e37891984ae7f9ff98c43e3bf0ac67f42b57607fc7 -size 206 diff --git a/datasets/fineweb/none/short_word_ratio_3/metric.json b/datasets/fineweb/none/short_word_ratio_3/metric.json deleted file mode 100644 index 0e671bf8f4b9c5b68225e7156926caddc2d76d5c..0000000000000000000000000000000000000000 --- a/datasets/fineweb/none/short_word_ratio_3/metric.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:883d298d764708328a33c5f3016d53d5a77d34f3a5bf9dc0996a9970d168b26d -size 226 diff --git a/datasets/fineweb/none/white_space_ratio/metric.json b/datasets/fineweb/none/white_space_ratio/metric.json deleted file mode 100644 index 115679c2ab6d091bb494ec1373f483cd1d596210..0000000000000000000000000000000000000000 --- a/datasets/fineweb/none/white_space_ratio/metric.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:66e8be471033706e67279fc15c29eac34a4bcab2ff8fc44a9df4b6b61a36aee5 -size 227 diff --git a/datasets/red_pajama_v2/none/avg_line_length/metric.json b/datasets/red_pajama_v2/none/avg_line_length/metric.json deleted file mode 100644 index 0ad2c9e3169b947c7b4a7ccc2758ca3c4bc5a97a..0000000000000000000000000000000000000000 --- a/datasets/red_pajama_v2/none/avg_line_length/metric.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:99bf408fd168a234f69c66472e84bcfd9100d25da4c5bde1c6fedb51d690abdd -size 192 diff --git a/datasets/red_pajama_v2/none/avg_word_length/metric.json b/datasets/red_pajama_v2/none/avg_word_length/metric.json deleted file mode 100644 index 5c59972e735cc14490e2d9b8dc84cf2a729beeb8..0000000000000000000000000000000000000000 --- a/datasets/red_pajama_v2/none/avg_word_length/metric.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2e1c7d1c9c63504f7dc687931e4b052a5834e73a967e18810e6e4d83b2495024 -size 188 diff --git a/datasets/red_pajama_v2/none/avg_words_per_line/metric.json b/datasets/red_pajama_v2/none/avg_words_per_line/metric.json deleted file mode 100644 index e94f4655d3406a8a7d31fe3db2a5583be2d48a45..0000000000000000000000000000000000000000 --- a/datasets/red_pajama_v2/none/avg_words_per_line/metric.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e6be67ab0cbde6963fa843b0a7754f4bd03cd970f8cb141c8b3bf118a7144511 -size 188 diff --git a/datasets/red_pajama_v2/none/digit_ratio/metric.json b/datasets/red_pajama_v2/none/digit_ratio/metric.json deleted file mode 100644 index 699fd2a55fc2f443f871b4736afe7762109b5fbb..0000000000000000000000000000000000000000 --- a/datasets/red_pajama_v2/none/digit_ratio/metric.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:59671076d094d070decbe26d866f5f8cf87625dcb6c4743ea3bfb5037d247190 -size 206 diff --git a/datasets/red_pajama_v2/none/length/metric.json b/datasets/red_pajama_v2/none/length/metric.json deleted file mode 100644 index 6b9ca28a23d0e7c9f886dab4403884cf4912cb6c..0000000000000000000000000000000000000000 --- a/datasets/red_pajama_v2/none/length/metric.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:46039db109b384e7042e3c55978c9c3cd2eed3df0d2c876d38ec1b2303b24c11 -size 181 diff --git a/datasets/red_pajama_v2/none/lines_ending_with_terminal_mark_ratio/metric.json b/datasets/red_pajama_v2/none/lines_ending_with_terminal_mark_ratio/metric.json deleted file mode 100644 index 1c84eeb5b8aad3c8e31f641e0f5746e1c8e9ac2d..0000000000000000000000000000000000000000 --- a/datasets/red_pajama_v2/none/lines_ending_with_terminal_mark_ratio/metric.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2aa0b151dddea7a76e2f97acbd7adafa638f43945d21e6a8d301102460ad0b12 -size 188 diff --git a/datasets/red_pajama_v2/none/long_line_ratio_chars_10000/metric.json b/datasets/red_pajama_v2/none/long_line_ratio_chars_10000/metric.json deleted file mode 100644 index e0d053c407519006a75dca1791405fd1f32d5f03..0000000000000000000000000000000000000000 --- a/datasets/red_pajama_v2/none/long_line_ratio_chars_10000/metric.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:959aece966c5b9241a7d46b70c8e019fdbd4800f89d9213785b3fbd4e8bb7d7d -size 194 diff --git a/datasets/red_pajama_v2/none/long_line_ratio_chars_2000/metric.json b/datasets/red_pajama_v2/none/long_line_ratio_chars_2000/metric.json deleted file mode 100644 index 06c74649ea96659ed707e412978c27b1cafab25c..0000000000000000000000000000000000000000 --- a/datasets/red_pajama_v2/none/long_line_ratio_chars_2000/metric.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:322f555d51fa22597bd65b1df1f4e460883cfb947c6bb3fa1eb39b9ad026cc82 -size 191 diff --git a/datasets/red_pajama_v2/none/long_word_ratio_7/metric.json b/datasets/red_pajama_v2/none/long_word_ratio_7/metric.json deleted file mode 100644 index 57254841d2d93aa14d0d79d717d95cb5c93750cf..0000000000000000000000000000000000000000 --- a/datasets/red_pajama_v2/none/long_word_ratio_7/metric.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:fb30104ef22a341e4e3fd3eea705db96783e5758ea5630f0138ec89ac2101d76 -size 189 diff --git a/datasets/red_pajama_v2/none/n_lines/metric.json b/datasets/red_pajama_v2/none/n_lines/metric.json deleted file mode 100644 index 7aa815c2baf4735fa61a7d49bb1d8375401c3c49..0000000000000000000000000000000000000000 --- a/datasets/red_pajama_v2/none/n_lines/metric.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5b57a9cb8d3db184ae00007bdad45871222a6f6ccefd9d9534c87175ad68fec8 -size 174 diff --git a/datasets/red_pajama_v2/none/n_words/metric.json b/datasets/red_pajama_v2/none/n_words/metric.json deleted file mode 100644 index 60d9bfb7268dfdc12ecafa91c46077b19175ead0..0000000000000000000000000000000000000000 --- a/datasets/red_pajama_v2/none/n_words/metric.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9655978b2e83b8dfcabdb0e76e7036938cb65b3f0bb68f6e0ef0654babd345c6 -size 179 diff --git a/datasets/red_pajama_v2/none/non_alpha_digit_ratio/metric.json b/datasets/red_pajama_v2/none/non_alpha_digit_ratio/metric.json deleted file mode 100644 index d3ea712ea0bcfc6fed45fec8ef4b230f05f68453..0000000000000000000000000000000000000000 --- a/datasets/red_pajama_v2/none/non_alpha_digit_ratio/metric.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8388dbbcbb1175ae975a49011d132b7728b9c65dcb5885dfc1dd48a3c6ee8083 -size 190 diff --git a/datasets/red_pajama_v2/none/short_line_ratio_chars_10/metric.json b/datasets/red_pajama_v2/none/short_line_ratio_chars_10/metric.json deleted file mode 100644 index bda599f917e6916431883a7c86bede386785f58a..0000000000000000000000000000000000000000 --- a/datasets/red_pajama_v2/none/short_line_ratio_chars_10/metric.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ed3999291d917bda550fa81a824406efb50e44985dde5df95a55f0c0e4c64c5f -size 192 diff --git a/datasets/red_pajama_v2/none/short_line_ratio_chars_30/metric.json b/datasets/red_pajama_v2/none/short_line_ratio_chars_30/metric.json deleted file mode 100644 index 4b09a6a9591e880e1cca0b1ebfc9f414a3615237..0000000000000000000000000000000000000000 --- a/datasets/red_pajama_v2/none/short_line_ratio_chars_30/metric.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:fde9302f7fa82f571a6766dc57eebd9ffc1c85c1622277c14a38662d302fa9fa -size 190 diff --git a/datasets/red_pajama_v2/none/short_word_ratio_3/metric.json b/datasets/red_pajama_v2/none/short_word_ratio_3/metric.json deleted file mode 100644 index a7f5c5dba0cfd5c36fd811ca2c96daca1158afcb..0000000000000000000000000000000000000000 --- a/datasets/red_pajama_v2/none/short_word_ratio_3/metric.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:16706ccd2b7f6a9517ffa803c8f8bc34a6c9d4d5f826626fa51990cc98809d62 -size 191 diff --git a/datasets/red_pajama_v2/none/white_space_ratio/metric.json b/datasets/red_pajama_v2/none/white_space_ratio/metric.json deleted file mode 100644 index 0c0443e6e96ce7fa6a53a93a0aee8cfd5281a304..0000000000000000000000000000000000000000 --- a/datasets/red_pajama_v2/none/white_space_ratio/metric.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8a588963db9bce8f31b4fd439eedd67a91f37ae47d97c2962f4033f597a69e96 -size 207