Spaces:
Build error
Build error
meg-huggingface
commited on
Commit
•
4f4c0c4
1
Parent(s):
9f6cc2b
tokenized df bug
Browse files
data_measurements/dataset_statistics.py
CHANGED
@@ -455,7 +455,7 @@ class DatasetStatisticsCacheClass:
|
|
455 |
self.vocab_counts_filtered_df = filter_vocab(self.vocab_counts_df)
|
456 |
else:
|
457 |
logs.info("Calculating vocab afresh")
|
458 |
-
if
|
459 |
self.tokenized_df = self.do_tokenization()
|
460 |
if save:
|
461 |
logs.info("Writing out.")
|
|
|
455 |
self.vocab_counts_filtered_df = filter_vocab(self.vocab_counts_df)
|
456 |
else:
|
457 |
logs.info("Calculating vocab afresh")
|
458 |
+
if self.tokenized_df is None:
|
459 |
self.tokenized_df = self.do_tokenization()
|
460 |
if save:
|
461 |
logs.info("Writing out.")
|