meg-huggingface commited on
Commit
4f4c0c4
1 Parent(s): 9f6cc2b

tokenized df bug

Browse files
data_measurements/dataset_statistics.py CHANGED
@@ -455,7 +455,7 @@ class DatasetStatisticsCacheClass:
455
  self.vocab_counts_filtered_df = filter_vocab(self.vocab_counts_df)
456
  else:
457
  logs.info("Calculating vocab afresh")
458
- if len(self.tokenized_df) == 0:
459
  self.tokenized_df = self.do_tokenization()
460
  if save:
461
  logs.info("Writing out.")
 
455
  self.vocab_counts_filtered_df = filter_vocab(self.vocab_counts_df)
456
  else:
457
  logs.info("Calculating vocab afresh")
458
+ if self.tokenized_df is None:
459
  self.tokenized_df = self.do_tokenization()
460
  if save:
461
  logs.info("Writing out.")