mtasic85 commited on
Commit
9255616
1 Parent(s): 41211a9

pretrain dataset

Browse files
scripts/prepare_pretrain_dataset.py CHANGED
@@ -74,6 +74,9 @@ def batch_iterator(name=None):
74
  dataset = load_dataset(name, split='train')
75
 
76
  for row in dataset:
 
 
 
77
  yield row['Question'] + ' ' + row['Answer']
78
 
79
  del dataset
 
74
  dataset = load_dataset(name, split='train')
75
 
76
  for row in dataset:
77
+ if not row['Question'] or not row['Answer']:
78
+ continue
79
+
80
  yield row['Question'] + ' ' + row['Answer']
81
 
82
  del dataset