pretrain dataset
Browse files
scripts/prepare_pretrain_dataset.py
CHANGED
@@ -74,6 +74,9 @@ def batch_iterator(name=None):
|
|
74 |
dataset = load_dataset(name, split='train')
|
75 |
|
76 |
for row in dataset:
|
|
|
|
|
|
|
77 |
yield row['Question'] + ' ' + row['Answer']
|
78 |
|
79 |
del dataset
|
|
|
74 |
dataset = load_dataset(name, split='train')
|
75 |
|
76 |
for row in dataset:
|
77 |
+
if not row['Question'] or not row['Answer']:
|
78 |
+
continue
|
79 |
+
|
80 |
yield row['Question'] + ' ' + row['Answer']
|
81 |
|
82 |
del dataset
|