abhishek HF staff commited on
Commit
285aab9
1 Parent(s): 5e37512

update script

Browse files
Files changed (1) hide show
  1. script.py +5 -1
script.py CHANGED
@@ -32,7 +32,11 @@ def process_dataset(dataset_dir: str):
32
 
33
  # check if metadata.jsonl exists
34
  if os.path.exists(os.path.join(dataset_dir, "metadata.jsonl")):
35
- metadata = json.load(open(os.path.join(dataset_dir, "metadata.jsonl")))
 
 
 
 
36
  for item in metadata:
37
  txt_path = os.path.join(dataset_dir, item["file_name"])
38
  txt_path = txt_path.rsplit(".", 1)[0] + ".txt"
 
32
 
33
  # check if metadata.jsonl exists
34
  if os.path.exists(os.path.join(dataset_dir, "metadata.jsonl")):
35
+ metadata = []
36
+ with open(os.path.join(dataset_dir, "metadata.jsonl"), "r") as f:
37
+ for line in f:
38
+ if len(line.strip()) > 0:
39
+ metadata.append(json.loads(line))
40
  for item in metadata:
41
  txt_path = os.path.join(dataset_dir, item["file_name"])
42
  txt_path = txt_path.rsplit(".", 1)[0] + ".txt"