Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
update script
Browse files
script.py
CHANGED
@@ -32,7 +32,11 @@ def process_dataset(dataset_dir: str):
|
|
32 |
|
33 |
# check if metadata.jsonl exists
|
34 |
if os.path.exists(os.path.join(dataset_dir, "metadata.jsonl")):
|
35 |
-
metadata =
|
|
|
|
|
|
|
|
|
36 |
for item in metadata:
|
37 |
txt_path = os.path.join(dataset_dir, item["file_name"])
|
38 |
txt_path = txt_path.rsplit(".", 1)[0] + ".txt"
|
|
|
32 |
|
33 |
# check if metadata.jsonl exists
|
34 |
if os.path.exists(os.path.join(dataset_dir, "metadata.jsonl")):
|
35 |
+
metadata = []
|
36 |
+
with open(os.path.join(dataset_dir, "metadata.jsonl"), "r") as f:
|
37 |
+
for line in f:
|
38 |
+
if len(line.strip()) > 0:
|
39 |
+
metadata.append(json.loads(line))
|
40 |
for item in metadata:
|
41 |
txt_path = os.path.join(dataset_dir, item["file_name"])
|
42 |
txt_path = txt_path.rsplit(".", 1)[0] + ".txt"
|