Spaces:

flax-community
/

dalle-mini

Running

App Files Files Community

boris commited on Feb 22, 2022

Commit

1c4e839

•

1 Parent(s): 50498e6

feat: load from bucket

Browse files

Files changed (2) hide show

src/dalle_mini/model/utils.py +26 -3
tools/train/train.py +22 -9

src/dalle_mini/model/utils.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import os
 import tempfile
 import wandb
@@ -8,11 +9,13 @@ class PretrainedFromWandbMixin:
     @classmethod
     def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
         """
-        Initializes from a wandb artifact, or delegates loading to the superclass.
         """
         with tempfile.TemporaryDirectory() as tmp_dir:  # avoid multiple artifact copies
-            if ":" in pretrained_model_name_or_path and not os.path.isdir(
-                pretrained_model_name_or_path
             ):
                 # wandb artifact
                 if wandb.run is not None:
@@ -20,7 +23,27 @@ class PretrainedFromWandbMixin:
                 else:
                     artifact = wandb.Api().artifact(pretrained_model_name_or_path)
                 pretrained_model_name_or_path = artifact.download(tmp_dir)
             return super(PretrainedFromWandbMixin, cls).from_pretrained(
                 pretrained_model_name_or_path, *model_args, **kwargs
             )

 import os
 import tempfile
+from pathlib import Path
 import wandb
     @classmethod
     def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
         """
+        Initializes from a wandb artifact, google bucket path or delegates loading to the superclass.
         """
         with tempfile.TemporaryDirectory() as tmp_dir:  # avoid multiple artifact copies
+            if (
+                ":" in pretrained_model_name_or_path
+                and not os.path.isdir(pretrained_model_name_or_path)
+                and not pretrained_model_name_or_path.startswith("gs")
             ):
                 # wandb artifact
                 if wandb.run is not None:
                 else:
                     artifact = wandb.Api().artifact(pretrained_model_name_or_path)
                 pretrained_model_name_or_path = artifact.download(tmp_dir)
+                if artifact.metadata.get("bucket_path"):
+                    pretrained_model_name_or_path = artifact.metadata["bucket_path"]
+            if pretrained_model_name_or_path.startswith("gs://"):
+                copy_blobs(pretrained_model_name_or_path, tmp_dir)
+                pretrained_model_name_or_path = tmp_dir
             return super(PretrainedFromWandbMixin, cls).from_pretrained(
                 pretrained_model_name_or_path, *model_args, **kwargs
             )
+def copy_blobs(source_path, dest_path):
+    assert source_path.startswith("gs://")
+    from google.cloud import storage
+    bucket_path = Path(source_path[5:])
+    bucket, dir_path = str(bucket_path).split("/", 1)
+    client = storage.Client()
+    bucket = client.bucket(bucket)
+    blobs = client.list_blobs(bucket, prefix=f"{dir_path}/")
+    for blob in blobs:
+        dest_name = str(Path(dest_path) / Path(blob.name).name)
+        blob.download_to_filename(dest_name)

tools/train/train.py CHANGED Viewed

@@ -135,8 +135,21 @@ class ModelArguments:
                 else:
                     artifact = wandb.Api().artifact(state_artifact)
                 artifact_dir = artifact.download(tmp_dir)
-                self.restore_state = Path(artifact_dir) / "opt_state.msgpack"
-            return Path(self.restore_state).open("rb")
 @dataclass
@@ -788,9 +801,7 @@ def main():
         else:
             # load opt_state
-            opt_state_file = model_args.get_opt_state()
-            opt_state = from_bytes(opt_state_shape, opt_state_file.read())
-            opt_state_file.close()
             # restore other attributes
             attr_state = {
@@ -1060,7 +1071,7 @@ def main():
                 client = storage.Client()
                 bucket = client.bucket(bucket)
                 for filename in Path(output_dir).glob("*"):
-                    blob_name = str(Path(dir_path) / filename.name)
                     blob = bucket.blob(blob_name)
                     blob.upload_from_filename(str(filename))
                 tmp_dir.cleanup()
@@ -1068,7 +1079,7 @@ def main():
             # save state
             opt_state = jax.device_get(state.opt_state)
             if use_bucket:
-                blob_name = str(Path(dir_path) / "opt_state.msgpack")
                 blob = bucket.blob(blob_name)
                 blob.upload_from_file(io.BytesIO(to_bytes(opt_state)))
             else:
@@ -1088,10 +1099,10 @@ def main():
                 metadata["num_params"] = num_params
                 if eval_metrics is not None:
                     metadata["eval"] = eval_metrics
-                if use_bucket:
-                    metadata["bucket_path"] = bucket_path
                 # create model artifact
                 artifact = wandb.Artifact(
                     name=f"model-{wandb.run.id}",
                     type="DalleBart_model",
@@ -1113,6 +1124,8 @@ def main():
                 wandb.run.log_artifact(artifact)
                 # create state artifact
                 artifact_state = wandb.Artifact(
                     name=f"state-{wandb.run.id}",
                     type="DalleBart_state",

                 else:
                     artifact = wandb.Api().artifact(state_artifact)
                 artifact_dir = artifact.download(tmp_dir)
+                if artifact.metadata.get("bucket_path"):
+                    self.restore_state = artifact.metadata["bucket_path"]
+                else:
+                    self.restore_state = Path(artifact_dir) / "opt_state.msgpack"
+            if self.restore_state.startswith("gs://"):
+                bucket_path = Path(self.restore_state[5:]) / "opt_state.msgpack"
+                bucket, blob_name = str(bucket_path).split("/", 1)
+                client = storage.Client()
+                bucket = client.bucket(bucket)
+                blob = bucket.blob(blob_name)
+                return blob.download_as_bytes()
+            with Path(self.restore_state).open("rb") as f:
+                return f.read()
 @dataclass
         else:
             # load opt_state
+            opt_state = from_bytes(opt_state_shape, model_args.get_opt_state())
             # restore other attributes
             attr_state = {
                 client = storage.Client()
                 bucket = client.bucket(bucket)
                 for filename in Path(output_dir).glob("*"):
+                    blob_name = str(Path(dir_path) / "model" / filename.name)
                     blob = bucket.blob(blob_name)
                     blob.upload_from_filename(str(filename))
                 tmp_dir.cleanup()
             # save state
             opt_state = jax.device_get(state.opt_state)
             if use_bucket:
+                blob_name = str(Path(dir_path) / "state" / "opt_state.msgpack")
                 blob = bucket.blob(blob_name)
                 blob.upload_from_file(io.BytesIO(to_bytes(opt_state)))
             else:
                 metadata["num_params"] = num_params
                 if eval_metrics is not None:
                     metadata["eval"] = eval_metrics
                 # create model artifact
+                if use_bucket:
+                    metadata["bucket_path"] = f"gs://{bucket_path}/model"
                 artifact = wandb.Artifact(
                     name=f"model-{wandb.run.id}",
                     type="DalleBart_model",
                 wandb.run.log_artifact(artifact)
                 # create state artifact
+                if use_bucket:
+                    metadata["bucket_path"] = f"gs://{bucket_path}/state"
                 artifact_state = wandb.Artifact(
                     name=f"state-{wandb.run.id}",
                     type="DalleBart_state",

feat: load from bucket

feat: load from bucket