hpcai-tech
/

OpenSora-STDiT-v1-16x256x256

Feature Extraction

Model card Files Files and versions Community

frankleeeee commited on Mar 22

Commit

96995a1

•

1 Parent(s): df479cc

Upload STDiT

Files changed (2) hide show

config.json +1 -0
modeling_stdit.py +6 -1

config.json CHANGED Viewed

@@ -11,6 +11,7 @@
   "depth": 28,
   "drop_path": 0.0,
   "enable_flash_attn": false,
   "enable_layernorm_kernel": false,
   "enable_sequence_parallelism": false,
   "freeze": null,

   "depth": 28,
   "drop_path": 0.0,
   "enable_flash_attn": false,
+  "enable_flashattn": false,
   "enable_layernorm_kernel": false,
   "enable_sequence_parallelism": false,
   "freeze": null,

modeling_stdit.py CHANGED Viewed

@@ -109,6 +109,10 @@ class STDiT(PreTrainedModel):
         Returns:
             x (torch.Tensor): output latent representation; of shape [B, C, T, H, W]
         """
         # embedding
         x = self.x_embedder(x)  # [B, N, C]
         x = rearrange(x, "B (T S) C -> B T S C", T=self.num_temporal, S=self.num_spatial)
@@ -144,7 +148,8 @@ class STDiT(PreTrainedModel):
                     tpe = self.pos_embed_temporal
             else:
                 tpe = None
-            x = auto_grad_checkpoint(block, x, y, t0, y_lens, tpe)
         if self.enable_sequence_parallelism:
             x = gather_forward_split_backward(x, get_sequence_parallel_group(), dim=1, grad_scale="up")

         Returns:
             x (torch.Tensor): output latent representation; of shape [B, C, T, H, W]
         """
+        x = x.to(self.final_layer.linear.weight.dtype)
+        timestep = timestep.to(self.final_layer.linear.weight.dtype)
+        y = y.to(self.final_layer.linear.weight.dtype)
         # embedding
         x = self.x_embedder(x)  # [B, N, C]
         x = rearrange(x, "B (T S) C -> B T S C", T=self.num_temporal, S=self.num_spatial)
                     tpe = self.pos_embed_temporal
             else:
                 tpe = None
+            x = block(x, y, t0, y_lens, tpe)
+            # x = auto_grad_checkpoint(block, x, y, t0, y_lens, tpe)
         if self.enable_sequence_parallelism:
             x = gather_forward_split_backward(x, get_sequence_parallel_group(), dim=1, grad_scale="up")