kyleleey commited on
Commit
2b1cca8
1 Parent(s): 3bf37d0

remove unused pkgs

Browse files
Files changed (2) hide show
  1. requirements.txt +0 -1
  2. video3d/model_ddp.py +16 -15
requirements.txt CHANGED
@@ -1,4 +1,3 @@
1
- clip==1.0
2
  ConfigArgParse==1.5.3
3
  core==1.0.1
4
  diffusers==0.20.0
 
 
1
  ConfigArgParse==1.5.3
2
  core==1.0.1
3
  diffusers==0.20.0
video3d/model_ddp.py CHANGED
@@ -24,7 +24,7 @@ from .utils.skinning_v4 import estimate_bones, skinning
24
  import lpips
25
  from einops import rearrange, repeat
26
 
27
- import clip
28
  import torchvision.transforms.functional as tvf
29
  from . import discriminator_architecture
30
 
@@ -1195,20 +1195,21 @@ class Unsup3DDDP:
1195
  self.enable_mask_distribution = False
1196
 
1197
  self.enable_clip = cfgs.get('enable_clip', False)
1198
- if self.enable_clip:
1199
- self.clip_model, _ = clip.load('ViT-B/32', self.device)
1200
- self.clip_model = self.clip_model.eval().requires_grad_(False)
1201
- self.clip_mean = [0.48145466, 0.4578275, 0.40821073]
1202
- self.clip_std = [0.26862954, 0.26130258, 0.27577711]
1203
- self.clip_reso = 224
1204
- self.clip_render_size = 64
1205
- self.enable_clip_text = cfgs.get('enable_clip_text', False)
1206
- if self.enable_clip_text:
1207
- self.clip_text_feature = {}
1208
- for category_name in ['bear', 'elephant', 'horse', 'sheep', 'cow', 'zebra', 'giraffe']:
1209
- text_input = clip.tokenize(['A photo of ' + category_name]).to(self.device)
1210
- text_feature = self.clip_model.encode_text(text_input).detach() # [1, 512]
1211
- self.clip_text_feature.update({category_name: text_feature})
 
1212
 
1213
  self.enable_disc = cfgs.get('enable_disc', False)
1214
  if self.enable_disc:
 
24
  import lpips
25
  from einops import rearrange, repeat
26
 
27
+ # import clip
28
  import torchvision.transforms.functional as tvf
29
  from . import discriminator_architecture
30
 
 
1195
  self.enable_mask_distribution = False
1196
 
1197
  self.enable_clip = cfgs.get('enable_clip', False)
1198
+ self.enable_clip = False
1199
+ # if self.enable_clip:
1200
+ # self.clip_model, _ = clip.load('ViT-B/32', self.device)
1201
+ # self.clip_model = self.clip_model.eval().requires_grad_(False)
1202
+ # self.clip_mean = [0.48145466, 0.4578275, 0.40821073]
1203
+ # self.clip_std = [0.26862954, 0.26130258, 0.27577711]
1204
+ # self.clip_reso = 224
1205
+ # self.clip_render_size = 64
1206
+ # self.enable_clip_text = cfgs.get('enable_clip_text', False)
1207
+ # if self.enable_clip_text:
1208
+ # self.clip_text_feature = {}
1209
+ # for category_name in ['bear', 'elephant', 'horse', 'sheep', 'cow', 'zebra', 'giraffe']:
1210
+ # text_input = clip.tokenize(['A photo of ' + category_name]).to(self.device)
1211
+ # text_feature = self.clip_model.encode_text(text_input).detach() # [1, 512]
1212
+ # self.clip_text_feature.update({category_name: text_feature})
1213
 
1214
  self.enable_disc = cfgs.get('enable_disc', False)
1215
  if self.enable_disc: