dn6 HF staff commited on
Commit
b3654c7
1 Parent(s): 621fc2d

update diffusers weights

Browse files
image_encoder/config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "StableCascade-prior/image_encoder",
3
  "architectures": [
4
  "CLIPVisionModelWithProjection"
5
  ],
@@ -18,6 +18,6 @@
18
  "num_hidden_layers": 24,
19
  "patch_size": 14,
20
  "projection_dim": 768,
21
- "torch_dtype": "bfloat16",
22
- "transformers_version": "4.38.0.dev0"
23
  }
 
1
  {
2
+ "_name_or_path": "openai/clip-vit-large-patch14",
3
  "architectures": [
4
  "CLIPVisionModelWithProjection"
5
  ],
 
18
  "num_hidden_layers": 24,
19
  "patch_size": 14,
20
  "projection_dim": 768,
21
+ "torch_dtype": "float32",
22
+ "transformers_version": "4.38.2"
23
  }
image_encoder/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e4b33d864f89a793357a768cb07d0dc18d6a14e6664f4110a0d535ca9ba78da8
3
- size 607980488
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77b33d2a3a643650857672e880ccf73adbaf114fbbadec36d142ee9d48af7e20
3
+ size 1215912728
model_index.json CHANGED
@@ -1,7 +1,6 @@
1
  {
2
  "_class_name": "StableCascadePriorPipeline",
3
- "_diffusers_version": "0.26.0.dev0",
4
- "_name_or_path": "StableCascade-prior/",
5
  "feature_extractor": [
6
  "transformers",
7
  "CLIPImageProcessor"
@@ -11,8 +10,8 @@
11
  "CLIPVisionModelWithProjection"
12
  ],
13
  "prior": [
14
- "stable_cascade",
15
- "StableCascadeUnet"
16
  ],
17
  "resolution_multiple": 42.67,
18
  "scheduler": [
 
1
  {
2
  "_class_name": "StableCascadePriorPipeline",
3
+ "_diffusers_version": "0.27.0.dev0",
 
4
  "feature_extractor": [
5
  "transformers",
6
  "CLIPImageProcessor"
 
10
  "CLIPVisionModelWithProjection"
11
  ],
12
  "prior": [
13
+ "diffusers",
14
+ "StableCascadeUNet"
15
  ],
16
  "resolution_multiple": 42.67,
17
  "scheduler": [
prior/config.json CHANGED
@@ -1,61 +1,64 @@
1
  {
2
- "_class_name": "StableCascadeUnet",
3
- "_diffusers_version": "0.26.0.dev0",
4
- "_name_or_path": "StableCascade-prior/prior",
5
- "block_repeat": [
6
- [
7
- 1,
8
- 1
9
- ],
10
- [
11
- 1,
12
- 1
13
- ]
14
  ],
15
- "blocks": [
16
  [
17
- 8,
18
- 24
 
19
  ],
20
  [
21
- 24,
22
- 8
 
23
  ]
24
  ],
25
- "c_clip_img": 768,
26
- "c_clip_seq": 4,
27
- "c_clip_text": 1280,
28
- "c_clip_text_pooled": 1280,
29
- "c_cond": 2048,
30
- "c_effnet": null,
31
- "c_hidden": [
32
- 2048,
33
- 2048
 
 
 
34
  ],
35
- "c_in": 16,
36
- "c_out": 16,
37
- "c_pixels": null,
38
- "c_r": 64,
39
  "dropout": [
40
  0.1,
41
  0.1
42
  ],
 
 
43
  "kernel_size": 3,
44
- "level_config": [
45
- "CTA",
46
- "CTA"
47
- ],
48
- "nhead": [
49
  32,
50
  32
51
  ],
 
52
  "patch_size": 1,
 
53
  "self_attn": true,
54
  "switch_level": [
55
  false
56
  ],
57
- "t_conds": [
58
  "sca",
59
  "crp"
 
 
 
 
 
 
 
 
 
60
  ]
61
  }
 
1
  {
2
+ "_class_name": "StableCascadeUNet",
3
+ "_diffusers_version": "0.27.0.dev0",
4
+ "block_out_channels": [
5
+ 2048,
6
+ 2048
 
 
 
 
 
 
 
7
  ],
8
+ "block_types_per_layer": [
9
  [
10
+ "SDCascadeResBlock",
11
+ "SDCascadeTimestepBlock",
12
+ "SDCascadeAttnBlock"
13
  ],
14
  [
15
+ "SDCascadeResBlock",
16
+ "SDCascadeTimestepBlock",
17
+ "SDCascadeAttnBlock"
18
  ]
19
  ],
20
+ "clip_image_in_channels": 768,
21
+ "clip_seq": 4,
22
+ "clip_text_in_channels": 1280,
23
+ "clip_text_pooled_in_channels": 1280,
24
+ "conditioning_dim": 2048,
25
+ "down_blocks_repeat_mappers": [
26
+ 1,
27
+ 1
28
+ ],
29
+ "down_num_layers_per_block": [
30
+ 8,
31
+ 24
32
  ],
 
 
 
 
33
  "dropout": [
34
  0.1,
35
  0.1
36
  ],
37
+ "effnet_in_channels": null,
38
+ "in_channels": 16,
39
  "kernel_size": 3,
40
+ "num_attention_heads": [
 
 
 
 
41
  32,
42
  32
43
  ],
44
+ "out_channels": 16,
45
  "patch_size": 1,
46
+ "pixel_mapper_in_channels": null,
47
  "self_attn": true,
48
  "switch_level": [
49
  false
50
  ],
51
+ "timestep_conditioning_type": [
52
  "sca",
53
  "crp"
54
+ ],
55
+ "timestep_ratio_embedding_dim": 64,
56
+ "up_blocks_repeat_mappers": [
57
+ 1,
58
+ 1
59
+ ],
60
+ "up_num_layers_per_block": [
61
+ 24,
62
+ 8
63
  ]
64
  }
prior/diffusion_pytorch_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:44a4cd9540f327f2fb4ac09179e4e87912a01cdb1b3b86c79f0f853976fb4c98
3
- size 7178377816
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a2c7aa62c503780b85f74fd513b1b99c12ea4f83422bdbad5ac264aa68efb4b
3
+ size 14356584672
scheduler/scheduler_config.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "_class_name": "DDPMWuerstchenScheduler",
3
- "_diffusers_version": "0.26.0.dev0",
4
  "s": 0.008,
5
  "scaler": 1.0
6
  }
 
1
  {
2
  "_class_name": "DDPMWuerstchenScheduler",
3
+ "_diffusers_version": "0.27.0.dev0",
4
  "s": 0.008,
5
  "scaler": 1.0
6
  }
text_encoder/config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "StableCascade-prior/text_encoder",
3
  "architectures": [
4
  "CLIPTextModelWithProjection"
5
  ],
@@ -19,7 +19,7 @@
19
  "num_hidden_layers": 32,
20
  "pad_token_id": 1,
21
  "projection_dim": 1280,
22
- "torch_dtype": "bfloat16",
23
- "transformers_version": "4.38.0.dev0",
24
  "vocab_size": 49408
25
  }
 
1
  {
2
+ "_name_or_path": "laion/CLIP-ViT-bigG-14-laion2B-39B-b160k",
3
  "architectures": [
4
  "CLIPTextModelWithProjection"
5
  ],
 
19
  "num_hidden_layers": 32,
20
  "pad_token_id": 1,
21
  "projection_dim": 1280,
22
+ "torch_dtype": "float32",
23
+ "transformers_version": "4.38.2",
24
  "vocab_size": 49408
25
  }
text_encoder/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:260e0127aca3c89db813637ae659ebb822cb07af71fedc16cbd980e9518dfdcd
3
- size 1389382688
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa5b2e6f4c2efc2d82e4b8312faec1a5540eabfc6415126c9a05c8436a530ef4
3
+ size 2778702264
tokenizer/tokenizer.json CHANGED
@@ -1,21 +1,7 @@
1
  {
2
  "version": "1.0",
3
- "truncation": {
4
- "direction": "Right",
5
- "max_length": 77,
6
- "strategy": "LongestFirst",
7
- "stride": 0
8
- },
9
- "padding": {
10
- "strategy": {
11
- "Fixed": 77
12
- },
13
- "direction": "Right",
14
- "pad_to_multiple_of": null,
15
- "pad_id": 49407,
16
- "pad_type_id": 0,
17
- "pad_token": "<|endoftext|>"
18
- },
19
  "added_tokens": [
20
  {
21
  "id": 49406,
 
1
  {
2
  "version": "1.0",
3
+ "truncation": null,
4
+ "padding": null,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  "added_tokens": [
6
  {
7
  "id": 49406,