kmpartner commited on
Commit
8fc92d2
1 Parent(s): 858c800

Upload folder using huggingface_hub

Browse files
log_loss.csv ADDED
@@ -0,0 +1 @@
 
 
1
+ epoch,step,global_step,loss_total,loss_sd,loss_kd_output,loss_kd_feat,lr,lamb_sd,lamb_kd_output,lamb_kd_feat
logs/text2image-fine-tune/1717745111.570705/events.out.tfevents.1717745111.9748aac584ae.7759.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:74373b7c8749a20bc236c19ab2dedbe9cc3219d5b4671ee77f3ea48aaa9c00ba
3
+ size 2374
logs/text2image-fine-tune/1717745111.57284/hparams.yml ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ adam_beta1: 0.9
2
+ adam_beta2: 0.999
3
+ adam_epsilon: 1.0e-08
4
+ adam_weight_decay: 0.01
5
+ allow_tf32: false
6
+ cache_dir: null
7
+ center_crop: true
8
+ checkpointing_steps: 5
9
+ checkpoints_total_limit: null
10
+ dataloader_num_workers: 0
11
+ dataset_config_name: null
12
+ enable_xformers_memory_efficient_attention: false
13
+ gradient_accumulation_steps: 4
14
+ gradient_checkpointing: true
15
+ lambda_kd_feat: 1.0
16
+ lambda_kd_output: 1.0
17
+ lambda_sd: 1.0
18
+ learning_rate: 5.0e-05
19
+ local_rank: -1
20
+ logging_dir: logs
21
+ lr_scheduler: constant
22
+ lr_warmup_steps: 0
23
+ max_grad_norm: 1.0
24
+ max_train_samples: null
25
+ max_train_steps: 0
26
+ mixed_precision: null
27
+ non_ema_revision: null
28
+ num_train_epochs: 0
29
+ num_valid_images: 2
30
+ output_dir: ./results/toy_bk_tiny
31
+ pretrained_model_name_or_path: kmpartner/bk-test
32
+ random_flip: true
33
+ report_to: all
34
+ resolution: 512
35
+ resume_from_checkpoint: null
36
+ revision: null
37
+ scale_lr: false
38
+ seed: 1234
39
+ train_batch_size: 2
40
+ train_data_dir: ./data/laion_aes/preprocessed_11k
41
+ unet_config_name: bk_tiny
42
+ unet_config_path: ./src/unet_config
43
+ use_8bit_adam: false
44
+ use_copy_weight_from_teacher: true
45
+ use_ema: true
46
+ valid_prompt: a golden vase with different flowers
47
+ valid_steps: 5
logs/text2image-fine-tune/events.out.tfevents.1717745105.9748aac584ae.7759.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06470ca76d2530d61d2e9cc63077b60e91f5cce062a36a2ae2ff46e23982a8a9
3
+ size 88
model_index.json CHANGED
@@ -1,11 +1,7 @@
1
  {
2
- "_class_name": "StableDiffusionControlNetPipeline",
3
  "_diffusers_version": "0.28.2",
4
- "_name_or_path": "runwayml/stable-diffusion-v1-5",
5
- "controlnet": [
6
- "diffusers",
7
- "ControlNetModel"
8
- ],
9
  "feature_extractor": [
10
  "transformers",
11
  "CLIPImageProcessor"
@@ -37,6 +33,6 @@
37
  ],
38
  "vae": [
39
  "diffusers",
40
- "AutoencoderKL"
41
  ]
42
  }
 
1
  {
2
+ "_class_name": "StableDiffusionPipeline",
3
  "_diffusers_version": "0.28.2",
4
+ "_name_or_path": "kmpartner/bk-test",
 
 
 
 
5
  "feature_extractor": [
6
  "transformers",
7
  "CLIPImageProcessor"
 
33
  ],
34
  "vae": [
35
  "diffusers",
36
+ "AutoencoderTiny"
37
  ]
38
  }
safety_checker/config.json CHANGED
@@ -1,5 +1,6 @@
1
  {
2
- "_name_or_path": "/root/.cache/huggingface/hub/models--runwayml--stable-diffusion-v1-5/snapshots/1d0c4ebf6ff58a5caecab40fa1406526bca4b5b9/safety_checker",
 
3
  "architectures": [
4
  "StableDiffusionSafetyChecker"
5
  ],
@@ -8,21 +9,160 @@
8
  "model_type": "clip",
9
  "projection_dim": 768,
10
  "text_config": {
 
 
 
 
 
 
 
 
 
 
 
 
11
  "dropout": 0.0,
 
 
 
 
 
 
 
 
12
  "hidden_size": 768,
 
 
 
 
 
 
13
  "intermediate_size": 3072,
 
 
 
 
 
 
 
 
 
 
 
14
  "model_type": "clip_text_model",
15
- "num_attention_heads": 12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  },
17
  "torch_dtype": "float32",
18
  "transformers_version": "4.41.2",
19
  "vision_config": {
 
 
 
 
 
 
 
 
 
 
 
 
20
  "dropout": 0.0,
 
 
 
 
 
 
 
 
21
  "hidden_size": 1024,
 
 
 
 
 
 
 
22
  "intermediate_size": 4096,
 
 
 
 
 
 
 
 
 
 
23
  "model_type": "clip_vision_model",
 
24
  "num_attention_heads": 16,
 
 
 
25
  "num_hidden_layers": 24,
26
- "patch_size": 14
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  }
28
  }
 
1
  {
2
+ "_commit_hash": "858c800d0667613dc2f87f859e4843a95f8120cc",
3
+ "_name_or_path": "/root/.cache/huggingface/hub/models--kmpartner--bk-test/snapshots/858c800d0667613dc2f87f859e4843a95f8120cc/safety_checker",
4
  "architectures": [
5
  "StableDiffusionSafetyChecker"
6
  ],
 
9
  "model_type": "clip",
10
  "projection_dim": 768,
11
  "text_config": {
12
+ "_name_or_path": "",
13
+ "add_cross_attention": false,
14
+ "architectures": null,
15
+ "attention_dropout": 0.0,
16
+ "bad_words_ids": null,
17
+ "begin_suppress_tokens": null,
18
+ "bos_token_id": 0,
19
+ "chunk_size_feed_forward": 0,
20
+ "cross_attention_hidden_size": null,
21
+ "decoder_start_token_id": null,
22
+ "diversity_penalty": 0.0,
23
+ "do_sample": false,
24
  "dropout": 0.0,
25
+ "early_stopping": false,
26
+ "encoder_no_repeat_ngram_size": 0,
27
+ "eos_token_id": 2,
28
+ "exponential_decay_length_penalty": null,
29
+ "finetuning_task": null,
30
+ "forced_bos_token_id": null,
31
+ "forced_eos_token_id": null,
32
+ "hidden_act": "quick_gelu",
33
  "hidden_size": 768,
34
+ "id2label": {
35
+ "0": "LABEL_0",
36
+ "1": "LABEL_1"
37
+ },
38
+ "initializer_factor": 1.0,
39
+ "initializer_range": 0.02,
40
  "intermediate_size": 3072,
41
+ "is_decoder": false,
42
+ "is_encoder_decoder": false,
43
+ "label2id": {
44
+ "LABEL_0": 0,
45
+ "LABEL_1": 1
46
+ },
47
+ "layer_norm_eps": 1e-05,
48
+ "length_penalty": 1.0,
49
+ "max_length": 20,
50
+ "max_position_embeddings": 77,
51
+ "min_length": 0,
52
  "model_type": "clip_text_model",
53
+ "no_repeat_ngram_size": 0,
54
+ "num_attention_heads": 12,
55
+ "num_beam_groups": 1,
56
+ "num_beams": 1,
57
+ "num_hidden_layers": 12,
58
+ "num_return_sequences": 1,
59
+ "output_attentions": false,
60
+ "output_hidden_states": false,
61
+ "output_scores": false,
62
+ "pad_token_id": 1,
63
+ "prefix": null,
64
+ "problem_type": null,
65
+ "projection_dim": 512,
66
+ "pruned_heads": {},
67
+ "remove_invalid_values": false,
68
+ "repetition_penalty": 1.0,
69
+ "return_dict": true,
70
+ "return_dict_in_generate": false,
71
+ "sep_token_id": null,
72
+ "suppress_tokens": null,
73
+ "task_specific_params": null,
74
+ "temperature": 1.0,
75
+ "tf_legacy_loss": false,
76
+ "tie_encoder_decoder": false,
77
+ "tie_word_embeddings": true,
78
+ "tokenizer_class": null,
79
+ "top_k": 50,
80
+ "top_p": 1.0,
81
+ "torch_dtype": null,
82
+ "torchscript": false,
83
+ "transformers_version": "4.27.4",
84
+ "typical_p": 1.0,
85
+ "use_bfloat16": false,
86
+ "vocab_size": 49408
87
  },
88
  "torch_dtype": "float32",
89
  "transformers_version": "4.41.2",
90
  "vision_config": {
91
+ "_name_or_path": "",
92
+ "add_cross_attention": false,
93
+ "architectures": null,
94
+ "attention_dropout": 0.0,
95
+ "bad_words_ids": null,
96
+ "begin_suppress_tokens": null,
97
+ "bos_token_id": null,
98
+ "chunk_size_feed_forward": 0,
99
+ "cross_attention_hidden_size": null,
100
+ "decoder_start_token_id": null,
101
+ "diversity_penalty": 0.0,
102
+ "do_sample": false,
103
  "dropout": 0.0,
104
+ "early_stopping": false,
105
+ "encoder_no_repeat_ngram_size": 0,
106
+ "eos_token_id": null,
107
+ "exponential_decay_length_penalty": null,
108
+ "finetuning_task": null,
109
+ "forced_bos_token_id": null,
110
+ "forced_eos_token_id": null,
111
+ "hidden_act": "quick_gelu",
112
  "hidden_size": 1024,
113
+ "id2label": {
114
+ "0": "LABEL_0",
115
+ "1": "LABEL_1"
116
+ },
117
+ "image_size": 224,
118
+ "initializer_factor": 1.0,
119
+ "initializer_range": 0.02,
120
  "intermediate_size": 4096,
121
+ "is_decoder": false,
122
+ "is_encoder_decoder": false,
123
+ "label2id": {
124
+ "LABEL_0": 0,
125
+ "LABEL_1": 1
126
+ },
127
+ "layer_norm_eps": 1e-05,
128
+ "length_penalty": 1.0,
129
+ "max_length": 20,
130
+ "min_length": 0,
131
  "model_type": "clip_vision_model",
132
+ "no_repeat_ngram_size": 0,
133
  "num_attention_heads": 16,
134
+ "num_beam_groups": 1,
135
+ "num_beams": 1,
136
+ "num_channels": 3,
137
  "num_hidden_layers": 24,
138
+ "num_return_sequences": 1,
139
+ "output_attentions": false,
140
+ "output_hidden_states": false,
141
+ "output_scores": false,
142
+ "pad_token_id": null,
143
+ "patch_size": 14,
144
+ "prefix": null,
145
+ "problem_type": null,
146
+ "projection_dim": 512,
147
+ "pruned_heads": {},
148
+ "remove_invalid_values": false,
149
+ "repetition_penalty": 1.0,
150
+ "return_dict": true,
151
+ "return_dict_in_generate": false,
152
+ "sep_token_id": null,
153
+ "suppress_tokens": null,
154
+ "task_specific_params": null,
155
+ "temperature": 1.0,
156
+ "tf_legacy_loss": false,
157
+ "tie_encoder_decoder": false,
158
+ "tie_word_embeddings": true,
159
+ "tokenizer_class": null,
160
+ "top_k": 50,
161
+ "top_p": 1.0,
162
+ "torch_dtype": null,
163
+ "torchscript": false,
164
+ "transformers_version": "4.27.4",
165
+ "typical_p": 1.0,
166
+ "use_bfloat16": false
167
  }
168
  }
safety_checker/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fb351a5ded815c3ff744968ad9c6b218d071b9d313d04f35e813b84b4c0ffde8
3
- size 1215979664
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:11cfe53105625af8c00faac32a430626641cce686454f3c39d837f14397d858b
3
+ size 1215981832
text_encoder/config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "/root/.cache/huggingface/hub/models--runwayml--stable-diffusion-v1-5/snapshots/1d0c4ebf6ff58a5caecab40fa1406526bca4b5b9/text_encoder",
3
  "architectures": [
4
  "CLIPTextModel"
5
  ],
@@ -20,6 +20,6 @@
20
  "pad_token_id": 1,
21
  "projection_dim": 768,
22
  "torch_dtype": "float32",
23
- "transformers_version": "4.41.2",
24
  "vocab_size": 49408
25
  }
 
1
  {
2
+ "_name_or_path": "kmpartner/bk-test",
3
  "architectures": [
4
  "CLIPTextModel"
5
  ],
 
20
  "pad_token_id": 1,
21
  "projection_dim": 768,
22
  "torch_dtype": "float32",
23
+ "transformers_version": "4.27.4",
24
  "vocab_size": 49408
25
  }
text_encoder/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:778d02eb9e707c3fbaae0b67b79ea0d1399b52e624fb634f2f19375ae7c047c3
3
- size 492265168
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:71c10601ece1342fede0300fb88db71c09ece8be491b71875d18b9799a5e6c15
3
+ size 492265880
tokenizer/tokenizer_config.json CHANGED
@@ -18,13 +18,35 @@
18
  "special": true
19
  }
20
  },
21
- "bos_token": "<|startoftext|>",
 
 
 
 
 
 
 
22
  "clean_up_tokenization_spaces": true,
23
  "do_lower_case": true,
24
- "eos_token": "<|endoftext|>",
 
 
 
 
 
 
 
25
  "errors": "replace",
26
  "model_max_length": 77,
27
  "pad_token": "<|endoftext|>",
 
28
  "tokenizer_class": "CLIPTokenizer",
29
- "unk_token": "<|endoftext|>"
 
 
 
 
 
 
 
30
  }
 
18
  "special": true
19
  }
20
  },
21
+ "bos_token": {
22
+ "__type": "AddedToken",
23
+ "content": "<|startoftext|>",
24
+ "lstrip": false,
25
+ "normalized": true,
26
+ "rstrip": false,
27
+ "single_word": false
28
+ },
29
  "clean_up_tokenization_spaces": true,
30
  "do_lower_case": true,
31
+ "eos_token": {
32
+ "__type": "AddedToken",
33
+ "content": "<|endoftext|>",
34
+ "lstrip": false,
35
+ "normalized": true,
36
+ "rstrip": false,
37
+ "single_word": false
38
+ },
39
  "errors": "replace",
40
  "model_max_length": 77,
41
  "pad_token": "<|endoftext|>",
42
+ "special_tokens_map_file": "/root/.cache/huggingface/hub/models--kmpartner--bk-test/snapshots/858c800d0667613dc2f87f859e4843a95f8120cc/tokenizer/special_tokens_map.json",
43
  "tokenizer_class": "CLIPTokenizer",
44
+ "unk_token": {
45
+ "__type": "AddedToken",
46
+ "content": "<|endoftext|>",
47
+ "lstrip": false,
48
+ "normalized": true,
49
+ "rstrip": false,
50
+ "single_word": false
51
+ }
52
  }
unet/config.json CHANGED
@@ -1,7 +1,6 @@
1
  {
2
  "_class_name": "UNet2DConditionModel",
3
  "_diffusers_version": "0.28.2",
4
- "_name_or_path": "/root/.cache/huggingface/hub/models--runwayml--stable-diffusion-v1-5/snapshots/1d0c4ebf6ff58a5caecab40fa1406526bca4b5b9/unet",
5
  "act_fn": "silu",
6
  "addition_embed_type": null,
7
  "addition_embed_type_num_heads": 64,
@@ -11,7 +10,6 @@
11
  "block_out_channels": [
12
  320,
13
  640,
14
- 1280,
15
  1280
16
  ],
17
  "center_input_sample": false,
@@ -24,8 +22,7 @@
24
  "down_block_types": [
25
  "CrossAttnDownBlock2D",
26
  "CrossAttnDownBlock2D",
27
- "CrossAttnDownBlock2D",
28
- "DownBlock2D"
29
  ],
30
  "downsample_padding": 1,
31
  "dropout": 0.0,
@@ -35,10 +32,10 @@
35
  "flip_sin_to_cos": true,
36
  "freq_shift": 0,
37
  "in_channels": 4,
38
- "layers_per_block": 2,
39
  "mid_block_only_cross_attention": null,
40
  "mid_block_scale_factor": 1,
41
- "mid_block_type": "UNetMidBlock2DCrossAttn",
42
  "norm_eps": 1e-05,
43
  "norm_num_groups": 32,
44
  "num_attention_heads": null,
@@ -58,7 +55,6 @@
58
  "timestep_post_act": null,
59
  "transformer_layers_per_block": 1,
60
  "up_block_types": [
61
- "UpBlock2D",
62
  "CrossAttnUpBlock2D",
63
  "CrossAttnUpBlock2D",
64
  "CrossAttnUpBlock2D"
 
1
  {
2
  "_class_name": "UNet2DConditionModel",
3
  "_diffusers_version": "0.28.2",
 
4
  "act_fn": "silu",
5
  "addition_embed_type": null,
6
  "addition_embed_type_num_heads": 64,
 
10
  "block_out_channels": [
11
  320,
12
  640,
 
13
  1280
14
  ],
15
  "center_input_sample": false,
 
22
  "down_block_types": [
23
  "CrossAttnDownBlock2D",
24
  "CrossAttnDownBlock2D",
25
+ "CrossAttnDownBlock2D"
 
26
  ],
27
  "downsample_padding": 1,
28
  "dropout": 0.0,
 
32
  "flip_sin_to_cos": true,
33
  "freq_shift": 0,
34
  "in_channels": 4,
35
+ "layers_per_block": 1,
36
  "mid_block_only_cross_attention": null,
37
  "mid_block_scale_factor": 1,
38
+ "mid_block_type": null,
39
  "norm_eps": 1e-05,
40
  "norm_num_groups": 32,
41
  "num_attention_heads": null,
 
55
  "timestep_post_act": null,
56
  "transformer_layers_per_block": 1,
57
  "up_block_types": [
 
58
  "CrossAttnUpBlock2D",
59
  "CrossAttnUpBlock2D",
60
  "CrossAttnUpBlock2D"
unet/diffusion_pytorch_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d27cd69d4a0aa32105087a619f32a51bc087e133be93fe23da92f3c0bcc07d79
3
- size 3438167536
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e7506c2f819d897010902927b8f204679374d7610e0cb45cbc37489fb007ee9
3
+ size 1293583616
vae/config.json CHANGED
@@ -1,34 +1,45 @@
1
  {
2
- "_class_name": "AutoencoderKL",
3
  "_diffusers_version": "0.28.2",
4
- "_name_or_path": "/root/.cache/huggingface/hub/models--runwayml--stable-diffusion-v1-5/snapshots/1d0c4ebf6ff58a5caecab40fa1406526bca4b5b9/vae",
5
- "act_fn": "silu",
6
  "block_out_channels": [
7
- 128,
8
- 256,
9
- 512,
10
- 512
11
  ],
12
- "down_block_types": [
13
- "DownEncoderBlock2D",
14
- "DownEncoderBlock2D",
15
- "DownEncoderBlock2D",
16
- "DownEncoderBlock2D"
17
  ],
18
- "force_upcast": true,
 
 
 
 
 
 
19
  "in_channels": 3,
20
  "latent_channels": 4,
21
- "latents_mean": null,
22
- "latents_std": null,
23
- "layers_per_block": 2,
24
- "norm_num_groups": 32,
 
 
 
 
 
 
 
 
 
 
25
  "out_channels": 3,
26
- "sample_size": 512,
27
- "scaling_factor": 0.18215,
28
- "up_block_types": [
29
- "UpDecoderBlock2D",
30
- "UpDecoderBlock2D",
31
- "UpDecoderBlock2D",
32
- "UpDecoderBlock2D"
33
- ]
34
  }
 
1
  {
2
+ "_class_name": "AutoencoderTiny",
3
  "_diffusers_version": "0.28.2",
4
+ "_name_or_path": "madebyollin/taesd",
5
+ "act_fn": "relu",
6
  "block_out_channels": [
7
+ 64,
8
+ 64,
9
+ 64,
10
+ 64
11
  ],
12
+ "decoder_block_out_channels": [
13
+ 64,
14
+ 64,
15
+ 64,
16
+ 64
17
  ],
18
+ "encoder_block_out_channels": [
19
+ 64,
20
+ 64,
21
+ 64,
22
+ 64
23
+ ],
24
+ "force_upcast": false,
25
  "in_channels": 3,
26
  "latent_channels": 4,
27
+ "latent_magnitude": 3,
28
+ "latent_shift": 0.5,
29
+ "num_decoder_blocks": [
30
+ 3,
31
+ 3,
32
+ 3,
33
+ 1
34
+ ],
35
+ "num_encoder_blocks": [
36
+ 1,
37
+ 3,
38
+ 3,
39
+ 3
40
+ ],
41
  "out_channels": 3,
42
+ "scaling_factor": 1.0,
43
+ "upsample_fn": "nearest",
44
+ "upsampling_scaling_factor": 2
 
 
 
 
 
45
  }
vae/diffusion_pytorch_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b4d2b5932bb4151e54e694fd31ccf51fca908223c9485bd56cd0e1d83ad94c49
3
- size 334643268
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d7956d561b1efbd861ad9b03fd8f01510f9e87eddc07bdfd20837009433f6ee5
3
+ size 9793292