jxie commited on
Commit
6660448
1 Parent(s): 5560897

Upload SMAForSSL

Browse files
Files changed (2) hide show
  1. config.json +103 -0
  2. pytorch_model.bin +3 -0
config.json ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/iris/u/jwxie/workspace/domain-agnostic-pretraining/examples/research_projects/domain-agnostic-pretraining/saved_models/image_pretrained/imagenet100_guided_rand_select_masking_recon_tiny_144_0.75_noise0.25-adamw_torch-lr1e-4-wd0.01-ws10000/checkpoint-390000",
3
+ "architectures": [
4
+ "SMAForSSL"
5
+ ],
6
+ "attention_dropout_prob": 0.0,
7
+ "cross_attention_widening_factor": 1,
8
+ "cross_eval_noising_args": null,
9
+ "cross_train_noising_args": [
10
+ [
11
+ "RandomlySelectedCrossAttentionMasking",
12
+ {
13
+ "exclude_seen_reconstruction": true,
14
+ "masking_ratio": 0.75,
15
+ "num_per_query": 144,
16
+ "select_initial_ratio": 1.0,
17
+ "varying_length": false
18
+ }
19
+ ]
20
+ ],
21
+ "decoder_attention_channels": 768,
22
+ "decoder_heads": 1,
23
+ "decoder_latent_channels": 768,
24
+ "decoder_type": "cross_attention",
25
+ "dense_use_bias": true,
26
+ "drop_path_rate": 0.0,
27
+ "embedded_channels": 196,
28
+ "encoder_cross_attention_channels": 196,
29
+ "encoder_type": "cross_attention",
30
+ "final_project": true,
31
+ "hidden_act": "gelu",
32
+ "hidden_dropout_prob": 0.0,
33
+ "initializer_range": 0.02,
34
+ "input_channels": 3,
35
+ "input_type": "continuous",
36
+ "latent_channels": 768,
37
+ "layer_norm_eps": 1e-12,
38
+ "layernorm_eps": 1e-12,
39
+ "loss_fn": "mse",
40
+ "max_position_embeddings": 36864,
41
+ "model_type": "perceiver_sma",
42
+ "num_blocks": 1,
43
+ "num_cross_attention_heads": 1,
44
+ "num_discrete_tokens": 262,
45
+ "num_latents": 512,
46
+ "num_outputs": 1,
47
+ "num_self_attends_per_block": 8,
48
+ "num_self_attention_heads": 8,
49
+ "output_channels": 262,
50
+ "pe_initializer_range": 0.02,
51
+ "post_decoder_layers": null,
52
+ "project_after_concat": true,
53
+ "qk_channels": null,
54
+ "self_attention_widening_factor": 4,
55
+ "share_decoder_queries": true,
56
+ "share_embedding_weights": true,
57
+ "teacher_args": {
58
+ "auxiliary_loss_fn": "mse",
59
+ "auxiliary_loss_weight": 1.0,
60
+ "ema_args": {
61
+ "ema_decay_end": 0.0,
62
+ "ema_decay_start": 0.0
63
+ },
64
+ "eval_transform_args": [
65
+ [
66
+ "RandomlySelectedCrossAttentionMasking",
67
+ {
68
+ "exclude_seen_reconstruction": true,
69
+ "masking_ratio": 0.75,
70
+ "num_per_query": 144,
71
+ "select_initial_ratio": 1.0,
72
+ "varying_length": false
73
+ }
74
+ ]
75
+ ],
76
+ "mask_replace": null,
77
+ "norm_targets": true,
78
+ "num_layer_target_avg": null,
79
+ "reconstruction_decoder_args": {
80
+ "num_heads": 1,
81
+ "num_outputs": 36864,
82
+ "output_channels": 3,
83
+ "qk_channels": 196,
84
+ "query_num_channels": 196,
85
+ "share_decoder_queries": true,
86
+ "share_embedding_weights": true,
87
+ "use_query_residual": true,
88
+ "v_channels": 196
89
+ },
90
+ "reconstruction_loss_fn": "mse",
91
+ "reconstruction_loss_weight": 1.0,
92
+ "reconstruction_weighted_loss": false,
93
+ "target_normalization_fn": "layernorm",
94
+ "train_transform_args": null
95
+ },
96
+ "teacher_name": "ReconstructionTeacher",
97
+ "torch_dtype": "float32",
98
+ "transformers_version": "4.26.0.dev0",
99
+ "use_decoder": false,
100
+ "use_position_embeddings": true,
101
+ "use_query_residual": true,
102
+ "v_channels": 768
103
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f50a57499bba6615f50b0cabcf4b033c6211cf647817da895fd27e2173666fbb
3
+ size 267671229