x3d / config.json
zhong-al's picture
Upload model
6ba4e1b verified
{
"architectures": [
"X3DModel"
],
"auto_map": {
"AutoConfig": "configuration_x3d.X3DConfig",
"AutoModel": "modeling_x3d.X3DModel"
},
"cfg": {
"AUG": {
"AA_TYPE": "rand-m9-mstd0.5-inc1",
"COLOR_JITTER": 0.4,
"ENABLE": false,
"GEN_MASK_LOADER": false,
"INTERPOLATION": "bicubic",
"MASK_FRAMES": false,
"MASK_RATIO": 0.0,
"MASK_TUBE": false,
"MASK_WINDOW_SIZE": [
8,
7,
7
],
"MAX_MASK_PATCHES_PER_BLOCK": null,
"NUM_SAMPLE": 1,
"RE_COUNT": 1,
"RE_MODE": "pixel",
"RE_PROB": 0.25,
"RE_SPLIT": false
},
"AVA": {
"ANNOTATION_DIR": "/mnt/vol/gfsai-flash3-east/ai-group/users/haoqifan/ava/frame_list/",
"BGR": false,
"DETECTION_SCORE_THRESH": 0.9,
"EXCLUSION_FILE": "ava_val_excluded_timestamps_v2.2.csv",
"FRAME_DIR": "/mnt/fair-flash3-east/ava_trainval_frames.img/",
"FRAME_LIST_DIR": "/mnt/vol/gfsai-flash3-east/ai-group/users/haoqifan/ava/frame_list/",
"FULL_TEST_ON_VAL": false,
"GROUNDTRUTH_FILE": "ava_val_v2.2.csv",
"IMG_PROC_BACKEND": "cv2",
"LABEL_MAP_FILE": "ava_action_list_v2.2_for_activitynet_2019.pbtxt",
"TEST_FORCE_FLIP": false,
"TEST_LISTS": [
"val.csv"
],
"TEST_PREDICT_BOX_LISTS": [
"ava_val_predicted_boxes.csv"
],
"TRAIN_GT_BOX_LISTS": [
"ava_train_v2.2.csv"
],
"TRAIN_LISTS": [
"train.csv"
],
"TRAIN_PCA_JITTER_ONLY": true,
"TRAIN_PREDICT_BOX_LISTS": [],
"TRAIN_USE_COLOR_AUGMENTATION": false
},
"BENCHMARK": {
"LOG_PERIOD": 100,
"NUM_EPOCHS": 5,
"SHUFFLE": true
},
"BN": {
"GLOBAL_SYNC": false,
"NORM_TYPE": "sync_batchnorm",
"NUM_BATCHES_PRECISE": 200,
"NUM_SPLITS": 1,
"NUM_SYNC_DEVICES": 1,
"USE_PRECISE_STATS": true,
"WEIGHT_DECAY": 0.0
},
"CONTRASTIVE": {
"BN_MLP": false,
"BN_SYNC_MLP": false,
"DELTA_CLIPS_MAX": Infinity,
"DELTA_CLIPS_MIN": -Infinity,
"DIM": 128,
"INTERP_MEMORY": false,
"KNN_ON": true,
"LENGTH": 239975,
"LOCAL_SHUFFLE_BN": true,
"MEM_TYPE": "1d",
"MLP_DIM": 2048,
"MOCO_MULTI_VIEW_QUEUE": false,
"MOMENTUM": 0.5,
"MOMENTUM_ANNEALING": false,
"NUM_CLASSES_DOWNSTREAM": 400,
"NUM_MLP_LAYERS": 1,
"PREDICTOR_DEPTHS": [],
"QUEUE_LEN": 65536,
"SEQUENTIAL": false,
"SIMCLR_DIST_ON": true,
"SWAV_QEUE_LEN": 0,
"T": 0.07,
"TYPE": "mem"
},
"DATA": {
"COLOR_RND_GRAYSCALE": 0.0,
"DECODING_BACKEND": "torchvision",
"DECODING_SHORT_SIZE": 256,
"DUMMY_LOAD": false,
"ENSEMBLE_METHOD": "max",
"IN22K_TRAINVAL": false,
"IN22k_VAL_IN1K": "",
"INPUT_CHANNEL_NUM": [
3
],
"INV_UNIFORM_SAMPLE": true,
"IN_VAL_CROP_RATIO": 0.875,
"LOADER_CHUNK_OVERALL_SIZE": 0,
"LOADER_CHUNK_SIZE": 0,
"MEAN": [
0.45,
0.45,
0.45
],
"MULTI_LABEL": true,
"NUM_FRAMES": 16,
"PATH_LABEL_SEPARATOR": " ",
"PATH_PREFIX": "kabr/KABR/dataset/image",
"PATH_TO_DATA_DIR": "kabr/KABR/annotation",
"PATH_TO_PRELOAD_IMDB": "",
"RANDOM_FLIP": true,
"REVERSE_INPUT_CHANNEL": true,
"SAMPLING_RATE": 5,
"SKIP_ROWS": 0,
"SSL_BLUR_SIGMA_MAX": [
0.0,
2.0
],
"SSL_BLUR_SIGMA_MIN": [
0.0,
0.1
],
"SSL_COLOR_BRI_CON_SAT": [
0.2,
0.2,
0.2
],
"SSL_COLOR_HUE": 0.1,
"SSL_COLOR_JITTER": true,
"SSL_MOCOV2_AUG": false,
"STD": [
0.225,
0.225,
0.225
],
"TARGET_FPS": 30,
"TEST_CROP_SIZE": 300,
"TIME_DIFF_PROB": 0.0,
"TRAIN_CROP_NUM_SPATIAL": 1,
"TRAIN_CROP_NUM_TEMPORAL": 1,
"TRAIN_CROP_SIZE": 300,
"TRAIN_JITTER_ASPECT_RELATIVE": [],
"TRAIN_JITTER_FPS": 0.0,
"TRAIN_JITTER_MOTION_SHIFT": false,
"TRAIN_JITTER_SCALES": [
300,
400
],
"TRAIN_JITTER_SCALES_RELATIVE": [],
"TRAIN_PCA_EIGVAL": [
0.225,
0.224,
0.229
],
"TRAIN_PCA_EIGVEC": [
[
-0.5675,
0.7192,
0.4009
],
[
-0.5808,
-0.0045,
-0.814
],
[
-0.5836,
-0.6948,
0.4203
]
],
"USE_OFFSET_SAMPLING": false
},
"DATA_LOADER": {
"ENABLE_MULTI_THREAD_DECODE": false,
"NUM_WORKERS": 8,
"PIN_MEMORY": true
},
"DEMO": {
"BUFFER_SIZE": 0,
"CLIP_VIS_SIZE": 10,
"COMMON_CLASS_NAMES": [
"watch (a person)",
"talk to (e.g., self, a person, a group)",
"listen to (a person)",
"touch (an object)",
"carry/hold (an object)",
"walk",
"sit",
"lie/sleep",
"bend/bow (at the waist)"
],
"COMMON_CLASS_THRES": 0.7,
"DETECTRON2_CFG": "COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml",
"DETECTRON2_THRESH": 0.9,
"DETECTRON2_WEIGHTS": "detectron2://COCO-Detection/faster_rcnn_R_50_FPN_3x/137849458/model_final_280758.pkl",
"DISPLAY_HEIGHT": 0,
"DISPLAY_WIDTH": 0,
"ENABLE": false,
"FPS": 30,
"GT_BOXES": "",
"INPUT_FORMAT": "BGR",
"INPUT_VIDEO": "kabr/KABR/dataset/video/G0103.mp4",
"LABEL_FILE_PATH": "kabr/KABR/annotation/classes.json",
"NUM_CLIPS_SKIP": 1,
"NUM_VIS_INSTANCES": 1,
"OUTPUT_FILE": "kabr/KABR/dataset/predict/G0103.mp4",
"OUTPUT_FPS": -1,
"PREDS_BOXES": "",
"SLOWMO": 1,
"STARTING_SECOND": 900,
"THREAD_ENABLE": false,
"UNCOMMON_CLASS_THRES": 0.3,
"VIS_MODE": "thres",
"WEBCAM": -1
},
"DETECTION": {
"ALIGNED": true,
"ENABLE": false,
"ROI_XFORM_RESOLUTION": 7,
"SPATIAL_SCALE_FACTOR": 16
},
"DIST_BACKEND": "nccl",
"LOG_MODEL_INFO": true,
"LOG_PERIOD": 10,
"MASK": {
"DECODER_DEPTH": 0,
"DECODER_EMBED_DIM": 512,
"DECODER_SEP_POS_EMBED": false,
"DEC_KV_KERNEL": [],
"DEC_KV_STRIDE": [],
"ENABLE": false,
"HEAD_TYPE": "separate",
"MAE_ON": false,
"MAE_RND_MASK": false,
"NORM_PRED_PIXEL": true,
"PER_FRAME_MASKING": false,
"PRED_HOG": false,
"PRETRAIN_DEPTH": [
15
],
"SCALE_INIT_BY_DEPTH": false,
"TIME_STRIDE_LOSS": true
},
"MIXUP": {
"ALPHA": 0.8,
"CUTMIX_ALPHA": 1.0,
"ENABLE": false,
"LABEL_SMOOTH_VALUE": 0.1,
"PROB": 1.0,
"SWITCH_PROB": 0.5
},
"MODEL": {
"ACT_CHECKPOINT": false,
"ARCH": "x3d",
"DETACH_FINAL_FC": false,
"DROPCONNECT_RATE": 0.0,
"DROPOUT_RATE": 0.5,
"FC_INIT_STD": 0.01,
"FP16_ALLREDUCE": false,
"FROZEN_BN": false,
"HEAD_ACT": "sigmoid",
"LOSS_FUNC": "EQL",
"MODEL_NAME": "X3D",
"MULTI_PATHWAY_ARCH": [
"slowfast"
],
"NUM_CLASSES": 8,
"SINGLE_PATHWAY_ARCH": [
"2d",
"c2d",
"i3d",
"slow",
"x3d",
"mvit",
"maskmvit"
]
},
"MULTIGRID": {
"BN_BASE_SIZE": 8,
"DEFAULT_B": 0,
"DEFAULT_S": 0,
"DEFAULT_T": 0,
"EPOCH_FACTOR": 1.5,
"EVAL_FREQ": 3,
"LONG_CYCLE": false,
"LONG_CYCLE_FACTORS": [
[
0.25,
0.7071067811865476
],
[
0.5,
0.7071067811865476
],
[
0.5,
1
],
[
1,
1
]
],
"LONG_CYCLE_SAMPLING_RATE": 0,
"SHORT_CYCLE": false,
"SHORT_CYCLE_FACTORS": [
0.5,
0.7071067811865476
]
},
"MVIT": {
"CLS_EMBED_ON": true,
"DEPTH": 16,
"DIM_MUL": [],
"DIM_MUL_IN_ATT": false,
"DROPOUT_RATE": 0.0,
"DROPPATH_RATE": 0.1,
"EMBED_DIM": 96,
"HEAD_INIT_SCALE": 1.0,
"HEAD_MUL": [],
"LAYER_SCALE_INIT_VALUE": 0.0,
"MLP_RATIO": 4.0,
"MODE": "conv",
"NORM": "layernorm",
"NORM_STEM": false,
"NUM_HEADS": 1,
"PATCH_2D": false,
"PATCH_KERNEL": [
3,
7,
7
],
"PATCH_PADDING": [
2,
4,
4
],
"PATCH_STRIDE": [
2,
4,
4
],
"POOL_FIRST": false,
"POOL_KVQ_KERNEL": null,
"POOL_KV_STRIDE": [],
"POOL_KV_STRIDE_ADAPTIVE": null,
"POOL_Q_STRIDE": [],
"QKV_BIAS": true,
"REL_POS_SPATIAL": false,
"REL_POS_TEMPORAL": false,
"REL_POS_ZERO_INIT": false,
"RESIDUAL_POOLING": false,
"REV": {
"BUFFER_LAYERS": [],
"ENABLE": false,
"PRE_Q_FUSION": "avg",
"RESPATH_FUSE": "concat",
"RES_PATH": "conv"
},
"SEPARATE_QKV": false,
"SEP_POS_EMBED": false,
"USE_ABS_POS": true,
"USE_FIXED_SINCOS_POS": false,
"USE_MEAN_POOLING": false,
"ZERO_DECAY_POS_CLS": true
},
"NONLOCAL": {
"GROUP": [
[
1
],
[
1
],
[
1
],
[
1
]
],
"INSTANTIATION": "dot_product",
"LOCATION": [
[
[]
],
[
[]
],
[
[]
],
[
[]
]
],
"POOL": [
[
[
1,
2,
2
],
[
1,
2,
2
]
],
[
[
1,
2,
2
],
[
1,
2,
2
]
],
[
[
1,
2,
2
],
[
1,
2,
2
]
],
[
[
1,
2,
2
],
[
1,
2,
2
]
]
]
},
"NUM_GPUS": 0,
"NUM_SHARDS": 1,
"OUTPUT_DIR": "kabr/KABR/logs/x3d-l-kabr",
"RESNET": {
"DEPTH": 50,
"INPLACE_RELU": true,
"NUM_BLOCK_TEMP_KERNEL": [
[
3
],
[
4
],
[
6
],
[
3
]
],
"NUM_GROUPS": 1,
"SPATIAL_DILATIONS": [
[
1
],
[
1
],
[
1
],
[
1
]
],
"SPATIAL_STRIDES": [
[
1
],
[
2
],
[
2
],
[
2
]
],
"STRIDE_1X1": false,
"TRANS_FUNC": "x3d_transform",
"WIDTH_PER_GROUP": 64,
"ZERO_INIT_FINAL_BN": true,
"ZERO_INIT_FINAL_CONV": false
},
"RNG_SEED": 0,
"SHARD_ID": 0,
"SLOWFAST": {
"ALPHA": 8,
"BETA_INV": 8,
"FUSION_CONV_CHANNEL_RATIO": 2,
"FUSION_KERNEL_SZ": 5
},
"SOLVER": {
"BASE_LR": 0.05,
"BASE_LR_SCALE_NUM_SHARDS": true,
"BETAS": [
0.9,
0.999
],
"CLIP_GRAD_L2NORM": null,
"CLIP_GRAD_VAL": null,
"COSINE_AFTER_WARMUP": false,
"COSINE_END_LR": 0.0,
"DAMPENING": 0.0,
"GAMMA": 0.1,
"LARS_ON": false,
"LAYER_DECAY": 1.0,
"LRS": [],
"LR_POLICY": "cosine",
"MAX_EPOCH": 120,
"MOMENTUM": 0.9,
"NESTEROV": true,
"OPTIMIZING_METHOD": "sgd",
"STEPS": [],
"STEP_SIZE": 1,
"WARMUP_EPOCHS": 35.0,
"WARMUP_FACTOR": 0.1,
"WARMUP_START_LR": 0.01,
"WEIGHT_DECAY": 5e-05,
"ZERO_WD_1D_PARAM": false
},
"TASK": "",
"TENSORBOARD": {
"CATEGORIES_PATH": "",
"CLASS_NAMES_PATH": "kabr/KABR/annotation/classes.json",
"CONFUSION_MATRIX": {
"ENABLE": true,
"FIGSIZE": [
8,
8
],
"SUBSET_PATH": "kabr/KABR/annotation/classes.txt"
},
"ENABLE": true,
"HISTOGRAM": {
"ENABLE": true,
"FIGSIZE": [
8,
8
],
"SUBSET_PATH": "kabr/KABR/annotation/classes.txt",
"TOPK": 3
},
"LOG_DIR": "",
"MODEL_VIS": {
"ACTIVATIONS": true,
"COLORMAP": "Pastel2",
"ENABLE": true,
"GRAD_CAM": {
"COLORMAP": "viridis",
"ENABLE": true,
"LAYER_LIST": [
"s5/pathway0_res14"
],
"USE_TRUE_LABEL": false
},
"INPUT_VIDEO": true,
"LAYER_LIST": [
"s5/pathway0_res14"
],
"MODEL_WEIGHTS": true,
"TOPK_PREDS": 1
},
"PREDICTIONS_PATH": "",
"WRONG_PRED_VIS": {
"ENABLE": false,
"SUBSET_PATH": "",
"TAG": "Incorrectly classified videos."
}
},
"TEST": {
"BATCH_SIZE": 64,
"CHECKPOINT_FILE_PATH": "",
"CHECKPOINT_TYPE": "pytorch",
"DATASET": "charades",
"ENABLE": false,
"NUM_ENSEMBLE_VIEWS": 2,
"NUM_SPATIAL_CROPS": 1,
"NUM_TEMPORAL_CLIPS": [],
"SAVE_RESULTS_PATH": "kabr/KABR/logs/x3d-l-kabr/results.txt"
},
"TRAIN": {
"AUTO_RESUME": true,
"BATCH_SIZE": 64,
"CHECKPOINT_CLEAR_NAME_PATTERN": [],
"CHECKPOINT_EPOCH_RESET": true,
"CHECKPOINT_FILE_PATH": "slowfast/projects/x3d/x3d_l.pyth",
"CHECKPOINT_INFLATE": false,
"CHECKPOINT_IN_INIT": false,
"CHECKPOINT_PERIOD": 5,
"CHECKPOINT_TYPE": "pytorch",
"DATASET": "charades",
"ENABLE": true,
"EVAL_PERIOD": 5,
"KILL_LOSS_EXPLOSION_FACTOR": 0.0,
"MIXED_PRECISION": false
},
"VIS_MASK": {
"ENABLE": false
},
"X3D": {
"BN_LIN5": false,
"BOTTLENECK_FACTOR": 2.25,
"CHANNELWISE_3x3x3": true,
"DEPTH_FACTOR": 5.0,
"DIM_C1": 12,
"DIM_C5": 2048,
"SCALE_RES2": false,
"WIDTH_FACTOR": 2.0
}
},
"model_type": "x3d",
"torch_dtype": "float32",
"transformers_version": "4.46.0"
}