|
{ |
|
"architectures": [ |
|
"X3DModel" |
|
], |
|
"auto_map": { |
|
"AutoConfig": "configuration_x3d.X3DConfig", |
|
"AutoModel": "modeling_x3d.X3DModel" |
|
}, |
|
"cfg": { |
|
"AUG": { |
|
"AA_TYPE": "rand-m9-mstd0.5-inc1", |
|
"COLOR_JITTER": 0.4, |
|
"ENABLE": false, |
|
"GEN_MASK_LOADER": false, |
|
"INTERPOLATION": "bicubic", |
|
"MASK_FRAMES": false, |
|
"MASK_RATIO": 0.0, |
|
"MASK_TUBE": false, |
|
"MASK_WINDOW_SIZE": [ |
|
8, |
|
7, |
|
7 |
|
], |
|
"MAX_MASK_PATCHES_PER_BLOCK": null, |
|
"NUM_SAMPLE": 1, |
|
"RE_COUNT": 1, |
|
"RE_MODE": "pixel", |
|
"RE_PROB": 0.25, |
|
"RE_SPLIT": false |
|
}, |
|
"AVA": { |
|
"ANNOTATION_DIR": "/mnt/vol/gfsai-flash3-east/ai-group/users/haoqifan/ava/frame_list/", |
|
"BGR": false, |
|
"DETECTION_SCORE_THRESH": 0.9, |
|
"EXCLUSION_FILE": "ava_val_excluded_timestamps_v2.2.csv", |
|
"FRAME_DIR": "/mnt/fair-flash3-east/ava_trainval_frames.img/", |
|
"FRAME_LIST_DIR": "/mnt/vol/gfsai-flash3-east/ai-group/users/haoqifan/ava/frame_list/", |
|
"FULL_TEST_ON_VAL": false, |
|
"GROUNDTRUTH_FILE": "ava_val_v2.2.csv", |
|
"IMG_PROC_BACKEND": "cv2", |
|
"LABEL_MAP_FILE": "ava_action_list_v2.2_for_activitynet_2019.pbtxt", |
|
"TEST_FORCE_FLIP": false, |
|
"TEST_LISTS": [ |
|
"val.csv" |
|
], |
|
"TEST_PREDICT_BOX_LISTS": [ |
|
"ava_val_predicted_boxes.csv" |
|
], |
|
"TRAIN_GT_BOX_LISTS": [ |
|
"ava_train_v2.2.csv" |
|
], |
|
"TRAIN_LISTS": [ |
|
"train.csv" |
|
], |
|
"TRAIN_PCA_JITTER_ONLY": true, |
|
"TRAIN_PREDICT_BOX_LISTS": [], |
|
"TRAIN_USE_COLOR_AUGMENTATION": false |
|
}, |
|
"BENCHMARK": { |
|
"LOG_PERIOD": 100, |
|
"NUM_EPOCHS": 5, |
|
"SHUFFLE": true |
|
}, |
|
"BN": { |
|
"GLOBAL_SYNC": false, |
|
"NORM_TYPE": "sync_batchnorm", |
|
"NUM_BATCHES_PRECISE": 200, |
|
"NUM_SPLITS": 1, |
|
"NUM_SYNC_DEVICES": 1, |
|
"USE_PRECISE_STATS": true, |
|
"WEIGHT_DECAY": 0.0 |
|
}, |
|
"CONTRASTIVE": { |
|
"BN_MLP": false, |
|
"BN_SYNC_MLP": false, |
|
"DELTA_CLIPS_MAX": Infinity, |
|
"DELTA_CLIPS_MIN": -Infinity, |
|
"DIM": 128, |
|
"INTERP_MEMORY": false, |
|
"KNN_ON": true, |
|
"LENGTH": 239975, |
|
"LOCAL_SHUFFLE_BN": true, |
|
"MEM_TYPE": "1d", |
|
"MLP_DIM": 2048, |
|
"MOCO_MULTI_VIEW_QUEUE": false, |
|
"MOMENTUM": 0.5, |
|
"MOMENTUM_ANNEALING": false, |
|
"NUM_CLASSES_DOWNSTREAM": 400, |
|
"NUM_MLP_LAYERS": 1, |
|
"PREDICTOR_DEPTHS": [], |
|
"QUEUE_LEN": 65536, |
|
"SEQUENTIAL": false, |
|
"SIMCLR_DIST_ON": true, |
|
"SWAV_QEUE_LEN": 0, |
|
"T": 0.07, |
|
"TYPE": "mem" |
|
}, |
|
"DATA": { |
|
"COLOR_RND_GRAYSCALE": 0.0, |
|
"DECODING_BACKEND": "torchvision", |
|
"DECODING_SHORT_SIZE": 256, |
|
"DUMMY_LOAD": false, |
|
"ENSEMBLE_METHOD": "max", |
|
"IN22K_TRAINVAL": false, |
|
"IN22k_VAL_IN1K": "", |
|
"INPUT_CHANNEL_NUM": [ |
|
3 |
|
], |
|
"INV_UNIFORM_SAMPLE": true, |
|
"IN_VAL_CROP_RATIO": 0.875, |
|
"LOADER_CHUNK_OVERALL_SIZE": 0, |
|
"LOADER_CHUNK_SIZE": 0, |
|
"MEAN": [ |
|
0.45, |
|
0.45, |
|
0.45 |
|
], |
|
"MULTI_LABEL": true, |
|
"NUM_FRAMES": 16, |
|
"PATH_LABEL_SEPARATOR": " ", |
|
"PATH_PREFIX": "kabr/KABR/dataset/image", |
|
"PATH_TO_DATA_DIR": "kabr/KABR/annotation", |
|
"PATH_TO_PRELOAD_IMDB": "", |
|
"RANDOM_FLIP": true, |
|
"REVERSE_INPUT_CHANNEL": true, |
|
"SAMPLING_RATE": 5, |
|
"SKIP_ROWS": 0, |
|
"SSL_BLUR_SIGMA_MAX": [ |
|
0.0, |
|
2.0 |
|
], |
|
"SSL_BLUR_SIGMA_MIN": [ |
|
0.0, |
|
0.1 |
|
], |
|
"SSL_COLOR_BRI_CON_SAT": [ |
|
0.2, |
|
0.2, |
|
0.2 |
|
], |
|
"SSL_COLOR_HUE": 0.1, |
|
"SSL_COLOR_JITTER": true, |
|
"SSL_MOCOV2_AUG": false, |
|
"STD": [ |
|
0.225, |
|
0.225, |
|
0.225 |
|
], |
|
"TARGET_FPS": 30, |
|
"TEST_CROP_SIZE": 300, |
|
"TIME_DIFF_PROB": 0.0, |
|
"TRAIN_CROP_NUM_SPATIAL": 1, |
|
"TRAIN_CROP_NUM_TEMPORAL": 1, |
|
"TRAIN_CROP_SIZE": 300, |
|
"TRAIN_JITTER_ASPECT_RELATIVE": [], |
|
"TRAIN_JITTER_FPS": 0.0, |
|
"TRAIN_JITTER_MOTION_SHIFT": false, |
|
"TRAIN_JITTER_SCALES": [ |
|
300, |
|
400 |
|
], |
|
"TRAIN_JITTER_SCALES_RELATIVE": [], |
|
"TRAIN_PCA_EIGVAL": [ |
|
0.225, |
|
0.224, |
|
0.229 |
|
], |
|
"TRAIN_PCA_EIGVEC": [ |
|
[ |
|
-0.5675, |
|
0.7192, |
|
0.4009 |
|
], |
|
[ |
|
-0.5808, |
|
-0.0045, |
|
-0.814 |
|
], |
|
[ |
|
-0.5836, |
|
-0.6948, |
|
0.4203 |
|
] |
|
], |
|
"USE_OFFSET_SAMPLING": false |
|
}, |
|
"DATA_LOADER": { |
|
"ENABLE_MULTI_THREAD_DECODE": false, |
|
"NUM_WORKERS": 8, |
|
"PIN_MEMORY": true |
|
}, |
|
"DEMO": { |
|
"BUFFER_SIZE": 0, |
|
"CLIP_VIS_SIZE": 10, |
|
"COMMON_CLASS_NAMES": [ |
|
"watch (a person)", |
|
"talk to (e.g., self, a person, a group)", |
|
"listen to (a person)", |
|
"touch (an object)", |
|
"carry/hold (an object)", |
|
"walk", |
|
"sit", |
|
"lie/sleep", |
|
"bend/bow (at the waist)" |
|
], |
|
"COMMON_CLASS_THRES": 0.7, |
|
"DETECTRON2_CFG": "COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml", |
|
"DETECTRON2_THRESH": 0.9, |
|
"DETECTRON2_WEIGHTS": "detectron2://COCO-Detection/faster_rcnn_R_50_FPN_3x/137849458/model_final_280758.pkl", |
|
"DISPLAY_HEIGHT": 0, |
|
"DISPLAY_WIDTH": 0, |
|
"ENABLE": false, |
|
"FPS": 30, |
|
"GT_BOXES": "", |
|
"INPUT_FORMAT": "BGR", |
|
"INPUT_VIDEO": "kabr/KABR/dataset/video/G0103.mp4", |
|
"LABEL_FILE_PATH": "kabr/KABR/annotation/classes.json", |
|
"NUM_CLIPS_SKIP": 1, |
|
"NUM_VIS_INSTANCES": 1, |
|
"OUTPUT_FILE": "kabr/KABR/dataset/predict/G0103.mp4", |
|
"OUTPUT_FPS": -1, |
|
"PREDS_BOXES": "", |
|
"SLOWMO": 1, |
|
"STARTING_SECOND": 900, |
|
"THREAD_ENABLE": false, |
|
"UNCOMMON_CLASS_THRES": 0.3, |
|
"VIS_MODE": "thres", |
|
"WEBCAM": -1 |
|
}, |
|
"DETECTION": { |
|
"ALIGNED": true, |
|
"ENABLE": false, |
|
"ROI_XFORM_RESOLUTION": 7, |
|
"SPATIAL_SCALE_FACTOR": 16 |
|
}, |
|
"DIST_BACKEND": "nccl", |
|
"LOG_MODEL_INFO": true, |
|
"LOG_PERIOD": 10, |
|
"MASK": { |
|
"DECODER_DEPTH": 0, |
|
"DECODER_EMBED_DIM": 512, |
|
"DECODER_SEP_POS_EMBED": false, |
|
"DEC_KV_KERNEL": [], |
|
"DEC_KV_STRIDE": [], |
|
"ENABLE": false, |
|
"HEAD_TYPE": "separate", |
|
"MAE_ON": false, |
|
"MAE_RND_MASK": false, |
|
"NORM_PRED_PIXEL": true, |
|
"PER_FRAME_MASKING": false, |
|
"PRED_HOG": false, |
|
"PRETRAIN_DEPTH": [ |
|
15 |
|
], |
|
"SCALE_INIT_BY_DEPTH": false, |
|
"TIME_STRIDE_LOSS": true |
|
}, |
|
"MIXUP": { |
|
"ALPHA": 0.8, |
|
"CUTMIX_ALPHA": 1.0, |
|
"ENABLE": false, |
|
"LABEL_SMOOTH_VALUE": 0.1, |
|
"PROB": 1.0, |
|
"SWITCH_PROB": 0.5 |
|
}, |
|
"MODEL": { |
|
"ACT_CHECKPOINT": false, |
|
"ARCH": "x3d", |
|
"DETACH_FINAL_FC": false, |
|
"DROPCONNECT_RATE": 0.0, |
|
"DROPOUT_RATE": 0.5, |
|
"FC_INIT_STD": 0.01, |
|
"FP16_ALLREDUCE": false, |
|
"FROZEN_BN": false, |
|
"HEAD_ACT": "sigmoid", |
|
"LOSS_FUNC": "EQL", |
|
"MODEL_NAME": "X3D", |
|
"MULTI_PATHWAY_ARCH": [ |
|
"slowfast" |
|
], |
|
"NUM_CLASSES": 8, |
|
"SINGLE_PATHWAY_ARCH": [ |
|
"2d", |
|
"c2d", |
|
"i3d", |
|
"slow", |
|
"x3d", |
|
"mvit", |
|
"maskmvit" |
|
] |
|
}, |
|
"MULTIGRID": { |
|
"BN_BASE_SIZE": 8, |
|
"DEFAULT_B": 0, |
|
"DEFAULT_S": 0, |
|
"DEFAULT_T": 0, |
|
"EPOCH_FACTOR": 1.5, |
|
"EVAL_FREQ": 3, |
|
"LONG_CYCLE": false, |
|
"LONG_CYCLE_FACTORS": [ |
|
[ |
|
0.25, |
|
0.7071067811865476 |
|
], |
|
[ |
|
0.5, |
|
0.7071067811865476 |
|
], |
|
[ |
|
0.5, |
|
1 |
|
], |
|
[ |
|
1, |
|
1 |
|
] |
|
], |
|
"LONG_CYCLE_SAMPLING_RATE": 0, |
|
"SHORT_CYCLE": false, |
|
"SHORT_CYCLE_FACTORS": [ |
|
0.5, |
|
0.7071067811865476 |
|
] |
|
}, |
|
"MVIT": { |
|
"CLS_EMBED_ON": true, |
|
"DEPTH": 16, |
|
"DIM_MUL": [], |
|
"DIM_MUL_IN_ATT": false, |
|
"DROPOUT_RATE": 0.0, |
|
"DROPPATH_RATE": 0.1, |
|
"EMBED_DIM": 96, |
|
"HEAD_INIT_SCALE": 1.0, |
|
"HEAD_MUL": [], |
|
"LAYER_SCALE_INIT_VALUE": 0.0, |
|
"MLP_RATIO": 4.0, |
|
"MODE": "conv", |
|
"NORM": "layernorm", |
|
"NORM_STEM": false, |
|
"NUM_HEADS": 1, |
|
"PATCH_2D": false, |
|
"PATCH_KERNEL": [ |
|
3, |
|
7, |
|
7 |
|
], |
|
"PATCH_PADDING": [ |
|
2, |
|
4, |
|
4 |
|
], |
|
"PATCH_STRIDE": [ |
|
2, |
|
4, |
|
4 |
|
], |
|
"POOL_FIRST": false, |
|
"POOL_KVQ_KERNEL": null, |
|
"POOL_KV_STRIDE": [], |
|
"POOL_KV_STRIDE_ADAPTIVE": null, |
|
"POOL_Q_STRIDE": [], |
|
"QKV_BIAS": true, |
|
"REL_POS_SPATIAL": false, |
|
"REL_POS_TEMPORAL": false, |
|
"REL_POS_ZERO_INIT": false, |
|
"RESIDUAL_POOLING": false, |
|
"REV": { |
|
"BUFFER_LAYERS": [], |
|
"ENABLE": false, |
|
"PRE_Q_FUSION": "avg", |
|
"RESPATH_FUSE": "concat", |
|
"RES_PATH": "conv" |
|
}, |
|
"SEPARATE_QKV": false, |
|
"SEP_POS_EMBED": false, |
|
"USE_ABS_POS": true, |
|
"USE_FIXED_SINCOS_POS": false, |
|
"USE_MEAN_POOLING": false, |
|
"ZERO_DECAY_POS_CLS": true |
|
}, |
|
"NONLOCAL": { |
|
"GROUP": [ |
|
[ |
|
1 |
|
], |
|
[ |
|
1 |
|
], |
|
[ |
|
1 |
|
], |
|
[ |
|
1 |
|
] |
|
], |
|
"INSTANTIATION": "dot_product", |
|
"LOCATION": [ |
|
[ |
|
[] |
|
], |
|
[ |
|
[] |
|
], |
|
[ |
|
[] |
|
], |
|
[ |
|
[] |
|
] |
|
], |
|
"POOL": [ |
|
[ |
|
[ |
|
1, |
|
2, |
|
2 |
|
], |
|
[ |
|
1, |
|
2, |
|
2 |
|
] |
|
], |
|
[ |
|
[ |
|
1, |
|
2, |
|
2 |
|
], |
|
[ |
|
1, |
|
2, |
|
2 |
|
] |
|
], |
|
[ |
|
[ |
|
1, |
|
2, |
|
2 |
|
], |
|
[ |
|
1, |
|
2, |
|
2 |
|
] |
|
], |
|
[ |
|
[ |
|
1, |
|
2, |
|
2 |
|
], |
|
[ |
|
1, |
|
2, |
|
2 |
|
] |
|
] |
|
] |
|
}, |
|
"NUM_GPUS": 0, |
|
"NUM_SHARDS": 1, |
|
"OUTPUT_DIR": "kabr/KABR/logs/x3d-l-kabr", |
|
"RESNET": { |
|
"DEPTH": 50, |
|
"INPLACE_RELU": true, |
|
"NUM_BLOCK_TEMP_KERNEL": [ |
|
[ |
|
3 |
|
], |
|
[ |
|
4 |
|
], |
|
[ |
|
6 |
|
], |
|
[ |
|
3 |
|
] |
|
], |
|
"NUM_GROUPS": 1, |
|
"SPATIAL_DILATIONS": [ |
|
[ |
|
1 |
|
], |
|
[ |
|
1 |
|
], |
|
[ |
|
1 |
|
], |
|
[ |
|
1 |
|
] |
|
], |
|
"SPATIAL_STRIDES": [ |
|
[ |
|
1 |
|
], |
|
[ |
|
2 |
|
], |
|
[ |
|
2 |
|
], |
|
[ |
|
2 |
|
] |
|
], |
|
"STRIDE_1X1": false, |
|
"TRANS_FUNC": "x3d_transform", |
|
"WIDTH_PER_GROUP": 64, |
|
"ZERO_INIT_FINAL_BN": true, |
|
"ZERO_INIT_FINAL_CONV": false |
|
}, |
|
"RNG_SEED": 0, |
|
"SHARD_ID": 0, |
|
"SLOWFAST": { |
|
"ALPHA": 8, |
|
"BETA_INV": 8, |
|
"FUSION_CONV_CHANNEL_RATIO": 2, |
|
"FUSION_KERNEL_SZ": 5 |
|
}, |
|
"SOLVER": { |
|
"BASE_LR": 0.05, |
|
"BASE_LR_SCALE_NUM_SHARDS": true, |
|
"BETAS": [ |
|
0.9, |
|
0.999 |
|
], |
|
"CLIP_GRAD_L2NORM": null, |
|
"CLIP_GRAD_VAL": null, |
|
"COSINE_AFTER_WARMUP": false, |
|
"COSINE_END_LR": 0.0, |
|
"DAMPENING": 0.0, |
|
"GAMMA": 0.1, |
|
"LARS_ON": false, |
|
"LAYER_DECAY": 1.0, |
|
"LRS": [], |
|
"LR_POLICY": "cosine", |
|
"MAX_EPOCH": 120, |
|
"MOMENTUM": 0.9, |
|
"NESTEROV": true, |
|
"OPTIMIZING_METHOD": "sgd", |
|
"STEPS": [], |
|
"STEP_SIZE": 1, |
|
"WARMUP_EPOCHS": 35.0, |
|
"WARMUP_FACTOR": 0.1, |
|
"WARMUP_START_LR": 0.01, |
|
"WEIGHT_DECAY": 5e-05, |
|
"ZERO_WD_1D_PARAM": false |
|
}, |
|
"TASK": "", |
|
"TENSORBOARD": { |
|
"CATEGORIES_PATH": "", |
|
"CLASS_NAMES_PATH": "kabr/KABR/annotation/classes.json", |
|
"CONFUSION_MATRIX": { |
|
"ENABLE": true, |
|
"FIGSIZE": [ |
|
8, |
|
8 |
|
], |
|
"SUBSET_PATH": "kabr/KABR/annotation/classes.txt" |
|
}, |
|
"ENABLE": true, |
|
"HISTOGRAM": { |
|
"ENABLE": true, |
|
"FIGSIZE": [ |
|
8, |
|
8 |
|
], |
|
"SUBSET_PATH": "kabr/KABR/annotation/classes.txt", |
|
"TOPK": 3 |
|
}, |
|
"LOG_DIR": "", |
|
"MODEL_VIS": { |
|
"ACTIVATIONS": true, |
|
"COLORMAP": "Pastel2", |
|
"ENABLE": true, |
|
"GRAD_CAM": { |
|
"COLORMAP": "viridis", |
|
"ENABLE": true, |
|
"LAYER_LIST": [ |
|
"s5/pathway0_res14" |
|
], |
|
"USE_TRUE_LABEL": false |
|
}, |
|
"INPUT_VIDEO": true, |
|
"LAYER_LIST": [ |
|
"s5/pathway0_res14" |
|
], |
|
"MODEL_WEIGHTS": true, |
|
"TOPK_PREDS": 1 |
|
}, |
|
"PREDICTIONS_PATH": "", |
|
"WRONG_PRED_VIS": { |
|
"ENABLE": false, |
|
"SUBSET_PATH": "", |
|
"TAG": "Incorrectly classified videos." |
|
} |
|
}, |
|
"TEST": { |
|
"BATCH_SIZE": 64, |
|
"CHECKPOINT_FILE_PATH": "", |
|
"CHECKPOINT_TYPE": "pytorch", |
|
"DATASET": "charades", |
|
"ENABLE": false, |
|
"NUM_ENSEMBLE_VIEWS": 2, |
|
"NUM_SPATIAL_CROPS": 1, |
|
"NUM_TEMPORAL_CLIPS": [], |
|
"SAVE_RESULTS_PATH": "kabr/KABR/logs/x3d-l-kabr/results.txt" |
|
}, |
|
"TRAIN": { |
|
"AUTO_RESUME": true, |
|
"BATCH_SIZE": 64, |
|
"CHECKPOINT_CLEAR_NAME_PATTERN": [], |
|
"CHECKPOINT_EPOCH_RESET": true, |
|
"CHECKPOINT_FILE_PATH": "slowfast/projects/x3d/x3d_l.pyth", |
|
"CHECKPOINT_INFLATE": false, |
|
"CHECKPOINT_IN_INIT": false, |
|
"CHECKPOINT_PERIOD": 5, |
|
"CHECKPOINT_TYPE": "pytorch", |
|
"DATASET": "charades", |
|
"ENABLE": true, |
|
"EVAL_PERIOD": 5, |
|
"KILL_LOSS_EXPLOSION_FACTOR": 0.0, |
|
"MIXED_PRECISION": false |
|
}, |
|
"VIS_MASK": { |
|
"ENABLE": false |
|
}, |
|
"X3D": { |
|
"BN_LIN5": false, |
|
"BOTTLENECK_FACTOR": 2.25, |
|
"CHANNELWISE_3x3x3": true, |
|
"DEPTH_FACTOR": 5.0, |
|
"DIM_C1": 12, |
|
"DIM_C5": 2048, |
|
"SCALE_RES2": false, |
|
"WIDTH_FACTOR": 2.0 |
|
} |
|
}, |
|
"model_type": "x3d", |
|
"torch_dtype": "float32", |
|
"transformers_version": "4.46.0" |
|
} |
|
|