AUG: AA_TYPE: rand-m9-mstd0.5-inc1 COLOR_JITTER: 0.4 ENABLE: false GEN_MASK_LOADER: false INTERPOLATION: bicubic MASK_FRAMES: false MASK_RATIO: 0.0 MASK_TUBE: false MASK_WINDOW_SIZE: - 8 - 7 - 7 MAX_MASK_PATCHES_PER_BLOCK: null NUM_SAMPLE: 1 RE_COUNT: 1 RE_MODE: pixel RE_PROB: 0.25 RE_SPLIT: false AVA: ANNOTATION_DIR: /mnt/vol/gfsai-flash3-east/ai-group/users/haoqifan/ava/frame_list/ BGR: false DETECTION_SCORE_THRESH: 0.9 EXCLUSION_FILE: ava_val_excluded_timestamps_v2.2.csv FRAME_DIR: /mnt/fair-flash3-east/ava_trainval_frames.img/ FRAME_LIST_DIR: /mnt/vol/gfsai-flash3-east/ai-group/users/haoqifan/ava/frame_list/ FULL_TEST_ON_VAL: false GROUNDTRUTH_FILE: ava_val_v2.2.csv IMG_PROC_BACKEND: cv2 LABEL_MAP_FILE: ava_action_list_v2.2_for_activitynet_2019.pbtxt TEST_FORCE_FLIP: false TEST_LISTS: - val.csv TEST_PREDICT_BOX_LISTS: - ava_val_predicted_boxes.csv TRAIN_GT_BOX_LISTS: - ava_train_v2.2.csv TRAIN_LISTS: - train.csv TRAIN_PCA_JITTER_ONLY: true TRAIN_PREDICT_BOX_LISTS: [] TRAIN_USE_COLOR_AUGMENTATION: false BENCHMARK: LOG_PERIOD: 100 NUM_EPOCHS: 5 SHUFFLE: true BN: GLOBAL_SYNC: false NORM_TYPE: sync_batchnorm NUM_BATCHES_PRECISE: 200 NUM_SPLITS: 1 NUM_SYNC_DEVICES: 1 USE_PRECISE_STATS: true WEIGHT_DECAY: 0.0 CONTRASTIVE: BN_MLP: false BN_SYNC_MLP: false DELTA_CLIPS_MAX: .inf DELTA_CLIPS_MIN: -.inf DIM: 128 INTERP_MEMORY: false KNN_ON: true LENGTH: 239975 LOCAL_SHUFFLE_BN: true MEM_TYPE: 1d MLP_DIM: 2048 MOCO_MULTI_VIEW_QUEUE: false MOMENTUM: 0.5 MOMENTUM_ANNEALING: false NUM_CLASSES_DOWNSTREAM: 400 NUM_MLP_LAYERS: 1 PREDICTOR_DEPTHS: [] QUEUE_LEN: 65536 SEQUENTIAL: false SIMCLR_DIST_ON: true SWAV_QEUE_LEN: 0 T: 0.07 TYPE: mem DATA: COLOR_RND_GRAYSCALE: 0.0 DECODING_BACKEND: torchvision DECODING_SHORT_SIZE: 256 DUMMY_LOAD: false ENSEMBLE_METHOD: max IN22K_TRAINVAL: false IN22k_VAL_IN1K: '' INPUT_CHANNEL_NUM: - 3 INV_UNIFORM_SAMPLE: true IN_VAL_CROP_RATIO: 0.875 LOADER_CHUNK_OVERALL_SIZE: 0 LOADER_CHUNK_SIZE: 0 MEAN: - 0.45 - 0.45 - 0.45 MULTI_LABEL: true NUM_FRAMES: 16 PATH_LABEL_SEPARATOR: ' ' PATH_PREFIX: kabr/KABR/dataset/image PATH_TO_DATA_DIR: kabr/KABR/annotation PATH_TO_PRELOAD_IMDB: '' RANDOM_FLIP: true REVERSE_INPUT_CHANNEL: true SAMPLING_RATE: 5 SKIP_ROWS: 0 SSL_BLUR_SIGMA_MAX: - 0.0 - 2.0 SSL_BLUR_SIGMA_MIN: - 0.0 - 0.1 SSL_COLOR_BRI_CON_SAT: - 0.2 - 0.2 - 0.2 SSL_COLOR_HUE: 0.1 SSL_COLOR_JITTER: true SSL_MOCOV2_AUG: false STD: - 0.225 - 0.225 - 0.225 TARGET_FPS: 30 TEST_CROP_SIZE: 300 TIME_DIFF_PROB: 0.0 TRAIN_CROP_NUM_SPATIAL: 1 TRAIN_CROP_NUM_TEMPORAL: 1 TRAIN_CROP_SIZE: 300 TRAIN_JITTER_ASPECT_RELATIVE: [] TRAIN_JITTER_FPS: 0.0 TRAIN_JITTER_MOTION_SHIFT: false TRAIN_JITTER_SCALES: - 300 - 400 TRAIN_JITTER_SCALES_RELATIVE: [] TRAIN_PCA_EIGVAL: - 0.225 - 0.224 - 0.229 TRAIN_PCA_EIGVEC: - - -0.5675 - 0.7192 - 0.4009 - - -0.5808 - -0.0045 - -0.814 - - -0.5836 - -0.6948 - 0.4203 USE_OFFSET_SAMPLING: false DATA_LOADER: ENABLE_MULTI_THREAD_DECODE: false NUM_WORKERS: 8 PIN_MEMORY: true DEMO: BUFFER_SIZE: 0 CLIP_VIS_SIZE: 10 COMMON_CLASS_NAMES: - watch (a person) - talk to (e.g., self, a person, a group) - listen to (a person) - touch (an object) - carry/hold (an object) - walk - sit - lie/sleep - bend/bow (at the waist) COMMON_CLASS_THRES: 0.7 DETECTRON2_CFG: COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml DETECTRON2_THRESH: 0.9 DETECTRON2_WEIGHTS: detectron2://COCO-Detection/faster_rcnn_R_50_FPN_3x/137849458/model_final_280758.pkl DISPLAY_HEIGHT: 0 DISPLAY_WIDTH: 0 ENABLE: false FPS: 30 GT_BOXES: '' INPUT_FORMAT: BGR INPUT_VIDEO: kabr/KABR/dataset/video/G0103.mp4 LABEL_FILE_PATH: kabr/KABR/annotation/classes.json NUM_CLIPS_SKIP: 1 NUM_VIS_INSTANCES: 1 OUTPUT_FILE: kabr/KABR/dataset/predict/G0103.mp4 OUTPUT_FPS: -1 PREDS_BOXES: '' SLOWMO: 1 STARTING_SECOND: 900 THREAD_ENABLE: false UNCOMMON_CLASS_THRES: 0.3 VIS_MODE: thres WEBCAM: -1 DETECTION: ALIGNED: true ENABLE: false ROI_XFORM_RESOLUTION: 7 SPATIAL_SCALE_FACTOR: 16 DIST_BACKEND: nccl LOG_MODEL_INFO: true LOG_PERIOD: 10 MASK: DECODER_DEPTH: 0 DECODER_EMBED_DIM: 512 DECODER_SEP_POS_EMBED: false DEC_KV_KERNEL: [] DEC_KV_STRIDE: [] ENABLE: false HEAD_TYPE: separate MAE_ON: false MAE_RND_MASK: false NORM_PRED_PIXEL: true PER_FRAME_MASKING: false PRED_HOG: false PRETRAIN_DEPTH: - 15 SCALE_INIT_BY_DEPTH: false TIME_STRIDE_LOSS: true MIXUP: ALPHA: 0.8 CUTMIX_ALPHA: 1.0 ENABLE: false LABEL_SMOOTH_VALUE: 0.1 PROB: 1.0 SWITCH_PROB: 0.5 MODEL: ACT_CHECKPOINT: false ARCH: x3d DETACH_FINAL_FC: false DROPCONNECT_RATE: 0.0 DROPOUT_RATE: 0.5 FC_INIT_STD: 0.01 FP16_ALLREDUCE: false FROZEN_BN: false HEAD_ACT: sigmoid LOSS_FUNC: EQL MODEL_NAME: X3D MULTI_PATHWAY_ARCH: - slowfast NUM_CLASSES: 8 SINGLE_PATHWAY_ARCH: - 2d - c2d - i3d - slow - x3d - mvit - maskmvit MULTIGRID: BN_BASE_SIZE: 8 DEFAULT_B: 0 DEFAULT_S: 0 DEFAULT_T: 0 EPOCH_FACTOR: 1.5 EVAL_FREQ: 3 LONG_CYCLE: false LONG_CYCLE_FACTORS: - - 0.25 - 0.7071067811865476 - - 0.5 - 0.7071067811865476 - - 0.5 - 1 - - 1 - 1 LONG_CYCLE_SAMPLING_RATE: 0 SHORT_CYCLE: false SHORT_CYCLE_FACTORS: - 0.5 - 0.7071067811865476 MVIT: CLS_EMBED_ON: true DEPTH: 16 DIM_MUL: [] DIM_MUL_IN_ATT: false DROPOUT_RATE: 0.0 DROPPATH_RATE: 0.1 EMBED_DIM: 96 HEAD_INIT_SCALE: 1.0 HEAD_MUL: [] LAYER_SCALE_INIT_VALUE: 0.0 MLP_RATIO: 4.0 MODE: conv NORM: layernorm NORM_STEM: false NUM_HEADS: 1 PATCH_2D: false PATCH_KERNEL: - 3 - 7 - 7 PATCH_PADDING: - 2 - 4 - 4 PATCH_STRIDE: - 2 - 4 - 4 POOL_FIRST: false POOL_KVQ_KERNEL: null POOL_KV_STRIDE: [] POOL_KV_STRIDE_ADAPTIVE: null POOL_Q_STRIDE: [] QKV_BIAS: true REL_POS_SPATIAL: false REL_POS_TEMPORAL: false REL_POS_ZERO_INIT: false RESIDUAL_POOLING: false REV: BUFFER_LAYERS: [] ENABLE: false PRE_Q_FUSION: avg RESPATH_FUSE: concat RES_PATH: conv SEPARATE_QKV: false SEP_POS_EMBED: false USE_ABS_POS: true USE_FIXED_SINCOS_POS: false USE_MEAN_POOLING: false ZERO_DECAY_POS_CLS: true NONLOCAL: GROUP: - - 1 - - 1 - - 1 - - 1 INSTANTIATION: dot_product LOCATION: - - [] - - [] - - [] - - [] POOL: - - - 1 - 2 - 2 - - 1 - 2 - 2 - - - 1 - 2 - 2 - - 1 - 2 - 2 - - - 1 - 2 - 2 - - 1 - 2 - 2 - - - 1 - 2 - 2 - - 1 - 2 - 2 NUM_GPUS: 8 NUM_SHARDS: 1 OUTPUT_DIR: kabr/KABR/logs/x3d-l-kabr RESNET: DEPTH: 50 INPLACE_RELU: true NUM_BLOCK_TEMP_KERNEL: - - 3 - - 4 - - 6 - - 3 NUM_GROUPS: 1 SPATIAL_DILATIONS: - - 1 - - 1 - - 1 - - 1 SPATIAL_STRIDES: - - 1 - - 2 - - 2 - - 2 STRIDE_1X1: false TRANS_FUNC: x3d_transform WIDTH_PER_GROUP: 64 ZERO_INIT_FINAL_BN: true ZERO_INIT_FINAL_CONV: false RNG_SEED: 0 SHARD_ID: 0 SLOWFAST: ALPHA: 8 BETA_INV: 8 FUSION_CONV_CHANNEL_RATIO: 2 FUSION_KERNEL_SZ: 5 SOLVER: BASE_LR: 0.05 BASE_LR_SCALE_NUM_SHARDS: true BETAS: - 0.9 - 0.999 CLIP_GRAD_L2NORM: null CLIP_GRAD_VAL: null COSINE_AFTER_WARMUP: false COSINE_END_LR: 0.0 DAMPENING: 0.0 GAMMA: 0.1 LARS_ON: false LAYER_DECAY: 1.0 LRS: [] LR_POLICY: cosine MAX_EPOCH: 120 MOMENTUM: 0.9 NESTEROV: true OPTIMIZING_METHOD: sgd STEPS: [] STEP_SIZE: 1 WARMUP_EPOCHS: 35.0 WARMUP_FACTOR: 0.1 WARMUP_START_LR: 0.01 WEIGHT_DECAY: 5.0e-05 ZERO_WD_1D_PARAM: false TASK: '' TENSORBOARD: CATEGORIES_PATH: '' CLASS_NAMES_PATH: kabr/KABR/annotation/classes.json CONFUSION_MATRIX: ENABLE: true FIGSIZE: - 8 - 8 SUBSET_PATH: kabr/KABR/annotation/classes.txt ENABLE: true HISTOGRAM: ENABLE: true FIGSIZE: - 8 - 8 SUBSET_PATH: kabr/KABR/annotation/classes.txt TOPK: 3 LOG_DIR: '' MODEL_VIS: ACTIVATIONS: true COLORMAP: Pastel2 ENABLE: true GRAD_CAM: COLORMAP: viridis ENABLE: true LAYER_LIST: - s5/pathway0_res14 USE_TRUE_LABEL: false INPUT_VIDEO: true LAYER_LIST: - s5/pathway0_res14 MODEL_WEIGHTS: true TOPK_PREDS: 1 PREDICTIONS_PATH: '' WRONG_PRED_VIS: ENABLE: false SUBSET_PATH: '' TAG: Incorrectly classified videos. TEST: BATCH_SIZE: 64 CHECKPOINT_FILE_PATH: '' CHECKPOINT_TYPE: pytorch DATASET: charades ENABLE: false NUM_ENSEMBLE_VIEWS: 2 NUM_SPATIAL_CROPS: 1 NUM_TEMPORAL_CLIPS: [] SAVE_RESULTS_PATH: kabr/KABR/logs/x3d-l-kabr/results.txt TRAIN: AUTO_RESUME: true BATCH_SIZE: 64 CHECKPOINT_CLEAR_NAME_PATTERN: [] CHECKPOINT_EPOCH_RESET: true CHECKPOINT_FILE_PATH: slowfast/projects/x3d/x3d_l.pyth CHECKPOINT_INFLATE: false CHECKPOINT_IN_INIT: false CHECKPOINT_PERIOD: 5 CHECKPOINT_TYPE: pytorch DATASET: charades ENABLE: true EVAL_PERIOD: 5 KILL_LOSS_EXPLOSION_FACTOR: 0.0 MIXED_PRECISION: false VIS_MASK: ENABLE: false X3D: BN_LIN5: false BOTTLENECK_FACTOR: 2.25 CHANNELWISE_3x3x3: true DEPTH_FACTOR: 5.0 DIM_C1: 12 DIM_C5: 2048 SCALE_RES2: false WIDTH_FACTOR: 2.0