|
{ |
|
"query_token_id": "[unused0]", |
|
"doc_token_id": "[unused1]", |
|
"query_token": "[Q]", |
|
"doc_token": "[D]", |
|
"ncells": null, |
|
"centroid_score_threshold": null, |
|
"ndocs": null, |
|
"load_index_with_mmap": false, |
|
"index_path": null, |
|
"nbits": 1, |
|
"kmeans_niters": 20, |
|
"resume": false, |
|
"similarity": "cosine", |
|
"bsize": 4, |
|
"accumsteps": 1, |
|
"lr": 3e-6, |
|
"maxsteps": 400000, |
|
"save_every": null, |
|
"warmup": 20000, |
|
"warmup_bert": null, |
|
"relu": false, |
|
"nway": 64, |
|
"use_ib_negatives": false, |
|
"reranker": false, |
|
"distillation_alpha": 1.0, |
|
"ignore_scores": false, |
|
"model_name": null, |
|
"query_maxlen": 512, |
|
"attend_to_mask_tokens": false, |
|
"interaction": "colbert", |
|
"dim": 128, |
|
"doc_maxlen": 128, |
|
"mask_punctuation": true, |
|
"checkpoint": "\/data\/experiment_data\/junda\/chatdoctor\/llama-13b-32k-medqa-open-ir\/checkpoint-3500\/ir", |
|
"triples": "\/future\/u\/okhattab\/root\/unit\/experiments\/2021.10\/downstream.distillation.round2.2_score\/round2.nway6.cosine.ib\/examples.64.json", |
|
"collection": "\/future\/u\/okhattab\/data\/MSMARCO\/collection.tsv", |
|
"queries": "\/future\/u\/okhattab\/data\/MSMARCO\/queries.train.tsv", |
|
"index_name": null, |
|
"overwrite": false, |
|
"root": "\/future\/u\/okhattab\/root\/unit\/experiments", |
|
"experiment": "2021.10", |
|
"index_root": null, |
|
"name": "kldR2.nway64.ib", |
|
"rank": 0, |
|
"nranks": 4, |
|
"amp": true, |
|
"gpus": 8, |
|
"meta": { |
|
"hostname": "gamma", |
|
"git_branch": "main", |
|
"git_hash": "58087227c8ead73499b761c7f7a569844d01d248", |
|
"git_commit_datetime": "2023-10-16 12:32:12+08:00", |
|
"current_datetime": "Apr 21, 2024 ; 12:29AM UTC (+0000)", |
|
"cmd": "supervised-fine-tune2.py --model_name_or_path \/data\/experiment_data\/junda\/chatdoctor\/llama-13b-32k-medqa-open-reason-hf --colbert_path \/data\/experiment_data\/junda\/chatdoctor\/llama-13b-32k-medqa-open-ir\/checkpoint-3500\/ir --bf16 True --data_path error_20.json --output_dir \/data\/experiment_data\/junda\/chatdoctor\/llama-13b-32k-medqa-open-ir\/ --cache_dir \/home\/jwang\/.cache --model_max_length 32768 --use_flash_attn True --low_rank_training True --num_train_epochs 1 --per_device_train_batch_size 1 --per_device_eval_batch_size 2 --gradient_accumulation_steps 8 --save_strategy epoch --save_total_limit 5 --learning_rate 1e-5 --weight_decay 0.0 --warmup_steps 20 --lr_scheduler_type constant_with_warmup --logging_steps 1 --deepspeed ds_configs\/stage2.json --tf32 True", |
|
"version": "colbert-v0.4" |
|
} |
|
} |
|
|