mskov commited on
Commit
00e8b72
1 Parent(s): 85a3d0d

Upload 19 files

Browse files
.gitattributes CHANGED
@@ -34,3 +34,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  results/checkpoint-100/Unconfirmed[[:space:]]828739.crdownload filter=lfs diff=lfs merge=lfs -text
 
 
 
 
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  results/checkpoint-100/Unconfirmed[[:space:]]828739.crdownload filter=lfs diff=lfs merge=lfs -text
37
+ wandb/debug-internal.log filter=lfs diff=lfs merge=lfs -text
38
+ wandb/latest-run/logs/debug-internal.log filter=lfs diff=lfs merge=lfs -text
39
+ wandb/run-20230727_154936-a41qiywg/logs/debug-internal.log filter=lfs diff=lfs merge=lfs -text
wandb/debug-internal.log ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d82385c7c91ccf548be984016744cafe22c0bffbe4c56266892c862cde84fe4
3
+ size 16040370
wandb/debug.log ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2023-07-27 15:49:36,411 INFO MainThread:21 [wandb_setup.py:_flush():76] Current SDK version is 0.15.7
2
+ 2023-07-27 15:49:36,412 INFO MainThread:21 [wandb_setup.py:_flush():76] Configure stats pid to 21
3
+ 2023-07-27 15:49:36,412 INFO MainThread:21 [wandb_setup.py:_flush():76] Loading settings from /root/.config/wandb/settings
4
+ 2023-07-27 15:49:36,412 INFO MainThread:21 [wandb_setup.py:_flush():76] Loading settings from /root/mskov/falcon7b_quant/wandb/settings
5
+ 2023-07-27 15:49:36,412 INFO MainThread:21 [wandb_setup.py:_flush():76] Loading settings from environment variables: {}
6
+ 2023-07-27 15:49:36,412 INFO MainThread:21 [wandb_setup.py:_flush():76] Applying setup settings: {'_disable_service': False}
7
+ 2023-07-27 15:49:36,412 INFO MainThread:21 [wandb_setup.py:_flush():76] Inferring run settings from compute environment: {'program': '<python with no main file>'}
8
+ 2023-07-27 15:49:36,412 INFO MainThread:21 [wandb_setup.py:_flush():76] Applying login settings: {'api_key': '***REDACTED***'}
9
+ 2023-07-27 15:49:36,412 INFO MainThread:21 [wandb_init.py:_log_setup():507] Logging user logs to /root/mskov/falcon7b_quant/wandb/run-20230727_154936-a41qiywg/logs/debug.log
10
+ 2023-07-27 15:49:36,412 INFO MainThread:21 [wandb_init.py:_log_setup():508] Logging internal logs to /root/mskov/falcon7b_quant/wandb/run-20230727_154936-a41qiywg/logs/debug-internal.log
11
+ 2023-07-27 15:49:36,412 INFO MainThread:21 [wandb_init.py:_jupyter_setup():453] configuring jupyter hooks <wandb.sdk.wandb_init._WandbInit object at 0x7f468db73070>
12
+ 2023-07-27 15:49:36,412 INFO MainThread:21 [wandb_init.py:init():547] calling init triggers
13
+ 2023-07-27 15:49:36,412 INFO MainThread:21 [wandb_init.py:init():554] wandb.init called with sweep_config: {}
14
+ config: {}
15
+ 2023-07-27 15:49:36,412 INFO MainThread:21 [wandb_init.py:init():596] starting backend
16
+ 2023-07-27 15:49:36,412 INFO MainThread:21 [wandb_init.py:init():600] setting up manager
17
+ 2023-07-27 15:49:36,414 INFO MainThread:21 [backend.py:_multiprocessing_setup():106] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
18
+ 2023-07-27 15:49:36,416 INFO MainThread:21 [wandb_init.py:init():606] backend started and connected
19
+ 2023-07-27 15:49:36,424 INFO MainThread:21 [wandb_run.py:_label_probe_notebook():1234] probe notebook
20
+ 2023-07-27 15:49:36,429 INFO MainThread:21 [wandb_run.py:_label_probe_notebook():1244] Unable to probe notebook: 'NoneType' object has no attribute 'get'
21
+ 2023-07-27 15:49:36,429 INFO MainThread:21 [wandb_init.py:init():697] updated telemetry
22
+ 2023-07-27 15:49:36,450 INFO MainThread:21 [wandb_init.py:init():730] communicating run to backend with 60.0 second timeout
23
+ 2023-07-27 15:49:36,781 INFO MainThread:21 [wandb_run.py:_on_init():2174] communicating current version
24
+ 2023-07-27 15:49:36,852 INFO MainThread:21 [wandb_run.py:_on_init():2183] got version response
25
+ 2023-07-27 15:49:36,852 INFO MainThread:21 [wandb_init.py:init():781] starting run threads in backend
26
+ 2023-07-27 15:49:44,828 INFO MainThread:21 [wandb_run.py:_console_start():2153] atexit reg
27
+ 2023-07-27 15:49:44,830 INFO MainThread:21 [wandb_run.py:_redirect():2008] redirect: wrap_raw
28
+ 2023-07-27 15:49:44,830 INFO MainThread:21 [wandb_run.py:_redirect():2073] Wrapping output streams.
29
+ 2023-07-27 15:49:44,830 INFO MainThread:21 [wandb_run.py:_redirect():2098] Redirects installed.
30
+ 2023-07-27 15:49:44,832 INFO MainThread:21 [wandb_init.py:init():822] run started, returning control to user process
31
+ 2023-07-27 15:49:44,835 INFO MainThread:21 [wandb_run.py:_config_callback():1282] config_cb None None {'vocab_size': 65024, 'hidden_size': 4544, 'n_layer': 32, 'n_head': 71, 'layer_norm_epsilon': 1e-05, 'initializer_range': 0.02, 'use_cache': False, 'apply_residual_connection_post_layernorm': False, 'hidden_dropout': 0.0, 'attention_dropout': 0.0, 'bos_token_id': 11, 'eos_token_id': 11, 'multi_query': True, 'alibi': False, 'bias': False, 'parallel_attn': True, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'bfloat16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['RWForCausalLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'pad_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'ybelkada/falcon-7b-sharded-bf16', 'transformers_version': '4.31.0', 'auto_map': {'AutoConfig': 'tiiuae/falcon-7b--configuration_RW.RWConfig', 'AutoModel': 'tiiuae/falcon-7b--modelling_RW.RWModel', 'AutoModelForCausalLM': 'tiiuae/falcon-7b--modelling_RW.RWForCausalLM', 'AutoModelForQuestionAnswering': 'tiiuae/falcon-7b--modelling_RW.RWForQuestionAnswering', 'AutoModelForSequenceClassification': 'tiiuae/falcon-7b--modelling_RW.RWForSequenceClassification', 'AutoModelForTokenClassification': 'tiiuae/falcon-7b--modelling_RW.RWForTokenClassification'}, 'model_type': 'RefinedWebModel', 'quantization_config': {'load_in_8bit': False, 'load_in_4bit': True, 'llm_int8_threshold': 6.0, 'llm_int8_skip_modules': None, 'llm_int8_enable_fp32_cpu_offload': False, 'llm_int8_has_fp16_weight': False, 'bnb_4bit_quant_type': 'nf4', 'bnb_4bit_use_double_quant': False, 'bnb_4bit_compute_dtype': 'float16'}, 'output_dir': './results', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'no', 'prediction_loss_only': False, 'per_device_train_batch_size': 4, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': 'None', 'per_gpu_eval_batch_size': 'None', 'gradient_accumulation_steps': 4, 'eval_accumulation_steps': 'None', 'eval_delay': 0, 'learning_rate': 0.0002, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 0.3, 'num_train_epochs': 3.0, 'max_steps': 500, 'lr_scheduler_type': 'constant', 'warmup_ratio': 0.03, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './results/runs/Jul27_15-48-23_pytorch-2-0-0-gpu--ml-g4dn-2xlarge-9a500aed7fe4dadadc562adc1e80', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 10, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 10, 'save_total_limit': 'None', 'save_safetensors': False, 'save_on_each_node': False, 'no_cuda': False, 'use_mps_device': False, 'seed': 42, 'data_seed': 'None', 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': 'None', 'local_rank': 0, 'ddp_backend': 'None', 'tpu_num_cores': 'None', 'tpu_metrics_debug': False, 'debug': '[]', 'dataloader_drop_last': False, 'eval_steps': 'None', 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './results', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': 'None', 'load_best_model_at_end': False, 'metric_for_best_model': 'None', 'greater_is_better': 'None', 'ignore_data_skip': False, 'sharded_ddp': '[]', 'fsdp': '[]', 'fsdp_min_num_params': 0, 'fsdp_config': "{'fsdp_min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False}", 'fsdp_transformer_layer_cls_to_wrap': 'None', 'deepspeed': 'None', 'label_smoothing_factor': 0.0, 'optim': 'paged_adamw_32bit', 'optim_args': 'None', 'adafactor': False, 'group_by_length': True, 'length_column_name': 'length', 'report_to': "['wandb']", 'ddp_find_unused_parameters': 'None', 'ddp_bucket_cap_mb': 'None', 'ddp_broadcast_buffers': 'None', 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': False, 'resume_from_checkpoint': 'None', 'hub_model_id': 'None', 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'gradient_checkpointing': False, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': 'None', 'push_to_hub_organization': 'None', 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': 'None', 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': 'None', 'torch_compile_mode': 'None', 'xpu_backend': 'None', 'train_batch_size': 4, 'eval_batch_size': 8}
32
+ 2023-07-27 17:45:31,239 INFO MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
33
+ 2023-07-27 17:45:31,240 INFO MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
34
+ 2023-07-31 14:45:09,605 INFO MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
35
+ 2023-07-31 14:45:09,630 INFO MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
36
+ 2023-07-31 14:45:09,630 INFO MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
37
+ 2023-07-31 15:11:17,481 INFO MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
38
+ 2023-07-31 15:11:29,927 INFO MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
39
+ 2023-07-31 15:11:29,929 INFO MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
40
+ 2023-07-31 15:11:29,934 INFO MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
41
+ 2023-07-31 15:11:32,706 INFO MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
42
+ 2023-07-31 15:11:32,707 INFO MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
43
+ 2023-07-31 15:11:32,712 INFO MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
44
+ 2023-07-31 15:11:35,511 INFO MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
45
+ 2023-07-31 15:11:35,512 INFO MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
46
+ 2023-07-31 15:11:35,517 INFO MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
47
+ 2023-07-31 15:11:38,405 INFO MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
48
+ 2023-07-31 15:11:38,407 INFO MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
49
+ 2023-07-31 15:11:39,706 INFO MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
50
+ 2023-07-31 15:11:42,399 INFO MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
51
+ 2023-07-31 15:11:42,400 INFO MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
52
+ 2023-07-31 15:11:42,759 INFO MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
53
+ 2023-07-31 15:11:42,762 INFO MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
54
+ 2023-07-31 15:11:42,762 INFO MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
55
+ 2023-07-31 15:11:47,781 INFO MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
56
+ 2023-07-31 15:12:05,813 INFO MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
57
+ 2023-07-31 15:12:05,815 INFO MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
58
+ 2023-07-31 15:12:05,839 INFO MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
59
+ 2023-07-31 15:12:06,211 INFO MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
60
+ 2023-07-31 15:12:06,211 INFO MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
61
+ 2023-07-31 15:12:06,217 INFO MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
62
+ 2023-07-31 15:12:06,218 INFO MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
63
+ 2023-07-31 15:12:06,218 INFO MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
64
+ 2023-07-31 15:12:06,224 INFO MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
65
+ 2023-07-31 15:12:06,301 INFO MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
66
+ 2023-07-31 15:12:06,301 INFO MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
67
+ 2023-07-31 15:12:11,043 INFO MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
68
+ 2023-07-31 15:13:04,229 INFO MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
69
+ 2023-07-31 15:13:04,231 INFO MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
70
+ 2023-07-31 15:13:04,236 INFO MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
71
+ 2023-07-31 15:13:04,244 INFO MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
72
+ 2023-07-31 15:13:04,244 INFO MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
73
+ 2023-07-31 15:13:04,249 INFO MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
74
+ 2023-07-31 15:13:04,818 INFO MainThread:21 [wandb_run.py:_config_callback():1282] config_cb None None {'vocab_size': 65024, 'hidden_size': 4544, 'n_layer': 32, 'n_head': 71, 'layer_norm_epsilon': 1e-05, 'initializer_range': 0.02, 'use_cache': False, 'apply_residual_connection_post_layernorm': False, 'hidden_dropout': 0.0, 'attention_dropout': 0.0, 'bos_token_id': 11, 'eos_token_id': 11, 'multi_query': True, 'alibi': False, 'bias': False, 'parallel_attn': True, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'bfloat16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['RWForCausalLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'pad_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'ybelkada/falcon-7b-sharded-bf16', 'transformers_version': '4.31.0', 'auto_map': {'AutoConfig': 'tiiuae/falcon-7b--configuration_RW.RWConfig', 'AutoModel': 'tiiuae/falcon-7b--modelling_RW.RWModel', 'AutoModelForCausalLM': 'tiiuae/falcon-7b--modelling_RW.RWForCausalLM', 'AutoModelForQuestionAnswering': 'tiiuae/falcon-7b--modelling_RW.RWForQuestionAnswering', 'AutoModelForSequenceClassification': 'tiiuae/falcon-7b--modelling_RW.RWForSequenceClassification', 'AutoModelForTokenClassification': 'tiiuae/falcon-7b--modelling_RW.RWForTokenClassification'}, 'model_type': 'RefinedWebModel', 'quantization_config': {'load_in_8bit': False, 'load_in_4bit': True, 'llm_int8_threshold': 6.0, 'llm_int8_skip_modules': None, 'llm_int8_enable_fp32_cpu_offload': False, 'llm_int8_has_fp16_weight': False, 'bnb_4bit_quant_type': 'nf4', 'bnb_4bit_use_double_quant': False, 'bnb_4bit_compute_dtype': 'float16'}, 'output_dir': './results', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'no', 'prediction_loss_only': False, 'per_device_train_batch_size': 4, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': 'None', 'per_gpu_eval_batch_size': 'None', 'gradient_accumulation_steps': 4, 'eval_accumulation_steps': 'None', 'eval_delay': 0, 'learning_rate': 0.0002, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 0.3, 'num_train_epochs': 3.0, 'max_steps': 500, 'lr_scheduler_type': 'constant', 'warmup_ratio': 0.03, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './results/runs/Jul31_15-12-06_pytorch-2-0-0-gpu--ml-g4dn-2xlarge-9a500aed7fe4dadadc562adc1e80', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 10, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 10, 'save_total_limit': 'None', 'save_safetensors': False, 'save_on_each_node': False, 'no_cuda': False, 'use_mps_device': False, 'seed': 42, 'data_seed': 'None', 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': 'None', 'local_rank': 0, 'ddp_backend': 'None', 'tpu_num_cores': 'None', 'tpu_metrics_debug': False, 'debug': '[]', 'dataloader_drop_last': False, 'eval_steps': 'None', 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './results', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': 'None', 'load_best_model_at_end': False, 'metric_for_best_model': 'None', 'greater_is_better': 'None', 'ignore_data_skip': False, 'sharded_ddp': '[]', 'fsdp': '[]', 'fsdp_min_num_params': 0, 'fsdp_config': "{'fsdp_min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False}", 'fsdp_transformer_layer_cls_to_wrap': 'None', 'deepspeed': 'None', 'label_smoothing_factor': 0.0, 'optim': 'paged_adamw_32bit', 'optim_args': 'None', 'adafactor': False, 'group_by_length': True, 'length_column_name': 'length', 'report_to': "['wandb']", 'ddp_find_unused_parameters': 'None', 'ddp_bucket_cap_mb': 'None', 'ddp_broadcast_buffers': 'None', 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': False, 'resume_from_checkpoint': 'None', 'hub_model_id': 'None', 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'gradient_checkpointing': False, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': 'None', 'push_to_hub_organization': 'None', 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': 'None', 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': 'None', 'torch_compile_mode': 'None', 'xpu_backend': 'None', 'train_batch_size': 4, 'eval_batch_size': 8}
75
+ 2023-07-31 17:09:57,806 INFO MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
76
+ 2023-07-31 17:09:57,808 INFO MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
wandb/latest-run/files/conda-environment.yaml ADDED
@@ -0,0 +1,498 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: base
2
+ channels:
3
+ - fastai
4
+ - dglteam/label/cu118
5
+ - nvidia/label/cuda-11.8.0
6
+ - https://aws-ml-conda-pre-prod-ec2.s3.us-west-2.amazonaws.com
7
+ - conda-forge
8
+ dependencies:
9
+ - _libgcc_mutex=0.1=conda_forge
10
+ - _openmp_mutex=4.5=2_kmp_llvm
11
+ - alsa-lib=1.2.8=h166bdaf_0
12
+ - aom=3.5.0=h27087fc_0
13
+ - asttokens=2.2.1=pyhd8ed1ab_0
14
+ - attr=2.5.1=h166bdaf_1
15
+ - aws-ofi-nccl-dlc=1.5.0=aws_0
16
+ - awscli=1.27.132=py310hff52083_0
17
+ - backcall=0.2.0=pyh9f0ad1d_0
18
+ - backports=1.0=pyhd8ed1ab_3
19
+ - backports.functools_lru_cache=1.6.4=pyhd8ed1ab_0
20
+ - blas=1.0=mkl
21
+ - bokeh=3.1.1=pyhd8ed1ab_0
22
+ - boto3=1.26.132=pyhd8ed1ab_0
23
+ - botocore=1.29.132=pyhd8ed1ab_0
24
+ - brotli=1.0.9=h166bdaf_8
25
+ - brotli-bin=1.0.9=h166bdaf_8
26
+ - brotlipy=0.7.0=py310h5764c6d_1005
27
+ - bzip2=1.0.8=h7f98852_4
28
+ - c-ares=1.18.1=h7f98852_0
29
+ - ca-certificates=2023.5.7=hbcca054_0
30
+ - cached-property=1.5.2=hd8ed1ab_1
31
+ - cached_property=1.5.2=pyha770c72_1
32
+ - cairo=1.16.0=ha61ee94_1014
33
+ - catalogue=2.0.8=py310hff52083_1
34
+ - certifi=2023.5.7=pyhd8ed1ab_0
35
+ - cffi=1.15.1=py310h255011f_3
36
+ - charset-normalizer=3.1.0=pyhd8ed1ab_0
37
+ - click=8.1.3=unix_pyhd8ed1ab_2
38
+ - cloudpickle=2.2.1=pyhd8ed1ab_0
39
+ - colorama=0.4.4=pyh9f0ad1d_0
40
+ - comm=0.1.3=pyhd8ed1ab_0
41
+ - commonmark=0.9.1=py_0
42
+ - conda=23.1.0=py310hff52083_0
43
+ - conda-content-trust=0.1.3=pyhd8ed1ab_0
44
+ - conda-package-handling=2.0.2=pyh38be061_0
45
+ - conda-package-streaming=0.7.0=pyhd8ed1ab_1
46
+ - confection=0.0.4=py310hfdc917e_1
47
+ - contourpy=1.0.7=py310hdf3cbec_0
48
+ - cryptography=40.0.1=py310h34c0648_0
49
+ - cuda-cccl=11.8.89=0
50
+ - cuda-command-line-tools=11.8.0=0
51
+ - cuda-compiler=11.8.0=0
52
+ - cuda-cudart=11.8.89=0
53
+ - cuda-cudart-dev=11.8.89=0
54
+ - cuda-cuobjdump=11.8.86=0
55
+ - cuda-cupti=11.8.87=0
56
+ - cuda-cuxxfilt=11.8.86=0
57
+ - cuda-documentation=11.8.86=0
58
+ - cuda-driver-dev=11.8.89=0
59
+ - cuda-gdb=11.8.86=0
60
+ - cuda-libraries=11.8.0=0
61
+ - cuda-libraries-dev=11.8.0=0
62
+ - cuda-memcheck=11.8.86=0
63
+ - cuda-nsight=11.8.86=0
64
+ - cuda-nsight-compute=11.8.0=0
65
+ - cuda-nvcc=11.8.89=0
66
+ - cuda-nvdisasm=11.8.86=0
67
+ - cuda-nvml-dev=11.8.86=0
68
+ - cuda-nvprof=11.8.87=0
69
+ - cuda-nvprune=11.8.86=0
70
+ - cuda-nvrtc=11.8.89=0
71
+ - cuda-nvrtc-dev=11.8.89=0
72
+ - cuda-nvtx=11.8.86=0
73
+ - cuda-nvvp=11.8.87=0
74
+ - cuda-profiler-api=11.8.86=0
75
+ - cuda-runtime=11.8.0=0
76
+ - cuda-sanitizer-api=11.8.86=0
77
+ - cuda-toolkit=11.8.0=0
78
+ - cuda-tools=11.8.0=0
79
+ - cuda-visual-tools=11.8.0=0
80
+ - cycler=0.11.0=pyhd8ed1ab_0
81
+ - cymem=2.0.7=py310hd8f1fbe_1
82
+ - cython=0.29.34=py310heca2aa9_0
83
+ - cython-blis=0.7.9=py310hde88566_1
84
+ - dbus=1.13.6=h5008d03_3
85
+ - debugpy=1.6.7=py310heca2aa9_0
86
+ - decorator=5.1.1=pyhd8ed1ab_0
87
+ - dgl=1.1.0.cu118=py310_0
88
+ - docutils=0.15.2=py310hff52083_6
89
+ - executing=1.2.0=pyhd8ed1ab_0
90
+ - expat=2.5.0=hcb278e6_1
91
+ - fastai=2.7.12=py_0
92
+ - fastcore=1.5.29=py_0
93
+ - fastdownload=0.0.7=py_0
94
+ - fastprogress=1.0.3=py_0
95
+ - ffmpeg=5.1.2=gpl_h8dda1f0_106
96
+ - fftw=3.3.10=nompi_hc118613_107
97
+ - filelock=3.12.0=pyhd8ed1ab_0
98
+ - fmt=9.1.0=h924138e_0
99
+ - font-ttf-dejavu-sans-mono=2.37=hab24e00_0
100
+ - font-ttf-inconsolata=3.000=h77eed37_0
101
+ - font-ttf-source-code-pro=2.038=h77eed37_0
102
+ - font-ttf-ubuntu=0.83=hab24e00_0
103
+ - fontconfig=2.14.2=h14ed4e7_0
104
+ - fonts-conda-ecosystem=1=0
105
+ - fonts-conda-forge=1=0
106
+ - fonttools=4.39.4=py310h2372a71_0
107
+ - freeglut=3.2.2=h9c3ff4c_1
108
+ - freetype=2.12.1=hca18f0e_1
109
+ - future=0.18.3=pyhd8ed1ab_0
110
+ - gds-tools=1.4.0.31=0
111
+ - gettext=0.21.1=h27087fc_0
112
+ - glib=2.76.2=hfc55251_0
113
+ - glib-tools=2.76.2=hfc55251_0
114
+ - gmp=6.2.1=h58526e2_0
115
+ - gmpy2=2.1.2=py310h3ec546c_1
116
+ - gnutls=3.7.8=hf3e180e_0
117
+ - graphite2=1.3.13=h58526e2_1001
118
+ - gst-plugins-base=1.22.0=h4243ec0_2
119
+ - gstreamer=1.22.0=h25f0c4b_2
120
+ - gstreamer-orc=0.4.33=h166bdaf_0
121
+ - h5py=3.8.0=nompi_py310ha66b2ad_101
122
+ - harfbuzz=6.0.0=h8e241bc_0
123
+ - hdf5=1.14.0=nompi_hb72d44e_103
124
+ - icu=70.1=h27087fc_0
125
+ - idna=3.4=pyhd8ed1ab_0
126
+ - imageio=2.28.1=pyh24c5eb1_0
127
+ - importlib_metadata=6.6.0=hd8ed1ab_0
128
+ - ipykernel=6.23.0=pyh210e3f2_0
129
+ - ipython=8.13.2=pyh41d4057_0
130
+ - jack=1.9.22=h11f4161_0
131
+ - jasper=2.0.33=h0ff4b12_1
132
+ - jedi=0.18.2=pyhd8ed1ab_0
133
+ - jinja2=3.1.2=pyhd8ed1ab_1
134
+ - jmespath=1.0.1=pyhd8ed1ab_0
135
+ - joblib=1.2.0=pyhd8ed1ab_0
136
+ - jpeg=9e=h166bdaf_2
137
+ - jupyter_client=8.2.0=pyhd8ed1ab_0
138
+ - jupyter_core=5.3.0=py310hff52083_0
139
+ - keyutils=1.6.1=h166bdaf_0
140
+ - kiwisolver=1.4.4=py310hbf28c38_1
141
+ - krb5=1.20.1=h81ceb04_0
142
+ - lame=3.100=h166bdaf_1003
143
+ - langcodes=3.3.0=pyhd8ed1ab_0
144
+ - lcms2=2.15=hfd0df8a_0
145
+ - ld_impl_linux-64=2.40=h41732ed_0
146
+ - lerc=4.0.0=h27087fc_0
147
+ - libaec=1.0.6=hcb278e6_1
148
+ - libarchive=3.6.2=h3d51595_0
149
+ - libblas=3.9.0=1_h86c2bf4_netlib
150
+ - libbrotlicommon=1.0.9=h166bdaf_8
151
+ - libbrotlidec=1.0.9=h166bdaf_8
152
+ - libbrotlienc=1.0.9=h166bdaf_8
153
+ - libcap=2.67=he9d0100_0
154
+ - libcblas=3.9.0=5_h92ddd45_netlib
155
+ - libclang=15.0.7=default_had23c3d_1
156
+ - libclang13=15.0.7=default_h3e3d535_1
157
+ - libcublas=11.11.3.6=0
158
+ - libcublas-dev=11.11.3.6=0
159
+ - libcufft=10.9.0.58=0
160
+ - libcufft-dev=10.9.0.58=0
161
+ - libcufile=1.4.0.31=0
162
+ - libcufile-dev=1.4.0.31=0
163
+ - libcups=2.3.3=h36d4200_3
164
+ - libcurand=10.3.0.86=0
165
+ - libcurand-dev=10.3.0.86=0
166
+ - libcurl=7.88.1=hdc1c0ab_1
167
+ - libcusolver=11.4.1.48=0
168
+ - libcusolver-dev=11.4.1.48=0
169
+ - libcusparse=11.7.5.86=0
170
+ - libcusparse-dev=11.7.5.86=0
171
+ - libdb=6.2.32=h9c3ff4c_0
172
+ - libdeflate=1.17=h0b41bf4_0
173
+ - libdrm=2.4.114=h166bdaf_0
174
+ - libedit=3.1.20191231=he28a2e2_2
175
+ - libev=4.33=h516909a_1
176
+ - libevent=2.1.10=h28343ad_4
177
+ - libexpat=2.5.0=hcb278e6_1
178
+ - libffi=3.4.2=h7f98852_5
179
+ - libflac=1.4.2=h27087fc_0
180
+ - libgcc=7.2.0=h69d50b8_2
181
+ - libgcc-ng=12.2.0=h65d4601_19
182
+ - libgcrypt=1.10.1=h166bdaf_0
183
+ - libgfortran-ng=12.2.0=h69a702a_19
184
+ - libgfortran5=12.2.0=h337968e_19
185
+ - libglib=2.76.2=hebfc3b9_0
186
+ - libglu=9.0.0=he1b5a44_1001
187
+ - libgomp=12.2.0=h65d4601_19
188
+ - libgpg-error=1.46=h620e276_0
189
+ - libhwloc=2.9.1=hd6dc26d_0
190
+ - libiconv=1.17=h166bdaf_0
191
+ - libidn2=2.3.4=h166bdaf_0
192
+ - libjpeg-turbo=2.1.4=h166bdaf_0
193
+ - liblapack=3.9.0=5_h92ddd45_netlib
194
+ - liblapacke=3.9.0=5_h92ddd45_netlib
195
+ - libllvm11=11.1.0=he0ac6c6_5
196
+ - libllvm15=15.0.7=hadd5161_1
197
+ - libllvm16=16.0.1=hadd5161_0
198
+ - libmamba=1.4.1=hcea66bb_0
199
+ - libmambapy=1.4.1=py310h1428755_0
200
+ - libnghttp2=1.52.0=h61bc06f_0
201
+ - libnpp=11.8.0.86=0
202
+ - libnpp-dev=11.8.0.86=0
203
+ - libnsl=2.0.0=h7f98852_0
204
+ - libnvjpeg=11.9.0.86=0
205
+ - libnvjpeg-dev=11.9.0.86=0
206
+ - libogg=1.3.4=h7f98852_1
207
+ - libopenblas=0.3.21=pthreads_h78a6416_3
208
+ - libopencv=4.7.0=py310hb48cf42_1
209
+ - libopus=1.3.1=h7f98852_1
210
+ - libpciaccess=0.17=h166bdaf_0
211
+ - libpng=1.6.39=h753d276_0
212
+ - libpq=15.3=hbcd7760_0
213
+ - libprotobuf=3.21.12=h3eb15da_0
214
+ - libsndfile=1.2.0=hb75c966_0
215
+ - libsodium=1.0.18=h36c2ea0_1
216
+ - libsolv=0.7.23=h3eb15da_0
217
+ - libsqlite=3.40.0=h753d276_0
218
+ - libssh2=1.10.0=hf14f497_3
219
+ - libstdcxx-ng=12.2.0=h46fd767_19
220
+ - libsystemd0=253=h8c4010b_1
221
+ - libtasn1=4.19.0=h166bdaf_0
222
+ - libtiff=4.5.0=h6adf6a1_2
223
+ - libtool=2.4.7=h27087fc_0
224
+ - libudev1=253=h0b41bf4_1
225
+ - libunistring=0.9.10=h7f98852_0
226
+ - libuuid=2.38.1=h0b41bf4_0
227
+ - libuv=1.44.2=h166bdaf_0
228
+ - libva=2.18.0=h0b41bf4_0
229
+ - libvorbis=1.3.7=h9c3ff4c_0
230
+ - libvpx=1.11.0=h9c3ff4c_3
231
+ - libwebp-base=1.3.0=h0b41bf4_0
232
+ - libxcb=1.13=h7f98852_1004
233
+ - libxkbcommon=1.5.0=h79f4944_1
234
+ - libxml2=2.10.3=hca2bb57_4
235
+ - libzlib=1.2.13=h166bdaf_4
236
+ - llvm-openmp=16.0.3=h4dfa4b3_0
237
+ - llvmlite=0.39.1=py310h58363a5_1
238
+ - lz4-c=1.9.4=hcb278e6_0
239
+ - lzo=2.10=h516909a_1000
240
+ - mamba=1.4.1=py310h51d5547_0
241
+ - markupsafe=2.1.2=py310h1fa729e_0
242
+ - matplotlib=3.7.1=py310hff52083_0
243
+ - matplotlib-base=3.7.1=py310he60537e_0
244
+ - matplotlib-inline=0.1.6=pyhd8ed1ab_0
245
+ - mkl=2023.1.0=h84fe81f_48680
246
+ - mkl-include=2023.1.0=h84fe81f_48680
247
+ - mpc=1.3.1=hfe3b2da_0
248
+ - mpfr=4.2.0=hb012696_0
249
+ - mpg123=1.31.3=hcb278e6_0
250
+ - mpi=1.0=openmpi
251
+ - mpi4py=3.1.4=py310h6075a6b_0
252
+ - mpmath=1.3.0=pyhd8ed1ab_0
253
+ - munkres=1.1.4=pyh9f0ad1d_0
254
+ - murmurhash=1.0.9=py310hd8f1fbe_1
255
+ - mysql-common=8.0.32=hf1915f5_2
256
+ - mysql-libs=8.0.32=hca2cd23_2
257
+ - ncurses=6.3=h27087fc_1
258
+ - nest-asyncio=1.5.6=pyhd8ed1ab_0
259
+ - nettle=3.8.1=hc379101_1
260
+ - networkx=3.1=pyhd8ed1ab_0
261
+ - nsight-compute=2022.3.0.22=0
262
+ - nspr=4.35=h27087fc_0
263
+ - nss=3.89=he45b914_0
264
+ - numba=0.56.4=py310h0e39c9b_1
265
+ - numpy=1.23.5=py310h53a5b5f_0
266
+ - opencv=4.7.0=py310hff52083_1
267
+ - openh264=2.3.1=hcb278e6_2
268
+ - openjpeg=2.5.0=hfec8fc6_2
269
+ - openmpi=4.1.5=h414af15_101
270
+ - openssl=3.1.0=hd590300_3
271
+ - p11-kit=0.24.1=hc5aa10d_0
272
+ - packaging=23.1=pyhd8ed1ab_0
273
+ - pandas=2.0.1=py310h7cbd5c2_1
274
+ - parso=0.8.3=pyhd8ed1ab_0
275
+ - pathy=0.10.1=pyhd8ed1ab_0
276
+ - patsy=0.5.3=pyhd8ed1ab_0
277
+ - pcre2=10.40=hc3806b6_0
278
+ - pexpect=4.8.0=pyh1a96a4e_2
279
+ - pickleshare=0.7.5=py_1003
280
+ - pillow=9.4.0=py310h023d228_1
281
+ - pixman=0.40.0=h36c2ea0_0
282
+ - platformdirs=3.5.0=pyhd8ed1ab_0
283
+ - plotly=5.14.1=pyhd8ed1ab_0
284
+ - pluggy=1.0.0=pyhd8ed1ab_5
285
+ - ply=3.11=py_1
286
+ - pooch=1.7.0=pyha770c72_3
287
+ - preshed=3.0.8=py310hd8f1fbe_1
288
+ - prompt-toolkit=3.0.38=pyha770c72_0
289
+ - prompt_toolkit=3.0.38=hd8ed1ab_0
290
+ - psutil=5.9.5=py310h1fa729e_0
291
+ - pthread-stubs=0.4=h36c2ea0_1001
292
+ - ptyprocess=0.7.0=pyhd3deb0d_0
293
+ - pulseaudio=16.1=hcb278e6_3
294
+ - pulseaudio-client=16.1=h5195f5e_3
295
+ - pulseaudio-daemon=16.1=ha8d29e2_3
296
+ - pure_eval=0.2.2=pyhd8ed1ab_0
297
+ - py-opencv=4.7.0=py310hfdc917e_1
298
+ - pyasn1=0.4.8=py_0
299
+ - pybind11=2.10.4=py310hdf3cbec_0
300
+ - pybind11-abi=4=hd8ed1ab_3
301
+ - pybind11-global=2.10.4=py310hdf3cbec_0
302
+ - pycosat=0.6.4=py310h5764c6d_1
303
+ - pycparser=2.21=pyhd8ed1ab_0
304
+ - pydantic=1.10.7=py310h1fa729e_0
305
+ - pygments=2.15.1=pyhd8ed1ab_0
306
+ - pyopenssl=23.1.1=pyhd8ed1ab_0
307
+ - pyparsing=3.0.9=pyhd8ed1ab_0
308
+ - pyqt=5.15.7=py310hab646b1_3
309
+ - pyqt5-sip=12.11.0=py310heca2aa9_3
310
+ - pysocks=1.7.1=pyha2e5f31_6
311
+ - python=3.10.8=h4a9ceb5_0_cpython
312
+ - python-dateutil=2.8.2=pyhd8ed1ab_0
313
+ - python-tzdata=2023.3=pyhd8ed1ab_0
314
+ - python_abi=3.10=3_cp310
315
+ - pytorch=2.0.0=aws_py3.10_cuda11.8_cudnn8.7.0_0
316
+ - pytorch-cuda=11.8=h7e8668a_3
317
+ - pytorch-mutex=1.0=cuda
318
+ - pytz=2023.3=pyhd8ed1ab_0
319
+ - pyyaml=5.4.1=py310h5764c6d_4
320
+ - pyzmq=25.0.2=py310h059b190_0
321
+ - qt-main=5.15.8=h5d23da1_6
322
+ - readline=8.2=h8228510_1
323
+ - reproc=14.2.4=h0b41bf4_0
324
+ - reproc-cpp=14.2.4=hcb278e6_0
325
+ - requests=2.28.2=pyhd8ed1ab_1
326
+ - rhash=1.4.3=h166bdaf_0
327
+ - rich=12.6.0=pyhd8ed1ab_0
328
+ - rsa=4.7.2=pyh44b312d_0
329
+ - ruamel.yaml=0.17.21=py310h1fa729e_3
330
+ - ruamel.yaml.clib=0.2.7=py310h1fa729e_1
331
+ - s3transfer=0.6.1=pyhd8ed1ab_0
332
+ - scikit-learn=1.2.2=py310h41b6a48_1
333
+ - scipy=1.10.1=py310h8deb116_2
334
+ - seaborn=0.12.2=hd8ed1ab_0
335
+ - seaborn-base=0.12.2=pyhd8ed1ab_0
336
+ - setuptools=65.6.3=pyhd8ed1ab_0
337
+ - shap=0.41.0=py310h769672d_0
338
+ - shellingham=1.5.1=pyhd8ed1ab_0
339
+ - sip=6.7.9=py310hc6cd4ac_0
340
+ - six=1.16.0=pyh6c4a22f_0
341
+ - slicer=0.0.7=pyhd8ed1ab_0
342
+ - smart_open=5.2.1=pyhd8ed1ab_0
343
+ - spacy=3.5.2=py310h5a539fb_0
344
+ - spacy-legacy=3.0.12=pyhd8ed1ab_0
345
+ - spacy-loggers=1.0.4=pyhd8ed1ab_0
346
+ - srsly=2.4.6=py310heca2aa9_0
347
+ - stack_data=0.6.2=pyhd8ed1ab_0
348
+ - statsmodels=0.14.0=py310h278f3c1_1
349
+ - svt-av1=1.4.1=hcb278e6_0
350
+ - sympy=1.11.1=pypyh9d50eac_103
351
+ - tbb=2021.9.0=hf52228f_0
352
+ - tenacity=8.2.2=pyhd8ed1ab_0
353
+ - thinc=8.1.10=py310hfb6f7a9_0
354
+ - threadpoolctl=3.1.0=pyh8a188c0_0
355
+ - tk=8.6.12=h27826a3_0
356
+ - toml=0.10.2=pyhd8ed1ab_0
357
+ - tomli=2.0.1=pyhd8ed1ab_0
358
+ - toolz=0.12.0=pyhd8ed1ab_0
359
+ - torchaudio=2.0.1=py310_cu118
360
+ - torchdata=0.6.0=py310
361
+ - torchtext=0.15.1=py310
362
+ - torchvision=0.15.1=py310_cu118
363
+ - tornado=6.3=py310h1fa729e_0
364
+ - tqdm=4.65.0=pyhd8ed1ab_1
365
+ - traitlets=5.9.0=pyhd8ed1ab_0
366
+ - typer=0.7.0=pyhd8ed1ab_0
367
+ - typing=3.10.0.0=pyhd8ed1ab_0
368
+ - typing-extensions=4.5.0=hd8ed1ab_0
369
+ - typing_extensions=4.5.0=pyha770c72_0
370
+ - tzdata=2023c=h71feb2d_0
371
+ - unicodedata2=15.0.0=py310h5764c6d_0
372
+ - urllib3=1.26.15=pyhd8ed1ab_0
373
+ - wasabi=1.1.1=py310hff52083_1
374
+ - wcwidth=0.2.6=pyhd8ed1ab_0
375
+ - wheel=0.40.0=pyhd8ed1ab_0
376
+ - x264=1!164.3095=h166bdaf_2
377
+ - x265=3.5=h924138e_3
378
+ - xcb-util=0.4.0=h516909a_0
379
+ - xcb-util-image=0.4.0=h166bdaf_0
380
+ - xcb-util-keysyms=0.4.0=h516909a_0
381
+ - xcb-util-renderutil=0.3.9=h166bdaf_0
382
+ - xcb-util-wm=0.4.1=h516909a_0
383
+ - xkeyboard-config=2.38=h0b41bf4_0
384
+ - xorg-fixesproto=5.0=h7f98852_1002
385
+ - xorg-inputproto=2.3.2=h7f98852_1002
386
+ - xorg-kbproto=1.0.7=h7f98852_1002
387
+ - xorg-libice=1.0.10=h7f98852_0
388
+ - xorg-libsm=1.2.3=hd9c2040_1000
389
+ - xorg-libx11=1.8.4=h0b41bf4_0
390
+ - xorg-libxau=1.0.9=h7f98852_0
391
+ - xorg-libxdmcp=1.1.3=h7f98852_0
392
+ - xorg-libxext=1.3.4=h0b41bf4_2
393
+ - xorg-libxfixes=5.0.3=h7f98852_1004
394
+ - xorg-libxi=1.7.10=h7f98852_0
395
+ - xorg-libxrender=0.9.10=h7f98852_1003
396
+ - xorg-renderproto=0.11.1=h7f98852_1002
397
+ - xorg-xextproto=7.3.0=h0b41bf4_1003
398
+ - xorg-xf86vidmodeproto=2.3.1=h7f98852_1002
399
+ - xorg-xproto=7.0.31=h7f98852_1007
400
+ - xyzservices=2023.2.0=pyhd8ed1ab_0
401
+ - xz=5.2.6=h166bdaf_0
402
+ - yaml=0.2.5=h7f98852_2
403
+ - yaml-cpp=0.7.0=h27087fc_2
404
+ - zeromq=4.3.4=h9c3ff4c_1
405
+ - zipp=3.15.0=pyhd8ed1ab_0
406
+ - zlib=1.2.13=h166bdaf_4
407
+ - zstandard=0.19.0=py310hdeb6495_1
408
+ - zstd=1.5.2=h3eb15da_6
409
+ - pip:
410
+ - accelerate==0.21.0
411
+ - aiohttp==3.8.5
412
+ - aiosignal==1.3.1
413
+ - apex==0.1
414
+ - appdirs==1.4.4
415
+ - argparse==1.4.0
416
+ - async-timeout==4.0.2
417
+ - attrs==22.2.0
418
+ - bcrypt==4.0.1
419
+ - bitsandbytes==0.41.0
420
+ - cmake==3.26.3
421
+ - contextlib2==21.6.0
422
+ - datasets==2.14.0
423
+ - deepspeed==0.6.1+1ea3d4b
424
+ - dill==0.3.6
425
+ - docker-pycreds==0.4.0
426
+ - einops==0.6.1
427
+ - flash-attn==0.2.8
428
+ - frozenlist==1.4.0
429
+ - fsspec==2023.5.0
430
+ - gevent==22.10.2
431
+ - gitdb==4.0.10
432
+ - gitpython==3.1.32
433
+ - google-pasta==0.2.0
434
+ - greenlet==2.0.2
435
+ - hjson==3.1.0
436
+ - horovod==0.26.1
437
+ - huggingface-hub==0.16.4
438
+ - importlib-metadata==4.13.0
439
+ - inotify-simple==1.2.1
440
+ - ipywidgets==8.0.7
441
+ - jsonpatch==1.32
442
+ - jsonpointer==2.3
443
+ - jsonschema==4.17.3
444
+ - jupyterlab-widgets==3.0.8
445
+ - lit==16.0.3
446
+ - multidict==6.0.4
447
+ - multiprocess==0.70.14
448
+ - ninja==1.11.1
449
+ - paramiko==3.1.0
450
+ - pathos==0.3.0
451
+ - pathtools==0.1.2
452
+ - peft==0.5.0.dev0
453
+ - pip==23.1.2
454
+ - pox==0.3.2
455
+ - ppft==1.7.6.6
456
+ - protobuf==3.20.3
457
+ - protobuf3-to-dict==0.1.5
458
+ - py-cpuinfo==9.0.0
459
+ - pyarrow==12.0.0
460
+ - pyfunctional==1.4.3
461
+ - pyinstrument==3.4.2
462
+ - pyinstrument-cext==0.2.4
463
+ - pynacl==1.5.0
464
+ - pyrsistent==0.19.3
465
+ - regex==2023.6.3
466
+ - retrying==1.3.4
467
+ - s3fs==0.4.2
468
+ - safetensors==0.3.1
469
+ - sagemaker==2.154.0
470
+ - sagemaker-experiments==0.1.43
471
+ - sagemaker-pytorch-training==2.8.0
472
+ - sagemaker-training==4.5.0
473
+ - schema==0.7.5
474
+ - sentry-sdk==1.28.1
475
+ - setproctitle==1.3.2
476
+ - smclarify==0.5
477
+ - smdebug==1.0.34
478
+ - smdebug-rulesconfig==1.0.1
479
+ - smdistributed-dataparallel==1.8.0
480
+ - smdistributed-modelparallel==1.15.0
481
+ - smmap==5.0.0
482
+ - tabulate==0.9.0
483
+ - tblib==1.7.0
484
+ - tokenizers==0.13.3
485
+ - torchnet==0.0.4
486
+ - transformers==4.31.0
487
+ - triton==2.0.0.dev20221202
488
+ - trl==0.4.7
489
+ - visdom==0.2.4
490
+ - wandb==0.15.7
491
+ - websocket-client==1.5.1
492
+ - werkzeug==2.3.4
493
+ - widgetsnbextension==4.0.8
494
+ - xxhash==3.2.0
495
+ - yarl==1.9.2
496
+ - zope-event==4.6
497
+ - zope-interface==6.0
498
+ prefix: /opt/conda
wandb/latest-run/files/config.yaml ADDED
@@ -0,0 +1,649 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ wandb_version: 1
2
+
3
+ _wandb:
4
+ desc: null
5
+ value:
6
+ python_version: 3.10.8
7
+ cli_version: 0.15.7
8
+ framework: huggingface
9
+ huggingface_version: 4.31.0
10
+ is_jupyter_run: true
11
+ is_kaggle_kernel: false
12
+ start_time: 1690472976.418337
13
+ t:
14
+ 1:
15
+ - 1
16
+ - 5
17
+ - 11
18
+ - 49
19
+ - 51
20
+ - 53
21
+ - 55
22
+ - 71
23
+ - 84
24
+ - 98
25
+ 2:
26
+ - 1
27
+ - 5
28
+ - 11
29
+ - 49
30
+ - 51
31
+ - 53
32
+ - 55
33
+ - 71
34
+ - 84
35
+ - 98
36
+ 3:
37
+ - 7
38
+ - 23
39
+ 4: 3.10.8
40
+ 5: 0.15.7
41
+ 6: 4.31.0
42
+ 8:
43
+ - 1
44
+ - 5
45
+ m:
46
+ - 1: train/global_step
47
+ 6:
48
+ - 3
49
+ - 1: train/loss
50
+ 5: 1
51
+ 6:
52
+ - 1
53
+ - 1: train/learning_rate
54
+ 5: 1
55
+ 6:
56
+ - 1
57
+ - 1: train/epoch
58
+ 5: 1
59
+ 6:
60
+ - 1
61
+ - 1: train/train_runtime
62
+ 5: 1
63
+ 6:
64
+ - 1
65
+ - 1: train/train_samples_per_second
66
+ 5: 1
67
+ 6:
68
+ - 1
69
+ - 1: train/train_steps_per_second
70
+ 5: 1
71
+ 6:
72
+ - 1
73
+ - 1: train/total_flos
74
+ 5: 1
75
+ 6:
76
+ - 1
77
+ - 1: train/train_loss
78
+ 5: 1
79
+ 6:
80
+ - 1
81
+ vocab_size:
82
+ desc: null
83
+ value: 65024
84
+ hidden_size:
85
+ desc: null
86
+ value: 4544
87
+ n_layer:
88
+ desc: null
89
+ value: 32
90
+ n_head:
91
+ desc: null
92
+ value: 71
93
+ layer_norm_epsilon:
94
+ desc: null
95
+ value: 1.0e-05
96
+ initializer_range:
97
+ desc: null
98
+ value: 0.02
99
+ use_cache:
100
+ desc: null
101
+ value: false
102
+ apply_residual_connection_post_layernorm:
103
+ desc: null
104
+ value: false
105
+ hidden_dropout:
106
+ desc: null
107
+ value: 0.0
108
+ attention_dropout:
109
+ desc: null
110
+ value: 0.0
111
+ bos_token_id:
112
+ desc: null
113
+ value: 11
114
+ eos_token_id:
115
+ desc: null
116
+ value: 11
117
+ multi_query:
118
+ desc: null
119
+ value: true
120
+ alibi:
121
+ desc: null
122
+ value: false
123
+ bias:
124
+ desc: null
125
+ value: false
126
+ parallel_attn:
127
+ desc: null
128
+ value: true
129
+ return_dict:
130
+ desc: null
131
+ value: true
132
+ output_hidden_states:
133
+ desc: null
134
+ value: false
135
+ output_attentions:
136
+ desc: null
137
+ value: false
138
+ torchscript:
139
+ desc: null
140
+ value: false
141
+ torch_dtype:
142
+ desc: null
143
+ value: bfloat16
144
+ use_bfloat16:
145
+ desc: null
146
+ value: false
147
+ tf_legacy_loss:
148
+ desc: null
149
+ value: false
150
+ pruned_heads:
151
+ desc: null
152
+ value: {}
153
+ tie_word_embeddings:
154
+ desc: null
155
+ value: true
156
+ is_encoder_decoder:
157
+ desc: null
158
+ value: false
159
+ is_decoder:
160
+ desc: null
161
+ value: false
162
+ cross_attention_hidden_size:
163
+ desc: null
164
+ value: null
165
+ add_cross_attention:
166
+ desc: null
167
+ value: false
168
+ tie_encoder_decoder:
169
+ desc: null
170
+ value: false
171
+ max_length:
172
+ desc: null
173
+ value: 20
174
+ min_length:
175
+ desc: null
176
+ value: 0
177
+ do_sample:
178
+ desc: null
179
+ value: false
180
+ early_stopping:
181
+ desc: null
182
+ value: false
183
+ num_beams:
184
+ desc: null
185
+ value: 1
186
+ num_beam_groups:
187
+ desc: null
188
+ value: 1
189
+ diversity_penalty:
190
+ desc: null
191
+ value: 0.0
192
+ temperature:
193
+ desc: null
194
+ value: 1.0
195
+ top_k:
196
+ desc: null
197
+ value: 50
198
+ top_p:
199
+ desc: null
200
+ value: 1.0
201
+ typical_p:
202
+ desc: null
203
+ value: 1.0
204
+ repetition_penalty:
205
+ desc: null
206
+ value: 1.0
207
+ length_penalty:
208
+ desc: null
209
+ value: 1.0
210
+ no_repeat_ngram_size:
211
+ desc: null
212
+ value: 0
213
+ encoder_no_repeat_ngram_size:
214
+ desc: null
215
+ value: 0
216
+ bad_words_ids:
217
+ desc: null
218
+ value: null
219
+ num_return_sequences:
220
+ desc: null
221
+ value: 1
222
+ chunk_size_feed_forward:
223
+ desc: null
224
+ value: 0
225
+ output_scores:
226
+ desc: null
227
+ value: false
228
+ return_dict_in_generate:
229
+ desc: null
230
+ value: false
231
+ forced_bos_token_id:
232
+ desc: null
233
+ value: null
234
+ forced_eos_token_id:
235
+ desc: null
236
+ value: null
237
+ remove_invalid_values:
238
+ desc: null
239
+ value: false
240
+ exponential_decay_length_penalty:
241
+ desc: null
242
+ value: null
243
+ suppress_tokens:
244
+ desc: null
245
+ value: null
246
+ begin_suppress_tokens:
247
+ desc: null
248
+ value: null
249
+ architectures:
250
+ desc: null
251
+ value:
252
+ - RWForCausalLM
253
+ finetuning_task:
254
+ desc: null
255
+ value: null
256
+ id2label:
257
+ desc: null
258
+ value:
259
+ '0': LABEL_0
260
+ '1': LABEL_1
261
+ label2id:
262
+ desc: null
263
+ value:
264
+ LABEL_0: 0
265
+ LABEL_1: 1
266
+ tokenizer_class:
267
+ desc: null
268
+ value: null
269
+ prefix:
270
+ desc: null
271
+ value: null
272
+ pad_token_id:
273
+ desc: null
274
+ value: null
275
+ sep_token_id:
276
+ desc: null
277
+ value: null
278
+ decoder_start_token_id:
279
+ desc: null
280
+ value: null
281
+ task_specific_params:
282
+ desc: null
283
+ value: null
284
+ problem_type:
285
+ desc: null
286
+ value: null
287
+ _name_or_path:
288
+ desc: null
289
+ value: ybelkada/falcon-7b-sharded-bf16
290
+ transformers_version:
291
+ desc: null
292
+ value: 4.31.0
293
+ auto_map:
294
+ desc: null
295
+ value:
296
+ AutoConfig: tiiuae/falcon-7b--configuration_RW.RWConfig
297
+ AutoModel: tiiuae/falcon-7b--modelling_RW.RWModel
298
+ AutoModelForCausalLM: tiiuae/falcon-7b--modelling_RW.RWForCausalLM
299
+ AutoModelForQuestionAnswering: tiiuae/falcon-7b--modelling_RW.RWForQuestionAnswering
300
+ AutoModelForSequenceClassification: tiiuae/falcon-7b--modelling_RW.RWForSequenceClassification
301
+ AutoModelForTokenClassification: tiiuae/falcon-7b--modelling_RW.RWForTokenClassification
302
+ model_type:
303
+ desc: null
304
+ value: RefinedWebModel
305
+ quantization_config:
306
+ desc: null
307
+ value:
308
+ load_in_8bit: false
309
+ load_in_4bit: true
310
+ llm_int8_threshold: 6.0
311
+ llm_int8_skip_modules: null
312
+ llm_int8_enable_fp32_cpu_offload: false
313
+ llm_int8_has_fp16_weight: false
314
+ bnb_4bit_quant_type: nf4
315
+ bnb_4bit_use_double_quant: false
316
+ bnb_4bit_compute_dtype: float16
317
+ output_dir:
318
+ desc: null
319
+ value: ./results
320
+ overwrite_output_dir:
321
+ desc: null
322
+ value: false
323
+ do_train:
324
+ desc: null
325
+ value: false
326
+ do_eval:
327
+ desc: null
328
+ value: false
329
+ do_predict:
330
+ desc: null
331
+ value: false
332
+ evaluation_strategy:
333
+ desc: null
334
+ value: 'no'
335
+ prediction_loss_only:
336
+ desc: null
337
+ value: false
338
+ per_device_train_batch_size:
339
+ desc: null
340
+ value: 4
341
+ per_device_eval_batch_size:
342
+ desc: null
343
+ value: 8
344
+ per_gpu_train_batch_size:
345
+ desc: null
346
+ value: None
347
+ per_gpu_eval_batch_size:
348
+ desc: null
349
+ value: None
350
+ gradient_accumulation_steps:
351
+ desc: null
352
+ value: 4
353
+ eval_accumulation_steps:
354
+ desc: null
355
+ value: None
356
+ eval_delay:
357
+ desc: null
358
+ value: 0
359
+ learning_rate:
360
+ desc: null
361
+ value: 0.0002
362
+ weight_decay:
363
+ desc: null
364
+ value: 0.0
365
+ adam_beta1:
366
+ desc: null
367
+ value: 0.9
368
+ adam_beta2:
369
+ desc: null
370
+ value: 0.999
371
+ adam_epsilon:
372
+ desc: null
373
+ value: 1.0e-08
374
+ max_grad_norm:
375
+ desc: null
376
+ value: 0.3
377
+ num_train_epochs:
378
+ desc: null
379
+ value: 3.0
380
+ max_steps:
381
+ desc: null
382
+ value: 500
383
+ lr_scheduler_type:
384
+ desc: null
385
+ value: constant
386
+ warmup_ratio:
387
+ desc: null
388
+ value: 0.03
389
+ warmup_steps:
390
+ desc: null
391
+ value: 0
392
+ log_level:
393
+ desc: null
394
+ value: passive
395
+ log_level_replica:
396
+ desc: null
397
+ value: warning
398
+ log_on_each_node:
399
+ desc: null
400
+ value: true
401
+ logging_dir:
402
+ desc: null
403
+ value: ./results/runs/Jul31_15-12-06_pytorch-2-0-0-gpu--ml-g4dn-2xlarge-9a500aed7fe4dadadc562adc1e80
404
+ logging_strategy:
405
+ desc: null
406
+ value: steps
407
+ logging_first_step:
408
+ desc: null
409
+ value: false
410
+ logging_steps:
411
+ desc: null
412
+ value: 10
413
+ logging_nan_inf_filter:
414
+ desc: null
415
+ value: true
416
+ save_strategy:
417
+ desc: null
418
+ value: steps
419
+ save_steps:
420
+ desc: null
421
+ value: 10
422
+ save_total_limit:
423
+ desc: null
424
+ value: None
425
+ save_safetensors:
426
+ desc: null
427
+ value: false
428
+ save_on_each_node:
429
+ desc: null
430
+ value: false
431
+ no_cuda:
432
+ desc: null
433
+ value: false
434
+ use_mps_device:
435
+ desc: null
436
+ value: false
437
+ seed:
438
+ desc: null
439
+ value: 42
440
+ data_seed:
441
+ desc: null
442
+ value: None
443
+ jit_mode_eval:
444
+ desc: null
445
+ value: false
446
+ use_ipex:
447
+ desc: null
448
+ value: false
449
+ bf16:
450
+ desc: null
451
+ value: false
452
+ fp16:
453
+ desc: null
454
+ value: true
455
+ fp16_opt_level:
456
+ desc: null
457
+ value: O1
458
+ half_precision_backend:
459
+ desc: null
460
+ value: auto
461
+ bf16_full_eval:
462
+ desc: null
463
+ value: false
464
+ fp16_full_eval:
465
+ desc: null
466
+ value: false
467
+ tf32:
468
+ desc: null
469
+ value: None
470
+ local_rank:
471
+ desc: null
472
+ value: 0
473
+ ddp_backend:
474
+ desc: null
475
+ value: None
476
+ tpu_num_cores:
477
+ desc: null
478
+ value: None
479
+ tpu_metrics_debug:
480
+ desc: null
481
+ value: false
482
+ debug:
483
+ desc: null
484
+ value: '[]'
485
+ dataloader_drop_last:
486
+ desc: null
487
+ value: false
488
+ eval_steps:
489
+ desc: null
490
+ value: None
491
+ dataloader_num_workers:
492
+ desc: null
493
+ value: 0
494
+ past_index:
495
+ desc: null
496
+ value: -1
497
+ run_name:
498
+ desc: null
499
+ value: ./results
500
+ disable_tqdm:
501
+ desc: null
502
+ value: false
503
+ remove_unused_columns:
504
+ desc: null
505
+ value: true
506
+ label_names:
507
+ desc: null
508
+ value: None
509
+ load_best_model_at_end:
510
+ desc: null
511
+ value: false
512
+ metric_for_best_model:
513
+ desc: null
514
+ value: None
515
+ greater_is_better:
516
+ desc: null
517
+ value: None
518
+ ignore_data_skip:
519
+ desc: null
520
+ value: false
521
+ sharded_ddp:
522
+ desc: null
523
+ value: '[]'
524
+ fsdp:
525
+ desc: null
526
+ value: '[]'
527
+ fsdp_min_num_params:
528
+ desc: null
529
+ value: 0
530
+ fsdp_config:
531
+ desc: null
532
+ value: '{''fsdp_min_num_params'': 0, ''xla'': False, ''xla_fsdp_grad_ckpt'': False}'
533
+ fsdp_transformer_layer_cls_to_wrap:
534
+ desc: null
535
+ value: None
536
+ deepspeed:
537
+ desc: null
538
+ value: None
539
+ label_smoothing_factor:
540
+ desc: null
541
+ value: 0.0
542
+ optim:
543
+ desc: null
544
+ value: paged_adamw_32bit
545
+ optim_args:
546
+ desc: null
547
+ value: None
548
+ adafactor:
549
+ desc: null
550
+ value: false
551
+ group_by_length:
552
+ desc: null
553
+ value: true
554
+ length_column_name:
555
+ desc: null
556
+ value: length
557
+ report_to:
558
+ desc: null
559
+ value: '[''wandb'']'
560
+ ddp_find_unused_parameters:
561
+ desc: null
562
+ value: None
563
+ ddp_bucket_cap_mb:
564
+ desc: null
565
+ value: None
566
+ ddp_broadcast_buffers:
567
+ desc: null
568
+ value: None
569
+ dataloader_pin_memory:
570
+ desc: null
571
+ value: true
572
+ skip_memory_metrics:
573
+ desc: null
574
+ value: true
575
+ use_legacy_prediction_loop:
576
+ desc: null
577
+ value: false
578
+ push_to_hub:
579
+ desc: null
580
+ value: false
581
+ resume_from_checkpoint:
582
+ desc: null
583
+ value: None
584
+ hub_model_id:
585
+ desc: null
586
+ value: None
587
+ hub_strategy:
588
+ desc: null
589
+ value: every_save
590
+ hub_token:
591
+ desc: null
592
+ value: <HUB_TOKEN>
593
+ hub_private_repo:
594
+ desc: null
595
+ value: false
596
+ gradient_checkpointing:
597
+ desc: null
598
+ value: false
599
+ include_inputs_for_metrics:
600
+ desc: null
601
+ value: false
602
+ fp16_backend:
603
+ desc: null
604
+ value: auto
605
+ push_to_hub_model_id:
606
+ desc: null
607
+ value: None
608
+ push_to_hub_organization:
609
+ desc: null
610
+ value: None
611
+ push_to_hub_token:
612
+ desc: null
613
+ value: <PUSH_TO_HUB_TOKEN>
614
+ mp_parameters:
615
+ desc: null
616
+ value: ''
617
+ auto_find_batch_size:
618
+ desc: null
619
+ value: false
620
+ full_determinism:
621
+ desc: null
622
+ value: false
623
+ torchdynamo:
624
+ desc: null
625
+ value: None
626
+ ray_scope:
627
+ desc: null
628
+ value: last
629
+ ddp_timeout:
630
+ desc: null
631
+ value: 1800
632
+ torch_compile:
633
+ desc: null
634
+ value: false
635
+ torch_compile_backend:
636
+ desc: null
637
+ value: None
638
+ torch_compile_mode:
639
+ desc: null
640
+ value: None
641
+ xpu_backend:
642
+ desc: null
643
+ value: None
644
+ train_batch_size:
645
+ desc: null
646
+ value: 4
647
+ eval_batch_size:
648
+ desc: null
649
+ value: 8
wandb/latest-run/files/output.log ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ You're using a PreTrainedTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
3
+ {}
4
+ huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
5
+ To disable this warning, you can either:
6
+ - Avoid using `tokenizers` before the fork if possible
7
+ - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
8
+ WARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv
9
+ [notice] A new release of pip is available: 23.1.2 -> 23.2.1
10
+ [notice] To update, run: pip install --upgrade pip
11
+ huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
12
+ To disable this warning, you can either:
13
+ - Avoid using `tokenizers` before the fork if possible
14
+ - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
15
+ WARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv
16
+ [notice] A new release of pip is available: 23.1.2 -> 23.2.1
17
+ [notice] To update, run: pip install --upgrade pip
18
+ huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
19
+ To disable this warning, you can either:
20
+ - Avoid using `tokenizers` before the fork if possible
21
+ - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
22
+ Requirement already satisfied: ipywidgets in /opt/conda/lib/python3.10/site-packages (8.0.7)
23
+ Requirement already satisfied: ipykernel>=4.5.1 in /opt/conda/lib/python3.10/site-packages (from ipywidgets) (6.23.0)
24
+ Requirement already satisfied: ipython>=6.1.0 in /opt/conda/lib/python3.10/site-packages (from ipywidgets) (8.13.2)
25
+ Requirement already satisfied: traitlets>=4.3.1 in /opt/conda/lib/python3.10/site-packages (from ipywidgets) (5.9.0)
26
+ Requirement already satisfied: widgetsnbextension~=4.0.7 in /opt/conda/lib/python3.10/site-packages (from ipywidgets) (4.0.8)
27
+ Requirement already satisfied: jupyterlab-widgets~=3.0.7 in /opt/conda/lib/python3.10/site-packages (from ipywidgets) (3.0.8)
28
+ Requirement already satisfied: comm>=0.1.1 in /opt/conda/lib/python3.10/site-packages (from ipykernel>=4.5.1->ipywidgets) (0.1.3)
29
+ Requirement already satisfied: debugpy>=1.6.5 in /opt/conda/lib/python3.10/site-packages (from ipykernel>=4.5.1->ipywidgets) (1.6.7)
30
+ Requirement already satisfied: jupyter-client>=6.1.12 in /opt/conda/lib/python3.10/site-packages (from ipykernel>=4.5.1->ipywidgets) (8.2.0)
31
+ Requirement already satisfied: jupyter-core!=5.0.*,>=4.12 in /opt/conda/lib/python3.10/site-packages (from ipykernel>=4.5.1->ipywidgets) (5.3.0)
32
+ Requirement already satisfied: matplotlib-inline>=0.1 in /opt/conda/lib/python3.10/site-packages (from ipykernel>=4.5.1->ipywidgets) (0.1.6)
33
+ Requirement already satisfied: nest-asyncio in /opt/conda/lib/python3.10/site-packages (from ipykernel>=4.5.1->ipywidgets) (1.5.6)
34
+ Requirement already satisfied: packaging in /opt/conda/lib/python3.10/site-packages (from ipykernel>=4.5.1->ipywidgets) (23.1)
35
+ Requirement already satisfied: psutil in /opt/conda/lib/python3.10/site-packages (from ipykernel>=4.5.1->ipywidgets) (5.9.5)
36
+ Requirement already satisfied: pyzmq>=20 in /opt/conda/lib/python3.10/site-packages (from ipykernel>=4.5.1->ipywidgets) (25.0.2)
37
+ Requirement already satisfied: tornado>=6.1 in /opt/conda/lib/python3.10/site-packages (from ipykernel>=4.5.1->ipywidgets) (6.3)
38
+ Requirement already satisfied: backcall in /opt/conda/lib/python3.10/site-packages (from ipython>=6.1.0->ipywidgets) (0.2.0)
39
+ Requirement already satisfied: decorator in /opt/conda/lib/python3.10/site-packages (from ipython>=6.1.0->ipywidgets) (5.1.1)
40
+ Requirement already satisfied: jedi>=0.16 in /opt/conda/lib/python3.10/site-packages (from ipython>=6.1.0->ipywidgets) (0.18.2)
41
+ Requirement already satisfied: pickleshare in /opt/conda/lib/python3.10/site-packages (from ipython>=6.1.0->ipywidgets) (0.7.5)
42
+ Requirement already satisfied: prompt-toolkit!=3.0.37,<3.1.0,>=3.0.30 in /opt/conda/lib/python3.10/site-packages (from ipython>=6.1.0->ipywidgets) (3.0.38)
43
+ Requirement already satisfied: pygments>=2.4.0 in /opt/conda/lib/python3.10/site-packages (from ipython>=6.1.0->ipywidgets) (2.15.1)
44
+ Requirement already satisfied: stack-data in /opt/conda/lib/python3.10/site-packages (from ipython>=6.1.0->ipywidgets) (0.6.2)
45
+ Requirement already satisfied: pexpect>4.3 in /opt/conda/lib/python3.10/site-packages (from ipython>=6.1.0->ipywidgets) (4.8.0)
46
+ Requirement already satisfied: parso<0.9.0,>=0.8.0 in /opt/conda/lib/python3.10/site-packages (from jedi>=0.16->ipython>=6.1.0->ipywidgets) (0.8.3)
47
+ Requirement already satisfied: python-dateutil>=2.8.2 in /opt/conda/lib/python3.10/site-packages (from jupyter-client>=6.1.12->ipykernel>=4.5.1->ipywidgets) (2.8.2)
48
+ Requirement already satisfied: platformdirs>=2.5 in /opt/conda/lib/python3.10/site-packages (from jupyter-core!=5.0.*,>=4.12->ipykernel>=4.5.1->ipywidgets) (3.5.0)
49
+ Requirement already satisfied: ptyprocess>=0.5 in /opt/conda/lib/python3.10/site-packages (from pexpect>4.3->ipython>=6.1.0->ipywidgets) (0.7.0)
50
+ Requirement already satisfied: wcwidth in /opt/conda/lib/python3.10/site-packages (from prompt-toolkit!=3.0.37,<3.1.0,>=3.0.30->ipython>=6.1.0->ipywidgets) (0.2.6)
51
+ Requirement already satisfied: executing>=1.2.0 in /opt/conda/lib/python3.10/site-packages (from stack-data->ipython>=6.1.0->ipywidgets) (1.2.0)
52
+ Requirement already satisfied: asttokens>=2.1.0 in /opt/conda/lib/python3.10/site-packages (from stack-data->ipython>=6.1.0->ipywidgets) (2.2.1)
53
+ Requirement already satisfied: pure-eval in /opt/conda/lib/python3.10/site-packages (from stack-data->ipython>=6.1.0->ipywidgets) (0.2.2)
54
+ Requirement already satisfied: six in /opt/conda/lib/python3.10/site-packages (from asttokens>=2.1.0->stack-data->ipython>=6.1.0->ipywidgets) (1.16.0)
55
+ WARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv
56
+ [notice] A new release of pip is available: 23.1.2 -> 23.2.1
57
+ [notice] To update, run: pip install --upgrade pip
58
+ huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
59
+ To disable this warning, you can either:
60
+ - Avoid using `tokenizers` before the fork if possible
61
+ - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
62
+ Requirement already satisfied: datasets in /opt/conda/lib/python3.10/site-packages (2.14.0)
63
+ Requirement already satisfied: numpy>=1.17 in /opt/conda/lib/python3.10/site-packages (from datasets) (1.23.5)
64
+ Requirement already satisfied: pyarrow>=8.0.0 in /opt/conda/lib/python3.10/site-packages (from datasets) (12.0.0)
65
+ Requirement already satisfied: dill<0.3.8,>=0.3.0 in /opt/conda/lib/python3.10/site-packages (from datasets) (0.3.6)
66
+ Requirement already satisfied: pandas in /opt/conda/lib/python3.10/site-packages (from datasets) (2.0.1)
67
+ Requirement already satisfied: requests>=2.19.0 in /opt/conda/lib/python3.10/site-packages (from datasets) (2.28.2)
68
+ Requirement already satisfied: tqdm>=4.62.1 in /opt/conda/lib/python3.10/site-packages (from datasets) (4.65.0)
69
+ Requirement already satisfied: xxhash in /opt/conda/lib/python3.10/site-packages (from datasets) (3.2.0)
70
+ Requirement already satisfied: multiprocess in /opt/conda/lib/python3.10/site-packages (from datasets) (0.70.14)
71
+ Requirement already satisfied: fsspec[http]>=2021.11.1 in /opt/conda/lib/python3.10/site-packages (from datasets) (2023.5.0)
72
+ Requirement already satisfied: aiohttp in /opt/conda/lib/python3.10/site-packages (from datasets) (3.8.5)
73
+ Requirement already satisfied: huggingface-hub<1.0.0,>=0.14.0 in /opt/conda/lib/python3.10/site-packages (from datasets) (0.16.4)
74
+ Requirement already satisfied: packaging in /opt/conda/lib/python3.10/site-packages (from datasets) (23.1)
75
+ Requirement already satisfied: pyyaml>=5.1 in /opt/conda/lib/python3.10/site-packages (from datasets) (5.4.1)
76
+ Requirement already satisfied: attrs>=17.3.0 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets) (22.2.0)
77
+ Requirement already satisfied: charset-normalizer<4.0,>=2.0 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets) (3.1.0)
78
+ Requirement already satisfied: multidict<7.0,>=4.5 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets) (6.0.4)
79
+ Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets) (4.0.2)
80
+ Requirement already satisfied: yarl<2.0,>=1.0 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets) (1.9.2)
81
+ Requirement already satisfied: frozenlist>=1.1.1 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets) (1.4.0)
82
+ Requirement already satisfied: aiosignal>=1.1.2 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets) (1.3.1)
83
+ Requirement already satisfied: filelock in /opt/conda/lib/python3.10/site-packages (from huggingface-hub<1.0.0,>=0.14.0->datasets) (3.12.0)
84
+ Requirement already satisfied: typing-extensions>=3.7.4.3 in /opt/conda/lib/python3.10/site-packages (from huggingface-hub<1.0.0,>=0.14.0->datasets) (4.5.0)
85
+ Requirement already satisfied: idna<4,>=2.5 in /opt/conda/lib/python3.10/site-packages (from requests>=2.19.0->datasets) (3.4)
86
+ Requirement already satisfied: urllib3<1.27,>=1.21.1 in /opt/conda/lib/python3.10/site-packages (from requests>=2.19.0->datasets) (1.26.15)
87
+ Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.10/site-packages (from requests>=2.19.0->datasets) (2023.5.7)
88
+ Requirement already satisfied: python-dateutil>=2.8.2 in /opt/conda/lib/python3.10/site-packages (from pandas->datasets) (2.8.2)
89
+ Requirement already satisfied: pytz>=2020.1 in /opt/conda/lib/python3.10/site-packages (from pandas->datasets) (2023.3)
90
+ Requirement already satisfied: tzdata>=2022.1 in /opt/conda/lib/python3.10/site-packages (from pandas->datasets) (2023.3)
91
+ Requirement already satisfied: six>=1.5 in /opt/conda/lib/python3.10/site-packages (from python-dateutil>=2.8.2->pandas->datasets) (1.16.0)
92
+ WARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv
93
+ [notice] A new release of pip is available: 23.1.2 -> 23.2.1
94
+ [notice] To update, run: pip install --upgrade pip
95
+ huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
96
+ To disable this warning, you can either:
97
+ - Avoid using `tokenizers` before the fork if possible
98
+ - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
99
+ Requirement already satisfied: torch in /opt/conda/lib/python3.10/site-packages (2.0.0)
100
+ Requirement already satisfied: filelock in /opt/conda/lib/python3.10/site-packages (from torch) (3.12.0)
101
+ Requirement already satisfied: typing-extensions in /opt/conda/lib/python3.10/site-packages (from torch) (4.5.0)
102
+ Requirement already satisfied: sympy in /opt/conda/lib/python3.10/site-packages (from torch) (1.11.1)
103
+ Requirement already satisfied: networkx in /opt/conda/lib/python3.10/site-packages (from torch) (3.1)
104
+ Requirement already satisfied: jinja2 in /opt/conda/lib/python3.10/site-packages (from torch) (3.1.2)
105
+ Requirement already satisfied: MarkupSafe>=2.0 in /opt/conda/lib/python3.10/site-packages (from jinja2->torch) (2.1.2)
106
+ Requirement already satisfied: mpmath>=0.19 in /opt/conda/lib/python3.10/site-packages (from sympy->torch) (1.3.0)
107
+ WARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv
108
+ [notice] A new release of pip is available: 23.1.2 -> 23.2.1
109
+ [notice] To update, run: pip install --upgrade pip
110
+ True
111
+ /opt/conda/lib/python3.10/site-packages/peft/utils/other.py:104: FutureWarning: prepare_model_for_int8_training is deprecated and will be removed in a future version. Use prepare_model_for_kbit_training instead.
112
+ warnings.warn(
wandb/latest-run/files/requirements.txt ADDED
@@ -0,0 +1,240 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ accelerate==0.21.0
2
+ aiohttp==3.8.5
3
+ aiosignal==1.3.1
4
+ apex==0.1
5
+ appdirs==1.4.4
6
+ argparse==1.4.0
7
+ asttokens==2.2.1
8
+ async-timeout==4.0.2
9
+ attrs==22.2.0
10
+ awscli==1.27.132
11
+ backcall==0.2.0
12
+ backports.functools-lru-cache==1.6.4
13
+ bcrypt==4.0.1
14
+ bitsandbytes==0.41.0
15
+ blis==0.7.9
16
+ bokeh==3.1.1
17
+ boto3==1.26.132
18
+ botocore==1.29.132
19
+ brotlipy==0.7.0
20
+ cached-property==1.5.2
21
+ catalogue==2.0.8
22
+ certifi==2023.5.7
23
+ cffi==1.15.1
24
+ charset-normalizer==3.1.0
25
+ click==8.1.3
26
+ cloudpickle==2.2.1
27
+ cmake==3.26.3
28
+ colorama==0.4.4
29
+ comm==0.1.3
30
+ commonmark==0.9.1
31
+ conda-content-trust==0.1.3
32
+ conda-package-handling==2.0.2
33
+ conda-package-streaming==0.7.0
34
+ conda==23.1.0
35
+ confection==0.0.4
36
+ contextlib2==21.6.0
37
+ contourpy==1.0.7
38
+ cryptography==40.0.1
39
+ cycler==0.11.0
40
+ cymem==2.0.7
41
+ cython==0.29.34
42
+ datasets==2.14.0
43
+ debugpy==1.6.7
44
+ decorator==5.1.1
45
+ deepspeed==0.6.1+1ea3d4b
46
+ dgl==1.1.0+cu118
47
+ dill==0.3.6
48
+ docker-pycreds==0.4.0
49
+ docutils==0.15.2
50
+ einops==0.6.1
51
+ executing==1.2.0
52
+ fastai==2.7.12
53
+ fastcore==1.5.29
54
+ fastdownload==0.0.7
55
+ fastprogress==1.0.3
56
+ filelock==3.12.0
57
+ flash-attn==0.2.8
58
+ fonttools==4.39.4
59
+ frozenlist==1.4.0
60
+ fsspec==2023.5.0
61
+ future==0.18.3
62
+ gevent==22.10.2
63
+ gitdb==4.0.10
64
+ gitpython==3.1.32
65
+ gmpy2==2.1.2
66
+ google-pasta==0.2.0
67
+ greenlet==2.0.2
68
+ h5py==3.8.0
69
+ hjson==3.1.0
70
+ horovod==0.26.1
71
+ huggingface-hub==0.16.4
72
+ idna==3.4
73
+ imageio==2.28.1
74
+ importlib-metadata==4.13.0
75
+ inotify-simple==1.2.1
76
+ ipykernel==6.23.0
77
+ ipython==8.13.2
78
+ ipywidgets==8.0.7
79
+ jedi==0.18.2
80
+ jinja2==3.1.2
81
+ jmespath==1.0.1
82
+ joblib==1.2.0
83
+ jsonpatch==1.32
84
+ jsonpointer==2.3
85
+ jsonschema==4.17.3
86
+ jupyter-client==8.2.0
87
+ jupyter-core==5.3.0
88
+ jupyterlab-widgets==3.0.8
89
+ kiwisolver==1.4.4
90
+ langcodes==3.3.0
91
+ libmambapy==1.4.1
92
+ lit==16.0.3
93
+ llvmlite==0.39.1
94
+ mamba==1.4.1
95
+ markupsafe==2.1.2
96
+ matplotlib-inline==0.1.6
97
+ matplotlib==3.7.1
98
+ mpi4py==3.1.4
99
+ mpmath==1.3.0
100
+ multidict==6.0.4
101
+ multiprocess==0.70.14
102
+ munkres==1.1.4
103
+ murmurhash==1.0.9
104
+ nest-asyncio==1.5.6
105
+ networkx==3.1
106
+ ninja==1.11.1
107
+ numba==0.56.4
108
+ numpy==1.23.5
109
+ opencv-python==4.7.0
110
+ packaging==23.1
111
+ pandas==2.0.1
112
+ paramiko==3.1.0
113
+ parso==0.8.3
114
+ pathos==0.3.0
115
+ pathtools==0.1.2
116
+ pathy==0.10.1
117
+ patsy==0.5.3
118
+ peft==0.5.0.dev0
119
+ pexpect==4.8.0
120
+ pickleshare==0.7.5
121
+ pillow==9.4.0
122
+ pip==23.1.2
123
+ platformdirs==3.5.0
124
+ plotly==5.14.1
125
+ pluggy==1.0.0
126
+ ply==3.11
127
+ pooch==1.7.0
128
+ pox==0.3.2
129
+ ppft==1.7.6.6
130
+ preshed==3.0.8
131
+ prompt-toolkit==3.0.38
132
+ protobuf3-to-dict==0.1.5
133
+ protobuf==3.20.3
134
+ psutil==5.9.5
135
+ ptyprocess==0.7.0
136
+ pure-eval==0.2.2
137
+ py-cpuinfo==9.0.0
138
+ pyarrow==12.0.0
139
+ pyasn1==0.4.8
140
+ pybind11-global==2.10.4
141
+ pybind11==2.10.4
142
+ pycosat==0.6.4
143
+ pycparser==2.21
144
+ pydantic==1.10.7
145
+ pyfunctional==1.4.3
146
+ pygments==2.15.1
147
+ pyinstrument-cext==0.2.4
148
+ pyinstrument==3.4.2
149
+ pynacl==1.5.0
150
+ pyopenssl==23.1.1
151
+ pyparsing==3.0.9
152
+ pyqt5-sip==12.11.0
153
+ pyqt5==5.15.7
154
+ pyrsistent==0.19.3
155
+ pysocks==1.7.1
156
+ python-dateutil==2.8.2
157
+ pytz==2023.3
158
+ pyyaml==5.4.1
159
+ pyzmq==25.0.2
160
+ regex==2023.6.3
161
+ requests==2.28.2
162
+ retrying==1.3.4
163
+ rich==12.6.0
164
+ rsa==4.7.2
165
+ ruamel.yaml.clib==0.2.7
166
+ ruamel.yaml==0.17.21
167
+ s3fs==0.4.2
168
+ s3transfer==0.6.1
169
+ safetensors==0.3.1
170
+ sagemaker-experiments==0.1.43
171
+ sagemaker-pytorch-training==2.8.0
172
+ sagemaker-training==4.5.0
173
+ sagemaker==2.154.0
174
+ schema==0.7.5
175
+ scikit-learn==1.2.2
176
+ scipy==1.10.1
177
+ seaborn==0.12.2
178
+ sentry-sdk==1.28.1
179
+ setproctitle==1.3.2
180
+ setuptools==65.6.3
181
+ shap==0.41.0
182
+ shellingham==1.5.1
183
+ sip==6.7.9
184
+ six==1.16.0
185
+ slicer==0.0.7
186
+ smart-open==5.2.1
187
+ smclarify==0.5
188
+ smdebug-rulesconfig==1.0.1
189
+ smdebug==1.0.34
190
+ smdistributed-dataparallel==1.8.0
191
+ smdistributed-modelparallel==1.15.0
192
+ smmap==5.0.0
193
+ spacy-legacy==3.0.12
194
+ spacy-loggers==1.0.4
195
+ spacy==3.5.2
196
+ srsly==2.4.6
197
+ stack-data==0.6.2
198
+ statsmodels==0.14.0
199
+ sympy==1.11.1
200
+ tabulate==0.9.0
201
+ tblib==1.7.0
202
+ tenacity==8.2.2
203
+ thinc==8.1.10
204
+ threadpoolctl==3.1.0
205
+ tokenizers==0.13.3
206
+ toml==0.10.2
207
+ tomli==2.0.1
208
+ toolz==0.12.0
209
+ torch==2.0.0
210
+ torchaudio==2.0.1
211
+ torchdata==0.6.0
212
+ torchnet==0.0.4
213
+ torchtext==0.15.1
214
+ torchvision==0.15.1
215
+ tornado==6.3
216
+ tqdm==4.65.0
217
+ traitlets==5.9.0
218
+ transformers==4.31.0
219
+ triton==2.0.0.dev20221202
220
+ trl==0.4.7
221
+ typer==0.7.0
222
+ typing-extensions==4.5.0
223
+ tzdata==2023.3
224
+ unicodedata2==15.0.0
225
+ urllib3==1.26.15
226
+ visdom==0.2.4
227
+ wandb==0.15.7
228
+ wasabi==1.1.1
229
+ wcwidth==0.2.6
230
+ websocket-client==1.5.1
231
+ werkzeug==2.3.4
232
+ wheel==0.40.0
233
+ widgetsnbextension==4.0.8
234
+ xxhash==3.2.0
235
+ xyzservices==2023.2.0
236
+ yarl==1.9.2
237
+ zipp==3.15.0
238
+ zope.event==4.6
239
+ zope.interface==6.0
240
+ zstandard==0.19.0
wandb/latest-run/files/wandb-metadata.json ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-4.14.318-241.531.amzn2.x86_64-x86_64-with-glibc2.31",
3
+ "python": "3.10.8",
4
+ "heartbeatAt": "2023-07-27T15:49:36.888553",
5
+ "startedAt": "2023-07-27T15:49:36.344100",
6
+ "docker": null,
7
+ "cuda": null,
8
+ "args": [],
9
+ "state": "running",
10
+ "program": "<python with no main file>",
11
+ "host": "pytorch-2-0-0-gpu--ml-g4dn-2xlarge-9a500aed7fe4dadadc562adc1e80",
12
+ "username": "root",
13
+ "executable": "/opt/conda/bin/python",
14
+ "cpu_count": 4,
15
+ "cpu_count_logical": 8,
16
+ "cpu_freq": {
17
+ "current": 3100.120625,
18
+ "min": 0.0,
19
+ "max": 0.0
20
+ },
21
+ "cpu_freq_per_core": [
22
+ {
23
+ "current": 3107.574,
24
+ "min": 0.0,
25
+ "max": 0.0
26
+ },
27
+ {
28
+ "current": 3102.47,
29
+ "min": 0.0,
30
+ "max": 0.0
31
+ },
32
+ {
33
+ "current": 3099.63,
34
+ "min": 0.0,
35
+ "max": 0.0
36
+ },
37
+ {
38
+ "current": 3099.058,
39
+ "min": 0.0,
40
+ "max": 0.0
41
+ },
42
+ {
43
+ "current": 3100.716,
44
+ "min": 0.0,
45
+ "max": 0.0
46
+ },
47
+ {
48
+ "current": 3099.393,
49
+ "min": 0.0,
50
+ "max": 0.0
51
+ },
52
+ {
53
+ "current": 3099.988,
54
+ "min": 0.0,
55
+ "max": 0.0
56
+ },
57
+ {
58
+ "current": 3092.136,
59
+ "min": 0.0,
60
+ "max": 0.0
61
+ }
62
+ ],
63
+ "disk": {
64
+ "total": 32.0,
65
+ "used": 0.414398193359375
66
+ },
67
+ "gpu": "Tesla T4",
68
+ "gpu_count": 1,
69
+ "gpu_devices": [
70
+ {
71
+ "name": "Tesla T4",
72
+ "memory_total": 15843721216
73
+ }
74
+ ],
75
+ "memory": {
76
+ "total": 30.947834014892578
77
+ }
78
+ }
wandb/latest-run/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"train/loss": 1.5234, "train/learning_rate": 0.0002, "train/epoch": 5.8, "train/global_step": 500, "_timestamp": 1690823397.7400424, "_runtime": 350421.32170534134, "_step": 101, "train/train_runtime": 7012.9274, "train/train_samples_per_second": 1.141, "train/train_steps_per_second": 0.071, "train/total_flos": 2.3703947270255616e+16, "train/train_loss": 2.225116060256958}
wandb/latest-run/logs/debug-internal.log ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d82385c7c91ccf548be984016744cafe22c0bffbe4c56266892c862cde84fe4
3
+ size 16040370
wandb/latest-run/logs/debug.log ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2023-07-27 15:49:36,411 INFO MainThread:21 [wandb_setup.py:_flush():76] Current SDK version is 0.15.7
2
+ 2023-07-27 15:49:36,412 INFO MainThread:21 [wandb_setup.py:_flush():76] Configure stats pid to 21
3
+ 2023-07-27 15:49:36,412 INFO MainThread:21 [wandb_setup.py:_flush():76] Loading settings from /root/.config/wandb/settings
4
+ 2023-07-27 15:49:36,412 INFO MainThread:21 [wandb_setup.py:_flush():76] Loading settings from /root/mskov/falcon7b_quant/wandb/settings
5
+ 2023-07-27 15:49:36,412 INFO MainThread:21 [wandb_setup.py:_flush():76] Loading settings from environment variables: {}
6
+ 2023-07-27 15:49:36,412 INFO MainThread:21 [wandb_setup.py:_flush():76] Applying setup settings: {'_disable_service': False}
7
+ 2023-07-27 15:49:36,412 INFO MainThread:21 [wandb_setup.py:_flush():76] Inferring run settings from compute environment: {'program': '<python with no main file>'}
8
+ 2023-07-27 15:49:36,412 INFO MainThread:21 [wandb_setup.py:_flush():76] Applying login settings: {'api_key': '***REDACTED***'}
9
+ 2023-07-27 15:49:36,412 INFO MainThread:21 [wandb_init.py:_log_setup():507] Logging user logs to /root/mskov/falcon7b_quant/wandb/run-20230727_154936-a41qiywg/logs/debug.log
10
+ 2023-07-27 15:49:36,412 INFO MainThread:21 [wandb_init.py:_log_setup():508] Logging internal logs to /root/mskov/falcon7b_quant/wandb/run-20230727_154936-a41qiywg/logs/debug-internal.log
11
+ 2023-07-27 15:49:36,412 INFO MainThread:21 [wandb_init.py:_jupyter_setup():453] configuring jupyter hooks <wandb.sdk.wandb_init._WandbInit object at 0x7f468db73070>
12
+ 2023-07-27 15:49:36,412 INFO MainThread:21 [wandb_init.py:init():547] calling init triggers
13
+ 2023-07-27 15:49:36,412 INFO MainThread:21 [wandb_init.py:init():554] wandb.init called with sweep_config: {}
14
+ config: {}
15
+ 2023-07-27 15:49:36,412 INFO MainThread:21 [wandb_init.py:init():596] starting backend
16
+ 2023-07-27 15:49:36,412 INFO MainThread:21 [wandb_init.py:init():600] setting up manager
17
+ 2023-07-27 15:49:36,414 INFO MainThread:21 [backend.py:_multiprocessing_setup():106] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
18
+ 2023-07-27 15:49:36,416 INFO MainThread:21 [wandb_init.py:init():606] backend started and connected
19
+ 2023-07-27 15:49:36,424 INFO MainThread:21 [wandb_run.py:_label_probe_notebook():1234] probe notebook
20
+ 2023-07-27 15:49:36,429 INFO MainThread:21 [wandb_run.py:_label_probe_notebook():1244] Unable to probe notebook: 'NoneType' object has no attribute 'get'
21
+ 2023-07-27 15:49:36,429 INFO MainThread:21 [wandb_init.py:init():697] updated telemetry
22
+ 2023-07-27 15:49:36,450 INFO MainThread:21 [wandb_init.py:init():730] communicating run to backend with 60.0 second timeout
23
+ 2023-07-27 15:49:36,781 INFO MainThread:21 [wandb_run.py:_on_init():2174] communicating current version
24
+ 2023-07-27 15:49:36,852 INFO MainThread:21 [wandb_run.py:_on_init():2183] got version response
25
+ 2023-07-27 15:49:36,852 INFO MainThread:21 [wandb_init.py:init():781] starting run threads in backend
26
+ 2023-07-27 15:49:44,828 INFO MainThread:21 [wandb_run.py:_console_start():2153] atexit reg
27
+ 2023-07-27 15:49:44,830 INFO MainThread:21 [wandb_run.py:_redirect():2008] redirect: wrap_raw
28
+ 2023-07-27 15:49:44,830 INFO MainThread:21 [wandb_run.py:_redirect():2073] Wrapping output streams.
29
+ 2023-07-27 15:49:44,830 INFO MainThread:21 [wandb_run.py:_redirect():2098] Redirects installed.
30
+ 2023-07-27 15:49:44,832 INFO MainThread:21 [wandb_init.py:init():822] run started, returning control to user process
31
+ 2023-07-27 15:49:44,835 INFO MainThread:21 [wandb_run.py:_config_callback():1282] config_cb None None {'vocab_size': 65024, 'hidden_size': 4544, 'n_layer': 32, 'n_head': 71, 'layer_norm_epsilon': 1e-05, 'initializer_range': 0.02, 'use_cache': False, 'apply_residual_connection_post_layernorm': False, 'hidden_dropout': 0.0, 'attention_dropout': 0.0, 'bos_token_id': 11, 'eos_token_id': 11, 'multi_query': True, 'alibi': False, 'bias': False, 'parallel_attn': True, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'bfloat16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['RWForCausalLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'pad_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'ybelkada/falcon-7b-sharded-bf16', 'transformers_version': '4.31.0', 'auto_map': {'AutoConfig': 'tiiuae/falcon-7b--configuration_RW.RWConfig', 'AutoModel': 'tiiuae/falcon-7b--modelling_RW.RWModel', 'AutoModelForCausalLM': 'tiiuae/falcon-7b--modelling_RW.RWForCausalLM', 'AutoModelForQuestionAnswering': 'tiiuae/falcon-7b--modelling_RW.RWForQuestionAnswering', 'AutoModelForSequenceClassification': 'tiiuae/falcon-7b--modelling_RW.RWForSequenceClassification', 'AutoModelForTokenClassification': 'tiiuae/falcon-7b--modelling_RW.RWForTokenClassification'}, 'model_type': 'RefinedWebModel', 'quantization_config': {'load_in_8bit': False, 'load_in_4bit': True, 'llm_int8_threshold': 6.0, 'llm_int8_skip_modules': None, 'llm_int8_enable_fp32_cpu_offload': False, 'llm_int8_has_fp16_weight': False, 'bnb_4bit_quant_type': 'nf4', 'bnb_4bit_use_double_quant': False, 'bnb_4bit_compute_dtype': 'float16'}, 'output_dir': './results', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'no', 'prediction_loss_only': False, 'per_device_train_batch_size': 4, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': 'None', 'per_gpu_eval_batch_size': 'None', 'gradient_accumulation_steps': 4, 'eval_accumulation_steps': 'None', 'eval_delay': 0, 'learning_rate': 0.0002, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 0.3, 'num_train_epochs': 3.0, 'max_steps': 500, 'lr_scheduler_type': 'constant', 'warmup_ratio': 0.03, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './results/runs/Jul27_15-48-23_pytorch-2-0-0-gpu--ml-g4dn-2xlarge-9a500aed7fe4dadadc562adc1e80', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 10, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 10, 'save_total_limit': 'None', 'save_safetensors': False, 'save_on_each_node': False, 'no_cuda': False, 'use_mps_device': False, 'seed': 42, 'data_seed': 'None', 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': 'None', 'local_rank': 0, 'ddp_backend': 'None', 'tpu_num_cores': 'None', 'tpu_metrics_debug': False, 'debug': '[]', 'dataloader_drop_last': False, 'eval_steps': 'None', 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './results', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': 'None', 'load_best_model_at_end': False, 'metric_for_best_model': 'None', 'greater_is_better': 'None', 'ignore_data_skip': False, 'sharded_ddp': '[]', 'fsdp': '[]', 'fsdp_min_num_params': 0, 'fsdp_config': "{'fsdp_min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False}", 'fsdp_transformer_layer_cls_to_wrap': 'None', 'deepspeed': 'None', 'label_smoothing_factor': 0.0, 'optim': 'paged_adamw_32bit', 'optim_args': 'None', 'adafactor': False, 'group_by_length': True, 'length_column_name': 'length', 'report_to': "['wandb']", 'ddp_find_unused_parameters': 'None', 'ddp_bucket_cap_mb': 'None', 'ddp_broadcast_buffers': 'None', 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': False, 'resume_from_checkpoint': 'None', 'hub_model_id': 'None', 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'gradient_checkpointing': False, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': 'None', 'push_to_hub_organization': 'None', 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': 'None', 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': 'None', 'torch_compile_mode': 'None', 'xpu_backend': 'None', 'train_batch_size': 4, 'eval_batch_size': 8}
32
+ 2023-07-27 17:45:31,239 INFO MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
33
+ 2023-07-27 17:45:31,240 INFO MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
34
+ 2023-07-31 14:45:09,605 INFO MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
35
+ 2023-07-31 14:45:09,630 INFO MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
36
+ 2023-07-31 14:45:09,630 INFO MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
37
+ 2023-07-31 15:11:17,481 INFO MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
38
+ 2023-07-31 15:11:29,927 INFO MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
39
+ 2023-07-31 15:11:29,929 INFO MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
40
+ 2023-07-31 15:11:29,934 INFO MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
41
+ 2023-07-31 15:11:32,706 INFO MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
42
+ 2023-07-31 15:11:32,707 INFO MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
43
+ 2023-07-31 15:11:32,712 INFO MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
44
+ 2023-07-31 15:11:35,511 INFO MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
45
+ 2023-07-31 15:11:35,512 INFO MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
46
+ 2023-07-31 15:11:35,517 INFO MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
47
+ 2023-07-31 15:11:38,405 INFO MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
48
+ 2023-07-31 15:11:38,407 INFO MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
49
+ 2023-07-31 15:11:39,706 INFO MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
50
+ 2023-07-31 15:11:42,399 INFO MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
51
+ 2023-07-31 15:11:42,400 INFO MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
52
+ 2023-07-31 15:11:42,759 INFO MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
53
+ 2023-07-31 15:11:42,762 INFO MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
54
+ 2023-07-31 15:11:42,762 INFO MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
55
+ 2023-07-31 15:11:47,781 INFO MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
56
+ 2023-07-31 15:12:05,813 INFO MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
57
+ 2023-07-31 15:12:05,815 INFO MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
58
+ 2023-07-31 15:12:05,839 INFO MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
59
+ 2023-07-31 15:12:06,211 INFO MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
60
+ 2023-07-31 15:12:06,211 INFO MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
61
+ 2023-07-31 15:12:06,217 INFO MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
62
+ 2023-07-31 15:12:06,218 INFO MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
63
+ 2023-07-31 15:12:06,218 INFO MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
64
+ 2023-07-31 15:12:06,224 INFO MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
65
+ 2023-07-31 15:12:06,301 INFO MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
66
+ 2023-07-31 15:12:06,301 INFO MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
67
+ 2023-07-31 15:12:11,043 INFO MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
68
+ 2023-07-31 15:13:04,229 INFO MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
69
+ 2023-07-31 15:13:04,231 INFO MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
70
+ 2023-07-31 15:13:04,236 INFO MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
71
+ 2023-07-31 15:13:04,244 INFO MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
72
+ 2023-07-31 15:13:04,244 INFO MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
73
+ 2023-07-31 15:13:04,249 INFO MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
74
+ 2023-07-31 15:13:04,818 INFO MainThread:21 [wandb_run.py:_config_callback():1282] config_cb None None {'vocab_size': 65024, 'hidden_size': 4544, 'n_layer': 32, 'n_head': 71, 'layer_norm_epsilon': 1e-05, 'initializer_range': 0.02, 'use_cache': False, 'apply_residual_connection_post_layernorm': False, 'hidden_dropout': 0.0, 'attention_dropout': 0.0, 'bos_token_id': 11, 'eos_token_id': 11, 'multi_query': True, 'alibi': False, 'bias': False, 'parallel_attn': True, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'bfloat16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['RWForCausalLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'pad_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'ybelkada/falcon-7b-sharded-bf16', 'transformers_version': '4.31.0', 'auto_map': {'AutoConfig': 'tiiuae/falcon-7b--configuration_RW.RWConfig', 'AutoModel': 'tiiuae/falcon-7b--modelling_RW.RWModel', 'AutoModelForCausalLM': 'tiiuae/falcon-7b--modelling_RW.RWForCausalLM', 'AutoModelForQuestionAnswering': 'tiiuae/falcon-7b--modelling_RW.RWForQuestionAnswering', 'AutoModelForSequenceClassification': 'tiiuae/falcon-7b--modelling_RW.RWForSequenceClassification', 'AutoModelForTokenClassification': 'tiiuae/falcon-7b--modelling_RW.RWForTokenClassification'}, 'model_type': 'RefinedWebModel', 'quantization_config': {'load_in_8bit': False, 'load_in_4bit': True, 'llm_int8_threshold': 6.0, 'llm_int8_skip_modules': None, 'llm_int8_enable_fp32_cpu_offload': False, 'llm_int8_has_fp16_weight': False, 'bnb_4bit_quant_type': 'nf4', 'bnb_4bit_use_double_quant': False, 'bnb_4bit_compute_dtype': 'float16'}, 'output_dir': './results', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'no', 'prediction_loss_only': False, 'per_device_train_batch_size': 4, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': 'None', 'per_gpu_eval_batch_size': 'None', 'gradient_accumulation_steps': 4, 'eval_accumulation_steps': 'None', 'eval_delay': 0, 'learning_rate': 0.0002, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 0.3, 'num_train_epochs': 3.0, 'max_steps': 500, 'lr_scheduler_type': 'constant', 'warmup_ratio': 0.03, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './results/runs/Jul31_15-12-06_pytorch-2-0-0-gpu--ml-g4dn-2xlarge-9a500aed7fe4dadadc562adc1e80', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 10, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 10, 'save_total_limit': 'None', 'save_safetensors': False, 'save_on_each_node': False, 'no_cuda': False, 'use_mps_device': False, 'seed': 42, 'data_seed': 'None', 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': 'None', 'local_rank': 0, 'ddp_backend': 'None', 'tpu_num_cores': 'None', 'tpu_metrics_debug': False, 'debug': '[]', 'dataloader_drop_last': False, 'eval_steps': 'None', 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './results', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': 'None', 'load_best_model_at_end': False, 'metric_for_best_model': 'None', 'greater_is_better': 'None', 'ignore_data_skip': False, 'sharded_ddp': '[]', 'fsdp': '[]', 'fsdp_min_num_params': 0, 'fsdp_config': "{'fsdp_min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False}", 'fsdp_transformer_layer_cls_to_wrap': 'None', 'deepspeed': 'None', 'label_smoothing_factor': 0.0, 'optim': 'paged_adamw_32bit', 'optim_args': 'None', 'adafactor': False, 'group_by_length': True, 'length_column_name': 'length', 'report_to': "['wandb']", 'ddp_find_unused_parameters': 'None', 'ddp_bucket_cap_mb': 'None', 'ddp_broadcast_buffers': 'None', 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': False, 'resume_from_checkpoint': 'None', 'hub_model_id': 'None', 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'gradient_checkpointing': False, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': 'None', 'push_to_hub_organization': 'None', 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': 'None', 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': 'None', 'torch_compile_mode': 'None', 'xpu_backend': 'None', 'train_batch_size': 4, 'eval_batch_size': 8}
75
+ 2023-07-31 17:09:57,806 INFO MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
76
+ 2023-07-31 17:09:57,808 INFO MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
wandb/run-20230727_154936-a41qiywg/files/conda-environment.yaml ADDED
@@ -0,0 +1,498 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: base
2
+ channels:
3
+ - fastai
4
+ - dglteam/label/cu118
5
+ - nvidia/label/cuda-11.8.0
6
+ - https://aws-ml-conda-pre-prod-ec2.s3.us-west-2.amazonaws.com
7
+ - conda-forge
8
+ dependencies:
9
+ - _libgcc_mutex=0.1=conda_forge
10
+ - _openmp_mutex=4.5=2_kmp_llvm
11
+ - alsa-lib=1.2.8=h166bdaf_0
12
+ - aom=3.5.0=h27087fc_0
13
+ - asttokens=2.2.1=pyhd8ed1ab_0
14
+ - attr=2.5.1=h166bdaf_1
15
+ - aws-ofi-nccl-dlc=1.5.0=aws_0
16
+ - awscli=1.27.132=py310hff52083_0
17
+ - backcall=0.2.0=pyh9f0ad1d_0
18
+ - backports=1.0=pyhd8ed1ab_3
19
+ - backports.functools_lru_cache=1.6.4=pyhd8ed1ab_0
20
+ - blas=1.0=mkl
21
+ - bokeh=3.1.1=pyhd8ed1ab_0
22
+ - boto3=1.26.132=pyhd8ed1ab_0
23
+ - botocore=1.29.132=pyhd8ed1ab_0
24
+ - brotli=1.0.9=h166bdaf_8
25
+ - brotli-bin=1.0.9=h166bdaf_8
26
+ - brotlipy=0.7.0=py310h5764c6d_1005
27
+ - bzip2=1.0.8=h7f98852_4
28
+ - c-ares=1.18.1=h7f98852_0
29
+ - ca-certificates=2023.5.7=hbcca054_0
30
+ - cached-property=1.5.2=hd8ed1ab_1
31
+ - cached_property=1.5.2=pyha770c72_1
32
+ - cairo=1.16.0=ha61ee94_1014
33
+ - catalogue=2.0.8=py310hff52083_1
34
+ - certifi=2023.5.7=pyhd8ed1ab_0
35
+ - cffi=1.15.1=py310h255011f_3
36
+ - charset-normalizer=3.1.0=pyhd8ed1ab_0
37
+ - click=8.1.3=unix_pyhd8ed1ab_2
38
+ - cloudpickle=2.2.1=pyhd8ed1ab_0
39
+ - colorama=0.4.4=pyh9f0ad1d_0
40
+ - comm=0.1.3=pyhd8ed1ab_0
41
+ - commonmark=0.9.1=py_0
42
+ - conda=23.1.0=py310hff52083_0
43
+ - conda-content-trust=0.1.3=pyhd8ed1ab_0
44
+ - conda-package-handling=2.0.2=pyh38be061_0
45
+ - conda-package-streaming=0.7.0=pyhd8ed1ab_1
46
+ - confection=0.0.4=py310hfdc917e_1
47
+ - contourpy=1.0.7=py310hdf3cbec_0
48
+ - cryptography=40.0.1=py310h34c0648_0
49
+ - cuda-cccl=11.8.89=0
50
+ - cuda-command-line-tools=11.8.0=0
51
+ - cuda-compiler=11.8.0=0
52
+ - cuda-cudart=11.8.89=0
53
+ - cuda-cudart-dev=11.8.89=0
54
+ - cuda-cuobjdump=11.8.86=0
55
+ - cuda-cupti=11.8.87=0
56
+ - cuda-cuxxfilt=11.8.86=0
57
+ - cuda-documentation=11.8.86=0
58
+ - cuda-driver-dev=11.8.89=0
59
+ - cuda-gdb=11.8.86=0
60
+ - cuda-libraries=11.8.0=0
61
+ - cuda-libraries-dev=11.8.0=0
62
+ - cuda-memcheck=11.8.86=0
63
+ - cuda-nsight=11.8.86=0
64
+ - cuda-nsight-compute=11.8.0=0
65
+ - cuda-nvcc=11.8.89=0
66
+ - cuda-nvdisasm=11.8.86=0
67
+ - cuda-nvml-dev=11.8.86=0
68
+ - cuda-nvprof=11.8.87=0
69
+ - cuda-nvprune=11.8.86=0
70
+ - cuda-nvrtc=11.8.89=0
71
+ - cuda-nvrtc-dev=11.8.89=0
72
+ - cuda-nvtx=11.8.86=0
73
+ - cuda-nvvp=11.8.87=0
74
+ - cuda-profiler-api=11.8.86=0
75
+ - cuda-runtime=11.8.0=0
76
+ - cuda-sanitizer-api=11.8.86=0
77
+ - cuda-toolkit=11.8.0=0
78
+ - cuda-tools=11.8.0=0
79
+ - cuda-visual-tools=11.8.0=0
80
+ - cycler=0.11.0=pyhd8ed1ab_0
81
+ - cymem=2.0.7=py310hd8f1fbe_1
82
+ - cython=0.29.34=py310heca2aa9_0
83
+ - cython-blis=0.7.9=py310hde88566_1
84
+ - dbus=1.13.6=h5008d03_3
85
+ - debugpy=1.6.7=py310heca2aa9_0
86
+ - decorator=5.1.1=pyhd8ed1ab_0
87
+ - dgl=1.1.0.cu118=py310_0
88
+ - docutils=0.15.2=py310hff52083_6
89
+ - executing=1.2.0=pyhd8ed1ab_0
90
+ - expat=2.5.0=hcb278e6_1
91
+ - fastai=2.7.12=py_0
92
+ - fastcore=1.5.29=py_0
93
+ - fastdownload=0.0.7=py_0
94
+ - fastprogress=1.0.3=py_0
95
+ - ffmpeg=5.1.2=gpl_h8dda1f0_106
96
+ - fftw=3.3.10=nompi_hc118613_107
97
+ - filelock=3.12.0=pyhd8ed1ab_0
98
+ - fmt=9.1.0=h924138e_0
99
+ - font-ttf-dejavu-sans-mono=2.37=hab24e00_0
100
+ - font-ttf-inconsolata=3.000=h77eed37_0
101
+ - font-ttf-source-code-pro=2.038=h77eed37_0
102
+ - font-ttf-ubuntu=0.83=hab24e00_0
103
+ - fontconfig=2.14.2=h14ed4e7_0
104
+ - fonts-conda-ecosystem=1=0
105
+ - fonts-conda-forge=1=0
106
+ - fonttools=4.39.4=py310h2372a71_0
107
+ - freeglut=3.2.2=h9c3ff4c_1
108
+ - freetype=2.12.1=hca18f0e_1
109
+ - future=0.18.3=pyhd8ed1ab_0
110
+ - gds-tools=1.4.0.31=0
111
+ - gettext=0.21.1=h27087fc_0
112
+ - glib=2.76.2=hfc55251_0
113
+ - glib-tools=2.76.2=hfc55251_0
114
+ - gmp=6.2.1=h58526e2_0
115
+ - gmpy2=2.1.2=py310h3ec546c_1
116
+ - gnutls=3.7.8=hf3e180e_0
117
+ - graphite2=1.3.13=h58526e2_1001
118
+ - gst-plugins-base=1.22.0=h4243ec0_2
119
+ - gstreamer=1.22.0=h25f0c4b_2
120
+ - gstreamer-orc=0.4.33=h166bdaf_0
121
+ - h5py=3.8.0=nompi_py310ha66b2ad_101
122
+ - harfbuzz=6.0.0=h8e241bc_0
123
+ - hdf5=1.14.0=nompi_hb72d44e_103
124
+ - icu=70.1=h27087fc_0
125
+ - idna=3.4=pyhd8ed1ab_0
126
+ - imageio=2.28.1=pyh24c5eb1_0
127
+ - importlib_metadata=6.6.0=hd8ed1ab_0
128
+ - ipykernel=6.23.0=pyh210e3f2_0
129
+ - ipython=8.13.2=pyh41d4057_0
130
+ - jack=1.9.22=h11f4161_0
131
+ - jasper=2.0.33=h0ff4b12_1
132
+ - jedi=0.18.2=pyhd8ed1ab_0
133
+ - jinja2=3.1.2=pyhd8ed1ab_1
134
+ - jmespath=1.0.1=pyhd8ed1ab_0
135
+ - joblib=1.2.0=pyhd8ed1ab_0
136
+ - jpeg=9e=h166bdaf_2
137
+ - jupyter_client=8.2.0=pyhd8ed1ab_0
138
+ - jupyter_core=5.3.0=py310hff52083_0
139
+ - keyutils=1.6.1=h166bdaf_0
140
+ - kiwisolver=1.4.4=py310hbf28c38_1
141
+ - krb5=1.20.1=h81ceb04_0
142
+ - lame=3.100=h166bdaf_1003
143
+ - langcodes=3.3.0=pyhd8ed1ab_0
144
+ - lcms2=2.15=hfd0df8a_0
145
+ - ld_impl_linux-64=2.40=h41732ed_0
146
+ - lerc=4.0.0=h27087fc_0
147
+ - libaec=1.0.6=hcb278e6_1
148
+ - libarchive=3.6.2=h3d51595_0
149
+ - libblas=3.9.0=1_h86c2bf4_netlib
150
+ - libbrotlicommon=1.0.9=h166bdaf_8
151
+ - libbrotlidec=1.0.9=h166bdaf_8
152
+ - libbrotlienc=1.0.9=h166bdaf_8
153
+ - libcap=2.67=he9d0100_0
154
+ - libcblas=3.9.0=5_h92ddd45_netlib
155
+ - libclang=15.0.7=default_had23c3d_1
156
+ - libclang13=15.0.7=default_h3e3d535_1
157
+ - libcublas=11.11.3.6=0
158
+ - libcublas-dev=11.11.3.6=0
159
+ - libcufft=10.9.0.58=0
160
+ - libcufft-dev=10.9.0.58=0
161
+ - libcufile=1.4.0.31=0
162
+ - libcufile-dev=1.4.0.31=0
163
+ - libcups=2.3.3=h36d4200_3
164
+ - libcurand=10.3.0.86=0
165
+ - libcurand-dev=10.3.0.86=0
166
+ - libcurl=7.88.1=hdc1c0ab_1
167
+ - libcusolver=11.4.1.48=0
168
+ - libcusolver-dev=11.4.1.48=0
169
+ - libcusparse=11.7.5.86=0
170
+ - libcusparse-dev=11.7.5.86=0
171
+ - libdb=6.2.32=h9c3ff4c_0
172
+ - libdeflate=1.17=h0b41bf4_0
173
+ - libdrm=2.4.114=h166bdaf_0
174
+ - libedit=3.1.20191231=he28a2e2_2
175
+ - libev=4.33=h516909a_1
176
+ - libevent=2.1.10=h28343ad_4
177
+ - libexpat=2.5.0=hcb278e6_1
178
+ - libffi=3.4.2=h7f98852_5
179
+ - libflac=1.4.2=h27087fc_0
180
+ - libgcc=7.2.0=h69d50b8_2
181
+ - libgcc-ng=12.2.0=h65d4601_19
182
+ - libgcrypt=1.10.1=h166bdaf_0
183
+ - libgfortran-ng=12.2.0=h69a702a_19
184
+ - libgfortran5=12.2.0=h337968e_19
185
+ - libglib=2.76.2=hebfc3b9_0
186
+ - libglu=9.0.0=he1b5a44_1001
187
+ - libgomp=12.2.0=h65d4601_19
188
+ - libgpg-error=1.46=h620e276_0
189
+ - libhwloc=2.9.1=hd6dc26d_0
190
+ - libiconv=1.17=h166bdaf_0
191
+ - libidn2=2.3.4=h166bdaf_0
192
+ - libjpeg-turbo=2.1.4=h166bdaf_0
193
+ - liblapack=3.9.0=5_h92ddd45_netlib
194
+ - liblapacke=3.9.0=5_h92ddd45_netlib
195
+ - libllvm11=11.1.0=he0ac6c6_5
196
+ - libllvm15=15.0.7=hadd5161_1
197
+ - libllvm16=16.0.1=hadd5161_0
198
+ - libmamba=1.4.1=hcea66bb_0
199
+ - libmambapy=1.4.1=py310h1428755_0
200
+ - libnghttp2=1.52.0=h61bc06f_0
201
+ - libnpp=11.8.0.86=0
202
+ - libnpp-dev=11.8.0.86=0
203
+ - libnsl=2.0.0=h7f98852_0
204
+ - libnvjpeg=11.9.0.86=0
205
+ - libnvjpeg-dev=11.9.0.86=0
206
+ - libogg=1.3.4=h7f98852_1
207
+ - libopenblas=0.3.21=pthreads_h78a6416_3
208
+ - libopencv=4.7.0=py310hb48cf42_1
209
+ - libopus=1.3.1=h7f98852_1
210
+ - libpciaccess=0.17=h166bdaf_0
211
+ - libpng=1.6.39=h753d276_0
212
+ - libpq=15.3=hbcd7760_0
213
+ - libprotobuf=3.21.12=h3eb15da_0
214
+ - libsndfile=1.2.0=hb75c966_0
215
+ - libsodium=1.0.18=h36c2ea0_1
216
+ - libsolv=0.7.23=h3eb15da_0
217
+ - libsqlite=3.40.0=h753d276_0
218
+ - libssh2=1.10.0=hf14f497_3
219
+ - libstdcxx-ng=12.2.0=h46fd767_19
220
+ - libsystemd0=253=h8c4010b_1
221
+ - libtasn1=4.19.0=h166bdaf_0
222
+ - libtiff=4.5.0=h6adf6a1_2
223
+ - libtool=2.4.7=h27087fc_0
224
+ - libudev1=253=h0b41bf4_1
225
+ - libunistring=0.9.10=h7f98852_0
226
+ - libuuid=2.38.1=h0b41bf4_0
227
+ - libuv=1.44.2=h166bdaf_0
228
+ - libva=2.18.0=h0b41bf4_0
229
+ - libvorbis=1.3.7=h9c3ff4c_0
230
+ - libvpx=1.11.0=h9c3ff4c_3
231
+ - libwebp-base=1.3.0=h0b41bf4_0
232
+ - libxcb=1.13=h7f98852_1004
233
+ - libxkbcommon=1.5.0=h79f4944_1
234
+ - libxml2=2.10.3=hca2bb57_4
235
+ - libzlib=1.2.13=h166bdaf_4
236
+ - llvm-openmp=16.0.3=h4dfa4b3_0
237
+ - llvmlite=0.39.1=py310h58363a5_1
238
+ - lz4-c=1.9.4=hcb278e6_0
239
+ - lzo=2.10=h516909a_1000
240
+ - mamba=1.4.1=py310h51d5547_0
241
+ - markupsafe=2.1.2=py310h1fa729e_0
242
+ - matplotlib=3.7.1=py310hff52083_0
243
+ - matplotlib-base=3.7.1=py310he60537e_0
244
+ - matplotlib-inline=0.1.6=pyhd8ed1ab_0
245
+ - mkl=2023.1.0=h84fe81f_48680
246
+ - mkl-include=2023.1.0=h84fe81f_48680
247
+ - mpc=1.3.1=hfe3b2da_0
248
+ - mpfr=4.2.0=hb012696_0
249
+ - mpg123=1.31.3=hcb278e6_0
250
+ - mpi=1.0=openmpi
251
+ - mpi4py=3.1.4=py310h6075a6b_0
252
+ - mpmath=1.3.0=pyhd8ed1ab_0
253
+ - munkres=1.1.4=pyh9f0ad1d_0
254
+ - murmurhash=1.0.9=py310hd8f1fbe_1
255
+ - mysql-common=8.0.32=hf1915f5_2
256
+ - mysql-libs=8.0.32=hca2cd23_2
257
+ - ncurses=6.3=h27087fc_1
258
+ - nest-asyncio=1.5.6=pyhd8ed1ab_0
259
+ - nettle=3.8.1=hc379101_1
260
+ - networkx=3.1=pyhd8ed1ab_0
261
+ - nsight-compute=2022.3.0.22=0
262
+ - nspr=4.35=h27087fc_0
263
+ - nss=3.89=he45b914_0
264
+ - numba=0.56.4=py310h0e39c9b_1
265
+ - numpy=1.23.5=py310h53a5b5f_0
266
+ - opencv=4.7.0=py310hff52083_1
267
+ - openh264=2.3.1=hcb278e6_2
268
+ - openjpeg=2.5.0=hfec8fc6_2
269
+ - openmpi=4.1.5=h414af15_101
270
+ - openssl=3.1.0=hd590300_3
271
+ - p11-kit=0.24.1=hc5aa10d_0
272
+ - packaging=23.1=pyhd8ed1ab_0
273
+ - pandas=2.0.1=py310h7cbd5c2_1
274
+ - parso=0.8.3=pyhd8ed1ab_0
275
+ - pathy=0.10.1=pyhd8ed1ab_0
276
+ - patsy=0.5.3=pyhd8ed1ab_0
277
+ - pcre2=10.40=hc3806b6_0
278
+ - pexpect=4.8.0=pyh1a96a4e_2
279
+ - pickleshare=0.7.5=py_1003
280
+ - pillow=9.4.0=py310h023d228_1
281
+ - pixman=0.40.0=h36c2ea0_0
282
+ - platformdirs=3.5.0=pyhd8ed1ab_0
283
+ - plotly=5.14.1=pyhd8ed1ab_0
284
+ - pluggy=1.0.0=pyhd8ed1ab_5
285
+ - ply=3.11=py_1
286
+ - pooch=1.7.0=pyha770c72_3
287
+ - preshed=3.0.8=py310hd8f1fbe_1
288
+ - prompt-toolkit=3.0.38=pyha770c72_0
289
+ - prompt_toolkit=3.0.38=hd8ed1ab_0
290
+ - psutil=5.9.5=py310h1fa729e_0
291
+ - pthread-stubs=0.4=h36c2ea0_1001
292
+ - ptyprocess=0.7.0=pyhd3deb0d_0
293
+ - pulseaudio=16.1=hcb278e6_3
294
+ - pulseaudio-client=16.1=h5195f5e_3
295
+ - pulseaudio-daemon=16.1=ha8d29e2_3
296
+ - pure_eval=0.2.2=pyhd8ed1ab_0
297
+ - py-opencv=4.7.0=py310hfdc917e_1
298
+ - pyasn1=0.4.8=py_0
299
+ - pybind11=2.10.4=py310hdf3cbec_0
300
+ - pybind11-abi=4=hd8ed1ab_3
301
+ - pybind11-global=2.10.4=py310hdf3cbec_0
302
+ - pycosat=0.6.4=py310h5764c6d_1
303
+ - pycparser=2.21=pyhd8ed1ab_0
304
+ - pydantic=1.10.7=py310h1fa729e_0
305
+ - pygments=2.15.1=pyhd8ed1ab_0
306
+ - pyopenssl=23.1.1=pyhd8ed1ab_0
307
+ - pyparsing=3.0.9=pyhd8ed1ab_0
308
+ - pyqt=5.15.7=py310hab646b1_3
309
+ - pyqt5-sip=12.11.0=py310heca2aa9_3
310
+ - pysocks=1.7.1=pyha2e5f31_6
311
+ - python=3.10.8=h4a9ceb5_0_cpython
312
+ - python-dateutil=2.8.2=pyhd8ed1ab_0
313
+ - python-tzdata=2023.3=pyhd8ed1ab_0
314
+ - python_abi=3.10=3_cp310
315
+ - pytorch=2.0.0=aws_py3.10_cuda11.8_cudnn8.7.0_0
316
+ - pytorch-cuda=11.8=h7e8668a_3
317
+ - pytorch-mutex=1.0=cuda
318
+ - pytz=2023.3=pyhd8ed1ab_0
319
+ - pyyaml=5.4.1=py310h5764c6d_4
320
+ - pyzmq=25.0.2=py310h059b190_0
321
+ - qt-main=5.15.8=h5d23da1_6
322
+ - readline=8.2=h8228510_1
323
+ - reproc=14.2.4=h0b41bf4_0
324
+ - reproc-cpp=14.2.4=hcb278e6_0
325
+ - requests=2.28.2=pyhd8ed1ab_1
326
+ - rhash=1.4.3=h166bdaf_0
327
+ - rich=12.6.0=pyhd8ed1ab_0
328
+ - rsa=4.7.2=pyh44b312d_0
329
+ - ruamel.yaml=0.17.21=py310h1fa729e_3
330
+ - ruamel.yaml.clib=0.2.7=py310h1fa729e_1
331
+ - s3transfer=0.6.1=pyhd8ed1ab_0
332
+ - scikit-learn=1.2.2=py310h41b6a48_1
333
+ - scipy=1.10.1=py310h8deb116_2
334
+ - seaborn=0.12.2=hd8ed1ab_0
335
+ - seaborn-base=0.12.2=pyhd8ed1ab_0
336
+ - setuptools=65.6.3=pyhd8ed1ab_0
337
+ - shap=0.41.0=py310h769672d_0
338
+ - shellingham=1.5.1=pyhd8ed1ab_0
339
+ - sip=6.7.9=py310hc6cd4ac_0
340
+ - six=1.16.0=pyh6c4a22f_0
341
+ - slicer=0.0.7=pyhd8ed1ab_0
342
+ - smart_open=5.2.1=pyhd8ed1ab_0
343
+ - spacy=3.5.2=py310h5a539fb_0
344
+ - spacy-legacy=3.0.12=pyhd8ed1ab_0
345
+ - spacy-loggers=1.0.4=pyhd8ed1ab_0
346
+ - srsly=2.4.6=py310heca2aa9_0
347
+ - stack_data=0.6.2=pyhd8ed1ab_0
348
+ - statsmodels=0.14.0=py310h278f3c1_1
349
+ - svt-av1=1.4.1=hcb278e6_0
350
+ - sympy=1.11.1=pypyh9d50eac_103
351
+ - tbb=2021.9.0=hf52228f_0
352
+ - tenacity=8.2.2=pyhd8ed1ab_0
353
+ - thinc=8.1.10=py310hfb6f7a9_0
354
+ - threadpoolctl=3.1.0=pyh8a188c0_0
355
+ - tk=8.6.12=h27826a3_0
356
+ - toml=0.10.2=pyhd8ed1ab_0
357
+ - tomli=2.0.1=pyhd8ed1ab_0
358
+ - toolz=0.12.0=pyhd8ed1ab_0
359
+ - torchaudio=2.0.1=py310_cu118
360
+ - torchdata=0.6.0=py310
361
+ - torchtext=0.15.1=py310
362
+ - torchvision=0.15.1=py310_cu118
363
+ - tornado=6.3=py310h1fa729e_0
364
+ - tqdm=4.65.0=pyhd8ed1ab_1
365
+ - traitlets=5.9.0=pyhd8ed1ab_0
366
+ - typer=0.7.0=pyhd8ed1ab_0
367
+ - typing=3.10.0.0=pyhd8ed1ab_0
368
+ - typing-extensions=4.5.0=hd8ed1ab_0
369
+ - typing_extensions=4.5.0=pyha770c72_0
370
+ - tzdata=2023c=h71feb2d_0
371
+ - unicodedata2=15.0.0=py310h5764c6d_0
372
+ - urllib3=1.26.15=pyhd8ed1ab_0
373
+ - wasabi=1.1.1=py310hff52083_1
374
+ - wcwidth=0.2.6=pyhd8ed1ab_0
375
+ - wheel=0.40.0=pyhd8ed1ab_0
376
+ - x264=1!164.3095=h166bdaf_2
377
+ - x265=3.5=h924138e_3
378
+ - xcb-util=0.4.0=h516909a_0
379
+ - xcb-util-image=0.4.0=h166bdaf_0
380
+ - xcb-util-keysyms=0.4.0=h516909a_0
381
+ - xcb-util-renderutil=0.3.9=h166bdaf_0
382
+ - xcb-util-wm=0.4.1=h516909a_0
383
+ - xkeyboard-config=2.38=h0b41bf4_0
384
+ - xorg-fixesproto=5.0=h7f98852_1002
385
+ - xorg-inputproto=2.3.2=h7f98852_1002
386
+ - xorg-kbproto=1.0.7=h7f98852_1002
387
+ - xorg-libice=1.0.10=h7f98852_0
388
+ - xorg-libsm=1.2.3=hd9c2040_1000
389
+ - xorg-libx11=1.8.4=h0b41bf4_0
390
+ - xorg-libxau=1.0.9=h7f98852_0
391
+ - xorg-libxdmcp=1.1.3=h7f98852_0
392
+ - xorg-libxext=1.3.4=h0b41bf4_2
393
+ - xorg-libxfixes=5.0.3=h7f98852_1004
394
+ - xorg-libxi=1.7.10=h7f98852_0
395
+ - xorg-libxrender=0.9.10=h7f98852_1003
396
+ - xorg-renderproto=0.11.1=h7f98852_1002
397
+ - xorg-xextproto=7.3.0=h0b41bf4_1003
398
+ - xorg-xf86vidmodeproto=2.3.1=h7f98852_1002
399
+ - xorg-xproto=7.0.31=h7f98852_1007
400
+ - xyzservices=2023.2.0=pyhd8ed1ab_0
401
+ - xz=5.2.6=h166bdaf_0
402
+ - yaml=0.2.5=h7f98852_2
403
+ - yaml-cpp=0.7.0=h27087fc_2
404
+ - zeromq=4.3.4=h9c3ff4c_1
405
+ - zipp=3.15.0=pyhd8ed1ab_0
406
+ - zlib=1.2.13=h166bdaf_4
407
+ - zstandard=0.19.0=py310hdeb6495_1
408
+ - zstd=1.5.2=h3eb15da_6
409
+ - pip:
410
+ - accelerate==0.21.0
411
+ - aiohttp==3.8.5
412
+ - aiosignal==1.3.1
413
+ - apex==0.1
414
+ - appdirs==1.4.4
415
+ - argparse==1.4.0
416
+ - async-timeout==4.0.2
417
+ - attrs==22.2.0
418
+ - bcrypt==4.0.1
419
+ - bitsandbytes==0.41.0
420
+ - cmake==3.26.3
421
+ - contextlib2==21.6.0
422
+ - datasets==2.14.0
423
+ - deepspeed==0.6.1+1ea3d4b
424
+ - dill==0.3.6
425
+ - docker-pycreds==0.4.0
426
+ - einops==0.6.1
427
+ - flash-attn==0.2.8
428
+ - frozenlist==1.4.0
429
+ - fsspec==2023.5.0
430
+ - gevent==22.10.2
431
+ - gitdb==4.0.10
432
+ - gitpython==3.1.32
433
+ - google-pasta==0.2.0
434
+ - greenlet==2.0.2
435
+ - hjson==3.1.0
436
+ - horovod==0.26.1
437
+ - huggingface-hub==0.16.4
438
+ - importlib-metadata==4.13.0
439
+ - inotify-simple==1.2.1
440
+ - ipywidgets==8.0.7
441
+ - jsonpatch==1.32
442
+ - jsonpointer==2.3
443
+ - jsonschema==4.17.3
444
+ - jupyterlab-widgets==3.0.8
445
+ - lit==16.0.3
446
+ - multidict==6.0.4
447
+ - multiprocess==0.70.14
448
+ - ninja==1.11.1
449
+ - paramiko==3.1.0
450
+ - pathos==0.3.0
451
+ - pathtools==0.1.2
452
+ - peft==0.5.0.dev0
453
+ - pip==23.1.2
454
+ - pox==0.3.2
455
+ - ppft==1.7.6.6
456
+ - protobuf==3.20.3
457
+ - protobuf3-to-dict==0.1.5
458
+ - py-cpuinfo==9.0.0
459
+ - pyarrow==12.0.0
460
+ - pyfunctional==1.4.3
461
+ - pyinstrument==3.4.2
462
+ - pyinstrument-cext==0.2.4
463
+ - pynacl==1.5.0
464
+ - pyrsistent==0.19.3
465
+ - regex==2023.6.3
466
+ - retrying==1.3.4
467
+ - s3fs==0.4.2
468
+ - safetensors==0.3.1
469
+ - sagemaker==2.154.0
470
+ - sagemaker-experiments==0.1.43
471
+ - sagemaker-pytorch-training==2.8.0
472
+ - sagemaker-training==4.5.0
473
+ - schema==0.7.5
474
+ - sentry-sdk==1.28.1
475
+ - setproctitle==1.3.2
476
+ - smclarify==0.5
477
+ - smdebug==1.0.34
478
+ - smdebug-rulesconfig==1.0.1
479
+ - smdistributed-dataparallel==1.8.0
480
+ - smdistributed-modelparallel==1.15.0
481
+ - smmap==5.0.0
482
+ - tabulate==0.9.0
483
+ - tblib==1.7.0
484
+ - tokenizers==0.13.3
485
+ - torchnet==0.0.4
486
+ - transformers==4.31.0
487
+ - triton==2.0.0.dev20221202
488
+ - trl==0.4.7
489
+ - visdom==0.2.4
490
+ - wandb==0.15.7
491
+ - websocket-client==1.5.1
492
+ - werkzeug==2.3.4
493
+ - widgetsnbextension==4.0.8
494
+ - xxhash==3.2.0
495
+ - yarl==1.9.2
496
+ - zope-event==4.6
497
+ - zope-interface==6.0
498
+ prefix: /opt/conda
wandb/run-20230727_154936-a41qiywg/files/config.yaml ADDED
@@ -0,0 +1,649 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ wandb_version: 1
2
+
3
+ _wandb:
4
+ desc: null
5
+ value:
6
+ python_version: 3.10.8
7
+ cli_version: 0.15.7
8
+ framework: huggingface
9
+ huggingface_version: 4.31.0
10
+ is_jupyter_run: true
11
+ is_kaggle_kernel: false
12
+ start_time: 1690472976.418337
13
+ t:
14
+ 1:
15
+ - 1
16
+ - 5
17
+ - 11
18
+ - 49
19
+ - 51
20
+ - 53
21
+ - 55
22
+ - 71
23
+ - 84
24
+ - 98
25
+ 2:
26
+ - 1
27
+ - 5
28
+ - 11
29
+ - 49
30
+ - 51
31
+ - 53
32
+ - 55
33
+ - 71
34
+ - 84
35
+ - 98
36
+ 3:
37
+ - 7
38
+ - 23
39
+ 4: 3.10.8
40
+ 5: 0.15.7
41
+ 6: 4.31.0
42
+ 8:
43
+ - 1
44
+ - 5
45
+ m:
46
+ - 1: train/global_step
47
+ 6:
48
+ - 3
49
+ - 1: train/loss
50
+ 5: 1
51
+ 6:
52
+ - 1
53
+ - 1: train/learning_rate
54
+ 5: 1
55
+ 6:
56
+ - 1
57
+ - 1: train/epoch
58
+ 5: 1
59
+ 6:
60
+ - 1
61
+ - 1: train/train_runtime
62
+ 5: 1
63
+ 6:
64
+ - 1
65
+ - 1: train/train_samples_per_second
66
+ 5: 1
67
+ 6:
68
+ - 1
69
+ - 1: train/train_steps_per_second
70
+ 5: 1
71
+ 6:
72
+ - 1
73
+ - 1: train/total_flos
74
+ 5: 1
75
+ 6:
76
+ - 1
77
+ - 1: train/train_loss
78
+ 5: 1
79
+ 6:
80
+ - 1
81
+ vocab_size:
82
+ desc: null
83
+ value: 65024
84
+ hidden_size:
85
+ desc: null
86
+ value: 4544
87
+ n_layer:
88
+ desc: null
89
+ value: 32
90
+ n_head:
91
+ desc: null
92
+ value: 71
93
+ layer_norm_epsilon:
94
+ desc: null
95
+ value: 1.0e-05
96
+ initializer_range:
97
+ desc: null
98
+ value: 0.02
99
+ use_cache:
100
+ desc: null
101
+ value: false
102
+ apply_residual_connection_post_layernorm:
103
+ desc: null
104
+ value: false
105
+ hidden_dropout:
106
+ desc: null
107
+ value: 0.0
108
+ attention_dropout:
109
+ desc: null
110
+ value: 0.0
111
+ bos_token_id:
112
+ desc: null
113
+ value: 11
114
+ eos_token_id:
115
+ desc: null
116
+ value: 11
117
+ multi_query:
118
+ desc: null
119
+ value: true
120
+ alibi:
121
+ desc: null
122
+ value: false
123
+ bias:
124
+ desc: null
125
+ value: false
126
+ parallel_attn:
127
+ desc: null
128
+ value: true
129
+ return_dict:
130
+ desc: null
131
+ value: true
132
+ output_hidden_states:
133
+ desc: null
134
+ value: false
135
+ output_attentions:
136
+ desc: null
137
+ value: false
138
+ torchscript:
139
+ desc: null
140
+ value: false
141
+ torch_dtype:
142
+ desc: null
143
+ value: bfloat16
144
+ use_bfloat16:
145
+ desc: null
146
+ value: false
147
+ tf_legacy_loss:
148
+ desc: null
149
+ value: false
150
+ pruned_heads:
151
+ desc: null
152
+ value: {}
153
+ tie_word_embeddings:
154
+ desc: null
155
+ value: true
156
+ is_encoder_decoder:
157
+ desc: null
158
+ value: false
159
+ is_decoder:
160
+ desc: null
161
+ value: false
162
+ cross_attention_hidden_size:
163
+ desc: null
164
+ value: null
165
+ add_cross_attention:
166
+ desc: null
167
+ value: false
168
+ tie_encoder_decoder:
169
+ desc: null
170
+ value: false
171
+ max_length:
172
+ desc: null
173
+ value: 20
174
+ min_length:
175
+ desc: null
176
+ value: 0
177
+ do_sample:
178
+ desc: null
179
+ value: false
180
+ early_stopping:
181
+ desc: null
182
+ value: false
183
+ num_beams:
184
+ desc: null
185
+ value: 1
186
+ num_beam_groups:
187
+ desc: null
188
+ value: 1
189
+ diversity_penalty:
190
+ desc: null
191
+ value: 0.0
192
+ temperature:
193
+ desc: null
194
+ value: 1.0
195
+ top_k:
196
+ desc: null
197
+ value: 50
198
+ top_p:
199
+ desc: null
200
+ value: 1.0
201
+ typical_p:
202
+ desc: null
203
+ value: 1.0
204
+ repetition_penalty:
205
+ desc: null
206
+ value: 1.0
207
+ length_penalty:
208
+ desc: null
209
+ value: 1.0
210
+ no_repeat_ngram_size:
211
+ desc: null
212
+ value: 0
213
+ encoder_no_repeat_ngram_size:
214
+ desc: null
215
+ value: 0
216
+ bad_words_ids:
217
+ desc: null
218
+ value: null
219
+ num_return_sequences:
220
+ desc: null
221
+ value: 1
222
+ chunk_size_feed_forward:
223
+ desc: null
224
+ value: 0
225
+ output_scores:
226
+ desc: null
227
+ value: false
228
+ return_dict_in_generate:
229
+ desc: null
230
+ value: false
231
+ forced_bos_token_id:
232
+ desc: null
233
+ value: null
234
+ forced_eos_token_id:
235
+ desc: null
236
+ value: null
237
+ remove_invalid_values:
238
+ desc: null
239
+ value: false
240
+ exponential_decay_length_penalty:
241
+ desc: null
242
+ value: null
243
+ suppress_tokens:
244
+ desc: null
245
+ value: null
246
+ begin_suppress_tokens:
247
+ desc: null
248
+ value: null
249
+ architectures:
250
+ desc: null
251
+ value:
252
+ - RWForCausalLM
253
+ finetuning_task:
254
+ desc: null
255
+ value: null
256
+ id2label:
257
+ desc: null
258
+ value:
259
+ '0': LABEL_0
260
+ '1': LABEL_1
261
+ label2id:
262
+ desc: null
263
+ value:
264
+ LABEL_0: 0
265
+ LABEL_1: 1
266
+ tokenizer_class:
267
+ desc: null
268
+ value: null
269
+ prefix:
270
+ desc: null
271
+ value: null
272
+ pad_token_id:
273
+ desc: null
274
+ value: null
275
+ sep_token_id:
276
+ desc: null
277
+ value: null
278
+ decoder_start_token_id:
279
+ desc: null
280
+ value: null
281
+ task_specific_params:
282
+ desc: null
283
+ value: null
284
+ problem_type:
285
+ desc: null
286
+ value: null
287
+ _name_or_path:
288
+ desc: null
289
+ value: ybelkada/falcon-7b-sharded-bf16
290
+ transformers_version:
291
+ desc: null
292
+ value: 4.31.0
293
+ auto_map:
294
+ desc: null
295
+ value:
296
+ AutoConfig: tiiuae/falcon-7b--configuration_RW.RWConfig
297
+ AutoModel: tiiuae/falcon-7b--modelling_RW.RWModel
298
+ AutoModelForCausalLM: tiiuae/falcon-7b--modelling_RW.RWForCausalLM
299
+ AutoModelForQuestionAnswering: tiiuae/falcon-7b--modelling_RW.RWForQuestionAnswering
300
+ AutoModelForSequenceClassification: tiiuae/falcon-7b--modelling_RW.RWForSequenceClassification
301
+ AutoModelForTokenClassification: tiiuae/falcon-7b--modelling_RW.RWForTokenClassification
302
+ model_type:
303
+ desc: null
304
+ value: RefinedWebModel
305
+ quantization_config:
306
+ desc: null
307
+ value:
308
+ load_in_8bit: false
309
+ load_in_4bit: true
310
+ llm_int8_threshold: 6.0
311
+ llm_int8_skip_modules: null
312
+ llm_int8_enable_fp32_cpu_offload: false
313
+ llm_int8_has_fp16_weight: false
314
+ bnb_4bit_quant_type: nf4
315
+ bnb_4bit_use_double_quant: false
316
+ bnb_4bit_compute_dtype: float16
317
+ output_dir:
318
+ desc: null
319
+ value: ./results
320
+ overwrite_output_dir:
321
+ desc: null
322
+ value: false
323
+ do_train:
324
+ desc: null
325
+ value: false
326
+ do_eval:
327
+ desc: null
328
+ value: false
329
+ do_predict:
330
+ desc: null
331
+ value: false
332
+ evaluation_strategy:
333
+ desc: null
334
+ value: 'no'
335
+ prediction_loss_only:
336
+ desc: null
337
+ value: false
338
+ per_device_train_batch_size:
339
+ desc: null
340
+ value: 4
341
+ per_device_eval_batch_size:
342
+ desc: null
343
+ value: 8
344
+ per_gpu_train_batch_size:
345
+ desc: null
346
+ value: None
347
+ per_gpu_eval_batch_size:
348
+ desc: null
349
+ value: None
350
+ gradient_accumulation_steps:
351
+ desc: null
352
+ value: 4
353
+ eval_accumulation_steps:
354
+ desc: null
355
+ value: None
356
+ eval_delay:
357
+ desc: null
358
+ value: 0
359
+ learning_rate:
360
+ desc: null
361
+ value: 0.0002
362
+ weight_decay:
363
+ desc: null
364
+ value: 0.0
365
+ adam_beta1:
366
+ desc: null
367
+ value: 0.9
368
+ adam_beta2:
369
+ desc: null
370
+ value: 0.999
371
+ adam_epsilon:
372
+ desc: null
373
+ value: 1.0e-08
374
+ max_grad_norm:
375
+ desc: null
376
+ value: 0.3
377
+ num_train_epochs:
378
+ desc: null
379
+ value: 3.0
380
+ max_steps:
381
+ desc: null
382
+ value: 500
383
+ lr_scheduler_type:
384
+ desc: null
385
+ value: constant
386
+ warmup_ratio:
387
+ desc: null
388
+ value: 0.03
389
+ warmup_steps:
390
+ desc: null
391
+ value: 0
392
+ log_level:
393
+ desc: null
394
+ value: passive
395
+ log_level_replica:
396
+ desc: null
397
+ value: warning
398
+ log_on_each_node:
399
+ desc: null
400
+ value: true
401
+ logging_dir:
402
+ desc: null
403
+ value: ./results/runs/Jul31_15-12-06_pytorch-2-0-0-gpu--ml-g4dn-2xlarge-9a500aed7fe4dadadc562adc1e80
404
+ logging_strategy:
405
+ desc: null
406
+ value: steps
407
+ logging_first_step:
408
+ desc: null
409
+ value: false
410
+ logging_steps:
411
+ desc: null
412
+ value: 10
413
+ logging_nan_inf_filter:
414
+ desc: null
415
+ value: true
416
+ save_strategy:
417
+ desc: null
418
+ value: steps
419
+ save_steps:
420
+ desc: null
421
+ value: 10
422
+ save_total_limit:
423
+ desc: null
424
+ value: None
425
+ save_safetensors:
426
+ desc: null
427
+ value: false
428
+ save_on_each_node:
429
+ desc: null
430
+ value: false
431
+ no_cuda:
432
+ desc: null
433
+ value: false
434
+ use_mps_device:
435
+ desc: null
436
+ value: false
437
+ seed:
438
+ desc: null
439
+ value: 42
440
+ data_seed:
441
+ desc: null
442
+ value: None
443
+ jit_mode_eval:
444
+ desc: null
445
+ value: false
446
+ use_ipex:
447
+ desc: null
448
+ value: false
449
+ bf16:
450
+ desc: null
451
+ value: false
452
+ fp16:
453
+ desc: null
454
+ value: true
455
+ fp16_opt_level:
456
+ desc: null
457
+ value: O1
458
+ half_precision_backend:
459
+ desc: null
460
+ value: auto
461
+ bf16_full_eval:
462
+ desc: null
463
+ value: false
464
+ fp16_full_eval:
465
+ desc: null
466
+ value: false
467
+ tf32:
468
+ desc: null
469
+ value: None
470
+ local_rank:
471
+ desc: null
472
+ value: 0
473
+ ddp_backend:
474
+ desc: null
475
+ value: None
476
+ tpu_num_cores:
477
+ desc: null
478
+ value: None
479
+ tpu_metrics_debug:
480
+ desc: null
481
+ value: false
482
+ debug:
483
+ desc: null
484
+ value: '[]'
485
+ dataloader_drop_last:
486
+ desc: null
487
+ value: false
488
+ eval_steps:
489
+ desc: null
490
+ value: None
491
+ dataloader_num_workers:
492
+ desc: null
493
+ value: 0
494
+ past_index:
495
+ desc: null
496
+ value: -1
497
+ run_name:
498
+ desc: null
499
+ value: ./results
500
+ disable_tqdm:
501
+ desc: null
502
+ value: false
503
+ remove_unused_columns:
504
+ desc: null
505
+ value: true
506
+ label_names:
507
+ desc: null
508
+ value: None
509
+ load_best_model_at_end:
510
+ desc: null
511
+ value: false
512
+ metric_for_best_model:
513
+ desc: null
514
+ value: None
515
+ greater_is_better:
516
+ desc: null
517
+ value: None
518
+ ignore_data_skip:
519
+ desc: null
520
+ value: false
521
+ sharded_ddp:
522
+ desc: null
523
+ value: '[]'
524
+ fsdp:
525
+ desc: null
526
+ value: '[]'
527
+ fsdp_min_num_params:
528
+ desc: null
529
+ value: 0
530
+ fsdp_config:
531
+ desc: null
532
+ value: '{''fsdp_min_num_params'': 0, ''xla'': False, ''xla_fsdp_grad_ckpt'': False}'
533
+ fsdp_transformer_layer_cls_to_wrap:
534
+ desc: null
535
+ value: None
536
+ deepspeed:
537
+ desc: null
538
+ value: None
539
+ label_smoothing_factor:
540
+ desc: null
541
+ value: 0.0
542
+ optim:
543
+ desc: null
544
+ value: paged_adamw_32bit
545
+ optim_args:
546
+ desc: null
547
+ value: None
548
+ adafactor:
549
+ desc: null
550
+ value: false
551
+ group_by_length:
552
+ desc: null
553
+ value: true
554
+ length_column_name:
555
+ desc: null
556
+ value: length
557
+ report_to:
558
+ desc: null
559
+ value: '[''wandb'']'
560
+ ddp_find_unused_parameters:
561
+ desc: null
562
+ value: None
563
+ ddp_bucket_cap_mb:
564
+ desc: null
565
+ value: None
566
+ ddp_broadcast_buffers:
567
+ desc: null
568
+ value: None
569
+ dataloader_pin_memory:
570
+ desc: null
571
+ value: true
572
+ skip_memory_metrics:
573
+ desc: null
574
+ value: true
575
+ use_legacy_prediction_loop:
576
+ desc: null
577
+ value: false
578
+ push_to_hub:
579
+ desc: null
580
+ value: false
581
+ resume_from_checkpoint:
582
+ desc: null
583
+ value: None
584
+ hub_model_id:
585
+ desc: null
586
+ value: None
587
+ hub_strategy:
588
+ desc: null
589
+ value: every_save
590
+ hub_token:
591
+ desc: null
592
+ value: <HUB_TOKEN>
593
+ hub_private_repo:
594
+ desc: null
595
+ value: false
596
+ gradient_checkpointing:
597
+ desc: null
598
+ value: false
599
+ include_inputs_for_metrics:
600
+ desc: null
601
+ value: false
602
+ fp16_backend:
603
+ desc: null
604
+ value: auto
605
+ push_to_hub_model_id:
606
+ desc: null
607
+ value: None
608
+ push_to_hub_organization:
609
+ desc: null
610
+ value: None
611
+ push_to_hub_token:
612
+ desc: null
613
+ value: <PUSH_TO_HUB_TOKEN>
614
+ mp_parameters:
615
+ desc: null
616
+ value: ''
617
+ auto_find_batch_size:
618
+ desc: null
619
+ value: false
620
+ full_determinism:
621
+ desc: null
622
+ value: false
623
+ torchdynamo:
624
+ desc: null
625
+ value: None
626
+ ray_scope:
627
+ desc: null
628
+ value: last
629
+ ddp_timeout:
630
+ desc: null
631
+ value: 1800
632
+ torch_compile:
633
+ desc: null
634
+ value: false
635
+ torch_compile_backend:
636
+ desc: null
637
+ value: None
638
+ torch_compile_mode:
639
+ desc: null
640
+ value: None
641
+ xpu_backend:
642
+ desc: null
643
+ value: None
644
+ train_batch_size:
645
+ desc: null
646
+ value: 4
647
+ eval_batch_size:
648
+ desc: null
649
+ value: 8
wandb/run-20230727_154936-a41qiywg/files/output.log ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ You're using a PreTrainedTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
3
+ {}
4
+ huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
5
+ To disable this warning, you can either:
6
+ - Avoid using `tokenizers` before the fork if possible
7
+ - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
8
+ WARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv
9
+ [notice] A new release of pip is available: 23.1.2 -> 23.2.1
10
+ [notice] To update, run: pip install --upgrade pip
11
+ huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
12
+ To disable this warning, you can either:
13
+ - Avoid using `tokenizers` before the fork if possible
14
+ - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
15
+ WARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv
16
+ [notice] A new release of pip is available: 23.1.2 -> 23.2.1
17
+ [notice] To update, run: pip install --upgrade pip
18
+ huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
19
+ To disable this warning, you can either:
20
+ - Avoid using `tokenizers` before the fork if possible
21
+ - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
22
+ Requirement already satisfied: ipywidgets in /opt/conda/lib/python3.10/site-packages (8.0.7)
23
+ Requirement already satisfied: ipykernel>=4.5.1 in /opt/conda/lib/python3.10/site-packages (from ipywidgets) (6.23.0)
24
+ Requirement already satisfied: ipython>=6.1.0 in /opt/conda/lib/python3.10/site-packages (from ipywidgets) (8.13.2)
25
+ Requirement already satisfied: traitlets>=4.3.1 in /opt/conda/lib/python3.10/site-packages (from ipywidgets) (5.9.0)
26
+ Requirement already satisfied: widgetsnbextension~=4.0.7 in /opt/conda/lib/python3.10/site-packages (from ipywidgets) (4.0.8)
27
+ Requirement already satisfied: jupyterlab-widgets~=3.0.7 in /opt/conda/lib/python3.10/site-packages (from ipywidgets) (3.0.8)
28
+ Requirement already satisfied: comm>=0.1.1 in /opt/conda/lib/python3.10/site-packages (from ipykernel>=4.5.1->ipywidgets) (0.1.3)
29
+ Requirement already satisfied: debugpy>=1.6.5 in /opt/conda/lib/python3.10/site-packages (from ipykernel>=4.5.1->ipywidgets) (1.6.7)
30
+ Requirement already satisfied: jupyter-client>=6.1.12 in /opt/conda/lib/python3.10/site-packages (from ipykernel>=4.5.1->ipywidgets) (8.2.0)
31
+ Requirement already satisfied: jupyter-core!=5.0.*,>=4.12 in /opt/conda/lib/python3.10/site-packages (from ipykernel>=4.5.1->ipywidgets) (5.3.0)
32
+ Requirement already satisfied: matplotlib-inline>=0.1 in /opt/conda/lib/python3.10/site-packages (from ipykernel>=4.5.1->ipywidgets) (0.1.6)
33
+ Requirement already satisfied: nest-asyncio in /opt/conda/lib/python3.10/site-packages (from ipykernel>=4.5.1->ipywidgets) (1.5.6)
34
+ Requirement already satisfied: packaging in /opt/conda/lib/python3.10/site-packages (from ipykernel>=4.5.1->ipywidgets) (23.1)
35
+ Requirement already satisfied: psutil in /opt/conda/lib/python3.10/site-packages (from ipykernel>=4.5.1->ipywidgets) (5.9.5)
36
+ Requirement already satisfied: pyzmq>=20 in /opt/conda/lib/python3.10/site-packages (from ipykernel>=4.5.1->ipywidgets) (25.0.2)
37
+ Requirement already satisfied: tornado>=6.1 in /opt/conda/lib/python3.10/site-packages (from ipykernel>=4.5.1->ipywidgets) (6.3)
38
+ Requirement already satisfied: backcall in /opt/conda/lib/python3.10/site-packages (from ipython>=6.1.0->ipywidgets) (0.2.0)
39
+ Requirement already satisfied: decorator in /opt/conda/lib/python3.10/site-packages (from ipython>=6.1.0->ipywidgets) (5.1.1)
40
+ Requirement already satisfied: jedi>=0.16 in /opt/conda/lib/python3.10/site-packages (from ipython>=6.1.0->ipywidgets) (0.18.2)
41
+ Requirement already satisfied: pickleshare in /opt/conda/lib/python3.10/site-packages (from ipython>=6.1.0->ipywidgets) (0.7.5)
42
+ Requirement already satisfied: prompt-toolkit!=3.0.37,<3.1.0,>=3.0.30 in /opt/conda/lib/python3.10/site-packages (from ipython>=6.1.0->ipywidgets) (3.0.38)
43
+ Requirement already satisfied: pygments>=2.4.0 in /opt/conda/lib/python3.10/site-packages (from ipython>=6.1.0->ipywidgets) (2.15.1)
44
+ Requirement already satisfied: stack-data in /opt/conda/lib/python3.10/site-packages (from ipython>=6.1.0->ipywidgets) (0.6.2)
45
+ Requirement already satisfied: pexpect>4.3 in /opt/conda/lib/python3.10/site-packages (from ipython>=6.1.0->ipywidgets) (4.8.0)
46
+ Requirement already satisfied: parso<0.9.0,>=0.8.0 in /opt/conda/lib/python3.10/site-packages (from jedi>=0.16->ipython>=6.1.0->ipywidgets) (0.8.3)
47
+ Requirement already satisfied: python-dateutil>=2.8.2 in /opt/conda/lib/python3.10/site-packages (from jupyter-client>=6.1.12->ipykernel>=4.5.1->ipywidgets) (2.8.2)
48
+ Requirement already satisfied: platformdirs>=2.5 in /opt/conda/lib/python3.10/site-packages (from jupyter-core!=5.0.*,>=4.12->ipykernel>=4.5.1->ipywidgets) (3.5.0)
49
+ Requirement already satisfied: ptyprocess>=0.5 in /opt/conda/lib/python3.10/site-packages (from pexpect>4.3->ipython>=6.1.0->ipywidgets) (0.7.0)
50
+ Requirement already satisfied: wcwidth in /opt/conda/lib/python3.10/site-packages (from prompt-toolkit!=3.0.37,<3.1.0,>=3.0.30->ipython>=6.1.0->ipywidgets) (0.2.6)
51
+ Requirement already satisfied: executing>=1.2.0 in /opt/conda/lib/python3.10/site-packages (from stack-data->ipython>=6.1.0->ipywidgets) (1.2.0)
52
+ Requirement already satisfied: asttokens>=2.1.0 in /opt/conda/lib/python3.10/site-packages (from stack-data->ipython>=6.1.0->ipywidgets) (2.2.1)
53
+ Requirement already satisfied: pure-eval in /opt/conda/lib/python3.10/site-packages (from stack-data->ipython>=6.1.0->ipywidgets) (0.2.2)
54
+ Requirement already satisfied: six in /opt/conda/lib/python3.10/site-packages (from asttokens>=2.1.0->stack-data->ipython>=6.1.0->ipywidgets) (1.16.0)
55
+ WARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv
56
+ [notice] A new release of pip is available: 23.1.2 -> 23.2.1
57
+ [notice] To update, run: pip install --upgrade pip
58
+ huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
59
+ To disable this warning, you can either:
60
+ - Avoid using `tokenizers` before the fork if possible
61
+ - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
62
+ Requirement already satisfied: datasets in /opt/conda/lib/python3.10/site-packages (2.14.0)
63
+ Requirement already satisfied: numpy>=1.17 in /opt/conda/lib/python3.10/site-packages (from datasets) (1.23.5)
64
+ Requirement already satisfied: pyarrow>=8.0.0 in /opt/conda/lib/python3.10/site-packages (from datasets) (12.0.0)
65
+ Requirement already satisfied: dill<0.3.8,>=0.3.0 in /opt/conda/lib/python3.10/site-packages (from datasets) (0.3.6)
66
+ Requirement already satisfied: pandas in /opt/conda/lib/python3.10/site-packages (from datasets) (2.0.1)
67
+ Requirement already satisfied: requests>=2.19.0 in /opt/conda/lib/python3.10/site-packages (from datasets) (2.28.2)
68
+ Requirement already satisfied: tqdm>=4.62.1 in /opt/conda/lib/python3.10/site-packages (from datasets) (4.65.0)
69
+ Requirement already satisfied: xxhash in /opt/conda/lib/python3.10/site-packages (from datasets) (3.2.0)
70
+ Requirement already satisfied: multiprocess in /opt/conda/lib/python3.10/site-packages (from datasets) (0.70.14)
71
+ Requirement already satisfied: fsspec[http]>=2021.11.1 in /opt/conda/lib/python3.10/site-packages (from datasets) (2023.5.0)
72
+ Requirement already satisfied: aiohttp in /opt/conda/lib/python3.10/site-packages (from datasets) (3.8.5)
73
+ Requirement already satisfied: huggingface-hub<1.0.0,>=0.14.0 in /opt/conda/lib/python3.10/site-packages (from datasets) (0.16.4)
74
+ Requirement already satisfied: packaging in /opt/conda/lib/python3.10/site-packages (from datasets) (23.1)
75
+ Requirement already satisfied: pyyaml>=5.1 in /opt/conda/lib/python3.10/site-packages (from datasets) (5.4.1)
76
+ Requirement already satisfied: attrs>=17.3.0 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets) (22.2.0)
77
+ Requirement already satisfied: charset-normalizer<4.0,>=2.0 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets) (3.1.0)
78
+ Requirement already satisfied: multidict<7.0,>=4.5 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets) (6.0.4)
79
+ Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets) (4.0.2)
80
+ Requirement already satisfied: yarl<2.0,>=1.0 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets) (1.9.2)
81
+ Requirement already satisfied: frozenlist>=1.1.1 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets) (1.4.0)
82
+ Requirement already satisfied: aiosignal>=1.1.2 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets) (1.3.1)
83
+ Requirement already satisfied: filelock in /opt/conda/lib/python3.10/site-packages (from huggingface-hub<1.0.0,>=0.14.0->datasets) (3.12.0)
84
+ Requirement already satisfied: typing-extensions>=3.7.4.3 in /opt/conda/lib/python3.10/site-packages (from huggingface-hub<1.0.0,>=0.14.0->datasets) (4.5.0)
85
+ Requirement already satisfied: idna<4,>=2.5 in /opt/conda/lib/python3.10/site-packages (from requests>=2.19.0->datasets) (3.4)
86
+ Requirement already satisfied: urllib3<1.27,>=1.21.1 in /opt/conda/lib/python3.10/site-packages (from requests>=2.19.0->datasets) (1.26.15)
87
+ Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.10/site-packages (from requests>=2.19.0->datasets) (2023.5.7)
88
+ Requirement already satisfied: python-dateutil>=2.8.2 in /opt/conda/lib/python3.10/site-packages (from pandas->datasets) (2.8.2)
89
+ Requirement already satisfied: pytz>=2020.1 in /opt/conda/lib/python3.10/site-packages (from pandas->datasets) (2023.3)
90
+ Requirement already satisfied: tzdata>=2022.1 in /opt/conda/lib/python3.10/site-packages (from pandas->datasets) (2023.3)
91
+ Requirement already satisfied: six>=1.5 in /opt/conda/lib/python3.10/site-packages (from python-dateutil>=2.8.2->pandas->datasets) (1.16.0)
92
+ WARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv
93
+ [notice] A new release of pip is available: 23.1.2 -> 23.2.1
94
+ [notice] To update, run: pip install --upgrade pip
95
+ huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
96
+ To disable this warning, you can either:
97
+ - Avoid using `tokenizers` before the fork if possible
98
+ - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
99
+ Requirement already satisfied: torch in /opt/conda/lib/python3.10/site-packages (2.0.0)
100
+ Requirement already satisfied: filelock in /opt/conda/lib/python3.10/site-packages (from torch) (3.12.0)
101
+ Requirement already satisfied: typing-extensions in /opt/conda/lib/python3.10/site-packages (from torch) (4.5.0)
102
+ Requirement already satisfied: sympy in /opt/conda/lib/python3.10/site-packages (from torch) (1.11.1)
103
+ Requirement already satisfied: networkx in /opt/conda/lib/python3.10/site-packages (from torch) (3.1)
104
+ Requirement already satisfied: jinja2 in /opt/conda/lib/python3.10/site-packages (from torch) (3.1.2)
105
+ Requirement already satisfied: MarkupSafe>=2.0 in /opt/conda/lib/python3.10/site-packages (from jinja2->torch) (2.1.2)
106
+ Requirement already satisfied: mpmath>=0.19 in /opt/conda/lib/python3.10/site-packages (from sympy->torch) (1.3.0)
107
+ WARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv
108
+ [notice] A new release of pip is available: 23.1.2 -> 23.2.1
109
+ [notice] To update, run: pip install --upgrade pip
110
+ True
111
+ /opt/conda/lib/python3.10/site-packages/peft/utils/other.py:104: FutureWarning: prepare_model_for_int8_training is deprecated and will be removed in a future version. Use prepare_model_for_kbit_training instead.
112
+ warnings.warn(
wandb/run-20230727_154936-a41qiywg/files/requirements.txt ADDED
@@ -0,0 +1,240 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ accelerate==0.21.0
2
+ aiohttp==3.8.5
3
+ aiosignal==1.3.1
4
+ apex==0.1
5
+ appdirs==1.4.4
6
+ argparse==1.4.0
7
+ asttokens==2.2.1
8
+ async-timeout==4.0.2
9
+ attrs==22.2.0
10
+ awscli==1.27.132
11
+ backcall==0.2.0
12
+ backports.functools-lru-cache==1.6.4
13
+ bcrypt==4.0.1
14
+ bitsandbytes==0.41.0
15
+ blis==0.7.9
16
+ bokeh==3.1.1
17
+ boto3==1.26.132
18
+ botocore==1.29.132
19
+ brotlipy==0.7.0
20
+ cached-property==1.5.2
21
+ catalogue==2.0.8
22
+ certifi==2023.5.7
23
+ cffi==1.15.1
24
+ charset-normalizer==3.1.0
25
+ click==8.1.3
26
+ cloudpickle==2.2.1
27
+ cmake==3.26.3
28
+ colorama==0.4.4
29
+ comm==0.1.3
30
+ commonmark==0.9.1
31
+ conda-content-trust==0.1.3
32
+ conda-package-handling==2.0.2
33
+ conda-package-streaming==0.7.0
34
+ conda==23.1.0
35
+ confection==0.0.4
36
+ contextlib2==21.6.0
37
+ contourpy==1.0.7
38
+ cryptography==40.0.1
39
+ cycler==0.11.0
40
+ cymem==2.0.7
41
+ cython==0.29.34
42
+ datasets==2.14.0
43
+ debugpy==1.6.7
44
+ decorator==5.1.1
45
+ deepspeed==0.6.1+1ea3d4b
46
+ dgl==1.1.0+cu118
47
+ dill==0.3.6
48
+ docker-pycreds==0.4.0
49
+ docutils==0.15.2
50
+ einops==0.6.1
51
+ executing==1.2.0
52
+ fastai==2.7.12
53
+ fastcore==1.5.29
54
+ fastdownload==0.0.7
55
+ fastprogress==1.0.3
56
+ filelock==3.12.0
57
+ flash-attn==0.2.8
58
+ fonttools==4.39.4
59
+ frozenlist==1.4.0
60
+ fsspec==2023.5.0
61
+ future==0.18.3
62
+ gevent==22.10.2
63
+ gitdb==4.0.10
64
+ gitpython==3.1.32
65
+ gmpy2==2.1.2
66
+ google-pasta==0.2.0
67
+ greenlet==2.0.2
68
+ h5py==3.8.0
69
+ hjson==3.1.0
70
+ horovod==0.26.1
71
+ huggingface-hub==0.16.4
72
+ idna==3.4
73
+ imageio==2.28.1
74
+ importlib-metadata==4.13.0
75
+ inotify-simple==1.2.1
76
+ ipykernel==6.23.0
77
+ ipython==8.13.2
78
+ ipywidgets==8.0.7
79
+ jedi==0.18.2
80
+ jinja2==3.1.2
81
+ jmespath==1.0.1
82
+ joblib==1.2.0
83
+ jsonpatch==1.32
84
+ jsonpointer==2.3
85
+ jsonschema==4.17.3
86
+ jupyter-client==8.2.0
87
+ jupyter-core==5.3.0
88
+ jupyterlab-widgets==3.0.8
89
+ kiwisolver==1.4.4
90
+ langcodes==3.3.0
91
+ libmambapy==1.4.1
92
+ lit==16.0.3
93
+ llvmlite==0.39.1
94
+ mamba==1.4.1
95
+ markupsafe==2.1.2
96
+ matplotlib-inline==0.1.6
97
+ matplotlib==3.7.1
98
+ mpi4py==3.1.4
99
+ mpmath==1.3.0
100
+ multidict==6.0.4
101
+ multiprocess==0.70.14
102
+ munkres==1.1.4
103
+ murmurhash==1.0.9
104
+ nest-asyncio==1.5.6
105
+ networkx==3.1
106
+ ninja==1.11.1
107
+ numba==0.56.4
108
+ numpy==1.23.5
109
+ opencv-python==4.7.0
110
+ packaging==23.1
111
+ pandas==2.0.1
112
+ paramiko==3.1.0
113
+ parso==0.8.3
114
+ pathos==0.3.0
115
+ pathtools==0.1.2
116
+ pathy==0.10.1
117
+ patsy==0.5.3
118
+ peft==0.5.0.dev0
119
+ pexpect==4.8.0
120
+ pickleshare==0.7.5
121
+ pillow==9.4.0
122
+ pip==23.1.2
123
+ platformdirs==3.5.0
124
+ plotly==5.14.1
125
+ pluggy==1.0.0
126
+ ply==3.11
127
+ pooch==1.7.0
128
+ pox==0.3.2
129
+ ppft==1.7.6.6
130
+ preshed==3.0.8
131
+ prompt-toolkit==3.0.38
132
+ protobuf3-to-dict==0.1.5
133
+ protobuf==3.20.3
134
+ psutil==5.9.5
135
+ ptyprocess==0.7.0
136
+ pure-eval==0.2.2
137
+ py-cpuinfo==9.0.0
138
+ pyarrow==12.0.0
139
+ pyasn1==0.4.8
140
+ pybind11-global==2.10.4
141
+ pybind11==2.10.4
142
+ pycosat==0.6.4
143
+ pycparser==2.21
144
+ pydantic==1.10.7
145
+ pyfunctional==1.4.3
146
+ pygments==2.15.1
147
+ pyinstrument-cext==0.2.4
148
+ pyinstrument==3.4.2
149
+ pynacl==1.5.0
150
+ pyopenssl==23.1.1
151
+ pyparsing==3.0.9
152
+ pyqt5-sip==12.11.0
153
+ pyqt5==5.15.7
154
+ pyrsistent==0.19.3
155
+ pysocks==1.7.1
156
+ python-dateutil==2.8.2
157
+ pytz==2023.3
158
+ pyyaml==5.4.1
159
+ pyzmq==25.0.2
160
+ regex==2023.6.3
161
+ requests==2.28.2
162
+ retrying==1.3.4
163
+ rich==12.6.0
164
+ rsa==4.7.2
165
+ ruamel.yaml.clib==0.2.7
166
+ ruamel.yaml==0.17.21
167
+ s3fs==0.4.2
168
+ s3transfer==0.6.1
169
+ safetensors==0.3.1
170
+ sagemaker-experiments==0.1.43
171
+ sagemaker-pytorch-training==2.8.0
172
+ sagemaker-training==4.5.0
173
+ sagemaker==2.154.0
174
+ schema==0.7.5
175
+ scikit-learn==1.2.2
176
+ scipy==1.10.1
177
+ seaborn==0.12.2
178
+ sentry-sdk==1.28.1
179
+ setproctitle==1.3.2
180
+ setuptools==65.6.3
181
+ shap==0.41.0
182
+ shellingham==1.5.1
183
+ sip==6.7.9
184
+ six==1.16.0
185
+ slicer==0.0.7
186
+ smart-open==5.2.1
187
+ smclarify==0.5
188
+ smdebug-rulesconfig==1.0.1
189
+ smdebug==1.0.34
190
+ smdistributed-dataparallel==1.8.0
191
+ smdistributed-modelparallel==1.15.0
192
+ smmap==5.0.0
193
+ spacy-legacy==3.0.12
194
+ spacy-loggers==1.0.4
195
+ spacy==3.5.2
196
+ srsly==2.4.6
197
+ stack-data==0.6.2
198
+ statsmodels==0.14.0
199
+ sympy==1.11.1
200
+ tabulate==0.9.0
201
+ tblib==1.7.0
202
+ tenacity==8.2.2
203
+ thinc==8.1.10
204
+ threadpoolctl==3.1.0
205
+ tokenizers==0.13.3
206
+ toml==0.10.2
207
+ tomli==2.0.1
208
+ toolz==0.12.0
209
+ torch==2.0.0
210
+ torchaudio==2.0.1
211
+ torchdata==0.6.0
212
+ torchnet==0.0.4
213
+ torchtext==0.15.1
214
+ torchvision==0.15.1
215
+ tornado==6.3
216
+ tqdm==4.65.0
217
+ traitlets==5.9.0
218
+ transformers==4.31.0
219
+ triton==2.0.0.dev20221202
220
+ trl==0.4.7
221
+ typer==0.7.0
222
+ typing-extensions==4.5.0
223
+ tzdata==2023.3
224
+ unicodedata2==15.0.0
225
+ urllib3==1.26.15
226
+ visdom==0.2.4
227
+ wandb==0.15.7
228
+ wasabi==1.1.1
229
+ wcwidth==0.2.6
230
+ websocket-client==1.5.1
231
+ werkzeug==2.3.4
232
+ wheel==0.40.0
233
+ widgetsnbextension==4.0.8
234
+ xxhash==3.2.0
235
+ xyzservices==2023.2.0
236
+ yarl==1.9.2
237
+ zipp==3.15.0
238
+ zope.event==4.6
239
+ zope.interface==6.0
240
+ zstandard==0.19.0
wandb/run-20230727_154936-a41qiywg/files/wandb-metadata.json ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-4.14.318-241.531.amzn2.x86_64-x86_64-with-glibc2.31",
3
+ "python": "3.10.8",
4
+ "heartbeatAt": "2023-07-27T15:49:36.888553",
5
+ "startedAt": "2023-07-27T15:49:36.344100",
6
+ "docker": null,
7
+ "cuda": null,
8
+ "args": [],
9
+ "state": "running",
10
+ "program": "<python with no main file>",
11
+ "host": "pytorch-2-0-0-gpu--ml-g4dn-2xlarge-9a500aed7fe4dadadc562adc1e80",
12
+ "username": "root",
13
+ "executable": "/opt/conda/bin/python",
14
+ "cpu_count": 4,
15
+ "cpu_count_logical": 8,
16
+ "cpu_freq": {
17
+ "current": 3100.120625,
18
+ "min": 0.0,
19
+ "max": 0.0
20
+ },
21
+ "cpu_freq_per_core": [
22
+ {
23
+ "current": 3107.574,
24
+ "min": 0.0,
25
+ "max": 0.0
26
+ },
27
+ {
28
+ "current": 3102.47,
29
+ "min": 0.0,
30
+ "max": 0.0
31
+ },
32
+ {
33
+ "current": 3099.63,
34
+ "min": 0.0,
35
+ "max": 0.0
36
+ },
37
+ {
38
+ "current": 3099.058,
39
+ "min": 0.0,
40
+ "max": 0.0
41
+ },
42
+ {
43
+ "current": 3100.716,
44
+ "min": 0.0,
45
+ "max": 0.0
46
+ },
47
+ {
48
+ "current": 3099.393,
49
+ "min": 0.0,
50
+ "max": 0.0
51
+ },
52
+ {
53
+ "current": 3099.988,
54
+ "min": 0.0,
55
+ "max": 0.0
56
+ },
57
+ {
58
+ "current": 3092.136,
59
+ "min": 0.0,
60
+ "max": 0.0
61
+ }
62
+ ],
63
+ "disk": {
64
+ "total": 32.0,
65
+ "used": 0.414398193359375
66
+ },
67
+ "gpu": "Tesla T4",
68
+ "gpu_count": 1,
69
+ "gpu_devices": [
70
+ {
71
+ "name": "Tesla T4",
72
+ "memory_total": 15843721216
73
+ }
74
+ ],
75
+ "memory": {
76
+ "total": 30.947834014892578
77
+ }
78
+ }
wandb/run-20230727_154936-a41qiywg/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"train/loss": 1.5234, "train/learning_rate": 0.0002, "train/epoch": 5.8, "train/global_step": 500, "_timestamp": 1690823397.7400424, "_runtime": 350421.32170534134, "_step": 101, "train/train_runtime": 7012.9274, "train/train_samples_per_second": 1.141, "train/train_steps_per_second": 0.071, "train/total_flos": 2.3703947270255616e+16, "train/train_loss": 2.225116060256958}
wandb/run-20230727_154936-a41qiywg/logs/debug-internal.log ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d82385c7c91ccf548be984016744cafe22c0bffbe4c56266892c862cde84fe4
3
+ size 16040370
wandb/run-20230727_154936-a41qiywg/logs/debug.log ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2023-07-27 15:49:36,411 INFO MainThread:21 [wandb_setup.py:_flush():76] Current SDK version is 0.15.7
2
+ 2023-07-27 15:49:36,412 INFO MainThread:21 [wandb_setup.py:_flush():76] Configure stats pid to 21
3
+ 2023-07-27 15:49:36,412 INFO MainThread:21 [wandb_setup.py:_flush():76] Loading settings from /root/.config/wandb/settings
4
+ 2023-07-27 15:49:36,412 INFO MainThread:21 [wandb_setup.py:_flush():76] Loading settings from /root/mskov/falcon7b_quant/wandb/settings
5
+ 2023-07-27 15:49:36,412 INFO MainThread:21 [wandb_setup.py:_flush():76] Loading settings from environment variables: {}
6
+ 2023-07-27 15:49:36,412 INFO MainThread:21 [wandb_setup.py:_flush():76] Applying setup settings: {'_disable_service': False}
7
+ 2023-07-27 15:49:36,412 INFO MainThread:21 [wandb_setup.py:_flush():76] Inferring run settings from compute environment: {'program': '<python with no main file>'}
8
+ 2023-07-27 15:49:36,412 INFO MainThread:21 [wandb_setup.py:_flush():76] Applying login settings: {'api_key': '***REDACTED***'}
9
+ 2023-07-27 15:49:36,412 INFO MainThread:21 [wandb_init.py:_log_setup():507] Logging user logs to /root/mskov/falcon7b_quant/wandb/run-20230727_154936-a41qiywg/logs/debug.log
10
+ 2023-07-27 15:49:36,412 INFO MainThread:21 [wandb_init.py:_log_setup():508] Logging internal logs to /root/mskov/falcon7b_quant/wandb/run-20230727_154936-a41qiywg/logs/debug-internal.log
11
+ 2023-07-27 15:49:36,412 INFO MainThread:21 [wandb_init.py:_jupyter_setup():453] configuring jupyter hooks <wandb.sdk.wandb_init._WandbInit object at 0x7f468db73070>
12
+ 2023-07-27 15:49:36,412 INFO MainThread:21 [wandb_init.py:init():547] calling init triggers
13
+ 2023-07-27 15:49:36,412 INFO MainThread:21 [wandb_init.py:init():554] wandb.init called with sweep_config: {}
14
+ config: {}
15
+ 2023-07-27 15:49:36,412 INFO MainThread:21 [wandb_init.py:init():596] starting backend
16
+ 2023-07-27 15:49:36,412 INFO MainThread:21 [wandb_init.py:init():600] setting up manager
17
+ 2023-07-27 15:49:36,414 INFO MainThread:21 [backend.py:_multiprocessing_setup():106] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
18
+ 2023-07-27 15:49:36,416 INFO MainThread:21 [wandb_init.py:init():606] backend started and connected
19
+ 2023-07-27 15:49:36,424 INFO MainThread:21 [wandb_run.py:_label_probe_notebook():1234] probe notebook
20
+ 2023-07-27 15:49:36,429 INFO MainThread:21 [wandb_run.py:_label_probe_notebook():1244] Unable to probe notebook: 'NoneType' object has no attribute 'get'
21
+ 2023-07-27 15:49:36,429 INFO MainThread:21 [wandb_init.py:init():697] updated telemetry
22
+ 2023-07-27 15:49:36,450 INFO MainThread:21 [wandb_init.py:init():730] communicating run to backend with 60.0 second timeout
23
+ 2023-07-27 15:49:36,781 INFO MainThread:21 [wandb_run.py:_on_init():2174] communicating current version
24
+ 2023-07-27 15:49:36,852 INFO MainThread:21 [wandb_run.py:_on_init():2183] got version response
25
+ 2023-07-27 15:49:36,852 INFO MainThread:21 [wandb_init.py:init():781] starting run threads in backend
26
+ 2023-07-27 15:49:44,828 INFO MainThread:21 [wandb_run.py:_console_start():2153] atexit reg
27
+ 2023-07-27 15:49:44,830 INFO MainThread:21 [wandb_run.py:_redirect():2008] redirect: wrap_raw
28
+ 2023-07-27 15:49:44,830 INFO MainThread:21 [wandb_run.py:_redirect():2073] Wrapping output streams.
29
+ 2023-07-27 15:49:44,830 INFO MainThread:21 [wandb_run.py:_redirect():2098] Redirects installed.
30
+ 2023-07-27 15:49:44,832 INFO MainThread:21 [wandb_init.py:init():822] run started, returning control to user process
31
+ 2023-07-27 15:49:44,835 INFO MainThread:21 [wandb_run.py:_config_callback():1282] config_cb None None {'vocab_size': 65024, 'hidden_size': 4544, 'n_layer': 32, 'n_head': 71, 'layer_norm_epsilon': 1e-05, 'initializer_range': 0.02, 'use_cache': False, 'apply_residual_connection_post_layernorm': False, 'hidden_dropout': 0.0, 'attention_dropout': 0.0, 'bos_token_id': 11, 'eos_token_id': 11, 'multi_query': True, 'alibi': False, 'bias': False, 'parallel_attn': True, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'bfloat16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['RWForCausalLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'pad_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'ybelkada/falcon-7b-sharded-bf16', 'transformers_version': '4.31.0', 'auto_map': {'AutoConfig': 'tiiuae/falcon-7b--configuration_RW.RWConfig', 'AutoModel': 'tiiuae/falcon-7b--modelling_RW.RWModel', 'AutoModelForCausalLM': 'tiiuae/falcon-7b--modelling_RW.RWForCausalLM', 'AutoModelForQuestionAnswering': 'tiiuae/falcon-7b--modelling_RW.RWForQuestionAnswering', 'AutoModelForSequenceClassification': 'tiiuae/falcon-7b--modelling_RW.RWForSequenceClassification', 'AutoModelForTokenClassification': 'tiiuae/falcon-7b--modelling_RW.RWForTokenClassification'}, 'model_type': 'RefinedWebModel', 'quantization_config': {'load_in_8bit': False, 'load_in_4bit': True, 'llm_int8_threshold': 6.0, 'llm_int8_skip_modules': None, 'llm_int8_enable_fp32_cpu_offload': False, 'llm_int8_has_fp16_weight': False, 'bnb_4bit_quant_type': 'nf4', 'bnb_4bit_use_double_quant': False, 'bnb_4bit_compute_dtype': 'float16'}, 'output_dir': './results', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'no', 'prediction_loss_only': False, 'per_device_train_batch_size': 4, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': 'None', 'per_gpu_eval_batch_size': 'None', 'gradient_accumulation_steps': 4, 'eval_accumulation_steps': 'None', 'eval_delay': 0, 'learning_rate': 0.0002, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 0.3, 'num_train_epochs': 3.0, 'max_steps': 500, 'lr_scheduler_type': 'constant', 'warmup_ratio': 0.03, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './results/runs/Jul27_15-48-23_pytorch-2-0-0-gpu--ml-g4dn-2xlarge-9a500aed7fe4dadadc562adc1e80', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 10, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 10, 'save_total_limit': 'None', 'save_safetensors': False, 'save_on_each_node': False, 'no_cuda': False, 'use_mps_device': False, 'seed': 42, 'data_seed': 'None', 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': 'None', 'local_rank': 0, 'ddp_backend': 'None', 'tpu_num_cores': 'None', 'tpu_metrics_debug': False, 'debug': '[]', 'dataloader_drop_last': False, 'eval_steps': 'None', 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './results', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': 'None', 'load_best_model_at_end': False, 'metric_for_best_model': 'None', 'greater_is_better': 'None', 'ignore_data_skip': False, 'sharded_ddp': '[]', 'fsdp': '[]', 'fsdp_min_num_params': 0, 'fsdp_config': "{'fsdp_min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False}", 'fsdp_transformer_layer_cls_to_wrap': 'None', 'deepspeed': 'None', 'label_smoothing_factor': 0.0, 'optim': 'paged_adamw_32bit', 'optim_args': 'None', 'adafactor': False, 'group_by_length': True, 'length_column_name': 'length', 'report_to': "['wandb']", 'ddp_find_unused_parameters': 'None', 'ddp_bucket_cap_mb': 'None', 'ddp_broadcast_buffers': 'None', 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': False, 'resume_from_checkpoint': 'None', 'hub_model_id': 'None', 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'gradient_checkpointing': False, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': 'None', 'push_to_hub_organization': 'None', 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': 'None', 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': 'None', 'torch_compile_mode': 'None', 'xpu_backend': 'None', 'train_batch_size': 4, 'eval_batch_size': 8}
32
+ 2023-07-27 17:45:31,239 INFO MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
33
+ 2023-07-27 17:45:31,240 INFO MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
34
+ 2023-07-31 14:45:09,605 INFO MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
35
+ 2023-07-31 14:45:09,630 INFO MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
36
+ 2023-07-31 14:45:09,630 INFO MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
37
+ 2023-07-31 15:11:17,481 INFO MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
38
+ 2023-07-31 15:11:29,927 INFO MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
39
+ 2023-07-31 15:11:29,929 INFO MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
40
+ 2023-07-31 15:11:29,934 INFO MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
41
+ 2023-07-31 15:11:32,706 INFO MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
42
+ 2023-07-31 15:11:32,707 INFO MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
43
+ 2023-07-31 15:11:32,712 INFO MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
44
+ 2023-07-31 15:11:35,511 INFO MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
45
+ 2023-07-31 15:11:35,512 INFO MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
46
+ 2023-07-31 15:11:35,517 INFO MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
47
+ 2023-07-31 15:11:38,405 INFO MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
48
+ 2023-07-31 15:11:38,407 INFO MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
49
+ 2023-07-31 15:11:39,706 INFO MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
50
+ 2023-07-31 15:11:42,399 INFO MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
51
+ 2023-07-31 15:11:42,400 INFO MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
52
+ 2023-07-31 15:11:42,759 INFO MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
53
+ 2023-07-31 15:11:42,762 INFO MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
54
+ 2023-07-31 15:11:42,762 INFO MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
55
+ 2023-07-31 15:11:47,781 INFO MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
56
+ 2023-07-31 15:12:05,813 INFO MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
57
+ 2023-07-31 15:12:05,815 INFO MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
58
+ 2023-07-31 15:12:05,839 INFO MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
59
+ 2023-07-31 15:12:06,211 INFO MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
60
+ 2023-07-31 15:12:06,211 INFO MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
61
+ 2023-07-31 15:12:06,217 INFO MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
62
+ 2023-07-31 15:12:06,218 INFO MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
63
+ 2023-07-31 15:12:06,218 INFO MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
64
+ 2023-07-31 15:12:06,224 INFO MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
65
+ 2023-07-31 15:12:06,301 INFO MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
66
+ 2023-07-31 15:12:06,301 INFO MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
67
+ 2023-07-31 15:12:11,043 INFO MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
68
+ 2023-07-31 15:13:04,229 INFO MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
69
+ 2023-07-31 15:13:04,231 INFO MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
70
+ 2023-07-31 15:13:04,236 INFO MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
71
+ 2023-07-31 15:13:04,244 INFO MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
72
+ 2023-07-31 15:13:04,244 INFO MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
73
+ 2023-07-31 15:13:04,249 INFO MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
74
+ 2023-07-31 15:13:04,818 INFO MainThread:21 [wandb_run.py:_config_callback():1282] config_cb None None {'vocab_size': 65024, 'hidden_size': 4544, 'n_layer': 32, 'n_head': 71, 'layer_norm_epsilon': 1e-05, 'initializer_range': 0.02, 'use_cache': False, 'apply_residual_connection_post_layernorm': False, 'hidden_dropout': 0.0, 'attention_dropout': 0.0, 'bos_token_id': 11, 'eos_token_id': 11, 'multi_query': True, 'alibi': False, 'bias': False, 'parallel_attn': True, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'bfloat16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['RWForCausalLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'pad_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'ybelkada/falcon-7b-sharded-bf16', 'transformers_version': '4.31.0', 'auto_map': {'AutoConfig': 'tiiuae/falcon-7b--configuration_RW.RWConfig', 'AutoModel': 'tiiuae/falcon-7b--modelling_RW.RWModel', 'AutoModelForCausalLM': 'tiiuae/falcon-7b--modelling_RW.RWForCausalLM', 'AutoModelForQuestionAnswering': 'tiiuae/falcon-7b--modelling_RW.RWForQuestionAnswering', 'AutoModelForSequenceClassification': 'tiiuae/falcon-7b--modelling_RW.RWForSequenceClassification', 'AutoModelForTokenClassification': 'tiiuae/falcon-7b--modelling_RW.RWForTokenClassification'}, 'model_type': 'RefinedWebModel', 'quantization_config': {'load_in_8bit': False, 'load_in_4bit': True, 'llm_int8_threshold': 6.0, 'llm_int8_skip_modules': None, 'llm_int8_enable_fp32_cpu_offload': False, 'llm_int8_has_fp16_weight': False, 'bnb_4bit_quant_type': 'nf4', 'bnb_4bit_use_double_quant': False, 'bnb_4bit_compute_dtype': 'float16'}, 'output_dir': './results', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'no', 'prediction_loss_only': False, 'per_device_train_batch_size': 4, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': 'None', 'per_gpu_eval_batch_size': 'None', 'gradient_accumulation_steps': 4, 'eval_accumulation_steps': 'None', 'eval_delay': 0, 'learning_rate': 0.0002, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 0.3, 'num_train_epochs': 3.0, 'max_steps': 500, 'lr_scheduler_type': 'constant', 'warmup_ratio': 0.03, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './results/runs/Jul31_15-12-06_pytorch-2-0-0-gpu--ml-g4dn-2xlarge-9a500aed7fe4dadadc562adc1e80', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 10, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 10, 'save_total_limit': 'None', 'save_safetensors': False, 'save_on_each_node': False, 'no_cuda': False, 'use_mps_device': False, 'seed': 42, 'data_seed': 'None', 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': 'None', 'local_rank': 0, 'ddp_backend': 'None', 'tpu_num_cores': 'None', 'tpu_metrics_debug': False, 'debug': '[]', 'dataloader_drop_last': False, 'eval_steps': 'None', 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './results', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': 'None', 'load_best_model_at_end': False, 'metric_for_best_model': 'None', 'greater_is_better': 'None', 'ignore_data_skip': False, 'sharded_ddp': '[]', 'fsdp': '[]', 'fsdp_min_num_params': 0, 'fsdp_config': "{'fsdp_min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False}", 'fsdp_transformer_layer_cls_to_wrap': 'None', 'deepspeed': 'None', 'label_smoothing_factor': 0.0, 'optim': 'paged_adamw_32bit', 'optim_args': 'None', 'adafactor': False, 'group_by_length': True, 'length_column_name': 'length', 'report_to': "['wandb']", 'ddp_find_unused_parameters': 'None', 'ddp_bucket_cap_mb': 'None', 'ddp_broadcast_buffers': 'None', 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': False, 'resume_from_checkpoint': 'None', 'hub_model_id': 'None', 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'gradient_checkpointing': False, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': 'None', 'push_to_hub_organization': 'None', 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': 'None', 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': 'None', 'torch_compile_mode': 'None', 'xpu_backend': 'None', 'train_batch_size': 4, 'eval_batch_size': 8}
75
+ 2023-07-31 17:09:57,806 INFO MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
76
+ 2023-07-31 17:09:57,808 INFO MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
wandb/run-20230727_154936-a41qiywg/run-a41qiywg.wandb ADDED
Binary file (426 kB). View file