mskov commited on Aug 1, 2023

Commit

00e8b72

•

1 Parent(s): 85a3d0d

Upload 19 files

Browse files

Files changed (20) hide show

.gitattributes +3 -0
wandb/debug-internal.log +3 -0
wandb/debug.log +76 -0
wandb/latest-run/files/conda-environment.yaml +498 -0
wandb/latest-run/files/config.yaml +649 -0
wandb/latest-run/files/output.log +112 -0
wandb/latest-run/files/requirements.txt +240 -0
wandb/latest-run/files/wandb-metadata.json +78 -0
wandb/latest-run/files/wandb-summary.json +1 -0
wandb/latest-run/logs/debug-internal.log +3 -0
wandb/latest-run/logs/debug.log +76 -0
wandb/run-20230727_154936-a41qiywg/files/conda-environment.yaml +498 -0
wandb/run-20230727_154936-a41qiywg/files/config.yaml +649 -0
wandb/run-20230727_154936-a41qiywg/files/output.log +112 -0
wandb/run-20230727_154936-a41qiywg/files/requirements.txt +240 -0
wandb/run-20230727_154936-a41qiywg/files/wandb-metadata.json +78 -0
wandb/run-20230727_154936-a41qiywg/files/wandb-summary.json +1 -0
wandb/run-20230727_154936-a41qiywg/logs/debug-internal.log +3 -0
wandb/run-20230727_154936-a41qiywg/logs/debug.log +76 -0
wandb/run-20230727_154936-a41qiywg/run-a41qiywg.wandb +0 -0

.gitattributes CHANGED Viewed

@@ -34,3 +34,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
 results/checkpoint-100/Unconfirmed[[:space:]]828739.crdownload filter=lfs diff=lfs merge=lfs -text

 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
 results/checkpoint-100/Unconfirmed[[:space:]]828739.crdownload filter=lfs diff=lfs merge=lfs -text
+wandb/debug-internal.log filter=lfs diff=lfs merge=lfs -text
+wandb/latest-run/logs/debug-internal.log filter=lfs diff=lfs merge=lfs -text
+wandb/run-20230727_154936-a41qiywg/logs/debug-internal.log filter=lfs diff=lfs merge=lfs -text

wandb/debug-internal.log ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0d82385c7c91ccf548be984016744cafe22c0bffbe4c56266892c862cde84fe4
+size 16040370

wandb/debug.log ADDED Viewed

	@@ -0,0 +1,76 @@

+2023-07-27 15:49:36,411 INFO    MainThread:21 [wandb_setup.py:_flush():76] Current SDK version is 0.15.7
+2023-07-27 15:49:36,412 INFO    MainThread:21 [wandb_setup.py:_flush():76] Configure stats pid to 21
+2023-07-27 15:49:36,412 INFO    MainThread:21 [wandb_setup.py:_flush():76] Loading settings from /root/.config/wandb/settings
+2023-07-27 15:49:36,412 INFO    MainThread:21 [wandb_setup.py:_flush():76] Loading settings from /root/mskov/falcon7b_quant/wandb/settings
+2023-07-27 15:49:36,412 INFO    MainThread:21 [wandb_setup.py:_flush():76] Loading settings from environment variables: {}
+2023-07-27 15:49:36,412 INFO    MainThread:21 [wandb_setup.py:_flush():76] Applying setup settings: {'_disable_service': False}
+2023-07-27 15:49:36,412 INFO    MainThread:21 [wandb_setup.py:_flush():76] Inferring run settings from compute environment: {'program': '<python with no main file>'}
+2023-07-27 15:49:36,412 INFO    MainThread:21 [wandb_setup.py:_flush():76] Applying login settings: {'api_key': '***REDACTED***'}
+2023-07-27 15:49:36,412 INFO    MainThread:21 [wandb_init.py:_log_setup():507] Logging user logs to /root/mskov/falcon7b_quant/wandb/run-20230727_154936-a41qiywg/logs/debug.log
+2023-07-27 15:49:36,412 INFO    MainThread:21 [wandb_init.py:_log_setup():508] Logging internal logs to /root/mskov/falcon7b_quant/wandb/run-20230727_154936-a41qiywg/logs/debug-internal.log
+2023-07-27 15:49:36,412 INFO    MainThread:21 [wandb_init.py:_jupyter_setup():453] configuring jupyter hooks <wandb.sdk.wandb_init._WandbInit object at 0x7f468db73070>
+2023-07-27 15:49:36,412 INFO    MainThread:21 [wandb_init.py:init():547] calling init triggers
+2023-07-27 15:49:36,412 INFO    MainThread:21 [wandb_init.py:init():554] wandb.init called with sweep_config: {}
+config: {}
+2023-07-27 15:49:36,412 INFO    MainThread:21 [wandb_init.py:init():596] starting backend
+2023-07-27 15:49:36,412 INFO    MainThread:21 [wandb_init.py:init():600] setting up manager
+2023-07-27 15:49:36,414 INFO    MainThread:21 [backend.py:_multiprocessing_setup():106] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
+2023-07-27 15:49:36,416 INFO    MainThread:21 [wandb_init.py:init():606] backend started and connected
+2023-07-27 15:49:36,424 INFO    MainThread:21 [wandb_run.py:_label_probe_notebook():1234] probe notebook
+2023-07-27 15:49:36,429 INFO    MainThread:21 [wandb_run.py:_label_probe_notebook():1244] Unable to probe notebook: 'NoneType' object has no attribute 'get'
+2023-07-27 15:49:36,429 INFO    MainThread:21 [wandb_init.py:init():697] updated telemetry
+2023-07-27 15:49:36,450 INFO    MainThread:21 [wandb_init.py:init():730] communicating run to backend with 60.0 second timeout
+2023-07-27 15:49:36,781 INFO    MainThread:21 [wandb_run.py:_on_init():2174] communicating current version
+2023-07-27 15:49:36,852 INFO    MainThread:21 [wandb_run.py:_on_init():2183] got version response
+2023-07-27 15:49:36,852 INFO    MainThread:21 [wandb_init.py:init():781] starting run threads in backend
+2023-07-27 15:49:44,828 INFO    MainThread:21 [wandb_run.py:_console_start():2153] atexit reg
+2023-07-27 15:49:44,830 INFO    MainThread:21 [wandb_run.py:_redirect():2008] redirect: wrap_raw
+2023-07-27 15:49:44,830 INFO    MainThread:21 [wandb_run.py:_redirect():2073] Wrapping output streams.
+2023-07-27 15:49:44,830 INFO    MainThread:21 [wandb_run.py:_redirect():2098] Redirects installed.
+2023-07-27 15:49:44,832 INFO    MainThread:21 [wandb_init.py:init():822] run started, returning control to user process
+2023-07-27 15:49:44,835 INFO    MainThread:21 [wandb_run.py:_config_callback():1282] config_cb None None {'vocab_size': 65024, 'hidden_size': 4544, 'n_layer': 32, 'n_head': 71, 'layer_norm_epsilon': 1e-05, 'initializer_range': 0.02, 'use_cache': False, 'apply_residual_connection_post_layernorm': False, 'hidden_dropout': 0.0, 'attention_dropout': 0.0, 'bos_token_id': 11, 'eos_token_id': 11, 'multi_query': True, 'alibi': False, 'bias': False, 'parallel_attn': True, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'bfloat16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['RWForCausalLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'pad_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'ybelkada/falcon-7b-sharded-bf16', 'transformers_version': '4.31.0', 'auto_map': {'AutoConfig': 'tiiuae/falcon-7b--configuration_RW.RWConfig', 'AutoModel': 'tiiuae/falcon-7b--modelling_RW.RWModel', 'AutoModelForCausalLM': 'tiiuae/falcon-7b--modelling_RW.RWForCausalLM', 'AutoModelForQuestionAnswering': 'tiiuae/falcon-7b--modelling_RW.RWForQuestionAnswering', 'AutoModelForSequenceClassification': 'tiiuae/falcon-7b--modelling_RW.RWForSequenceClassification', 'AutoModelForTokenClassification': 'tiiuae/falcon-7b--modelling_RW.RWForTokenClassification'}, 'model_type': 'RefinedWebModel', 'quantization_config': {'load_in_8bit': False, 'load_in_4bit': True, 'llm_int8_threshold': 6.0, 'llm_int8_skip_modules': None, 'llm_int8_enable_fp32_cpu_offload': False, 'llm_int8_has_fp16_weight': False, 'bnb_4bit_quant_type': 'nf4', 'bnb_4bit_use_double_quant': False, 'bnb_4bit_compute_dtype': 'float16'}, 'output_dir': './results', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'no', 'prediction_loss_only': False, 'per_device_train_batch_size': 4, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': 'None', 'per_gpu_eval_batch_size': 'None', 'gradient_accumulation_steps': 4, 'eval_accumulation_steps': 'None', 'eval_delay': 0, 'learning_rate': 0.0002, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 0.3, 'num_train_epochs': 3.0, 'max_steps': 500, 'lr_scheduler_type': 'constant', 'warmup_ratio': 0.03, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './results/runs/Jul27_15-48-23_pytorch-2-0-0-gpu--ml-g4dn-2xlarge-9a500aed7fe4dadadc562adc1e80', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 10, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 10, 'save_total_limit': 'None', 'save_safetensors': False, 'save_on_each_node': False, 'no_cuda': False, 'use_mps_device': False, 'seed': 42, 'data_seed': 'None', 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': 'None', 'local_rank': 0, 'ddp_backend': 'None', 'tpu_num_cores': 'None', 'tpu_metrics_debug': False, 'debug': '[]', 'dataloader_drop_last': False, 'eval_steps': 'None', 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './results', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': 'None', 'load_best_model_at_end': False, 'metric_for_best_model': 'None', 'greater_is_better': 'None', 'ignore_data_skip': False, 'sharded_ddp': '[]', 'fsdp': '[]', 'fsdp_min_num_params': 0, 'fsdp_config': "{'fsdp_min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False}", 'fsdp_transformer_layer_cls_to_wrap': 'None', 'deepspeed': 'None', 'label_smoothing_factor': 0.0, 'optim': 'paged_adamw_32bit', 'optim_args': 'None', 'adafactor': False, 'group_by_length': True, 'length_column_name': 'length', 'report_to': "['wandb']", 'ddp_find_unused_parameters': 'None', 'ddp_bucket_cap_mb': 'None', 'ddp_broadcast_buffers': 'None', 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': False, 'resume_from_checkpoint': 'None', 'hub_model_id': 'None', 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'gradient_checkpointing': False, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': 'None', 'push_to_hub_organization': 'None', 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': 'None', 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': 'None', 'torch_compile_mode': 'None', 'xpu_backend': 'None', 'train_batch_size': 4, 'eval_batch_size': 8}
+2023-07-27 17:45:31,239 INFO    MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2023-07-27 17:45:31,240 INFO    MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
+2023-07-31 14:45:09,605 INFO    MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
+2023-07-31 14:45:09,630 INFO    MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2023-07-31 14:45:09,630 INFO    MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
+2023-07-31 15:11:17,481 INFO    MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
+2023-07-31 15:11:29,927 INFO    MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2023-07-31 15:11:29,929 INFO    MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
+2023-07-31 15:11:29,934 INFO    MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
+2023-07-31 15:11:32,706 INFO    MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2023-07-31 15:11:32,707 INFO    MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
+2023-07-31 15:11:32,712 INFO    MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
+2023-07-31 15:11:35,511 INFO    MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2023-07-31 15:11:35,512 INFO    MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
+2023-07-31 15:11:35,517 INFO    MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
+2023-07-31 15:11:38,405 INFO    MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2023-07-31 15:11:38,407 INFO    MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
+2023-07-31 15:11:39,706 INFO    MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
+2023-07-31 15:11:42,399 INFO    MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2023-07-31 15:11:42,400 INFO    MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
+2023-07-31 15:11:42,759 INFO    MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
+2023-07-31 15:11:42,762 INFO    MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2023-07-31 15:11:42,762 INFO    MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
+2023-07-31 15:11:47,781 INFO    MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
+2023-07-31 15:12:05,813 INFO    MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2023-07-31 15:12:05,815 INFO    MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
+2023-07-31 15:12:05,839 INFO    MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
+2023-07-31 15:12:06,211 INFO    MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2023-07-31 15:12:06,211 INFO    MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
+2023-07-31 15:12:06,217 INFO    MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
+2023-07-31 15:12:06,218 INFO    MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2023-07-31 15:12:06,218 INFO    MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
+2023-07-31 15:12:06,224 INFO    MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
+2023-07-31 15:12:06,301 INFO    MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2023-07-31 15:12:06,301 INFO    MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
+2023-07-31 15:12:11,043 INFO    MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
+2023-07-31 15:13:04,229 INFO    MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2023-07-31 15:13:04,231 INFO    MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
+2023-07-31 15:13:04,236 INFO    MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
+2023-07-31 15:13:04,244 INFO    MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2023-07-31 15:13:04,244 INFO    MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
+2023-07-31 15:13:04,249 INFO    MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
+2023-07-31 15:13:04,818 INFO    MainThread:21 [wandb_run.py:_config_callback():1282] config_cb None None {'vocab_size': 65024, 'hidden_size': 4544, 'n_layer': 32, 'n_head': 71, 'layer_norm_epsilon': 1e-05, 'initializer_range': 0.02, 'use_cache': False, 'apply_residual_connection_post_layernorm': False, 'hidden_dropout': 0.0, 'attention_dropout': 0.0, 'bos_token_id': 11, 'eos_token_id': 11, 'multi_query': True, 'alibi': False, 'bias': False, 'parallel_attn': True, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'bfloat16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['RWForCausalLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'pad_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'ybelkada/falcon-7b-sharded-bf16', 'transformers_version': '4.31.0', 'auto_map': {'AutoConfig': 'tiiuae/falcon-7b--configuration_RW.RWConfig', 'AutoModel': 'tiiuae/falcon-7b--modelling_RW.RWModel', 'AutoModelForCausalLM': 'tiiuae/falcon-7b--modelling_RW.RWForCausalLM', 'AutoModelForQuestionAnswering': 'tiiuae/falcon-7b--modelling_RW.RWForQuestionAnswering', 'AutoModelForSequenceClassification': 'tiiuae/falcon-7b--modelling_RW.RWForSequenceClassification', 'AutoModelForTokenClassification': 'tiiuae/falcon-7b--modelling_RW.RWForTokenClassification'}, 'model_type': 'RefinedWebModel', 'quantization_config': {'load_in_8bit': False, 'load_in_4bit': True, 'llm_int8_threshold': 6.0, 'llm_int8_skip_modules': None, 'llm_int8_enable_fp32_cpu_offload': False, 'llm_int8_has_fp16_weight': False, 'bnb_4bit_quant_type': 'nf4', 'bnb_4bit_use_double_quant': False, 'bnb_4bit_compute_dtype': 'float16'}, 'output_dir': './results', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'no', 'prediction_loss_only': False, 'per_device_train_batch_size': 4, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': 'None', 'per_gpu_eval_batch_size': 'None', 'gradient_accumulation_steps': 4, 'eval_accumulation_steps': 'None', 'eval_delay': 0, 'learning_rate': 0.0002, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 0.3, 'num_train_epochs': 3.0, 'max_steps': 500, 'lr_scheduler_type': 'constant', 'warmup_ratio': 0.03, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './results/runs/Jul31_15-12-06_pytorch-2-0-0-gpu--ml-g4dn-2xlarge-9a500aed7fe4dadadc562adc1e80', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 10, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 10, 'save_total_limit': 'None', 'save_safetensors': False, 'save_on_each_node': False, 'no_cuda': False, 'use_mps_device': False, 'seed': 42, 'data_seed': 'None', 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': 'None', 'local_rank': 0, 'ddp_backend': 'None', 'tpu_num_cores': 'None', 'tpu_metrics_debug': False, 'debug': '[]', 'dataloader_drop_last': False, 'eval_steps': 'None', 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './results', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': 'None', 'load_best_model_at_end': False, 'metric_for_best_model': 'None', 'greater_is_better': 'None', 'ignore_data_skip': False, 'sharded_ddp': '[]', 'fsdp': '[]', 'fsdp_min_num_params': 0, 'fsdp_config': "{'fsdp_min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False}", 'fsdp_transformer_layer_cls_to_wrap': 'None', 'deepspeed': 'None', 'label_smoothing_factor': 0.0, 'optim': 'paged_adamw_32bit', 'optim_args': 'None', 'adafactor': False, 'group_by_length': True, 'length_column_name': 'length', 'report_to': "['wandb']", 'ddp_find_unused_parameters': 'None', 'ddp_bucket_cap_mb': 'None', 'ddp_broadcast_buffers': 'None', 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': False, 'resume_from_checkpoint': 'None', 'hub_model_id': 'None', 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'gradient_checkpointing': False, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': 'None', 'push_to_hub_organization': 'None', 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': 'None', 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': 'None', 'torch_compile_mode': 'None', 'xpu_backend': 'None', 'train_batch_size': 4, 'eval_batch_size': 8}
+2023-07-31 17:09:57,806 INFO    MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2023-07-31 17:09:57,808 INFO    MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend

wandb/latest-run/files/conda-environment.yaml ADDED Viewed

	@@ -0,0 +1,498 @@

+name: base
+channels:
+  - fastai
+  - dglteam/label/cu118
+  - nvidia/label/cuda-11.8.0
+  - https://aws-ml-conda-pre-prod-ec2.s3.us-west-2.amazonaws.com
+  - conda-forge
+dependencies:
+  - _libgcc_mutex=0.1=conda_forge
+  - _openmp_mutex=4.5=2_kmp_llvm
+  - alsa-lib=1.2.8=h166bdaf_0
+  - aom=3.5.0=h27087fc_0
+  - asttokens=2.2.1=pyhd8ed1ab_0
+  - attr=2.5.1=h166bdaf_1
+  - aws-ofi-nccl-dlc=1.5.0=aws_0
+  - awscli=1.27.132=py310hff52083_0
+  - backcall=0.2.0=pyh9f0ad1d_0
+  - backports=1.0=pyhd8ed1ab_3
+  - backports.functools_lru_cache=1.6.4=pyhd8ed1ab_0
+  - blas=1.0=mkl
+  - bokeh=3.1.1=pyhd8ed1ab_0
+  - boto3=1.26.132=pyhd8ed1ab_0
+  - botocore=1.29.132=pyhd8ed1ab_0
+  - brotli=1.0.9=h166bdaf_8
+  - brotli-bin=1.0.9=h166bdaf_8
+  - brotlipy=0.7.0=py310h5764c6d_1005
+  - bzip2=1.0.8=h7f98852_4
+  - c-ares=1.18.1=h7f98852_0
+  - ca-certificates=2023.5.7=hbcca054_0
+  - cached-property=1.5.2=hd8ed1ab_1
+  - cached_property=1.5.2=pyha770c72_1
+  - cairo=1.16.0=ha61ee94_1014
+  - catalogue=2.0.8=py310hff52083_1
+  - certifi=2023.5.7=pyhd8ed1ab_0
+  - cffi=1.15.1=py310h255011f_3
+  - charset-normalizer=3.1.0=pyhd8ed1ab_0
+  - click=8.1.3=unix_pyhd8ed1ab_2
+  - cloudpickle=2.2.1=pyhd8ed1ab_0
+  - colorama=0.4.4=pyh9f0ad1d_0
+  - comm=0.1.3=pyhd8ed1ab_0
+  - commonmark=0.9.1=py_0
+  - conda=23.1.0=py310hff52083_0
+  - conda-content-trust=0.1.3=pyhd8ed1ab_0
+  - conda-package-handling=2.0.2=pyh38be061_0
+  - conda-package-streaming=0.7.0=pyhd8ed1ab_1
+  - confection=0.0.4=py310hfdc917e_1
+  - contourpy=1.0.7=py310hdf3cbec_0
+  - cryptography=40.0.1=py310h34c0648_0
+  - cuda-cccl=11.8.89=0
+  - cuda-command-line-tools=11.8.0=0
+  - cuda-compiler=11.8.0=0
+  - cuda-cudart=11.8.89=0
+  - cuda-cudart-dev=11.8.89=0
+  - cuda-cuobjdump=11.8.86=0
+  - cuda-cupti=11.8.87=0
+  - cuda-cuxxfilt=11.8.86=0
+  - cuda-documentation=11.8.86=0
+  - cuda-driver-dev=11.8.89=0
+  - cuda-gdb=11.8.86=0
+  - cuda-libraries=11.8.0=0
+  - cuda-libraries-dev=11.8.0=0
+  - cuda-memcheck=11.8.86=0
+  - cuda-nsight=11.8.86=0
+  - cuda-nsight-compute=11.8.0=0
+  - cuda-nvcc=11.8.89=0
+  - cuda-nvdisasm=11.8.86=0
+  - cuda-nvml-dev=11.8.86=0
+  - cuda-nvprof=11.8.87=0
+  - cuda-nvprune=11.8.86=0
+  - cuda-nvrtc=11.8.89=0
+  - cuda-nvrtc-dev=11.8.89=0
+  - cuda-nvtx=11.8.86=0
+  - cuda-nvvp=11.8.87=0
+  - cuda-profiler-api=11.8.86=0
+  - cuda-runtime=11.8.0=0
+  - cuda-sanitizer-api=11.8.86=0
+  - cuda-toolkit=11.8.0=0
+  - cuda-tools=11.8.0=0
+  - cuda-visual-tools=11.8.0=0
+  - cycler=0.11.0=pyhd8ed1ab_0
+  - cymem=2.0.7=py310hd8f1fbe_1
+  - cython=0.29.34=py310heca2aa9_0
+  - cython-blis=0.7.9=py310hde88566_1
+  - dbus=1.13.6=h5008d03_3
+  - debugpy=1.6.7=py310heca2aa9_0
+  - decorator=5.1.1=pyhd8ed1ab_0
+  - dgl=1.1.0.cu118=py310_0
+  - docutils=0.15.2=py310hff52083_6
+  - executing=1.2.0=pyhd8ed1ab_0
+  - expat=2.5.0=hcb278e6_1
+  - fastai=2.7.12=py_0
+  - fastcore=1.5.29=py_0
+  - fastdownload=0.0.7=py_0
+  - fastprogress=1.0.3=py_0
+  - ffmpeg=5.1.2=gpl_h8dda1f0_106
+  - fftw=3.3.10=nompi_hc118613_107
+  - filelock=3.12.0=pyhd8ed1ab_0
+  - fmt=9.1.0=h924138e_0
+  - font-ttf-dejavu-sans-mono=2.37=hab24e00_0
+  - font-ttf-inconsolata=3.000=h77eed37_0
+  - font-ttf-source-code-pro=2.038=h77eed37_0
+  - font-ttf-ubuntu=0.83=hab24e00_0
+  - fontconfig=2.14.2=h14ed4e7_0
+  - fonts-conda-ecosystem=1=0
+  - fonts-conda-forge=1=0
+  - fonttools=4.39.4=py310h2372a71_0
+  - freeglut=3.2.2=h9c3ff4c_1
+  - freetype=2.12.1=hca18f0e_1
+  - future=0.18.3=pyhd8ed1ab_0
+  - gds-tools=1.4.0.31=0
+  - gettext=0.21.1=h27087fc_0
+  - glib=2.76.2=hfc55251_0
+  - glib-tools=2.76.2=hfc55251_0
+  - gmp=6.2.1=h58526e2_0
+  - gmpy2=2.1.2=py310h3ec546c_1
+  - gnutls=3.7.8=hf3e180e_0
+  - graphite2=1.3.13=h58526e2_1001
+  - gst-plugins-base=1.22.0=h4243ec0_2
+  - gstreamer=1.22.0=h25f0c4b_2
+  - gstreamer-orc=0.4.33=h166bdaf_0
+  - h5py=3.8.0=nompi_py310ha66b2ad_101
+  - harfbuzz=6.0.0=h8e241bc_0
+  - hdf5=1.14.0=nompi_hb72d44e_103
+  - icu=70.1=h27087fc_0
+  - idna=3.4=pyhd8ed1ab_0
+  - imageio=2.28.1=pyh24c5eb1_0
+  - importlib_metadata=6.6.0=hd8ed1ab_0
+  - ipykernel=6.23.0=pyh210e3f2_0
+  - ipython=8.13.2=pyh41d4057_0
+  - jack=1.9.22=h11f4161_0
+  - jasper=2.0.33=h0ff4b12_1
+  - jedi=0.18.2=pyhd8ed1ab_0
+  - jinja2=3.1.2=pyhd8ed1ab_1
+  - jmespath=1.0.1=pyhd8ed1ab_0
+  - joblib=1.2.0=pyhd8ed1ab_0
+  - jpeg=9e=h166bdaf_2
+  - jupyter_client=8.2.0=pyhd8ed1ab_0
+  - jupyter_core=5.3.0=py310hff52083_0
+  - keyutils=1.6.1=h166bdaf_0
+  - kiwisolver=1.4.4=py310hbf28c38_1
+  - krb5=1.20.1=h81ceb04_0
+  - lame=3.100=h166bdaf_1003
+  - langcodes=3.3.0=pyhd8ed1ab_0
+  - lcms2=2.15=hfd0df8a_0
+  - ld_impl_linux-64=2.40=h41732ed_0
+  - lerc=4.0.0=h27087fc_0
+  - libaec=1.0.6=hcb278e6_1
+  - libarchive=3.6.2=h3d51595_0
+  - libblas=3.9.0=1_h86c2bf4_netlib
+  - libbrotlicommon=1.0.9=h166bdaf_8
+  - libbrotlidec=1.0.9=h166bdaf_8
+  - libbrotlienc=1.0.9=h166bdaf_8
+  - libcap=2.67=he9d0100_0
+  - libcblas=3.9.0=5_h92ddd45_netlib
+  - libclang=15.0.7=default_had23c3d_1
+  - libclang13=15.0.7=default_h3e3d535_1
+  - libcublas=11.11.3.6=0
+  - libcublas-dev=11.11.3.6=0
+  - libcufft=10.9.0.58=0
+  - libcufft-dev=10.9.0.58=0
+  - libcufile=1.4.0.31=0
+  - libcufile-dev=1.4.0.31=0
+  - libcups=2.3.3=h36d4200_3
+  - libcurand=10.3.0.86=0
+  - libcurand-dev=10.3.0.86=0
+  - libcurl=7.88.1=hdc1c0ab_1
+  - libcusolver=11.4.1.48=0
+  - libcusolver-dev=11.4.1.48=0
+  - libcusparse=11.7.5.86=0
+  - libcusparse-dev=11.7.5.86=0
+  - libdb=6.2.32=h9c3ff4c_0
+  - libdeflate=1.17=h0b41bf4_0
+  - libdrm=2.4.114=h166bdaf_0
+  - libedit=3.1.20191231=he28a2e2_2
+  - libev=4.33=h516909a_1
+  - libevent=2.1.10=h28343ad_4
+  - libexpat=2.5.0=hcb278e6_1
+  - libffi=3.4.2=h7f98852_5
+  - libflac=1.4.2=h27087fc_0
+  - libgcc=7.2.0=h69d50b8_2
+  - libgcc-ng=12.2.0=h65d4601_19
+  - libgcrypt=1.10.1=h166bdaf_0
+  - libgfortran-ng=12.2.0=h69a702a_19
+  - libgfortran5=12.2.0=h337968e_19
+  - libglib=2.76.2=hebfc3b9_0
+  - libglu=9.0.0=he1b5a44_1001
+  - libgomp=12.2.0=h65d4601_19
+  - libgpg-error=1.46=h620e276_0
+  - libhwloc=2.9.1=hd6dc26d_0
+  - libiconv=1.17=h166bdaf_0
+  - libidn2=2.3.4=h166bdaf_0
+  - libjpeg-turbo=2.1.4=h166bdaf_0
+  - liblapack=3.9.0=5_h92ddd45_netlib
+  - liblapacke=3.9.0=5_h92ddd45_netlib
+  - libllvm11=11.1.0=he0ac6c6_5
+  - libllvm15=15.0.7=hadd5161_1
+  - libllvm16=16.0.1=hadd5161_0
+  - libmamba=1.4.1=hcea66bb_0
+  - libmambapy=1.4.1=py310h1428755_0
+  - libnghttp2=1.52.0=h61bc06f_0
+  - libnpp=11.8.0.86=0
+  - libnpp-dev=11.8.0.86=0
+  - libnsl=2.0.0=h7f98852_0
+  - libnvjpeg=11.9.0.86=0
+  - libnvjpeg-dev=11.9.0.86=0
+  - libogg=1.3.4=h7f98852_1
+  - libopenblas=0.3.21=pthreads_h78a6416_3
+  - libopencv=4.7.0=py310hb48cf42_1
+  - libopus=1.3.1=h7f98852_1
+  - libpciaccess=0.17=h166bdaf_0
+  - libpng=1.6.39=h753d276_0
+  - libpq=15.3=hbcd7760_0
+  - libprotobuf=3.21.12=h3eb15da_0
+  - libsndfile=1.2.0=hb75c966_0
+  - libsodium=1.0.18=h36c2ea0_1
+  - libsolv=0.7.23=h3eb15da_0
+  - libsqlite=3.40.0=h753d276_0
+  - libssh2=1.10.0=hf14f497_3
+  - libstdcxx-ng=12.2.0=h46fd767_19
+  - libsystemd0=253=h8c4010b_1
+  - libtasn1=4.19.0=h166bdaf_0
+  - libtiff=4.5.0=h6adf6a1_2
+  - libtool=2.4.7=h27087fc_0
+  - libudev1=253=h0b41bf4_1
+  - libunistring=0.9.10=h7f98852_0
+  - libuuid=2.38.1=h0b41bf4_0
+  - libuv=1.44.2=h166bdaf_0
+  - libva=2.18.0=h0b41bf4_0
+  - libvorbis=1.3.7=h9c3ff4c_0
+  - libvpx=1.11.0=h9c3ff4c_3
+  - libwebp-base=1.3.0=h0b41bf4_0
+  - libxcb=1.13=h7f98852_1004
+  - libxkbcommon=1.5.0=h79f4944_1
+  - libxml2=2.10.3=hca2bb57_4
+  - libzlib=1.2.13=h166bdaf_4
+  - llvm-openmp=16.0.3=h4dfa4b3_0
+  - llvmlite=0.39.1=py310h58363a5_1
+  - lz4-c=1.9.4=hcb278e6_0
+  - lzo=2.10=h516909a_1000
+  - mamba=1.4.1=py310h51d5547_0
+  - markupsafe=2.1.2=py310h1fa729e_0
+  - matplotlib=3.7.1=py310hff52083_0
+  - matplotlib-base=3.7.1=py310he60537e_0
+  - matplotlib-inline=0.1.6=pyhd8ed1ab_0
+  - mkl=2023.1.0=h84fe81f_48680
+  - mkl-include=2023.1.0=h84fe81f_48680
+  - mpc=1.3.1=hfe3b2da_0
+  - mpfr=4.2.0=hb012696_0
+  - mpg123=1.31.3=hcb278e6_0
+  - mpi=1.0=openmpi
+  - mpi4py=3.1.4=py310h6075a6b_0
+  - mpmath=1.3.0=pyhd8ed1ab_0
+  - munkres=1.1.4=pyh9f0ad1d_0
+  - murmurhash=1.0.9=py310hd8f1fbe_1
+  - mysql-common=8.0.32=hf1915f5_2
+  - mysql-libs=8.0.32=hca2cd23_2
+  - ncurses=6.3=h27087fc_1
+  - nest-asyncio=1.5.6=pyhd8ed1ab_0
+  - nettle=3.8.1=hc379101_1
+  - networkx=3.1=pyhd8ed1ab_0
+  - nsight-compute=2022.3.0.22=0
+  - nspr=4.35=h27087fc_0
+  - nss=3.89=he45b914_0
+  - numba=0.56.4=py310h0e39c9b_1
+  - numpy=1.23.5=py310h53a5b5f_0
+  - opencv=4.7.0=py310hff52083_1
+  - openh264=2.3.1=hcb278e6_2
+  - openjpeg=2.5.0=hfec8fc6_2
+  - openmpi=4.1.5=h414af15_101
+  - openssl=3.1.0=hd590300_3
+  - p11-kit=0.24.1=hc5aa10d_0
+  - packaging=23.1=pyhd8ed1ab_0
+  - pandas=2.0.1=py310h7cbd5c2_1
+  - parso=0.8.3=pyhd8ed1ab_0
+  - pathy=0.10.1=pyhd8ed1ab_0
+  - patsy=0.5.3=pyhd8ed1ab_0
+  - pcre2=10.40=hc3806b6_0
+  - pexpect=4.8.0=pyh1a96a4e_2
+  - pickleshare=0.7.5=py_1003
+  - pillow=9.4.0=py310h023d228_1
+  - pixman=0.40.0=h36c2ea0_0
+  - platformdirs=3.5.0=pyhd8ed1ab_0
+  - plotly=5.14.1=pyhd8ed1ab_0
+  - pluggy=1.0.0=pyhd8ed1ab_5
+  - ply=3.11=py_1
+  - pooch=1.7.0=pyha770c72_3
+  - preshed=3.0.8=py310hd8f1fbe_1
+  - prompt-toolkit=3.0.38=pyha770c72_0
+  - prompt_toolkit=3.0.38=hd8ed1ab_0
+  - psutil=5.9.5=py310h1fa729e_0
+  - pthread-stubs=0.4=h36c2ea0_1001
+  - ptyprocess=0.7.0=pyhd3deb0d_0
+  - pulseaudio=16.1=hcb278e6_3
+  - pulseaudio-client=16.1=h5195f5e_3
+  - pulseaudio-daemon=16.1=ha8d29e2_3
+  - pure_eval=0.2.2=pyhd8ed1ab_0
+  - py-opencv=4.7.0=py310hfdc917e_1
+  - pyasn1=0.4.8=py_0
+  - pybind11=2.10.4=py310hdf3cbec_0
+  - pybind11-abi=4=hd8ed1ab_3
+  - pybind11-global=2.10.4=py310hdf3cbec_0
+  - pycosat=0.6.4=py310h5764c6d_1
+  - pycparser=2.21=pyhd8ed1ab_0
+  - pydantic=1.10.7=py310h1fa729e_0
+  - pygments=2.15.1=pyhd8ed1ab_0
+  - pyopenssl=23.1.1=pyhd8ed1ab_0
+  - pyparsing=3.0.9=pyhd8ed1ab_0
+  - pyqt=5.15.7=py310hab646b1_3
+  - pyqt5-sip=12.11.0=py310heca2aa9_3
+  - pysocks=1.7.1=pyha2e5f31_6
+  - python=3.10.8=h4a9ceb5_0_cpython
+  - python-dateutil=2.8.2=pyhd8ed1ab_0
+  - python-tzdata=2023.3=pyhd8ed1ab_0
+  - python_abi=3.10=3_cp310
+  - pytorch=2.0.0=aws_py3.10_cuda11.8_cudnn8.7.0_0
+  - pytorch-cuda=11.8=h7e8668a_3
+  - pytorch-mutex=1.0=cuda
+  - pytz=2023.3=pyhd8ed1ab_0
+  - pyyaml=5.4.1=py310h5764c6d_4
+  - pyzmq=25.0.2=py310h059b190_0
+  - qt-main=5.15.8=h5d23da1_6
+  - readline=8.2=h8228510_1
+  - reproc=14.2.4=h0b41bf4_0
+  - reproc-cpp=14.2.4=hcb278e6_0
+  - requests=2.28.2=pyhd8ed1ab_1
+  - rhash=1.4.3=h166bdaf_0
+  - rich=12.6.0=pyhd8ed1ab_0
+  - rsa=4.7.2=pyh44b312d_0
+  - ruamel.yaml=0.17.21=py310h1fa729e_3
+  - ruamel.yaml.clib=0.2.7=py310h1fa729e_1
+  - s3transfer=0.6.1=pyhd8ed1ab_0
+  - scikit-learn=1.2.2=py310h41b6a48_1
+  - scipy=1.10.1=py310h8deb116_2
+  - seaborn=0.12.2=hd8ed1ab_0
+  - seaborn-base=0.12.2=pyhd8ed1ab_0
+  - setuptools=65.6.3=pyhd8ed1ab_0
+  - shap=0.41.0=py310h769672d_0
+  - shellingham=1.5.1=pyhd8ed1ab_0
+  - sip=6.7.9=py310hc6cd4ac_0
+  - six=1.16.0=pyh6c4a22f_0
+  - slicer=0.0.7=pyhd8ed1ab_0
+  - smart_open=5.2.1=pyhd8ed1ab_0
+  - spacy=3.5.2=py310h5a539fb_0
+  - spacy-legacy=3.0.12=pyhd8ed1ab_0
+  - spacy-loggers=1.0.4=pyhd8ed1ab_0
+  - srsly=2.4.6=py310heca2aa9_0
+  - stack_data=0.6.2=pyhd8ed1ab_0
+  - statsmodels=0.14.0=py310h278f3c1_1
+  - svt-av1=1.4.1=hcb278e6_0
+  - sympy=1.11.1=pypyh9d50eac_103
+  - tbb=2021.9.0=hf52228f_0
+  - tenacity=8.2.2=pyhd8ed1ab_0
+  - thinc=8.1.10=py310hfb6f7a9_0
+  - threadpoolctl=3.1.0=pyh8a188c0_0
+  - tk=8.6.12=h27826a3_0
+  - toml=0.10.2=pyhd8ed1ab_0
+  - tomli=2.0.1=pyhd8ed1ab_0
+  - toolz=0.12.0=pyhd8ed1ab_0
+  - torchaudio=2.0.1=py310_cu118
+  - torchdata=0.6.0=py310
+  - torchtext=0.15.1=py310
+  - torchvision=0.15.1=py310_cu118
+  - tornado=6.3=py310h1fa729e_0
+  - tqdm=4.65.0=pyhd8ed1ab_1
+  - traitlets=5.9.0=pyhd8ed1ab_0
+  - typer=0.7.0=pyhd8ed1ab_0
+  - typing=3.10.0.0=pyhd8ed1ab_0
+  - typing-extensions=4.5.0=hd8ed1ab_0
+  - typing_extensions=4.5.0=pyha770c72_0
+  - tzdata=2023c=h71feb2d_0
+  - unicodedata2=15.0.0=py310h5764c6d_0
+  - urllib3=1.26.15=pyhd8ed1ab_0
+  - wasabi=1.1.1=py310hff52083_1
+  - wcwidth=0.2.6=pyhd8ed1ab_0
+  - wheel=0.40.0=pyhd8ed1ab_0
+  - x264=1!164.3095=h166bdaf_2
+  - x265=3.5=h924138e_3
+  - xcb-util=0.4.0=h516909a_0
+  - xcb-util-image=0.4.0=h166bdaf_0
+  - xcb-util-keysyms=0.4.0=h516909a_0
+  - xcb-util-renderutil=0.3.9=h166bdaf_0
+  - xcb-util-wm=0.4.1=h516909a_0
+  - xkeyboard-config=2.38=h0b41bf4_0
+  - xorg-fixesproto=5.0=h7f98852_1002
+  - xorg-inputproto=2.3.2=h7f98852_1002
+  - xorg-kbproto=1.0.7=h7f98852_1002
+  - xorg-libice=1.0.10=h7f98852_0
+  - xorg-libsm=1.2.3=hd9c2040_1000
+  - xorg-libx11=1.8.4=h0b41bf4_0
+  - xorg-libxau=1.0.9=h7f98852_0
+  - xorg-libxdmcp=1.1.3=h7f98852_0
+  - xorg-libxext=1.3.4=h0b41bf4_2
+  - xorg-libxfixes=5.0.3=h7f98852_1004
+  - xorg-libxi=1.7.10=h7f98852_0
+  - xorg-libxrender=0.9.10=h7f98852_1003
+  - xorg-renderproto=0.11.1=h7f98852_1002
+  - xorg-xextproto=7.3.0=h0b41bf4_1003
+  - xorg-xf86vidmodeproto=2.3.1=h7f98852_1002
+  - xorg-xproto=7.0.31=h7f98852_1007
+  - xyzservices=2023.2.0=pyhd8ed1ab_0
+  - xz=5.2.6=h166bdaf_0
+  - yaml=0.2.5=h7f98852_2
+  - yaml-cpp=0.7.0=h27087fc_2
+  - zeromq=4.3.4=h9c3ff4c_1
+  - zipp=3.15.0=pyhd8ed1ab_0
+  - zlib=1.2.13=h166bdaf_4
+  - zstandard=0.19.0=py310hdeb6495_1
+  - zstd=1.5.2=h3eb15da_6
+  - pip:
+      - accelerate==0.21.0
+      - aiohttp==3.8.5
+      - aiosignal==1.3.1
+      - apex==0.1
+      - appdirs==1.4.4
+      - argparse==1.4.0
+      - async-timeout==4.0.2
+      - attrs==22.2.0
+      - bcrypt==4.0.1
+      - bitsandbytes==0.41.0
+      - cmake==3.26.3
+      - contextlib2==21.6.0
+      - datasets==2.14.0
+      - deepspeed==0.6.1+1ea3d4b
+      - dill==0.3.6
+      - docker-pycreds==0.4.0
+      - einops==0.6.1
+      - flash-attn==0.2.8
+      - frozenlist==1.4.0
+      - fsspec==2023.5.0
+      - gevent==22.10.2
+      - gitdb==4.0.10
+      - gitpython==3.1.32
+      - google-pasta==0.2.0
+      - greenlet==2.0.2
+      - hjson==3.1.0
+      - horovod==0.26.1
+      - huggingface-hub==0.16.4
+      - importlib-metadata==4.13.0
+      - inotify-simple==1.2.1
+      - ipywidgets==8.0.7
+      - jsonpatch==1.32
+      - jsonpointer==2.3
+      - jsonschema==4.17.3
+      - jupyterlab-widgets==3.0.8
+      - lit==16.0.3
+      - multidict==6.0.4
+      - multiprocess==0.70.14
+      - ninja==1.11.1
+      - paramiko==3.1.0
+      - pathos==0.3.0
+      - pathtools==0.1.2
+      - peft==0.5.0.dev0
+      - pip==23.1.2
+      - pox==0.3.2
+      - ppft==1.7.6.6
+      - protobuf==3.20.3
+      - protobuf3-to-dict==0.1.5
+      - py-cpuinfo==9.0.0
+      - pyarrow==12.0.0
+      - pyfunctional==1.4.3
+      - pyinstrument==3.4.2
+      - pyinstrument-cext==0.2.4
+      - pynacl==1.5.0
+      - pyrsistent==0.19.3
+      - regex==2023.6.3
+      - retrying==1.3.4
+      - s3fs==0.4.2
+      - safetensors==0.3.1
+      - sagemaker==2.154.0
+      - sagemaker-experiments==0.1.43
+      - sagemaker-pytorch-training==2.8.0
+      - sagemaker-training==4.5.0
+      - schema==0.7.5
+      - sentry-sdk==1.28.1
+      - setproctitle==1.3.2
+      - smclarify==0.5
+      - smdebug==1.0.34
+      - smdebug-rulesconfig==1.0.1
+      - smdistributed-dataparallel==1.8.0
+      - smdistributed-modelparallel==1.15.0
+      - smmap==5.0.0
+      - tabulate==0.9.0
+      - tblib==1.7.0
+      - tokenizers==0.13.3
+      - torchnet==0.0.4
+      - transformers==4.31.0
+      - triton==2.0.0.dev20221202
+      - trl==0.4.7
+      - visdom==0.2.4
+      - wandb==0.15.7
+      - websocket-client==1.5.1
+      - werkzeug==2.3.4
+      - widgetsnbextension==4.0.8
+      - xxhash==3.2.0
+      - yarl==1.9.2
+      - zope-event==4.6
+      - zope-interface==6.0
+prefix: /opt/conda

wandb/latest-run/files/config.yaml ADDED Viewed

	@@ -0,0 +1,649 @@

+wandb_version: 1
+_wandb:
+  desc: null
+  value:
+    python_version: 3.10.8
+    cli_version: 0.15.7
+    framework: huggingface
+    huggingface_version: 4.31.0
+    is_jupyter_run: true
+    is_kaggle_kernel: false
+    start_time: 1690472976.418337
+    t:
+      1:
+      - 1
+      - 5
+      - 11
+      - 49
+      - 51
+      - 53
+      - 55
+      - 71
+      - 84
+      - 98
+      2:
+      - 1
+      - 5
+      - 11
+      - 49
+      - 51
+      - 53
+      - 55
+      - 71
+      - 84
+      - 98
+      3:
+      - 7
+      - 23
+      4: 3.10.8
+      5: 0.15.7
+      6: 4.31.0
+      8:
+      - 1
+      - 5
+    m:
+    - 1: train/global_step
+      6:
+      - 3
+    - 1: train/loss
+      5: 1
+      6:
+      - 1
+    - 1: train/learning_rate
+      5: 1
+      6:
+      - 1
+    - 1: train/epoch
+      5: 1
+      6:
+      - 1
+    - 1: train/train_runtime
+      5: 1
+      6:
+      - 1
+    - 1: train/train_samples_per_second
+      5: 1
+      6:
+      - 1
+    - 1: train/train_steps_per_second
+      5: 1
+      6:
+      - 1
+    - 1: train/total_flos
+      5: 1
+      6:
+      - 1
+    - 1: train/train_loss
+      5: 1
+      6:
+      - 1
+vocab_size:
+  desc: null
+  value: 65024
+hidden_size:
+  desc: null
+  value: 4544
+n_layer:
+  desc: null
+  value: 32
+n_head:
+  desc: null
+  value: 71
+layer_norm_epsilon:
+  desc: null
+  value: 1.0e-05
+initializer_range:
+  desc: null
+  value: 0.02
+use_cache:
+  desc: null
+  value: false
+apply_residual_connection_post_layernorm:
+  desc: null
+  value: false
+hidden_dropout:
+  desc: null
+  value: 0.0
+attention_dropout:
+  desc: null
+  value: 0.0
+bos_token_id:
+  desc: null
+  value: 11
+eos_token_id:
+  desc: null
+  value: 11
+multi_query:
+  desc: null
+  value: true
+alibi:
+  desc: null
+  value: false
+bias:
+  desc: null
+  value: false
+parallel_attn:
+  desc: null
+  value: true
+return_dict:
+  desc: null
+  value: true
+output_hidden_states:
+  desc: null
+  value: false
+output_attentions:
+  desc: null
+  value: false
+torchscript:
+  desc: null
+  value: false
+torch_dtype:
+  desc: null
+  value: bfloat16
+use_bfloat16:
+  desc: null
+  value: false
+tf_legacy_loss:
+  desc: null
+  value: false
+pruned_heads:
+  desc: null
+  value: {}
+tie_word_embeddings:
+  desc: null
+  value: true
+is_encoder_decoder:
+  desc: null
+  value: false
+is_decoder:
+  desc: null
+  value: false
+cross_attention_hidden_size:
+  desc: null
+  value: null
+add_cross_attention:
+  desc: null
+  value: false
+tie_encoder_decoder:
+  desc: null
+  value: false
+max_length:
+  desc: null
+  value: 20
+min_length:
+  desc: null
+  value: 0
+do_sample:
+  desc: null
+  value: false
+early_stopping:
+  desc: null
+  value: false
+num_beams:
+  desc: null
+  value: 1
+num_beam_groups:
+  desc: null
+  value: 1
+diversity_penalty:
+  desc: null
+  value: 0.0
+temperature:
+  desc: null
+  value: 1.0
+top_k:
+  desc: null
+  value: 50
+top_p:
+  desc: null
+  value: 1.0
+typical_p:
+  desc: null
+  value: 1.0
+repetition_penalty:
+  desc: null
+  value: 1.0
+length_penalty:
+  desc: null
+  value: 1.0
+no_repeat_ngram_size:
+  desc: null
+  value: 0
+encoder_no_repeat_ngram_size:
+  desc: null
+  value: 0
+bad_words_ids:
+  desc: null
+  value: null
+num_return_sequences:
+  desc: null
+  value: 1
+chunk_size_feed_forward:
+  desc: null
+  value: 0
+output_scores:
+  desc: null
+  value: false
+return_dict_in_generate:
+  desc: null
+  value: false
+forced_bos_token_id:
+  desc: null
+  value: null
+forced_eos_token_id:
+  desc: null
+  value: null
+remove_invalid_values:
+  desc: null
+  value: false
+exponential_decay_length_penalty:
+  desc: null
+  value: null
+suppress_tokens:
+  desc: null
+  value: null
+begin_suppress_tokens:
+  desc: null
+  value: null
+architectures:
+  desc: null
+  value:
+  - RWForCausalLM
+finetuning_task:
+  desc: null
+  value: null
+id2label:
+  desc: null
+  value:
+    '0': LABEL_0
+    '1': LABEL_1
+label2id:
+  desc: null
+  value:
+    LABEL_0: 0
+    LABEL_1: 1
+tokenizer_class:
+  desc: null
+  value: null
+prefix:
+  desc: null
+  value: null
+pad_token_id:
+  desc: null
+  value: null
+sep_token_id:
+  desc: null
+  value: null
+decoder_start_token_id:
+  desc: null
+  value: null
+task_specific_params:
+  desc: null
+  value: null
+problem_type:
+  desc: null
+  value: null
+_name_or_path:
+  desc: null
+  value: ybelkada/falcon-7b-sharded-bf16
+transformers_version:
+  desc: null
+  value: 4.31.0
+auto_map:
+  desc: null
+  value:
+    AutoConfig: tiiuae/falcon-7b--configuration_RW.RWConfig
+    AutoModel: tiiuae/falcon-7b--modelling_RW.RWModel
+    AutoModelForCausalLM: tiiuae/falcon-7b--modelling_RW.RWForCausalLM
+    AutoModelForQuestionAnswering: tiiuae/falcon-7b--modelling_RW.RWForQuestionAnswering
+    AutoModelForSequenceClassification: tiiuae/falcon-7b--modelling_RW.RWForSequenceClassification
+    AutoModelForTokenClassification: tiiuae/falcon-7b--modelling_RW.RWForTokenClassification
+model_type:
+  desc: null
+  value: RefinedWebModel
+quantization_config:
+  desc: null
+  value:
+    load_in_8bit: false
+    load_in_4bit: true
+    llm_int8_threshold: 6.0
+    llm_int8_skip_modules: null
+    llm_int8_enable_fp32_cpu_offload: false
+    llm_int8_has_fp16_weight: false
+    bnb_4bit_quant_type: nf4
+    bnb_4bit_use_double_quant: false
+    bnb_4bit_compute_dtype: float16
+output_dir:
+  desc: null
+  value: ./results
+overwrite_output_dir:
+  desc: null
+  value: false
+do_train:
+  desc: null
+  value: false
+do_eval:
+  desc: null
+  value: false
+do_predict:
+  desc: null
+  value: false
+evaluation_strategy:
+  desc: null
+  value: 'no'
+prediction_loss_only:
+  desc: null
+  value: false
+per_device_train_batch_size:
+  desc: null
+  value: 4
+per_device_eval_batch_size:
+  desc: null
+  value: 8
+per_gpu_train_batch_size:
+  desc: null
+  value: None
+per_gpu_eval_batch_size:
+  desc: null
+  value: None
+gradient_accumulation_steps:
+  desc: null
+  value: 4
+eval_accumulation_steps:
+  desc: null
+  value: None
+eval_delay:
+  desc: null
+  value: 0
+learning_rate:
+  desc: null
+  value: 0.0002
+weight_decay:
+  desc: null
+  value: 0.0
+adam_beta1:
+  desc: null
+  value: 0.9
+adam_beta2:
+  desc: null
+  value: 0.999
+adam_epsilon:
+  desc: null
+  value: 1.0e-08
+max_grad_norm:
+  desc: null
+  value: 0.3
+num_train_epochs:
+  desc: null
+  value: 3.0
+max_steps:
+  desc: null
+  value: 500
+lr_scheduler_type:
+  desc: null
+  value: constant
+warmup_ratio:
+  desc: null
+  value: 0.03
+warmup_steps:
+  desc: null
+  value: 0
+log_level:
+  desc: null
+  value: passive
+log_level_replica:
+  desc: null
+  value: warning
+log_on_each_node:
+  desc: null
+  value: true
+logging_dir:
+  desc: null
+  value: ./results/runs/Jul31_15-12-06_pytorch-2-0-0-gpu--ml-g4dn-2xlarge-9a500aed7fe4dadadc562adc1e80
+logging_strategy:
+  desc: null
+  value: steps
+logging_first_step:
+  desc: null
+  value: false
+logging_steps:
+  desc: null
+  value: 10
+logging_nan_inf_filter:
+  desc: null
+  value: true
+save_strategy:
+  desc: null
+  value: steps
+save_steps:
+  desc: null
+  value: 10
+save_total_limit:
+  desc: null
+  value: None
+save_safetensors:
+  desc: null
+  value: false
+save_on_each_node:
+  desc: null
+  value: false
+no_cuda:
+  desc: null
+  value: false
+use_mps_device:
+  desc: null
+  value: false
+seed:
+  desc: null
+  value: 42
+data_seed:
+  desc: null
+  value: None
+jit_mode_eval:
+  desc: null
+  value: false
+use_ipex:
+  desc: null
+  value: false
+bf16:
+  desc: null
+  value: false
+fp16:
+  desc: null
+  value: true
+fp16_opt_level:
+  desc: null
+  value: O1
+half_precision_backend:
+  desc: null
+  value: auto
+bf16_full_eval:
+  desc: null
+  value: false
+fp16_full_eval:
+  desc: null
+  value: false
+tf32:
+  desc: null
+  value: None
+local_rank:
+  desc: null
+  value: 0
+ddp_backend:
+  desc: null
+  value: None
+tpu_num_cores:
+  desc: null
+  value: None
+tpu_metrics_debug:
+  desc: null
+  value: false
+debug:
+  desc: null
+  value: '[]'
+dataloader_drop_last:
+  desc: null
+  value: false
+eval_steps:
+  desc: null
+  value: None
+dataloader_num_workers:
+  desc: null
+  value: 0
+past_index:
+  desc: null
+  value: -1
+run_name:
+  desc: null
+  value: ./results
+disable_tqdm:
+  desc: null
+  value: false
+remove_unused_columns:
+  desc: null
+  value: true
+label_names:
+  desc: null
+  value: None
+load_best_model_at_end:
+  desc: null
+  value: false
+metric_for_best_model:
+  desc: null
+  value: None
+greater_is_better:
+  desc: null
+  value: None
+ignore_data_skip:
+  desc: null
+  value: false
+sharded_ddp:
+  desc: null
+  value: '[]'
+fsdp:
+  desc: null
+  value: '[]'
+fsdp_min_num_params:
+  desc: null
+  value: 0
+fsdp_config:
+  desc: null
+  value: '{''fsdp_min_num_params'': 0, ''xla'': False, ''xla_fsdp_grad_ckpt'': False}'
+fsdp_transformer_layer_cls_to_wrap:
+  desc: null
+  value: None
+deepspeed:
+  desc: null
+  value: None
+label_smoothing_factor:
+  desc: null
+  value: 0.0
+optim:
+  desc: null
+  value: paged_adamw_32bit
+optim_args:
+  desc: null
+  value: None
+adafactor:
+  desc: null
+  value: false
+group_by_length:
+  desc: null
+  value: true
+length_column_name:
+  desc: null
+  value: length
+report_to:
+  desc: null
+  value: '[''wandb'']'
+ddp_find_unused_parameters:
+  desc: null
+  value: None
+ddp_bucket_cap_mb:
+  desc: null
+  value: None
+ddp_broadcast_buffers:
+  desc: null
+  value: None
+dataloader_pin_memory:
+  desc: null
+  value: true
+skip_memory_metrics:
+  desc: null
+  value: true
+use_legacy_prediction_loop:
+  desc: null
+  value: false
+push_to_hub:
+  desc: null
+  value: false
+resume_from_checkpoint:
+  desc: null
+  value: None
+hub_model_id:
+  desc: null
+  value: None
+hub_strategy:
+  desc: null
+  value: every_save
+hub_token:
+  desc: null
+  value: <HUB_TOKEN>
+hub_private_repo:
+  desc: null
+  value: false
+gradient_checkpointing:
+  desc: null
+  value: false
+include_inputs_for_metrics:
+  desc: null
+  value: false
+fp16_backend:
+  desc: null
+  value: auto
+push_to_hub_model_id:
+  desc: null
+  value: None
+push_to_hub_organization:
+  desc: null
+  value: None
+push_to_hub_token:
+  desc: null
+  value: <PUSH_TO_HUB_TOKEN>
+mp_parameters:
+  desc: null
+  value: ''
+auto_find_batch_size:
+  desc: null
+  value: false
+full_determinism:
+  desc: null
+  value: false
+torchdynamo:
+  desc: null
+  value: None
+ray_scope:
+  desc: null
+  value: last
+ddp_timeout:
+  desc: null
+  value: 1800
+torch_compile:
+  desc: null
+  value: false
+torch_compile_backend:
+  desc: null
+  value: None
+torch_compile_mode:
+  desc: null
+  value: None
+xpu_backend:
+  desc: null
+  value: None
+train_batch_size:
+  desc: null
+  value: 4
+eval_batch_size:
+  desc: null
+  value: 8

wandb/latest-run/files/output.log ADDED Viewed

	@@ -0,0 +1,112 @@

+You're using a PreTrainedTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
+{}
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv
+[1m[[34m[22mnotice[39m[1m][22m A new release of pip is available: [31m23.1.2[39m -> [32m23.2.1
+[1m[[34m[22mnotice[39m[1m][22m To update, run: [32mpip install --upgrade pip
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv
+[1m[[34m[22mnotice[39m[1m][22m A new release of pip is available: [31m23.1.2[39m -> [32m23.2.1
+[1m[[34m[22mnotice[39m[1m][22m To update, run: [32mpip install --upgrade pip
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+Requirement already satisfied: ipywidgets in /opt/conda/lib/python3.10/site-packages (8.0.7)
+Requirement already satisfied: ipykernel>=4.5.1 in /opt/conda/lib/python3.10/site-packages (from ipywidgets) (6.23.0)
+Requirement already satisfied: ipython>=6.1.0 in /opt/conda/lib/python3.10/site-packages (from ipywidgets) (8.13.2)
+Requirement already satisfied: traitlets>=4.3.1 in /opt/conda/lib/python3.10/site-packages (from ipywidgets) (5.9.0)
+Requirement already satisfied: widgetsnbextension~=4.0.7 in /opt/conda/lib/python3.10/site-packages (from ipywidgets) (4.0.8)
+Requirement already satisfied: jupyterlab-widgets~=3.0.7 in /opt/conda/lib/python3.10/site-packages (from ipywidgets) (3.0.8)
+Requirement already satisfied: comm>=0.1.1 in /opt/conda/lib/python3.10/site-packages (from ipykernel>=4.5.1->ipywidgets) (0.1.3)
+Requirement already satisfied: debugpy>=1.6.5 in /opt/conda/lib/python3.10/site-packages (from ipykernel>=4.5.1->ipywidgets) (1.6.7)
+Requirement already satisfied: jupyter-client>=6.1.12 in /opt/conda/lib/python3.10/site-packages (from ipykernel>=4.5.1->ipywidgets) (8.2.0)
+Requirement already satisfied: jupyter-core!=5.0.*,>=4.12 in /opt/conda/lib/python3.10/site-packages (from ipykernel>=4.5.1->ipywidgets) (5.3.0)
+Requirement already satisfied: matplotlib-inline>=0.1 in /opt/conda/lib/python3.10/site-packages (from ipykernel>=4.5.1->ipywidgets) (0.1.6)
+Requirement already satisfied: nest-asyncio in /opt/conda/lib/python3.10/site-packages (from ipykernel>=4.5.1->ipywidgets) (1.5.6)
+Requirement already satisfied: packaging in /opt/conda/lib/python3.10/site-packages (from ipykernel>=4.5.1->ipywidgets) (23.1)
+Requirement already satisfied: psutil in /opt/conda/lib/python3.10/site-packages (from ipykernel>=4.5.1->ipywidgets) (5.9.5)
+Requirement already satisfied: pyzmq>=20 in /opt/conda/lib/python3.10/site-packages (from ipykernel>=4.5.1->ipywidgets) (25.0.2)
+Requirement already satisfied: tornado>=6.1 in /opt/conda/lib/python3.10/site-packages (from ipykernel>=4.5.1->ipywidgets) (6.3)
+Requirement already satisfied: backcall in /opt/conda/lib/python3.10/site-packages (from ipython>=6.1.0->ipywidgets) (0.2.0)
+Requirement already satisfied: decorator in /opt/conda/lib/python3.10/site-packages (from ipython>=6.1.0->ipywidgets) (5.1.1)
+Requirement already satisfied: jedi>=0.16 in /opt/conda/lib/python3.10/site-packages (from ipython>=6.1.0->ipywidgets) (0.18.2)
+Requirement already satisfied: pickleshare in /opt/conda/lib/python3.10/site-packages (from ipython>=6.1.0->ipywidgets) (0.7.5)
+Requirement already satisfied: prompt-toolkit!=3.0.37,<3.1.0,>=3.0.30 in /opt/conda/lib/python3.10/site-packages (from ipython>=6.1.0->ipywidgets) (3.0.38)
+Requirement already satisfied: pygments>=2.4.0 in /opt/conda/lib/python3.10/site-packages (from ipython>=6.1.0->ipywidgets) (2.15.1)
+Requirement already satisfied: stack-data in /opt/conda/lib/python3.10/site-packages (from ipython>=6.1.0->ipywidgets) (0.6.2)
+Requirement already satisfied: pexpect>4.3 in /opt/conda/lib/python3.10/site-packages (from ipython>=6.1.0->ipywidgets) (4.8.0)
+Requirement already satisfied: parso<0.9.0,>=0.8.0 in /opt/conda/lib/python3.10/site-packages (from jedi>=0.16->ipython>=6.1.0->ipywidgets) (0.8.3)
+Requirement already satisfied: python-dateutil>=2.8.2 in /opt/conda/lib/python3.10/site-packages (from jupyter-client>=6.1.12->ipykernel>=4.5.1->ipywidgets) (2.8.2)
+Requirement already satisfied: platformdirs>=2.5 in /opt/conda/lib/python3.10/site-packages (from jupyter-core!=5.0.*,>=4.12->ipykernel>=4.5.1->ipywidgets) (3.5.0)
+Requirement already satisfied: ptyprocess>=0.5 in /opt/conda/lib/python3.10/site-packages (from pexpect>4.3->ipython>=6.1.0->ipywidgets) (0.7.0)
+Requirement already satisfied: wcwidth in /opt/conda/lib/python3.10/site-packages (from prompt-toolkit!=3.0.37,<3.1.0,>=3.0.30->ipython>=6.1.0->ipywidgets) (0.2.6)
+Requirement already satisfied: executing>=1.2.0 in /opt/conda/lib/python3.10/site-packages (from stack-data->ipython>=6.1.0->ipywidgets) (1.2.0)
+Requirement already satisfied: asttokens>=2.1.0 in /opt/conda/lib/python3.10/site-packages (from stack-data->ipython>=6.1.0->ipywidgets) (2.2.1)
+Requirement already satisfied: pure-eval in /opt/conda/lib/python3.10/site-packages (from stack-data->ipython>=6.1.0->ipywidgets) (0.2.2)
+Requirement already satisfied: six in /opt/conda/lib/python3.10/site-packages (from asttokens>=2.1.0->stack-data->ipython>=6.1.0->ipywidgets) (1.16.0)
+[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv
+[1m[[34m[22mnotice[39m[1m][22m A new release of pip is available: [31m23.1.2[39m -> [32m23.2.1
+[1m[[34m[22mnotice[39m[1m][22m To update, run: [32mpip install --upgrade pip
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+Requirement already satisfied: datasets in /opt/conda/lib/python3.10/site-packages (2.14.0)
+Requirement already satisfied: numpy>=1.17 in /opt/conda/lib/python3.10/site-packages (from datasets) (1.23.5)
+Requirement already satisfied: pyarrow>=8.0.0 in /opt/conda/lib/python3.10/site-packages (from datasets) (12.0.0)
+Requirement already satisfied: dill<0.3.8,>=0.3.0 in /opt/conda/lib/python3.10/site-packages (from datasets) (0.3.6)
+Requirement already satisfied: pandas in /opt/conda/lib/python3.10/site-packages (from datasets) (2.0.1)
+Requirement already satisfied: requests>=2.19.0 in /opt/conda/lib/python3.10/site-packages (from datasets) (2.28.2)
+Requirement already satisfied: tqdm>=4.62.1 in /opt/conda/lib/python3.10/site-packages (from datasets) (4.65.0)
+Requirement already satisfied: xxhash in /opt/conda/lib/python3.10/site-packages (from datasets) (3.2.0)
+Requirement already satisfied: multiprocess in /opt/conda/lib/python3.10/site-packages (from datasets) (0.70.14)
+Requirement already satisfied: fsspec[http]>=2021.11.1 in /opt/conda/lib/python3.10/site-packages (from datasets) (2023.5.0)
+Requirement already satisfied: aiohttp in /opt/conda/lib/python3.10/site-packages (from datasets) (3.8.5)
+Requirement already satisfied: huggingface-hub<1.0.0,>=0.14.0 in /opt/conda/lib/python3.10/site-packages (from datasets) (0.16.4)
+Requirement already satisfied: packaging in /opt/conda/lib/python3.10/site-packages (from datasets) (23.1)
+Requirement already satisfied: pyyaml>=5.1 in /opt/conda/lib/python3.10/site-packages (from datasets) (5.4.1)
+Requirement already satisfied: attrs>=17.3.0 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets) (22.2.0)
+Requirement already satisfied: charset-normalizer<4.0,>=2.0 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets) (3.1.0)
+Requirement already satisfied: multidict<7.0,>=4.5 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets) (6.0.4)
+Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets) (4.0.2)
+Requirement already satisfied: yarl<2.0,>=1.0 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets) (1.9.2)
+Requirement already satisfied: frozenlist>=1.1.1 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets) (1.4.0)
+Requirement already satisfied: aiosignal>=1.1.2 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets) (1.3.1)
+Requirement already satisfied: filelock in /opt/conda/lib/python3.10/site-packages (from huggingface-hub<1.0.0,>=0.14.0->datasets) (3.12.0)
+Requirement already satisfied: typing-extensions>=3.7.4.3 in /opt/conda/lib/python3.10/site-packages (from huggingface-hub<1.0.0,>=0.14.0->datasets) (4.5.0)
+Requirement already satisfied: idna<4,>=2.5 in /opt/conda/lib/python3.10/site-packages (from requests>=2.19.0->datasets) (3.4)
+Requirement already satisfied: urllib3<1.27,>=1.21.1 in /opt/conda/lib/python3.10/site-packages (from requests>=2.19.0->datasets) (1.26.15)
+Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.10/site-packages (from requests>=2.19.0->datasets) (2023.5.7)
+Requirement already satisfied: python-dateutil>=2.8.2 in /opt/conda/lib/python3.10/site-packages (from pandas->datasets) (2.8.2)
+Requirement already satisfied: pytz>=2020.1 in /opt/conda/lib/python3.10/site-packages (from pandas->datasets) (2023.3)
+Requirement already satisfied: tzdata>=2022.1 in /opt/conda/lib/python3.10/site-packages (from pandas->datasets) (2023.3)
+Requirement already satisfied: six>=1.5 in /opt/conda/lib/python3.10/site-packages (from python-dateutil>=2.8.2->pandas->datasets) (1.16.0)
+[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv
+[1m[[34m[22mnotice[39m[1m][22m A new release of pip is available: [31m23.1.2[39m -> [32m23.2.1
+[1m[[34m[22mnotice[39m[1m][22m To update, run: [32mpip install --upgrade pip
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+Requirement already satisfied: torch in /opt/conda/lib/python3.10/site-packages (2.0.0)
+Requirement already satisfied: filelock in /opt/conda/lib/python3.10/site-packages (from torch) (3.12.0)
+Requirement already satisfied: typing-extensions in /opt/conda/lib/python3.10/site-packages (from torch) (4.5.0)
+Requirement already satisfied: sympy in /opt/conda/lib/python3.10/site-packages (from torch) (1.11.1)
+Requirement already satisfied: networkx in /opt/conda/lib/python3.10/site-packages (from torch) (3.1)
+Requirement already satisfied: jinja2 in /opt/conda/lib/python3.10/site-packages (from torch) (3.1.2)
+Requirement already satisfied: MarkupSafe>=2.0 in /opt/conda/lib/python3.10/site-packages (from jinja2->torch) (2.1.2)
+Requirement already satisfied: mpmath>=0.19 in /opt/conda/lib/python3.10/site-packages (from sympy->torch) (1.3.0)
+[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv
+[1m[[34m[22mnotice[39m[1m][22m A new release of pip is available: [31m23.1.2[39m -> [32m23.2.1
+[1m[[34m[22mnotice[39m[1m][22m To update, run: [32mpip install --upgrade pip
+True
+/opt/conda/lib/python3.10/site-packages/peft/utils/other.py:104: FutureWarning: prepare_model_for_int8_training is deprecated and will be removed in a future version. Use prepare_model_for_kbit_training instead.
+  warnings.warn(

wandb/latest-run/files/requirements.txt ADDED Viewed

	@@ -0,0 +1,240 @@

+accelerate==0.21.0
+aiohttp==3.8.5
+aiosignal==1.3.1
+apex==0.1
+appdirs==1.4.4
+argparse==1.4.0
+asttokens==2.2.1
+async-timeout==4.0.2
+attrs==22.2.0
+awscli==1.27.132
+backcall==0.2.0
+backports.functools-lru-cache==1.6.4
+bcrypt==4.0.1
+bitsandbytes==0.41.0
+blis==0.7.9
+bokeh==3.1.1
+boto3==1.26.132
+botocore==1.29.132
+brotlipy==0.7.0
+cached-property==1.5.2
+catalogue==2.0.8
+certifi==2023.5.7
+cffi==1.15.1
+charset-normalizer==3.1.0
+click==8.1.3
+cloudpickle==2.2.1
+cmake==3.26.3
+colorama==0.4.4
+comm==0.1.3
+commonmark==0.9.1
+conda-content-trust==0.1.3
+conda-package-handling==2.0.2
+conda-package-streaming==0.7.0
+conda==23.1.0
+confection==0.0.4
+contextlib2==21.6.0
+contourpy==1.0.7
+cryptography==40.0.1
+cycler==0.11.0
+cymem==2.0.7
+cython==0.29.34
+datasets==2.14.0
+debugpy==1.6.7
+decorator==5.1.1
+deepspeed==0.6.1+1ea3d4b
+dgl==1.1.0+cu118
+dill==0.3.6
+docker-pycreds==0.4.0
+docutils==0.15.2
+einops==0.6.1
+executing==1.2.0
+fastai==2.7.12
+fastcore==1.5.29
+fastdownload==0.0.7
+fastprogress==1.0.3
+filelock==3.12.0
+flash-attn==0.2.8
+fonttools==4.39.4
+frozenlist==1.4.0
+fsspec==2023.5.0
+future==0.18.3
+gevent==22.10.2
+gitdb==4.0.10
+gitpython==3.1.32
+gmpy2==2.1.2
+google-pasta==0.2.0
+greenlet==2.0.2
+h5py==3.8.0
+hjson==3.1.0
+horovod==0.26.1
+huggingface-hub==0.16.4
+idna==3.4
+imageio==2.28.1
+importlib-metadata==4.13.0
+inotify-simple==1.2.1
+ipykernel==6.23.0
+ipython==8.13.2
+ipywidgets==8.0.7
+jedi==0.18.2
+jinja2==3.1.2
+jmespath==1.0.1
+joblib==1.2.0
+jsonpatch==1.32
+jsonpointer==2.3
+jsonschema==4.17.3
+jupyter-client==8.2.0
+jupyter-core==5.3.0
+jupyterlab-widgets==3.0.8
+kiwisolver==1.4.4
+langcodes==3.3.0
+libmambapy==1.4.1
+lit==16.0.3
+llvmlite==0.39.1
+mamba==1.4.1
+markupsafe==2.1.2
+matplotlib-inline==0.1.6
+matplotlib==3.7.1
+mpi4py==3.1.4
+mpmath==1.3.0
+multidict==6.0.4
+multiprocess==0.70.14
+munkres==1.1.4
+murmurhash==1.0.9
+nest-asyncio==1.5.6
+networkx==3.1
+ninja==1.11.1
+numba==0.56.4
+numpy==1.23.5
+opencv-python==4.7.0
+packaging==23.1
+pandas==2.0.1
+paramiko==3.1.0
+parso==0.8.3
+pathos==0.3.0
+pathtools==0.1.2
+pathy==0.10.1
+patsy==0.5.3
+peft==0.5.0.dev0
+pexpect==4.8.0
+pickleshare==0.7.5
+pillow==9.4.0
+pip==23.1.2
+platformdirs==3.5.0
+plotly==5.14.1
+pluggy==1.0.0
+ply==3.11
+pooch==1.7.0
+pox==0.3.2
+ppft==1.7.6.6
+preshed==3.0.8
+prompt-toolkit==3.0.38
+protobuf3-to-dict==0.1.5
+protobuf==3.20.3
+psutil==5.9.5
+ptyprocess==0.7.0
+pure-eval==0.2.2
+py-cpuinfo==9.0.0
+pyarrow==12.0.0
+pyasn1==0.4.8
+pybind11-global==2.10.4
+pybind11==2.10.4
+pycosat==0.6.4
+pycparser==2.21
+pydantic==1.10.7
+pyfunctional==1.4.3
+pygments==2.15.1
+pyinstrument-cext==0.2.4
+pyinstrument==3.4.2
+pynacl==1.5.0
+pyopenssl==23.1.1
+pyparsing==3.0.9
+pyqt5-sip==12.11.0
+pyqt5==5.15.7
+pyrsistent==0.19.3
+pysocks==1.7.1
+python-dateutil==2.8.2
+pytz==2023.3
+pyyaml==5.4.1
+pyzmq==25.0.2
+regex==2023.6.3
+requests==2.28.2
+retrying==1.3.4
+rich==12.6.0
+rsa==4.7.2
+ruamel.yaml.clib==0.2.7
+ruamel.yaml==0.17.21
+s3fs==0.4.2
+s3transfer==0.6.1
+safetensors==0.3.1
+sagemaker-experiments==0.1.43
+sagemaker-pytorch-training==2.8.0
+sagemaker-training==4.5.0
+sagemaker==2.154.0
+schema==0.7.5
+scikit-learn==1.2.2
+scipy==1.10.1
+seaborn==0.12.2
+sentry-sdk==1.28.1
+setproctitle==1.3.2
+setuptools==65.6.3
+shap==0.41.0
+shellingham==1.5.1
+sip==6.7.9
+six==1.16.0
+slicer==0.0.7
+smart-open==5.2.1
+smclarify==0.5
+smdebug-rulesconfig==1.0.1
+smdebug==1.0.34
+smdistributed-dataparallel==1.8.0
+smdistributed-modelparallel==1.15.0
+smmap==5.0.0
+spacy-legacy==3.0.12
+spacy-loggers==1.0.4
+spacy==3.5.2
+srsly==2.4.6
+stack-data==0.6.2
+statsmodels==0.14.0
+sympy==1.11.1
+tabulate==0.9.0
+tblib==1.7.0
+tenacity==8.2.2
+thinc==8.1.10
+threadpoolctl==3.1.0
+tokenizers==0.13.3
+toml==0.10.2
+tomli==2.0.1
+toolz==0.12.0
+torch==2.0.0
+torchaudio==2.0.1
+torchdata==0.6.0
+torchnet==0.0.4
+torchtext==0.15.1
+torchvision==0.15.1
+tornado==6.3
+tqdm==4.65.0
+traitlets==5.9.0
+transformers==4.31.0
+triton==2.0.0.dev20221202
+trl==0.4.7
+typer==0.7.0
+typing-extensions==4.5.0
+tzdata==2023.3
+unicodedata2==15.0.0
+urllib3==1.26.15
+visdom==0.2.4
+wandb==0.15.7
+wasabi==1.1.1
+wcwidth==0.2.6
+websocket-client==1.5.1
+werkzeug==2.3.4
+wheel==0.40.0
+widgetsnbextension==4.0.8
+xxhash==3.2.0
+xyzservices==2023.2.0
+yarl==1.9.2
+zipp==3.15.0
+zope.event==4.6
+zope.interface==6.0
+zstandard==0.19.0

wandb/latest-run/files/wandb-metadata.json ADDED Viewed

	@@ -0,0 +1,78 @@

+{
+    "os": "Linux-4.14.318-241.531.amzn2.x86_64-x86_64-with-glibc2.31",
+    "python": "3.10.8",
+    "heartbeatAt": "2023-07-27T15:49:36.888553",
+    "startedAt": "2023-07-27T15:49:36.344100",
+    "docker": null,
+    "cuda": null,
+    "args": [],
+    "state": "running",
+    "program": "<python with no main file>",
+    "host": "pytorch-2-0-0-gpu--ml-g4dn-2xlarge-9a500aed7fe4dadadc562adc1e80",
+    "username": "root",
+    "executable": "/opt/conda/bin/python",
+    "cpu_count": 4,
+    "cpu_count_logical": 8,
+    "cpu_freq": {
+        "current": 3100.120625,
+        "min": 0.0,
+        "max": 0.0
+    },
+    "cpu_freq_per_core": [
+        {
+            "current": 3107.574,
+            "min": 0.0,
+            "max": 0.0
+        },
+        {
+            "current": 3102.47,
+            "min": 0.0,
+            "max": 0.0
+        },
+        {
+            "current": 3099.63,
+            "min": 0.0,
+            "max": 0.0
+        },
+        {
+            "current": 3099.058,
+            "min": 0.0,
+            "max": 0.0
+        },
+        {
+            "current": 3100.716,
+            "min": 0.0,
+            "max": 0.0
+        },
+        {
+            "current": 3099.393,
+            "min": 0.0,
+            "max": 0.0
+        },
+        {
+            "current": 3099.988,
+            "min": 0.0,
+            "max": 0.0
+        },
+        {
+            "current": 3092.136,
+            "min": 0.0,
+            "max": 0.0
+        }
+    ],
+    "disk": {
+        "total": 32.0,
+        "used": 0.414398193359375
+    },
+    "gpu": "Tesla T4",
+    "gpu_count": 1,
+    "gpu_devices": [
+        {
+            "name": "Tesla T4",
+            "memory_total": 15843721216
+        }
+    ],
+    "memory": {
+        "total": 30.947834014892578
+    }
+}

wandb/latest-run/files/wandb-summary.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"train/loss": 1.5234, "train/learning_rate": 0.0002, "train/epoch": 5.8, "train/global_step": 500, "_timestamp": 1690823397.7400424, "_runtime": 350421.32170534134, "_step": 101, "train/train_runtime": 7012.9274, "train/train_samples_per_second": 1.141, "train/train_steps_per_second": 0.071, "train/total_flos": 2.3703947270255616e+16, "train/train_loss": 2.225116060256958}

wandb/latest-run/logs/debug-internal.log ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0d82385c7c91ccf548be984016744cafe22c0bffbe4c56266892c862cde84fe4
+size 16040370

wandb/latest-run/logs/debug.log ADDED Viewed

	@@ -0,0 +1,76 @@

+2023-07-27 15:49:36,411 INFO    MainThread:21 [wandb_setup.py:_flush():76] Current SDK version is 0.15.7
+2023-07-27 15:49:36,412 INFO    MainThread:21 [wandb_setup.py:_flush():76] Configure stats pid to 21
+2023-07-27 15:49:36,412 INFO    MainThread:21 [wandb_setup.py:_flush():76] Loading settings from /root/.config/wandb/settings
+2023-07-27 15:49:36,412 INFO    MainThread:21 [wandb_setup.py:_flush():76] Loading settings from /root/mskov/falcon7b_quant/wandb/settings
+2023-07-27 15:49:36,412 INFO    MainThread:21 [wandb_setup.py:_flush():76] Loading settings from environment variables: {}
+2023-07-27 15:49:36,412 INFO    MainThread:21 [wandb_setup.py:_flush():76] Applying setup settings: {'_disable_service': False}
+2023-07-27 15:49:36,412 INFO    MainThread:21 [wandb_setup.py:_flush():76] Inferring run settings from compute environment: {'program': '<python with no main file>'}
+2023-07-27 15:49:36,412 INFO    MainThread:21 [wandb_setup.py:_flush():76] Applying login settings: {'api_key': '***REDACTED***'}
+2023-07-27 15:49:36,412 INFO    MainThread:21 [wandb_init.py:_log_setup():507] Logging user logs to /root/mskov/falcon7b_quant/wandb/run-20230727_154936-a41qiywg/logs/debug.log
+2023-07-27 15:49:36,412 INFO    MainThread:21 [wandb_init.py:_log_setup():508] Logging internal logs to /root/mskov/falcon7b_quant/wandb/run-20230727_154936-a41qiywg/logs/debug-internal.log
+2023-07-27 15:49:36,412 INFO    MainThread:21 [wandb_init.py:_jupyter_setup():453] configuring jupyter hooks <wandb.sdk.wandb_init._WandbInit object at 0x7f468db73070>
+2023-07-27 15:49:36,412 INFO    MainThread:21 [wandb_init.py:init():547] calling init triggers
+2023-07-27 15:49:36,412 INFO    MainThread:21 [wandb_init.py:init():554] wandb.init called with sweep_config: {}
+config: {}
+2023-07-27 15:49:36,412 INFO    MainThread:21 [wandb_init.py:init():596] starting backend
+2023-07-27 15:49:36,412 INFO    MainThread:21 [wandb_init.py:init():600] setting up manager
+2023-07-27 15:49:36,414 INFO    MainThread:21 [backend.py:_multiprocessing_setup():106] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
+2023-07-27 15:49:36,416 INFO    MainThread:21 [wandb_init.py:init():606] backend started and connected
+2023-07-27 15:49:36,424 INFO    MainThread:21 [wandb_run.py:_label_probe_notebook():1234] probe notebook
+2023-07-27 15:49:36,429 INFO    MainThread:21 [wandb_run.py:_label_probe_notebook():1244] Unable to probe notebook: 'NoneType' object has no attribute 'get'
+2023-07-27 15:49:36,429 INFO    MainThread:21 [wandb_init.py:init():697] updated telemetry
+2023-07-27 15:49:36,450 INFO    MainThread:21 [wandb_init.py:init():730] communicating run to backend with 60.0 second timeout
+2023-07-27 15:49:36,781 INFO    MainThread:21 [wandb_run.py:_on_init():2174] communicating current version
+2023-07-27 15:49:36,852 INFO    MainThread:21 [wandb_run.py:_on_init():2183] got version response
+2023-07-27 15:49:36,852 INFO    MainThread:21 [wandb_init.py:init():781] starting run threads in backend
+2023-07-27 15:49:44,828 INFO    MainThread:21 [wandb_run.py:_console_start():2153] atexit reg
+2023-07-27 15:49:44,830 INFO    MainThread:21 [wandb_run.py:_redirect():2008] redirect: wrap_raw
+2023-07-27 15:49:44,830 INFO    MainThread:21 [wandb_run.py:_redirect():2073] Wrapping output streams.
+2023-07-27 15:49:44,830 INFO    MainThread:21 [wandb_run.py:_redirect():2098] Redirects installed.
+2023-07-27 15:49:44,832 INFO    MainThread:21 [wandb_init.py:init():822] run started, returning control to user process
+2023-07-27 15:49:44,835 INFO    MainThread:21 [wandb_run.py:_config_callback():1282] config_cb None None {'vocab_size': 65024, 'hidden_size': 4544, 'n_layer': 32, 'n_head': 71, 'layer_norm_epsilon': 1e-05, 'initializer_range': 0.02, 'use_cache': False, 'apply_residual_connection_post_layernorm': False, 'hidden_dropout': 0.0, 'attention_dropout': 0.0, 'bos_token_id': 11, 'eos_token_id': 11, 'multi_query': True, 'alibi': False, 'bias': False, 'parallel_attn': True, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'bfloat16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['RWForCausalLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'pad_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'ybelkada/falcon-7b-sharded-bf16', 'transformers_version': '4.31.0', 'auto_map': {'AutoConfig': 'tiiuae/falcon-7b--configuration_RW.RWConfig', 'AutoModel': 'tiiuae/falcon-7b--modelling_RW.RWModel', 'AutoModelForCausalLM': 'tiiuae/falcon-7b--modelling_RW.RWForCausalLM', 'AutoModelForQuestionAnswering': 'tiiuae/falcon-7b--modelling_RW.RWForQuestionAnswering', 'AutoModelForSequenceClassification': 'tiiuae/falcon-7b--modelling_RW.RWForSequenceClassification', 'AutoModelForTokenClassification': 'tiiuae/falcon-7b--modelling_RW.RWForTokenClassification'}, 'model_type': 'RefinedWebModel', 'quantization_config': {'load_in_8bit': False, 'load_in_4bit': True, 'llm_int8_threshold': 6.0, 'llm_int8_skip_modules': None, 'llm_int8_enable_fp32_cpu_offload': False, 'llm_int8_has_fp16_weight': False, 'bnb_4bit_quant_type': 'nf4', 'bnb_4bit_use_double_quant': False, 'bnb_4bit_compute_dtype': 'float16'}, 'output_dir': './results', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'no', 'prediction_loss_only': False, 'per_device_train_batch_size': 4, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': 'None', 'per_gpu_eval_batch_size': 'None', 'gradient_accumulation_steps': 4, 'eval_accumulation_steps': 'None', 'eval_delay': 0, 'learning_rate': 0.0002, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 0.3, 'num_train_epochs': 3.0, 'max_steps': 500, 'lr_scheduler_type': 'constant', 'warmup_ratio': 0.03, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './results/runs/Jul27_15-48-23_pytorch-2-0-0-gpu--ml-g4dn-2xlarge-9a500aed7fe4dadadc562adc1e80', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 10, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 10, 'save_total_limit': 'None', 'save_safetensors': False, 'save_on_each_node': False, 'no_cuda': False, 'use_mps_device': False, 'seed': 42, 'data_seed': 'None', 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': 'None', 'local_rank': 0, 'ddp_backend': 'None', 'tpu_num_cores': 'None', 'tpu_metrics_debug': False, 'debug': '[]', 'dataloader_drop_last': False, 'eval_steps': 'None', 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './results', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': 'None', 'load_best_model_at_end': False, 'metric_for_best_model': 'None', 'greater_is_better': 'None', 'ignore_data_skip': False, 'sharded_ddp': '[]', 'fsdp': '[]', 'fsdp_min_num_params': 0, 'fsdp_config': "{'fsdp_min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False}", 'fsdp_transformer_layer_cls_to_wrap': 'None', 'deepspeed': 'None', 'label_smoothing_factor': 0.0, 'optim': 'paged_adamw_32bit', 'optim_args': 'None', 'adafactor': False, 'group_by_length': True, 'length_column_name': 'length', 'report_to': "['wandb']", 'ddp_find_unused_parameters': 'None', 'ddp_bucket_cap_mb': 'None', 'ddp_broadcast_buffers': 'None', 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': False, 'resume_from_checkpoint': 'None', 'hub_model_id': 'None', 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'gradient_checkpointing': False, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': 'None', 'push_to_hub_organization': 'None', 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': 'None', 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': 'None', 'torch_compile_mode': 'None', 'xpu_backend': 'None', 'train_batch_size': 4, 'eval_batch_size': 8}
+2023-07-27 17:45:31,239 INFO    MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2023-07-27 17:45:31,240 INFO    MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
+2023-07-31 14:45:09,605 INFO    MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
+2023-07-31 14:45:09,630 INFO    MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2023-07-31 14:45:09,630 INFO    MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
+2023-07-31 15:11:17,481 INFO    MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
+2023-07-31 15:11:29,927 INFO    MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2023-07-31 15:11:29,929 INFO    MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
+2023-07-31 15:11:29,934 INFO    MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
+2023-07-31 15:11:32,706 INFO    MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2023-07-31 15:11:32,707 INFO    MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
+2023-07-31 15:11:32,712 INFO    MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
+2023-07-31 15:11:35,511 INFO    MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2023-07-31 15:11:35,512 INFO    MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
+2023-07-31 15:11:35,517 INFO    MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
+2023-07-31 15:11:38,405 INFO    MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2023-07-31 15:11:38,407 INFO    MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
+2023-07-31 15:11:39,706 INFO    MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
+2023-07-31 15:11:42,399 INFO    MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2023-07-31 15:11:42,400 INFO    MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
+2023-07-31 15:11:42,759 INFO    MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
+2023-07-31 15:11:42,762 INFO    MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2023-07-31 15:11:42,762 INFO    MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
+2023-07-31 15:11:47,781 INFO    MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
+2023-07-31 15:12:05,813 INFO    MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2023-07-31 15:12:05,815 INFO    MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
+2023-07-31 15:12:05,839 INFO    MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
+2023-07-31 15:12:06,211 INFO    MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2023-07-31 15:12:06,211 INFO    MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
+2023-07-31 15:12:06,217 INFO    MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
+2023-07-31 15:12:06,218 INFO    MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2023-07-31 15:12:06,218 INFO    MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
+2023-07-31 15:12:06,224 INFO    MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
+2023-07-31 15:12:06,301 INFO    MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2023-07-31 15:12:06,301 INFO    MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
+2023-07-31 15:12:11,043 INFO    MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
+2023-07-31 15:13:04,229 INFO    MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2023-07-31 15:13:04,231 INFO    MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
+2023-07-31 15:13:04,236 INFO    MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
+2023-07-31 15:13:04,244 INFO    MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2023-07-31 15:13:04,244 INFO    MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
+2023-07-31 15:13:04,249 INFO    MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
+2023-07-31 15:13:04,818 INFO    MainThread:21 [wandb_run.py:_config_callback():1282] config_cb None None {'vocab_size': 65024, 'hidden_size': 4544, 'n_layer': 32, 'n_head': 71, 'layer_norm_epsilon': 1e-05, 'initializer_range': 0.02, 'use_cache': False, 'apply_residual_connection_post_layernorm': False, 'hidden_dropout': 0.0, 'attention_dropout': 0.0, 'bos_token_id': 11, 'eos_token_id': 11, 'multi_query': True, 'alibi': False, 'bias': False, 'parallel_attn': True, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'bfloat16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['RWForCausalLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'pad_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'ybelkada/falcon-7b-sharded-bf16', 'transformers_version': '4.31.0', 'auto_map': {'AutoConfig': 'tiiuae/falcon-7b--configuration_RW.RWConfig', 'AutoModel': 'tiiuae/falcon-7b--modelling_RW.RWModel', 'AutoModelForCausalLM': 'tiiuae/falcon-7b--modelling_RW.RWForCausalLM', 'AutoModelForQuestionAnswering': 'tiiuae/falcon-7b--modelling_RW.RWForQuestionAnswering', 'AutoModelForSequenceClassification': 'tiiuae/falcon-7b--modelling_RW.RWForSequenceClassification', 'AutoModelForTokenClassification': 'tiiuae/falcon-7b--modelling_RW.RWForTokenClassification'}, 'model_type': 'RefinedWebModel', 'quantization_config': {'load_in_8bit': False, 'load_in_4bit': True, 'llm_int8_threshold': 6.0, 'llm_int8_skip_modules': None, 'llm_int8_enable_fp32_cpu_offload': False, 'llm_int8_has_fp16_weight': False, 'bnb_4bit_quant_type': 'nf4', 'bnb_4bit_use_double_quant': False, 'bnb_4bit_compute_dtype': 'float16'}, 'output_dir': './results', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'no', 'prediction_loss_only': False, 'per_device_train_batch_size': 4, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': 'None', 'per_gpu_eval_batch_size': 'None', 'gradient_accumulation_steps': 4, 'eval_accumulation_steps': 'None', 'eval_delay': 0, 'learning_rate': 0.0002, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 0.3, 'num_train_epochs': 3.0, 'max_steps': 500, 'lr_scheduler_type': 'constant', 'warmup_ratio': 0.03, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './results/runs/Jul31_15-12-06_pytorch-2-0-0-gpu--ml-g4dn-2xlarge-9a500aed7fe4dadadc562adc1e80', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 10, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 10, 'save_total_limit': 'None', 'save_safetensors': False, 'save_on_each_node': False, 'no_cuda': False, 'use_mps_device': False, 'seed': 42, 'data_seed': 'None', 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': 'None', 'local_rank': 0, 'ddp_backend': 'None', 'tpu_num_cores': 'None', 'tpu_metrics_debug': False, 'debug': '[]', 'dataloader_drop_last': False, 'eval_steps': 'None', 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './results', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': 'None', 'load_best_model_at_end': False, 'metric_for_best_model': 'None', 'greater_is_better': 'None', 'ignore_data_skip': False, 'sharded_ddp': '[]', 'fsdp': '[]', 'fsdp_min_num_params': 0, 'fsdp_config': "{'fsdp_min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False}", 'fsdp_transformer_layer_cls_to_wrap': 'None', 'deepspeed': 'None', 'label_smoothing_factor': 0.0, 'optim': 'paged_adamw_32bit', 'optim_args': 'None', 'adafactor': False, 'group_by_length': True, 'length_column_name': 'length', 'report_to': "['wandb']", 'ddp_find_unused_parameters': 'None', 'ddp_bucket_cap_mb': 'None', 'ddp_broadcast_buffers': 'None', 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': False, 'resume_from_checkpoint': 'None', 'hub_model_id': 'None', 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'gradient_checkpointing': False, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': 'None', 'push_to_hub_organization': 'None', 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': 'None', 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': 'None', 'torch_compile_mode': 'None', 'xpu_backend': 'None', 'train_batch_size': 4, 'eval_batch_size': 8}
+2023-07-31 17:09:57,806 INFO    MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2023-07-31 17:09:57,808 INFO    MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend

wandb/run-20230727_154936-a41qiywg/files/conda-environment.yaml ADDED Viewed

	@@ -0,0 +1,498 @@

+name: base
+channels:
+  - fastai
+  - dglteam/label/cu118
+  - nvidia/label/cuda-11.8.0
+  - https://aws-ml-conda-pre-prod-ec2.s3.us-west-2.amazonaws.com
+  - conda-forge
+dependencies:
+  - _libgcc_mutex=0.1=conda_forge
+  - _openmp_mutex=4.5=2_kmp_llvm
+  - alsa-lib=1.2.8=h166bdaf_0
+  - aom=3.5.0=h27087fc_0
+  - asttokens=2.2.1=pyhd8ed1ab_0
+  - attr=2.5.1=h166bdaf_1
+  - aws-ofi-nccl-dlc=1.5.0=aws_0
+  - awscli=1.27.132=py310hff52083_0
+  - backcall=0.2.0=pyh9f0ad1d_0
+  - backports=1.0=pyhd8ed1ab_3
+  - backports.functools_lru_cache=1.6.4=pyhd8ed1ab_0
+  - blas=1.0=mkl
+  - bokeh=3.1.1=pyhd8ed1ab_0
+  - boto3=1.26.132=pyhd8ed1ab_0
+  - botocore=1.29.132=pyhd8ed1ab_0
+  - brotli=1.0.9=h166bdaf_8
+  - brotli-bin=1.0.9=h166bdaf_8
+  - brotlipy=0.7.0=py310h5764c6d_1005
+  - bzip2=1.0.8=h7f98852_4
+  - c-ares=1.18.1=h7f98852_0
+  - ca-certificates=2023.5.7=hbcca054_0
+  - cached-property=1.5.2=hd8ed1ab_1
+  - cached_property=1.5.2=pyha770c72_1
+  - cairo=1.16.0=ha61ee94_1014
+  - catalogue=2.0.8=py310hff52083_1
+  - certifi=2023.5.7=pyhd8ed1ab_0
+  - cffi=1.15.1=py310h255011f_3
+  - charset-normalizer=3.1.0=pyhd8ed1ab_0
+  - click=8.1.3=unix_pyhd8ed1ab_2
+  - cloudpickle=2.2.1=pyhd8ed1ab_0
+  - colorama=0.4.4=pyh9f0ad1d_0
+  - comm=0.1.3=pyhd8ed1ab_0
+  - commonmark=0.9.1=py_0
+  - conda=23.1.0=py310hff52083_0
+  - conda-content-trust=0.1.3=pyhd8ed1ab_0
+  - conda-package-handling=2.0.2=pyh38be061_0
+  - conda-package-streaming=0.7.0=pyhd8ed1ab_1
+  - confection=0.0.4=py310hfdc917e_1
+  - contourpy=1.0.7=py310hdf3cbec_0
+  - cryptography=40.0.1=py310h34c0648_0
+  - cuda-cccl=11.8.89=0
+  - cuda-command-line-tools=11.8.0=0
+  - cuda-compiler=11.8.0=0
+  - cuda-cudart=11.8.89=0
+  - cuda-cudart-dev=11.8.89=0
+  - cuda-cuobjdump=11.8.86=0
+  - cuda-cupti=11.8.87=0
+  - cuda-cuxxfilt=11.8.86=0
+  - cuda-documentation=11.8.86=0
+  - cuda-driver-dev=11.8.89=0
+  - cuda-gdb=11.8.86=0
+  - cuda-libraries=11.8.0=0
+  - cuda-libraries-dev=11.8.0=0
+  - cuda-memcheck=11.8.86=0
+  - cuda-nsight=11.8.86=0
+  - cuda-nsight-compute=11.8.0=0
+  - cuda-nvcc=11.8.89=0
+  - cuda-nvdisasm=11.8.86=0
+  - cuda-nvml-dev=11.8.86=0
+  - cuda-nvprof=11.8.87=0
+  - cuda-nvprune=11.8.86=0
+  - cuda-nvrtc=11.8.89=0
+  - cuda-nvrtc-dev=11.8.89=0
+  - cuda-nvtx=11.8.86=0
+  - cuda-nvvp=11.8.87=0
+  - cuda-profiler-api=11.8.86=0
+  - cuda-runtime=11.8.0=0
+  - cuda-sanitizer-api=11.8.86=0
+  - cuda-toolkit=11.8.0=0
+  - cuda-tools=11.8.0=0
+  - cuda-visual-tools=11.8.0=0
+  - cycler=0.11.0=pyhd8ed1ab_0
+  - cymem=2.0.7=py310hd8f1fbe_1
+  - cython=0.29.34=py310heca2aa9_0
+  - cython-blis=0.7.9=py310hde88566_1
+  - dbus=1.13.6=h5008d03_3
+  - debugpy=1.6.7=py310heca2aa9_0
+  - decorator=5.1.1=pyhd8ed1ab_0
+  - dgl=1.1.0.cu118=py310_0
+  - docutils=0.15.2=py310hff52083_6
+  - executing=1.2.0=pyhd8ed1ab_0
+  - expat=2.5.0=hcb278e6_1
+  - fastai=2.7.12=py_0
+  - fastcore=1.5.29=py_0
+  - fastdownload=0.0.7=py_0
+  - fastprogress=1.0.3=py_0
+  - ffmpeg=5.1.2=gpl_h8dda1f0_106
+  - fftw=3.3.10=nompi_hc118613_107
+  - filelock=3.12.0=pyhd8ed1ab_0
+  - fmt=9.1.0=h924138e_0
+  - font-ttf-dejavu-sans-mono=2.37=hab24e00_0
+  - font-ttf-inconsolata=3.000=h77eed37_0
+  - font-ttf-source-code-pro=2.038=h77eed37_0
+  - font-ttf-ubuntu=0.83=hab24e00_0
+  - fontconfig=2.14.2=h14ed4e7_0
+  - fonts-conda-ecosystem=1=0
+  - fonts-conda-forge=1=0
+  - fonttools=4.39.4=py310h2372a71_0
+  - freeglut=3.2.2=h9c3ff4c_1
+  - freetype=2.12.1=hca18f0e_1
+  - future=0.18.3=pyhd8ed1ab_0
+  - gds-tools=1.4.0.31=0
+  - gettext=0.21.1=h27087fc_0
+  - glib=2.76.2=hfc55251_0
+  - glib-tools=2.76.2=hfc55251_0
+  - gmp=6.2.1=h58526e2_0
+  - gmpy2=2.1.2=py310h3ec546c_1
+  - gnutls=3.7.8=hf3e180e_0
+  - graphite2=1.3.13=h58526e2_1001
+  - gst-plugins-base=1.22.0=h4243ec0_2
+  - gstreamer=1.22.0=h25f0c4b_2
+  - gstreamer-orc=0.4.33=h166bdaf_0
+  - h5py=3.8.0=nompi_py310ha66b2ad_101
+  - harfbuzz=6.0.0=h8e241bc_0
+  - hdf5=1.14.0=nompi_hb72d44e_103
+  - icu=70.1=h27087fc_0
+  - idna=3.4=pyhd8ed1ab_0
+  - imageio=2.28.1=pyh24c5eb1_0
+  - importlib_metadata=6.6.0=hd8ed1ab_0
+  - ipykernel=6.23.0=pyh210e3f2_0
+  - ipython=8.13.2=pyh41d4057_0
+  - jack=1.9.22=h11f4161_0
+  - jasper=2.0.33=h0ff4b12_1
+  - jedi=0.18.2=pyhd8ed1ab_0
+  - jinja2=3.1.2=pyhd8ed1ab_1
+  - jmespath=1.0.1=pyhd8ed1ab_0
+  - joblib=1.2.0=pyhd8ed1ab_0
+  - jpeg=9e=h166bdaf_2
+  - jupyter_client=8.2.0=pyhd8ed1ab_0
+  - jupyter_core=5.3.0=py310hff52083_0
+  - keyutils=1.6.1=h166bdaf_0
+  - kiwisolver=1.4.4=py310hbf28c38_1
+  - krb5=1.20.1=h81ceb04_0
+  - lame=3.100=h166bdaf_1003
+  - langcodes=3.3.0=pyhd8ed1ab_0
+  - lcms2=2.15=hfd0df8a_0
+  - ld_impl_linux-64=2.40=h41732ed_0
+  - lerc=4.0.0=h27087fc_0
+  - libaec=1.0.6=hcb278e6_1
+  - libarchive=3.6.2=h3d51595_0
+  - libblas=3.9.0=1_h86c2bf4_netlib
+  - libbrotlicommon=1.0.9=h166bdaf_8
+  - libbrotlidec=1.0.9=h166bdaf_8
+  - libbrotlienc=1.0.9=h166bdaf_8
+  - libcap=2.67=he9d0100_0
+  - libcblas=3.9.0=5_h92ddd45_netlib
+  - libclang=15.0.7=default_had23c3d_1
+  - libclang13=15.0.7=default_h3e3d535_1
+  - libcublas=11.11.3.6=0
+  - libcublas-dev=11.11.3.6=0
+  - libcufft=10.9.0.58=0
+  - libcufft-dev=10.9.0.58=0
+  - libcufile=1.4.0.31=0
+  - libcufile-dev=1.4.0.31=0
+  - libcups=2.3.3=h36d4200_3
+  - libcurand=10.3.0.86=0
+  - libcurand-dev=10.3.0.86=0
+  - libcurl=7.88.1=hdc1c0ab_1
+  - libcusolver=11.4.1.48=0
+  - libcusolver-dev=11.4.1.48=0
+  - libcusparse=11.7.5.86=0
+  - libcusparse-dev=11.7.5.86=0
+  - libdb=6.2.32=h9c3ff4c_0
+  - libdeflate=1.17=h0b41bf4_0
+  - libdrm=2.4.114=h166bdaf_0
+  - libedit=3.1.20191231=he28a2e2_2
+  - libev=4.33=h516909a_1
+  - libevent=2.1.10=h28343ad_4
+  - libexpat=2.5.0=hcb278e6_1
+  - libffi=3.4.2=h7f98852_5
+  - libflac=1.4.2=h27087fc_0
+  - libgcc=7.2.0=h69d50b8_2
+  - libgcc-ng=12.2.0=h65d4601_19
+  - libgcrypt=1.10.1=h166bdaf_0
+  - libgfortran-ng=12.2.0=h69a702a_19
+  - libgfortran5=12.2.0=h337968e_19
+  - libglib=2.76.2=hebfc3b9_0
+  - libglu=9.0.0=he1b5a44_1001
+  - libgomp=12.2.0=h65d4601_19
+  - libgpg-error=1.46=h620e276_0
+  - libhwloc=2.9.1=hd6dc26d_0
+  - libiconv=1.17=h166bdaf_0
+  - libidn2=2.3.4=h166bdaf_0
+  - libjpeg-turbo=2.1.4=h166bdaf_0
+  - liblapack=3.9.0=5_h92ddd45_netlib
+  - liblapacke=3.9.0=5_h92ddd45_netlib
+  - libllvm11=11.1.0=he0ac6c6_5
+  - libllvm15=15.0.7=hadd5161_1
+  - libllvm16=16.0.1=hadd5161_0
+  - libmamba=1.4.1=hcea66bb_0
+  - libmambapy=1.4.1=py310h1428755_0
+  - libnghttp2=1.52.0=h61bc06f_0
+  - libnpp=11.8.0.86=0
+  - libnpp-dev=11.8.0.86=0
+  - libnsl=2.0.0=h7f98852_0
+  - libnvjpeg=11.9.0.86=0
+  - libnvjpeg-dev=11.9.0.86=0
+  - libogg=1.3.4=h7f98852_1
+  - libopenblas=0.3.21=pthreads_h78a6416_3
+  - libopencv=4.7.0=py310hb48cf42_1
+  - libopus=1.3.1=h7f98852_1
+  - libpciaccess=0.17=h166bdaf_0
+  - libpng=1.6.39=h753d276_0
+  - libpq=15.3=hbcd7760_0
+  - libprotobuf=3.21.12=h3eb15da_0
+  - libsndfile=1.2.0=hb75c966_0
+  - libsodium=1.0.18=h36c2ea0_1
+  - libsolv=0.7.23=h3eb15da_0
+  - libsqlite=3.40.0=h753d276_0
+  - libssh2=1.10.0=hf14f497_3
+  - libstdcxx-ng=12.2.0=h46fd767_19
+  - libsystemd0=253=h8c4010b_1
+  - libtasn1=4.19.0=h166bdaf_0
+  - libtiff=4.5.0=h6adf6a1_2
+  - libtool=2.4.7=h27087fc_0
+  - libudev1=253=h0b41bf4_1
+  - libunistring=0.9.10=h7f98852_0
+  - libuuid=2.38.1=h0b41bf4_0
+  - libuv=1.44.2=h166bdaf_0
+  - libva=2.18.0=h0b41bf4_0
+  - libvorbis=1.3.7=h9c3ff4c_0
+  - libvpx=1.11.0=h9c3ff4c_3
+  - libwebp-base=1.3.0=h0b41bf4_0
+  - libxcb=1.13=h7f98852_1004
+  - libxkbcommon=1.5.0=h79f4944_1
+  - libxml2=2.10.3=hca2bb57_4
+  - libzlib=1.2.13=h166bdaf_4
+  - llvm-openmp=16.0.3=h4dfa4b3_0
+  - llvmlite=0.39.1=py310h58363a5_1
+  - lz4-c=1.9.4=hcb278e6_0
+  - lzo=2.10=h516909a_1000
+  - mamba=1.4.1=py310h51d5547_0
+  - markupsafe=2.1.2=py310h1fa729e_0
+  - matplotlib=3.7.1=py310hff52083_0
+  - matplotlib-base=3.7.1=py310he60537e_0
+  - matplotlib-inline=0.1.6=pyhd8ed1ab_0
+  - mkl=2023.1.0=h84fe81f_48680
+  - mkl-include=2023.1.0=h84fe81f_48680
+  - mpc=1.3.1=hfe3b2da_0
+  - mpfr=4.2.0=hb012696_0
+  - mpg123=1.31.3=hcb278e6_0
+  - mpi=1.0=openmpi
+  - mpi4py=3.1.4=py310h6075a6b_0
+  - mpmath=1.3.0=pyhd8ed1ab_0
+  - munkres=1.1.4=pyh9f0ad1d_0
+  - murmurhash=1.0.9=py310hd8f1fbe_1
+  - mysql-common=8.0.32=hf1915f5_2
+  - mysql-libs=8.0.32=hca2cd23_2
+  - ncurses=6.3=h27087fc_1
+  - nest-asyncio=1.5.6=pyhd8ed1ab_0
+  - nettle=3.8.1=hc379101_1
+  - networkx=3.1=pyhd8ed1ab_0
+  - nsight-compute=2022.3.0.22=0
+  - nspr=4.35=h27087fc_0
+  - nss=3.89=he45b914_0
+  - numba=0.56.4=py310h0e39c9b_1
+  - numpy=1.23.5=py310h53a5b5f_0
+  - opencv=4.7.0=py310hff52083_1
+  - openh264=2.3.1=hcb278e6_2
+  - openjpeg=2.5.0=hfec8fc6_2
+  - openmpi=4.1.5=h414af15_101
+  - openssl=3.1.0=hd590300_3
+  - p11-kit=0.24.1=hc5aa10d_0
+  - packaging=23.1=pyhd8ed1ab_0
+  - pandas=2.0.1=py310h7cbd5c2_1
+  - parso=0.8.3=pyhd8ed1ab_0
+  - pathy=0.10.1=pyhd8ed1ab_0
+  - patsy=0.5.3=pyhd8ed1ab_0
+  - pcre2=10.40=hc3806b6_0
+  - pexpect=4.8.0=pyh1a96a4e_2
+  - pickleshare=0.7.5=py_1003
+  - pillow=9.4.0=py310h023d228_1
+  - pixman=0.40.0=h36c2ea0_0
+  - platformdirs=3.5.0=pyhd8ed1ab_0
+  - plotly=5.14.1=pyhd8ed1ab_0
+  - pluggy=1.0.0=pyhd8ed1ab_5
+  - ply=3.11=py_1
+  - pooch=1.7.0=pyha770c72_3
+  - preshed=3.0.8=py310hd8f1fbe_1
+  - prompt-toolkit=3.0.38=pyha770c72_0
+  - prompt_toolkit=3.0.38=hd8ed1ab_0
+  - psutil=5.9.5=py310h1fa729e_0
+  - pthread-stubs=0.4=h36c2ea0_1001
+  - ptyprocess=0.7.0=pyhd3deb0d_0
+  - pulseaudio=16.1=hcb278e6_3
+  - pulseaudio-client=16.1=h5195f5e_3
+  - pulseaudio-daemon=16.1=ha8d29e2_3
+  - pure_eval=0.2.2=pyhd8ed1ab_0
+  - py-opencv=4.7.0=py310hfdc917e_1
+  - pyasn1=0.4.8=py_0
+  - pybind11=2.10.4=py310hdf3cbec_0
+  - pybind11-abi=4=hd8ed1ab_3
+  - pybind11-global=2.10.4=py310hdf3cbec_0
+  - pycosat=0.6.4=py310h5764c6d_1
+  - pycparser=2.21=pyhd8ed1ab_0
+  - pydantic=1.10.7=py310h1fa729e_0
+  - pygments=2.15.1=pyhd8ed1ab_0
+  - pyopenssl=23.1.1=pyhd8ed1ab_0
+  - pyparsing=3.0.9=pyhd8ed1ab_0
+  - pyqt=5.15.7=py310hab646b1_3
+  - pyqt5-sip=12.11.0=py310heca2aa9_3
+  - pysocks=1.7.1=pyha2e5f31_6
+  - python=3.10.8=h4a9ceb5_0_cpython
+  - python-dateutil=2.8.2=pyhd8ed1ab_0
+  - python-tzdata=2023.3=pyhd8ed1ab_0
+  - python_abi=3.10=3_cp310
+  - pytorch=2.0.0=aws_py3.10_cuda11.8_cudnn8.7.0_0
+  - pytorch-cuda=11.8=h7e8668a_3
+  - pytorch-mutex=1.0=cuda
+  - pytz=2023.3=pyhd8ed1ab_0
+  - pyyaml=5.4.1=py310h5764c6d_4
+  - pyzmq=25.0.2=py310h059b190_0
+  - qt-main=5.15.8=h5d23da1_6
+  - readline=8.2=h8228510_1
+  - reproc=14.2.4=h0b41bf4_0
+  - reproc-cpp=14.2.4=hcb278e6_0
+  - requests=2.28.2=pyhd8ed1ab_1
+  - rhash=1.4.3=h166bdaf_0
+  - rich=12.6.0=pyhd8ed1ab_0
+  - rsa=4.7.2=pyh44b312d_0
+  - ruamel.yaml=0.17.21=py310h1fa729e_3
+  - ruamel.yaml.clib=0.2.7=py310h1fa729e_1
+  - s3transfer=0.6.1=pyhd8ed1ab_0
+  - scikit-learn=1.2.2=py310h41b6a48_1
+  - scipy=1.10.1=py310h8deb116_2
+  - seaborn=0.12.2=hd8ed1ab_0
+  - seaborn-base=0.12.2=pyhd8ed1ab_0
+  - setuptools=65.6.3=pyhd8ed1ab_0
+  - shap=0.41.0=py310h769672d_0
+  - shellingham=1.5.1=pyhd8ed1ab_0
+  - sip=6.7.9=py310hc6cd4ac_0
+  - six=1.16.0=pyh6c4a22f_0
+  - slicer=0.0.7=pyhd8ed1ab_0
+  - smart_open=5.2.1=pyhd8ed1ab_0
+  - spacy=3.5.2=py310h5a539fb_0
+  - spacy-legacy=3.0.12=pyhd8ed1ab_0
+  - spacy-loggers=1.0.4=pyhd8ed1ab_0
+  - srsly=2.4.6=py310heca2aa9_0
+  - stack_data=0.6.2=pyhd8ed1ab_0
+  - statsmodels=0.14.0=py310h278f3c1_1
+  - svt-av1=1.4.1=hcb278e6_0
+  - sympy=1.11.1=pypyh9d50eac_103
+  - tbb=2021.9.0=hf52228f_0
+  - tenacity=8.2.2=pyhd8ed1ab_0
+  - thinc=8.1.10=py310hfb6f7a9_0
+  - threadpoolctl=3.1.0=pyh8a188c0_0
+  - tk=8.6.12=h27826a3_0
+  - toml=0.10.2=pyhd8ed1ab_0
+  - tomli=2.0.1=pyhd8ed1ab_0
+  - toolz=0.12.0=pyhd8ed1ab_0
+  - torchaudio=2.0.1=py310_cu118
+  - torchdata=0.6.0=py310
+  - torchtext=0.15.1=py310
+  - torchvision=0.15.1=py310_cu118
+  - tornado=6.3=py310h1fa729e_0
+  - tqdm=4.65.0=pyhd8ed1ab_1
+  - traitlets=5.9.0=pyhd8ed1ab_0
+  - typer=0.7.0=pyhd8ed1ab_0
+  - typing=3.10.0.0=pyhd8ed1ab_0
+  - typing-extensions=4.5.0=hd8ed1ab_0
+  - typing_extensions=4.5.0=pyha770c72_0
+  - tzdata=2023c=h71feb2d_0
+  - unicodedata2=15.0.0=py310h5764c6d_0
+  - urllib3=1.26.15=pyhd8ed1ab_0
+  - wasabi=1.1.1=py310hff52083_1
+  - wcwidth=0.2.6=pyhd8ed1ab_0
+  - wheel=0.40.0=pyhd8ed1ab_0
+  - x264=1!164.3095=h166bdaf_2
+  - x265=3.5=h924138e_3
+  - xcb-util=0.4.0=h516909a_0
+  - xcb-util-image=0.4.0=h166bdaf_0
+  - xcb-util-keysyms=0.4.0=h516909a_0
+  - xcb-util-renderutil=0.3.9=h166bdaf_0
+  - xcb-util-wm=0.4.1=h516909a_0
+  - xkeyboard-config=2.38=h0b41bf4_0
+  - xorg-fixesproto=5.0=h7f98852_1002
+  - xorg-inputproto=2.3.2=h7f98852_1002
+  - xorg-kbproto=1.0.7=h7f98852_1002
+  - xorg-libice=1.0.10=h7f98852_0
+  - xorg-libsm=1.2.3=hd9c2040_1000
+  - xorg-libx11=1.8.4=h0b41bf4_0
+  - xorg-libxau=1.0.9=h7f98852_0
+  - xorg-libxdmcp=1.1.3=h7f98852_0
+  - xorg-libxext=1.3.4=h0b41bf4_2
+  - xorg-libxfixes=5.0.3=h7f98852_1004
+  - xorg-libxi=1.7.10=h7f98852_0
+  - xorg-libxrender=0.9.10=h7f98852_1003
+  - xorg-renderproto=0.11.1=h7f98852_1002
+  - xorg-xextproto=7.3.0=h0b41bf4_1003
+  - xorg-xf86vidmodeproto=2.3.1=h7f98852_1002
+  - xorg-xproto=7.0.31=h7f98852_1007
+  - xyzservices=2023.2.0=pyhd8ed1ab_0
+  - xz=5.2.6=h166bdaf_0
+  - yaml=0.2.5=h7f98852_2
+  - yaml-cpp=0.7.0=h27087fc_2
+  - zeromq=4.3.4=h9c3ff4c_1
+  - zipp=3.15.0=pyhd8ed1ab_0
+  - zlib=1.2.13=h166bdaf_4
+  - zstandard=0.19.0=py310hdeb6495_1
+  - zstd=1.5.2=h3eb15da_6
+  - pip:
+      - accelerate==0.21.0
+      - aiohttp==3.8.5
+      - aiosignal==1.3.1
+      - apex==0.1
+      - appdirs==1.4.4
+      - argparse==1.4.0
+      - async-timeout==4.0.2
+      - attrs==22.2.0
+      - bcrypt==4.0.1
+      - bitsandbytes==0.41.0
+      - cmake==3.26.3
+      - contextlib2==21.6.0
+      - datasets==2.14.0
+      - deepspeed==0.6.1+1ea3d4b
+      - dill==0.3.6
+      - docker-pycreds==0.4.0
+      - einops==0.6.1
+      - flash-attn==0.2.8
+      - frozenlist==1.4.0
+      - fsspec==2023.5.0
+      - gevent==22.10.2
+      - gitdb==4.0.10
+      - gitpython==3.1.32
+      - google-pasta==0.2.0
+      - greenlet==2.0.2
+      - hjson==3.1.0
+      - horovod==0.26.1
+      - huggingface-hub==0.16.4
+      - importlib-metadata==4.13.0
+      - inotify-simple==1.2.1
+      - ipywidgets==8.0.7
+      - jsonpatch==1.32
+      - jsonpointer==2.3
+      - jsonschema==4.17.3
+      - jupyterlab-widgets==3.0.8
+      - lit==16.0.3
+      - multidict==6.0.4
+      - multiprocess==0.70.14
+      - ninja==1.11.1
+      - paramiko==3.1.0
+      - pathos==0.3.0
+      - pathtools==0.1.2
+      - peft==0.5.0.dev0
+      - pip==23.1.2
+      - pox==0.3.2
+      - ppft==1.7.6.6
+      - protobuf==3.20.3
+      - protobuf3-to-dict==0.1.5
+      - py-cpuinfo==9.0.0
+      - pyarrow==12.0.0
+      - pyfunctional==1.4.3
+      - pyinstrument==3.4.2
+      - pyinstrument-cext==0.2.4
+      - pynacl==1.5.0
+      - pyrsistent==0.19.3
+      - regex==2023.6.3
+      - retrying==1.3.4
+      - s3fs==0.4.2
+      - safetensors==0.3.1
+      - sagemaker==2.154.0
+      - sagemaker-experiments==0.1.43
+      - sagemaker-pytorch-training==2.8.0
+      - sagemaker-training==4.5.0
+      - schema==0.7.5
+      - sentry-sdk==1.28.1
+      - setproctitle==1.3.2
+      - smclarify==0.5
+      - smdebug==1.0.34
+      - smdebug-rulesconfig==1.0.1
+      - smdistributed-dataparallel==1.8.0
+      - smdistributed-modelparallel==1.15.0
+      - smmap==5.0.0
+      - tabulate==0.9.0
+      - tblib==1.7.0
+      - tokenizers==0.13.3
+      - torchnet==0.0.4
+      - transformers==4.31.0
+      - triton==2.0.0.dev20221202
+      - trl==0.4.7
+      - visdom==0.2.4
+      - wandb==0.15.7
+      - websocket-client==1.5.1
+      - werkzeug==2.3.4
+      - widgetsnbextension==4.0.8
+      - xxhash==3.2.0
+      - yarl==1.9.2
+      - zope-event==4.6
+      - zope-interface==6.0
+prefix: /opt/conda

wandb/run-20230727_154936-a41qiywg/files/config.yaml ADDED Viewed

	@@ -0,0 +1,649 @@

+wandb_version: 1
+_wandb:
+  desc: null
+  value:
+    python_version: 3.10.8
+    cli_version: 0.15.7
+    framework: huggingface
+    huggingface_version: 4.31.0
+    is_jupyter_run: true
+    is_kaggle_kernel: false
+    start_time: 1690472976.418337
+    t:
+      1:
+      - 1
+      - 5
+      - 11
+      - 49
+      - 51
+      - 53
+      - 55
+      - 71
+      - 84
+      - 98
+      2:
+      - 1
+      - 5
+      - 11
+      - 49
+      - 51
+      - 53
+      - 55
+      - 71
+      - 84
+      - 98
+      3:
+      - 7
+      - 23
+      4: 3.10.8
+      5: 0.15.7
+      6: 4.31.0
+      8:
+      - 1
+      - 5
+    m:
+    - 1: train/global_step
+      6:
+      - 3
+    - 1: train/loss
+      5: 1
+      6:
+      - 1
+    - 1: train/learning_rate
+      5: 1
+      6:
+      - 1
+    - 1: train/epoch
+      5: 1
+      6:
+      - 1
+    - 1: train/train_runtime
+      5: 1
+      6:
+      - 1
+    - 1: train/train_samples_per_second
+      5: 1
+      6:
+      - 1
+    - 1: train/train_steps_per_second
+      5: 1
+      6:
+      - 1
+    - 1: train/total_flos
+      5: 1
+      6:
+      - 1
+    - 1: train/train_loss
+      5: 1
+      6:
+      - 1
+vocab_size:
+  desc: null
+  value: 65024
+hidden_size:
+  desc: null
+  value: 4544
+n_layer:
+  desc: null
+  value: 32
+n_head:
+  desc: null
+  value: 71
+layer_norm_epsilon:
+  desc: null
+  value: 1.0e-05
+initializer_range:
+  desc: null
+  value: 0.02
+use_cache:
+  desc: null
+  value: false
+apply_residual_connection_post_layernorm:
+  desc: null
+  value: false
+hidden_dropout:
+  desc: null
+  value: 0.0
+attention_dropout:
+  desc: null
+  value: 0.0
+bos_token_id:
+  desc: null
+  value: 11
+eos_token_id:
+  desc: null
+  value: 11
+multi_query:
+  desc: null
+  value: true
+alibi:
+  desc: null
+  value: false
+bias:
+  desc: null
+  value: false
+parallel_attn:
+  desc: null
+  value: true
+return_dict:
+  desc: null
+  value: true
+output_hidden_states:
+  desc: null
+  value: false
+output_attentions:
+  desc: null
+  value: false
+torchscript:
+  desc: null
+  value: false
+torch_dtype:
+  desc: null
+  value: bfloat16
+use_bfloat16:
+  desc: null
+  value: false
+tf_legacy_loss:
+  desc: null
+  value: false
+pruned_heads:
+  desc: null
+  value: {}
+tie_word_embeddings:
+  desc: null
+  value: true
+is_encoder_decoder:
+  desc: null
+  value: false
+is_decoder:
+  desc: null
+  value: false
+cross_attention_hidden_size:
+  desc: null
+  value: null
+add_cross_attention:
+  desc: null
+  value: false
+tie_encoder_decoder:
+  desc: null
+  value: false
+max_length:
+  desc: null
+  value: 20
+min_length:
+  desc: null
+  value: 0
+do_sample:
+  desc: null
+  value: false
+early_stopping:
+  desc: null
+  value: false
+num_beams:
+  desc: null
+  value: 1
+num_beam_groups:
+  desc: null
+  value: 1
+diversity_penalty:
+  desc: null
+  value: 0.0
+temperature:
+  desc: null
+  value: 1.0
+top_k:
+  desc: null
+  value: 50
+top_p:
+  desc: null
+  value: 1.0
+typical_p:
+  desc: null
+  value: 1.0
+repetition_penalty:
+  desc: null
+  value: 1.0
+length_penalty:
+  desc: null
+  value: 1.0
+no_repeat_ngram_size:
+  desc: null
+  value: 0
+encoder_no_repeat_ngram_size:
+  desc: null
+  value: 0
+bad_words_ids:
+  desc: null
+  value: null
+num_return_sequences:
+  desc: null
+  value: 1
+chunk_size_feed_forward:
+  desc: null
+  value: 0
+output_scores:
+  desc: null
+  value: false
+return_dict_in_generate:
+  desc: null
+  value: false
+forced_bos_token_id:
+  desc: null
+  value: null
+forced_eos_token_id:
+  desc: null
+  value: null
+remove_invalid_values:
+  desc: null
+  value: false
+exponential_decay_length_penalty:
+  desc: null
+  value: null
+suppress_tokens:
+  desc: null
+  value: null
+begin_suppress_tokens:
+  desc: null
+  value: null
+architectures:
+  desc: null
+  value:
+  - RWForCausalLM
+finetuning_task:
+  desc: null
+  value: null
+id2label:
+  desc: null
+  value:
+    '0': LABEL_0
+    '1': LABEL_1
+label2id:
+  desc: null
+  value:
+    LABEL_0: 0
+    LABEL_1: 1
+tokenizer_class:
+  desc: null
+  value: null
+prefix:
+  desc: null
+  value: null
+pad_token_id:
+  desc: null
+  value: null
+sep_token_id:
+  desc: null
+  value: null
+decoder_start_token_id:
+  desc: null
+  value: null
+task_specific_params:
+  desc: null
+  value: null
+problem_type:
+  desc: null
+  value: null
+_name_or_path:
+  desc: null
+  value: ybelkada/falcon-7b-sharded-bf16
+transformers_version:
+  desc: null
+  value: 4.31.0
+auto_map:
+  desc: null
+  value:
+    AutoConfig: tiiuae/falcon-7b--configuration_RW.RWConfig
+    AutoModel: tiiuae/falcon-7b--modelling_RW.RWModel
+    AutoModelForCausalLM: tiiuae/falcon-7b--modelling_RW.RWForCausalLM
+    AutoModelForQuestionAnswering: tiiuae/falcon-7b--modelling_RW.RWForQuestionAnswering
+    AutoModelForSequenceClassification: tiiuae/falcon-7b--modelling_RW.RWForSequenceClassification
+    AutoModelForTokenClassification: tiiuae/falcon-7b--modelling_RW.RWForTokenClassification
+model_type:
+  desc: null
+  value: RefinedWebModel
+quantization_config:
+  desc: null
+  value:
+    load_in_8bit: false
+    load_in_4bit: true
+    llm_int8_threshold: 6.0
+    llm_int8_skip_modules: null
+    llm_int8_enable_fp32_cpu_offload: false
+    llm_int8_has_fp16_weight: false
+    bnb_4bit_quant_type: nf4
+    bnb_4bit_use_double_quant: false
+    bnb_4bit_compute_dtype: float16
+output_dir:
+  desc: null
+  value: ./results
+overwrite_output_dir:
+  desc: null
+  value: false
+do_train:
+  desc: null
+  value: false
+do_eval:
+  desc: null
+  value: false
+do_predict:
+  desc: null
+  value: false
+evaluation_strategy:
+  desc: null
+  value: 'no'
+prediction_loss_only:
+  desc: null
+  value: false
+per_device_train_batch_size:
+  desc: null
+  value: 4
+per_device_eval_batch_size:
+  desc: null
+  value: 8
+per_gpu_train_batch_size:
+  desc: null
+  value: None
+per_gpu_eval_batch_size:
+  desc: null
+  value: None
+gradient_accumulation_steps:
+  desc: null
+  value: 4
+eval_accumulation_steps:
+  desc: null
+  value: None
+eval_delay:
+  desc: null
+  value: 0
+learning_rate:
+  desc: null
+  value: 0.0002
+weight_decay:
+  desc: null
+  value: 0.0
+adam_beta1:
+  desc: null
+  value: 0.9
+adam_beta2:
+  desc: null
+  value: 0.999
+adam_epsilon:
+  desc: null
+  value: 1.0e-08
+max_grad_norm:
+  desc: null
+  value: 0.3
+num_train_epochs:
+  desc: null
+  value: 3.0
+max_steps:
+  desc: null
+  value: 500
+lr_scheduler_type:
+  desc: null
+  value: constant
+warmup_ratio:
+  desc: null
+  value: 0.03
+warmup_steps:
+  desc: null
+  value: 0
+log_level:
+  desc: null
+  value: passive
+log_level_replica:
+  desc: null
+  value: warning
+log_on_each_node:
+  desc: null
+  value: true
+logging_dir:
+  desc: null
+  value: ./results/runs/Jul31_15-12-06_pytorch-2-0-0-gpu--ml-g4dn-2xlarge-9a500aed7fe4dadadc562adc1e80
+logging_strategy:
+  desc: null
+  value: steps
+logging_first_step:
+  desc: null
+  value: false
+logging_steps:
+  desc: null
+  value: 10
+logging_nan_inf_filter:
+  desc: null
+  value: true
+save_strategy:
+  desc: null
+  value: steps
+save_steps:
+  desc: null
+  value: 10
+save_total_limit:
+  desc: null
+  value: None
+save_safetensors:
+  desc: null
+  value: false
+save_on_each_node:
+  desc: null
+  value: false
+no_cuda:
+  desc: null
+  value: false
+use_mps_device:
+  desc: null
+  value: false
+seed:
+  desc: null
+  value: 42
+data_seed:
+  desc: null
+  value: None
+jit_mode_eval:
+  desc: null
+  value: false
+use_ipex:
+  desc: null
+  value: false
+bf16:
+  desc: null
+  value: false
+fp16:
+  desc: null
+  value: true
+fp16_opt_level:
+  desc: null
+  value: O1
+half_precision_backend:
+  desc: null
+  value: auto
+bf16_full_eval:
+  desc: null
+  value: false
+fp16_full_eval:
+  desc: null
+  value: false
+tf32:
+  desc: null
+  value: None
+local_rank:
+  desc: null
+  value: 0
+ddp_backend:
+  desc: null
+  value: None
+tpu_num_cores:
+  desc: null
+  value: None
+tpu_metrics_debug:
+  desc: null
+  value: false
+debug:
+  desc: null
+  value: '[]'
+dataloader_drop_last:
+  desc: null
+  value: false
+eval_steps:
+  desc: null
+  value: None
+dataloader_num_workers:
+  desc: null
+  value: 0
+past_index:
+  desc: null
+  value: -1
+run_name:
+  desc: null
+  value: ./results
+disable_tqdm:
+  desc: null
+  value: false
+remove_unused_columns:
+  desc: null
+  value: true
+label_names:
+  desc: null
+  value: None
+load_best_model_at_end:
+  desc: null
+  value: false
+metric_for_best_model:
+  desc: null
+  value: None
+greater_is_better:
+  desc: null
+  value: None
+ignore_data_skip:
+  desc: null
+  value: false
+sharded_ddp:
+  desc: null
+  value: '[]'
+fsdp:
+  desc: null
+  value: '[]'
+fsdp_min_num_params:
+  desc: null
+  value: 0
+fsdp_config:
+  desc: null
+  value: '{''fsdp_min_num_params'': 0, ''xla'': False, ''xla_fsdp_grad_ckpt'': False}'
+fsdp_transformer_layer_cls_to_wrap:
+  desc: null
+  value: None
+deepspeed:
+  desc: null
+  value: None
+label_smoothing_factor:
+  desc: null
+  value: 0.0
+optim:
+  desc: null
+  value: paged_adamw_32bit
+optim_args:
+  desc: null
+  value: None
+adafactor:
+  desc: null
+  value: false
+group_by_length:
+  desc: null
+  value: true
+length_column_name:
+  desc: null
+  value: length
+report_to:
+  desc: null
+  value: '[''wandb'']'
+ddp_find_unused_parameters:
+  desc: null
+  value: None
+ddp_bucket_cap_mb:
+  desc: null
+  value: None
+ddp_broadcast_buffers:
+  desc: null
+  value: None
+dataloader_pin_memory:
+  desc: null
+  value: true
+skip_memory_metrics:
+  desc: null
+  value: true
+use_legacy_prediction_loop:
+  desc: null
+  value: false
+push_to_hub:
+  desc: null
+  value: false
+resume_from_checkpoint:
+  desc: null
+  value: None
+hub_model_id:
+  desc: null
+  value: None
+hub_strategy:
+  desc: null
+  value: every_save
+hub_token:
+  desc: null
+  value: <HUB_TOKEN>
+hub_private_repo:
+  desc: null
+  value: false
+gradient_checkpointing:
+  desc: null
+  value: false
+include_inputs_for_metrics:
+  desc: null
+  value: false
+fp16_backend:
+  desc: null
+  value: auto
+push_to_hub_model_id:
+  desc: null
+  value: None
+push_to_hub_organization:
+  desc: null
+  value: None
+push_to_hub_token:
+  desc: null
+  value: <PUSH_TO_HUB_TOKEN>
+mp_parameters:
+  desc: null
+  value: ''
+auto_find_batch_size:
+  desc: null
+  value: false
+full_determinism:
+  desc: null
+  value: false
+torchdynamo:
+  desc: null
+  value: None
+ray_scope:
+  desc: null
+  value: last
+ddp_timeout:
+  desc: null
+  value: 1800
+torch_compile:
+  desc: null
+  value: false
+torch_compile_backend:
+  desc: null
+  value: None
+torch_compile_mode:
+  desc: null
+  value: None
+xpu_backend:
+  desc: null
+  value: None
+train_batch_size:
+  desc: null
+  value: 4
+eval_batch_size:
+  desc: null
+  value: 8

wandb/run-20230727_154936-a41qiywg/files/output.log ADDED Viewed

	@@ -0,0 +1,112 @@

+You're using a PreTrainedTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
+{}
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv
+[1m[[34m[22mnotice[39m[1m][22m A new release of pip is available: [31m23.1.2[39m -> [32m23.2.1
+[1m[[34m[22mnotice[39m[1m][22m To update, run: [32mpip install --upgrade pip
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv
+[1m[[34m[22mnotice[39m[1m][22m A new release of pip is available: [31m23.1.2[39m -> [32m23.2.1
+[1m[[34m[22mnotice[39m[1m][22m To update, run: [32mpip install --upgrade pip
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+Requirement already satisfied: ipywidgets in /opt/conda/lib/python3.10/site-packages (8.0.7)
+Requirement already satisfied: ipykernel>=4.5.1 in /opt/conda/lib/python3.10/site-packages (from ipywidgets) (6.23.0)
+Requirement already satisfied: ipython>=6.1.0 in /opt/conda/lib/python3.10/site-packages (from ipywidgets) (8.13.2)
+Requirement already satisfied: traitlets>=4.3.1 in /opt/conda/lib/python3.10/site-packages (from ipywidgets) (5.9.0)
+Requirement already satisfied: widgetsnbextension~=4.0.7 in /opt/conda/lib/python3.10/site-packages (from ipywidgets) (4.0.8)
+Requirement already satisfied: jupyterlab-widgets~=3.0.7 in /opt/conda/lib/python3.10/site-packages (from ipywidgets) (3.0.8)
+Requirement already satisfied: comm>=0.1.1 in /opt/conda/lib/python3.10/site-packages (from ipykernel>=4.5.1->ipywidgets) (0.1.3)
+Requirement already satisfied: debugpy>=1.6.5 in /opt/conda/lib/python3.10/site-packages (from ipykernel>=4.5.1->ipywidgets) (1.6.7)
+Requirement already satisfied: jupyter-client>=6.1.12 in /opt/conda/lib/python3.10/site-packages (from ipykernel>=4.5.1->ipywidgets) (8.2.0)
+Requirement already satisfied: jupyter-core!=5.0.*,>=4.12 in /opt/conda/lib/python3.10/site-packages (from ipykernel>=4.5.1->ipywidgets) (5.3.0)
+Requirement already satisfied: matplotlib-inline>=0.1 in /opt/conda/lib/python3.10/site-packages (from ipykernel>=4.5.1->ipywidgets) (0.1.6)
+Requirement already satisfied: nest-asyncio in /opt/conda/lib/python3.10/site-packages (from ipykernel>=4.5.1->ipywidgets) (1.5.6)
+Requirement already satisfied: packaging in /opt/conda/lib/python3.10/site-packages (from ipykernel>=4.5.1->ipywidgets) (23.1)
+Requirement already satisfied: psutil in /opt/conda/lib/python3.10/site-packages (from ipykernel>=4.5.1->ipywidgets) (5.9.5)
+Requirement already satisfied: pyzmq>=20 in /opt/conda/lib/python3.10/site-packages (from ipykernel>=4.5.1->ipywidgets) (25.0.2)
+Requirement already satisfied: tornado>=6.1 in /opt/conda/lib/python3.10/site-packages (from ipykernel>=4.5.1->ipywidgets) (6.3)
+Requirement already satisfied: backcall in /opt/conda/lib/python3.10/site-packages (from ipython>=6.1.0->ipywidgets) (0.2.0)
+Requirement already satisfied: decorator in /opt/conda/lib/python3.10/site-packages (from ipython>=6.1.0->ipywidgets) (5.1.1)
+Requirement already satisfied: jedi>=0.16 in /opt/conda/lib/python3.10/site-packages (from ipython>=6.1.0->ipywidgets) (0.18.2)
+Requirement already satisfied: pickleshare in /opt/conda/lib/python3.10/site-packages (from ipython>=6.1.0->ipywidgets) (0.7.5)
+Requirement already satisfied: prompt-toolkit!=3.0.37,<3.1.0,>=3.0.30 in /opt/conda/lib/python3.10/site-packages (from ipython>=6.1.0->ipywidgets) (3.0.38)
+Requirement already satisfied: pygments>=2.4.0 in /opt/conda/lib/python3.10/site-packages (from ipython>=6.1.0->ipywidgets) (2.15.1)
+Requirement already satisfied: stack-data in /opt/conda/lib/python3.10/site-packages (from ipython>=6.1.0->ipywidgets) (0.6.2)
+Requirement already satisfied: pexpect>4.3 in /opt/conda/lib/python3.10/site-packages (from ipython>=6.1.0->ipywidgets) (4.8.0)
+Requirement already satisfied: parso<0.9.0,>=0.8.0 in /opt/conda/lib/python3.10/site-packages (from jedi>=0.16->ipython>=6.1.0->ipywidgets) (0.8.3)
+Requirement already satisfied: python-dateutil>=2.8.2 in /opt/conda/lib/python3.10/site-packages (from jupyter-client>=6.1.12->ipykernel>=4.5.1->ipywidgets) (2.8.2)
+Requirement already satisfied: platformdirs>=2.5 in /opt/conda/lib/python3.10/site-packages (from jupyter-core!=5.0.*,>=4.12->ipykernel>=4.5.1->ipywidgets) (3.5.0)
+Requirement already satisfied: ptyprocess>=0.5 in /opt/conda/lib/python3.10/site-packages (from pexpect>4.3->ipython>=6.1.0->ipywidgets) (0.7.0)
+Requirement already satisfied: wcwidth in /opt/conda/lib/python3.10/site-packages (from prompt-toolkit!=3.0.37,<3.1.0,>=3.0.30->ipython>=6.1.0->ipywidgets) (0.2.6)
+Requirement already satisfied: executing>=1.2.0 in /opt/conda/lib/python3.10/site-packages (from stack-data->ipython>=6.1.0->ipywidgets) (1.2.0)
+Requirement already satisfied: asttokens>=2.1.0 in /opt/conda/lib/python3.10/site-packages (from stack-data->ipython>=6.1.0->ipywidgets) (2.2.1)
+Requirement already satisfied: pure-eval in /opt/conda/lib/python3.10/site-packages (from stack-data->ipython>=6.1.0->ipywidgets) (0.2.2)
+Requirement already satisfied: six in /opt/conda/lib/python3.10/site-packages (from asttokens>=2.1.0->stack-data->ipython>=6.1.0->ipywidgets) (1.16.0)
+[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv
+[1m[[34m[22mnotice[39m[1m][22m A new release of pip is available: [31m23.1.2[39m -> [32m23.2.1
+[1m[[34m[22mnotice[39m[1m][22m To update, run: [32mpip install --upgrade pip
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+Requirement already satisfied: datasets in /opt/conda/lib/python3.10/site-packages (2.14.0)
+Requirement already satisfied: numpy>=1.17 in /opt/conda/lib/python3.10/site-packages (from datasets) (1.23.5)
+Requirement already satisfied: pyarrow>=8.0.0 in /opt/conda/lib/python3.10/site-packages (from datasets) (12.0.0)
+Requirement already satisfied: dill<0.3.8,>=0.3.0 in /opt/conda/lib/python3.10/site-packages (from datasets) (0.3.6)
+Requirement already satisfied: pandas in /opt/conda/lib/python3.10/site-packages (from datasets) (2.0.1)
+Requirement already satisfied: requests>=2.19.0 in /opt/conda/lib/python3.10/site-packages (from datasets) (2.28.2)
+Requirement already satisfied: tqdm>=4.62.1 in /opt/conda/lib/python3.10/site-packages (from datasets) (4.65.0)
+Requirement already satisfied: xxhash in /opt/conda/lib/python3.10/site-packages (from datasets) (3.2.0)
+Requirement already satisfied: multiprocess in /opt/conda/lib/python3.10/site-packages (from datasets) (0.70.14)
+Requirement already satisfied: fsspec[http]>=2021.11.1 in /opt/conda/lib/python3.10/site-packages (from datasets) (2023.5.0)
+Requirement already satisfied: aiohttp in /opt/conda/lib/python3.10/site-packages (from datasets) (3.8.5)
+Requirement already satisfied: huggingface-hub<1.0.0,>=0.14.0 in /opt/conda/lib/python3.10/site-packages (from datasets) (0.16.4)
+Requirement already satisfied: packaging in /opt/conda/lib/python3.10/site-packages (from datasets) (23.1)
+Requirement already satisfied: pyyaml>=5.1 in /opt/conda/lib/python3.10/site-packages (from datasets) (5.4.1)
+Requirement already satisfied: attrs>=17.3.0 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets) (22.2.0)
+Requirement already satisfied: charset-normalizer<4.0,>=2.0 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets) (3.1.0)
+Requirement already satisfied: multidict<7.0,>=4.5 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets) (6.0.4)
+Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets) (4.0.2)
+Requirement already satisfied: yarl<2.0,>=1.0 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets) (1.9.2)
+Requirement already satisfied: frozenlist>=1.1.1 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets) (1.4.0)
+Requirement already satisfied: aiosignal>=1.1.2 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets) (1.3.1)
+Requirement already satisfied: filelock in /opt/conda/lib/python3.10/site-packages (from huggingface-hub<1.0.0,>=0.14.0->datasets) (3.12.0)
+Requirement already satisfied: typing-extensions>=3.7.4.3 in /opt/conda/lib/python3.10/site-packages (from huggingface-hub<1.0.0,>=0.14.0->datasets) (4.5.0)
+Requirement already satisfied: idna<4,>=2.5 in /opt/conda/lib/python3.10/site-packages (from requests>=2.19.0->datasets) (3.4)
+Requirement already satisfied: urllib3<1.27,>=1.21.1 in /opt/conda/lib/python3.10/site-packages (from requests>=2.19.0->datasets) (1.26.15)
+Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.10/site-packages (from requests>=2.19.0->datasets) (2023.5.7)
+Requirement already satisfied: python-dateutil>=2.8.2 in /opt/conda/lib/python3.10/site-packages (from pandas->datasets) (2.8.2)
+Requirement already satisfied: pytz>=2020.1 in /opt/conda/lib/python3.10/site-packages (from pandas->datasets) (2023.3)
+Requirement already satisfied: tzdata>=2022.1 in /opt/conda/lib/python3.10/site-packages (from pandas->datasets) (2023.3)
+Requirement already satisfied: six>=1.5 in /opt/conda/lib/python3.10/site-packages (from python-dateutil>=2.8.2->pandas->datasets) (1.16.0)
+[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv
+[1m[[34m[22mnotice[39m[1m][22m A new release of pip is available: [31m23.1.2[39m -> [32m23.2.1
+[1m[[34m[22mnotice[39m[1m][22m To update, run: [32mpip install --upgrade pip
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+Requirement already satisfied: torch in /opt/conda/lib/python3.10/site-packages (2.0.0)
+Requirement already satisfied: filelock in /opt/conda/lib/python3.10/site-packages (from torch) (3.12.0)
+Requirement already satisfied: typing-extensions in /opt/conda/lib/python3.10/site-packages (from torch) (4.5.0)
+Requirement already satisfied: sympy in /opt/conda/lib/python3.10/site-packages (from torch) (1.11.1)
+Requirement already satisfied: networkx in /opt/conda/lib/python3.10/site-packages (from torch) (3.1)
+Requirement already satisfied: jinja2 in /opt/conda/lib/python3.10/site-packages (from torch) (3.1.2)
+Requirement already satisfied: MarkupSafe>=2.0 in /opt/conda/lib/python3.10/site-packages (from jinja2->torch) (2.1.2)
+Requirement already satisfied: mpmath>=0.19 in /opt/conda/lib/python3.10/site-packages (from sympy->torch) (1.3.0)
+[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv
+[1m[[34m[22mnotice[39m[1m][22m A new release of pip is available: [31m23.1.2[39m -> [32m23.2.1
+[1m[[34m[22mnotice[39m[1m][22m To update, run: [32mpip install --upgrade pip
+True
+/opt/conda/lib/python3.10/site-packages/peft/utils/other.py:104: FutureWarning: prepare_model_for_int8_training is deprecated and will be removed in a future version. Use prepare_model_for_kbit_training instead.
+  warnings.warn(

wandb/run-20230727_154936-a41qiywg/files/requirements.txt ADDED Viewed

	@@ -0,0 +1,240 @@

+accelerate==0.21.0
+aiohttp==3.8.5
+aiosignal==1.3.1
+apex==0.1
+appdirs==1.4.4
+argparse==1.4.0
+asttokens==2.2.1
+async-timeout==4.0.2
+attrs==22.2.0
+awscli==1.27.132
+backcall==0.2.0
+backports.functools-lru-cache==1.6.4
+bcrypt==4.0.1
+bitsandbytes==0.41.0
+blis==0.7.9
+bokeh==3.1.1
+boto3==1.26.132
+botocore==1.29.132
+brotlipy==0.7.0
+cached-property==1.5.2
+catalogue==2.0.8
+certifi==2023.5.7
+cffi==1.15.1
+charset-normalizer==3.1.0
+click==8.1.3
+cloudpickle==2.2.1
+cmake==3.26.3
+colorama==0.4.4
+comm==0.1.3
+commonmark==0.9.1
+conda-content-trust==0.1.3
+conda-package-handling==2.0.2
+conda-package-streaming==0.7.0
+conda==23.1.0
+confection==0.0.4
+contextlib2==21.6.0
+contourpy==1.0.7
+cryptography==40.0.1
+cycler==0.11.0
+cymem==2.0.7
+cython==0.29.34
+datasets==2.14.0
+debugpy==1.6.7
+decorator==5.1.1
+deepspeed==0.6.1+1ea3d4b
+dgl==1.1.0+cu118
+dill==0.3.6
+docker-pycreds==0.4.0
+docutils==0.15.2
+einops==0.6.1
+executing==1.2.0
+fastai==2.7.12
+fastcore==1.5.29
+fastdownload==0.0.7
+fastprogress==1.0.3
+filelock==3.12.0
+flash-attn==0.2.8
+fonttools==4.39.4
+frozenlist==1.4.0
+fsspec==2023.5.0
+future==0.18.3
+gevent==22.10.2
+gitdb==4.0.10
+gitpython==3.1.32
+gmpy2==2.1.2
+google-pasta==0.2.0
+greenlet==2.0.2
+h5py==3.8.0
+hjson==3.1.0
+horovod==0.26.1
+huggingface-hub==0.16.4
+idna==3.4
+imageio==2.28.1
+importlib-metadata==4.13.0
+inotify-simple==1.2.1
+ipykernel==6.23.0
+ipython==8.13.2
+ipywidgets==8.0.7
+jedi==0.18.2
+jinja2==3.1.2
+jmespath==1.0.1
+joblib==1.2.0
+jsonpatch==1.32
+jsonpointer==2.3
+jsonschema==4.17.3
+jupyter-client==8.2.0
+jupyter-core==5.3.0
+jupyterlab-widgets==3.0.8
+kiwisolver==1.4.4
+langcodes==3.3.0
+libmambapy==1.4.1
+lit==16.0.3
+llvmlite==0.39.1
+mamba==1.4.1
+markupsafe==2.1.2
+matplotlib-inline==0.1.6
+matplotlib==3.7.1
+mpi4py==3.1.4
+mpmath==1.3.0
+multidict==6.0.4
+multiprocess==0.70.14
+munkres==1.1.4
+murmurhash==1.0.9
+nest-asyncio==1.5.6
+networkx==3.1
+ninja==1.11.1
+numba==0.56.4
+numpy==1.23.5
+opencv-python==4.7.0
+packaging==23.1
+pandas==2.0.1
+paramiko==3.1.0
+parso==0.8.3
+pathos==0.3.0
+pathtools==0.1.2
+pathy==0.10.1
+patsy==0.5.3
+peft==0.5.0.dev0
+pexpect==4.8.0
+pickleshare==0.7.5
+pillow==9.4.0
+pip==23.1.2
+platformdirs==3.5.0
+plotly==5.14.1
+pluggy==1.0.0
+ply==3.11
+pooch==1.7.0
+pox==0.3.2
+ppft==1.7.6.6
+preshed==3.0.8
+prompt-toolkit==3.0.38
+protobuf3-to-dict==0.1.5
+protobuf==3.20.3
+psutil==5.9.5
+ptyprocess==0.7.0
+pure-eval==0.2.2
+py-cpuinfo==9.0.0
+pyarrow==12.0.0
+pyasn1==0.4.8
+pybind11-global==2.10.4
+pybind11==2.10.4
+pycosat==0.6.4
+pycparser==2.21
+pydantic==1.10.7
+pyfunctional==1.4.3
+pygments==2.15.1
+pyinstrument-cext==0.2.4
+pyinstrument==3.4.2
+pynacl==1.5.0
+pyopenssl==23.1.1
+pyparsing==3.0.9
+pyqt5-sip==12.11.0
+pyqt5==5.15.7
+pyrsistent==0.19.3
+pysocks==1.7.1
+python-dateutil==2.8.2
+pytz==2023.3
+pyyaml==5.4.1
+pyzmq==25.0.2
+regex==2023.6.3
+requests==2.28.2
+retrying==1.3.4
+rich==12.6.0
+rsa==4.7.2
+ruamel.yaml.clib==0.2.7
+ruamel.yaml==0.17.21
+s3fs==0.4.2
+s3transfer==0.6.1
+safetensors==0.3.1
+sagemaker-experiments==0.1.43
+sagemaker-pytorch-training==2.8.0
+sagemaker-training==4.5.0
+sagemaker==2.154.0
+schema==0.7.5
+scikit-learn==1.2.2
+scipy==1.10.1
+seaborn==0.12.2
+sentry-sdk==1.28.1
+setproctitle==1.3.2
+setuptools==65.6.3
+shap==0.41.0
+shellingham==1.5.1
+sip==6.7.9
+six==1.16.0
+slicer==0.0.7
+smart-open==5.2.1
+smclarify==0.5
+smdebug-rulesconfig==1.0.1
+smdebug==1.0.34
+smdistributed-dataparallel==1.8.0
+smdistributed-modelparallel==1.15.0
+smmap==5.0.0
+spacy-legacy==3.0.12
+spacy-loggers==1.0.4
+spacy==3.5.2
+srsly==2.4.6
+stack-data==0.6.2
+statsmodels==0.14.0
+sympy==1.11.1
+tabulate==0.9.0
+tblib==1.7.0
+tenacity==8.2.2
+thinc==8.1.10
+threadpoolctl==3.1.0
+tokenizers==0.13.3
+toml==0.10.2
+tomli==2.0.1
+toolz==0.12.0
+torch==2.0.0
+torchaudio==2.0.1
+torchdata==0.6.0
+torchnet==0.0.4
+torchtext==0.15.1
+torchvision==0.15.1
+tornado==6.3
+tqdm==4.65.0
+traitlets==5.9.0
+transformers==4.31.0
+triton==2.0.0.dev20221202
+trl==0.4.7
+typer==0.7.0
+typing-extensions==4.5.0
+tzdata==2023.3
+unicodedata2==15.0.0
+urllib3==1.26.15
+visdom==0.2.4
+wandb==0.15.7
+wasabi==1.1.1
+wcwidth==0.2.6
+websocket-client==1.5.1
+werkzeug==2.3.4
+wheel==0.40.0
+widgetsnbextension==4.0.8
+xxhash==3.2.0
+xyzservices==2023.2.0
+yarl==1.9.2
+zipp==3.15.0
+zope.event==4.6
+zope.interface==6.0
+zstandard==0.19.0

wandb/run-20230727_154936-a41qiywg/files/wandb-metadata.json ADDED Viewed

	@@ -0,0 +1,78 @@

+{
+    "os": "Linux-4.14.318-241.531.amzn2.x86_64-x86_64-with-glibc2.31",
+    "python": "3.10.8",
+    "heartbeatAt": "2023-07-27T15:49:36.888553",
+    "startedAt": "2023-07-27T15:49:36.344100",
+    "docker": null,
+    "cuda": null,
+    "args": [],
+    "state": "running",
+    "program": "<python with no main file>",
+    "host": "pytorch-2-0-0-gpu--ml-g4dn-2xlarge-9a500aed7fe4dadadc562adc1e80",
+    "username": "root",
+    "executable": "/opt/conda/bin/python",
+    "cpu_count": 4,
+    "cpu_count_logical": 8,
+    "cpu_freq": {
+        "current": 3100.120625,
+        "min": 0.0,
+        "max": 0.0
+    },
+    "cpu_freq_per_core": [
+        {
+            "current": 3107.574,
+            "min": 0.0,
+            "max": 0.0
+        },
+        {
+            "current": 3102.47,
+            "min": 0.0,
+            "max": 0.0
+        },
+        {
+            "current": 3099.63,
+            "min": 0.0,
+            "max": 0.0
+        },
+        {
+            "current": 3099.058,
+            "min": 0.0,
+            "max": 0.0
+        },
+        {
+            "current": 3100.716,
+            "min": 0.0,
+            "max": 0.0
+        },
+        {
+            "current": 3099.393,
+            "min": 0.0,
+            "max": 0.0
+        },
+        {
+            "current": 3099.988,
+            "min": 0.0,
+            "max": 0.0
+        },
+        {
+            "current": 3092.136,
+            "min": 0.0,
+            "max": 0.0
+        }
+    ],
+    "disk": {
+        "total": 32.0,
+        "used": 0.414398193359375
+    },
+    "gpu": "Tesla T4",
+    "gpu_count": 1,
+    "gpu_devices": [
+        {
+            "name": "Tesla T4",
+            "memory_total": 15843721216
+        }
+    ],
+    "memory": {
+        "total": 30.947834014892578
+    }
+}

wandb/run-20230727_154936-a41qiywg/files/wandb-summary.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"train/loss": 1.5234, "train/learning_rate": 0.0002, "train/epoch": 5.8, "train/global_step": 500, "_timestamp": 1690823397.7400424, "_runtime": 350421.32170534134, "_step": 101, "train/train_runtime": 7012.9274, "train/train_samples_per_second": 1.141, "train/train_steps_per_second": 0.071, "train/total_flos": 2.3703947270255616e+16, "train/train_loss": 2.225116060256958}

wandb/run-20230727_154936-a41qiywg/logs/debug-internal.log ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0d82385c7c91ccf548be984016744cafe22c0bffbe4c56266892c862cde84fe4
+size 16040370

wandb/run-20230727_154936-a41qiywg/logs/debug.log ADDED Viewed

	@@ -0,0 +1,76 @@

+2023-07-27 15:49:36,411 INFO    MainThread:21 [wandb_setup.py:_flush():76] Current SDK version is 0.15.7
+2023-07-27 15:49:36,412 INFO    MainThread:21 [wandb_setup.py:_flush():76] Configure stats pid to 21
+2023-07-27 15:49:36,412 INFO    MainThread:21 [wandb_setup.py:_flush():76] Loading settings from /root/.config/wandb/settings
+2023-07-27 15:49:36,412 INFO    MainThread:21 [wandb_setup.py:_flush():76] Loading settings from /root/mskov/falcon7b_quant/wandb/settings
+2023-07-27 15:49:36,412 INFO    MainThread:21 [wandb_setup.py:_flush():76] Loading settings from environment variables: {}
+2023-07-27 15:49:36,412 INFO    MainThread:21 [wandb_setup.py:_flush():76] Applying setup settings: {'_disable_service': False}
+2023-07-27 15:49:36,412 INFO    MainThread:21 [wandb_setup.py:_flush():76] Inferring run settings from compute environment: {'program': '<python with no main file>'}
+2023-07-27 15:49:36,412 INFO    MainThread:21 [wandb_setup.py:_flush():76] Applying login settings: {'api_key': '***REDACTED***'}
+2023-07-27 15:49:36,412 INFO    MainThread:21 [wandb_init.py:_log_setup():507] Logging user logs to /root/mskov/falcon7b_quant/wandb/run-20230727_154936-a41qiywg/logs/debug.log
+2023-07-27 15:49:36,412 INFO    MainThread:21 [wandb_init.py:_log_setup():508] Logging internal logs to /root/mskov/falcon7b_quant/wandb/run-20230727_154936-a41qiywg/logs/debug-internal.log
+2023-07-27 15:49:36,412 INFO    MainThread:21 [wandb_init.py:_jupyter_setup():453] configuring jupyter hooks <wandb.sdk.wandb_init._WandbInit object at 0x7f468db73070>
+2023-07-27 15:49:36,412 INFO    MainThread:21 [wandb_init.py:init():547] calling init triggers
+2023-07-27 15:49:36,412 INFO    MainThread:21 [wandb_init.py:init():554] wandb.init called with sweep_config: {}
+config: {}
+2023-07-27 15:49:36,412 INFO    MainThread:21 [wandb_init.py:init():596] starting backend
+2023-07-27 15:49:36,412 INFO    MainThread:21 [wandb_init.py:init():600] setting up manager
+2023-07-27 15:49:36,414 INFO    MainThread:21 [backend.py:_multiprocessing_setup():106] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
+2023-07-27 15:49:36,416 INFO    MainThread:21 [wandb_init.py:init():606] backend started and connected
+2023-07-27 15:49:36,424 INFO    MainThread:21 [wandb_run.py:_label_probe_notebook():1234] probe notebook
+2023-07-27 15:49:36,429 INFO    MainThread:21 [wandb_run.py:_label_probe_notebook():1244] Unable to probe notebook: 'NoneType' object has no attribute 'get'
+2023-07-27 15:49:36,429 INFO    MainThread:21 [wandb_init.py:init():697] updated telemetry
+2023-07-27 15:49:36,450 INFO    MainThread:21 [wandb_init.py:init():730] communicating run to backend with 60.0 second timeout
+2023-07-27 15:49:36,781 INFO    MainThread:21 [wandb_run.py:_on_init():2174] communicating current version
+2023-07-27 15:49:36,852 INFO    MainThread:21 [wandb_run.py:_on_init():2183] got version response
+2023-07-27 15:49:36,852 INFO    MainThread:21 [wandb_init.py:init():781] starting run threads in backend
+2023-07-27 15:49:44,828 INFO    MainThread:21 [wandb_run.py:_console_start():2153] atexit reg
+2023-07-27 15:49:44,830 INFO    MainThread:21 [wandb_run.py:_redirect():2008] redirect: wrap_raw
+2023-07-27 15:49:44,830 INFO    MainThread:21 [wandb_run.py:_redirect():2073] Wrapping output streams.
+2023-07-27 15:49:44,830 INFO    MainThread:21 [wandb_run.py:_redirect():2098] Redirects installed.
+2023-07-27 15:49:44,832 INFO    MainThread:21 [wandb_init.py:init():822] run started, returning control to user process
+2023-07-27 15:49:44,835 INFO    MainThread:21 [wandb_run.py:_config_callback():1282] config_cb None None {'vocab_size': 65024, 'hidden_size': 4544, 'n_layer': 32, 'n_head': 71, 'layer_norm_epsilon': 1e-05, 'initializer_range': 0.02, 'use_cache': False, 'apply_residual_connection_post_layernorm': False, 'hidden_dropout': 0.0, 'attention_dropout': 0.0, 'bos_token_id': 11, 'eos_token_id': 11, 'multi_query': True, 'alibi': False, 'bias': False, 'parallel_attn': True, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'bfloat16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['RWForCausalLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'pad_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'ybelkada/falcon-7b-sharded-bf16', 'transformers_version': '4.31.0', 'auto_map': {'AutoConfig': 'tiiuae/falcon-7b--configuration_RW.RWConfig', 'AutoModel': 'tiiuae/falcon-7b--modelling_RW.RWModel', 'AutoModelForCausalLM': 'tiiuae/falcon-7b--modelling_RW.RWForCausalLM', 'AutoModelForQuestionAnswering': 'tiiuae/falcon-7b--modelling_RW.RWForQuestionAnswering', 'AutoModelForSequenceClassification': 'tiiuae/falcon-7b--modelling_RW.RWForSequenceClassification', 'AutoModelForTokenClassification': 'tiiuae/falcon-7b--modelling_RW.RWForTokenClassification'}, 'model_type': 'RefinedWebModel', 'quantization_config': {'load_in_8bit': False, 'load_in_4bit': True, 'llm_int8_threshold': 6.0, 'llm_int8_skip_modules': None, 'llm_int8_enable_fp32_cpu_offload': False, 'llm_int8_has_fp16_weight': False, 'bnb_4bit_quant_type': 'nf4', 'bnb_4bit_use_double_quant': False, 'bnb_4bit_compute_dtype': 'float16'}, 'output_dir': './results', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'no', 'prediction_loss_only': False, 'per_device_train_batch_size': 4, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': 'None', 'per_gpu_eval_batch_size': 'None', 'gradient_accumulation_steps': 4, 'eval_accumulation_steps': 'None', 'eval_delay': 0, 'learning_rate': 0.0002, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 0.3, 'num_train_epochs': 3.0, 'max_steps': 500, 'lr_scheduler_type': 'constant', 'warmup_ratio': 0.03, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './results/runs/Jul27_15-48-23_pytorch-2-0-0-gpu--ml-g4dn-2xlarge-9a500aed7fe4dadadc562adc1e80', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 10, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 10, 'save_total_limit': 'None', 'save_safetensors': False, 'save_on_each_node': False, 'no_cuda': False, 'use_mps_device': False, 'seed': 42, 'data_seed': 'None', 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': 'None', 'local_rank': 0, 'ddp_backend': 'None', 'tpu_num_cores': 'None', 'tpu_metrics_debug': False, 'debug': '[]', 'dataloader_drop_last': False, 'eval_steps': 'None', 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './results', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': 'None', 'load_best_model_at_end': False, 'metric_for_best_model': 'None', 'greater_is_better': 'None', 'ignore_data_skip': False, 'sharded_ddp': '[]', 'fsdp': '[]', 'fsdp_min_num_params': 0, 'fsdp_config': "{'fsdp_min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False}", 'fsdp_transformer_layer_cls_to_wrap': 'None', 'deepspeed': 'None', 'label_smoothing_factor': 0.0, 'optim': 'paged_adamw_32bit', 'optim_args': 'None', 'adafactor': False, 'group_by_length': True, 'length_column_name': 'length', 'report_to': "['wandb']", 'ddp_find_unused_parameters': 'None', 'ddp_bucket_cap_mb': 'None', 'ddp_broadcast_buffers': 'None', 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': False, 'resume_from_checkpoint': 'None', 'hub_model_id': 'None', 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'gradient_checkpointing': False, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': 'None', 'push_to_hub_organization': 'None', 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': 'None', 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': 'None', 'torch_compile_mode': 'None', 'xpu_backend': 'None', 'train_batch_size': 4, 'eval_batch_size': 8}
+2023-07-27 17:45:31,239 INFO    MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2023-07-27 17:45:31,240 INFO    MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
+2023-07-31 14:45:09,605 INFO    MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
+2023-07-31 14:45:09,630 INFO    MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2023-07-31 14:45:09,630 INFO    MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
+2023-07-31 15:11:17,481 INFO    MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
+2023-07-31 15:11:29,927 INFO    MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2023-07-31 15:11:29,929 INFO    MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
+2023-07-31 15:11:29,934 INFO    MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
+2023-07-31 15:11:32,706 INFO    MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2023-07-31 15:11:32,707 INFO    MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
+2023-07-31 15:11:32,712 INFO    MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
+2023-07-31 15:11:35,511 INFO    MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2023-07-31 15:11:35,512 INFO    MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
+2023-07-31 15:11:35,517 INFO    MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
+2023-07-31 15:11:38,405 INFO    MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2023-07-31 15:11:38,407 INFO    MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
+2023-07-31 15:11:39,706 INFO    MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
+2023-07-31 15:11:42,399 INFO    MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2023-07-31 15:11:42,400 INFO    MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
+2023-07-31 15:11:42,759 INFO    MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
+2023-07-31 15:11:42,762 INFO    MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2023-07-31 15:11:42,762 INFO    MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
+2023-07-31 15:11:47,781 INFO    MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
+2023-07-31 15:12:05,813 INFO    MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2023-07-31 15:12:05,815 INFO    MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
+2023-07-31 15:12:05,839 INFO    MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
+2023-07-31 15:12:06,211 INFO    MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2023-07-31 15:12:06,211 INFO    MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
+2023-07-31 15:12:06,217 INFO    MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
+2023-07-31 15:12:06,218 INFO    MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2023-07-31 15:12:06,218 INFO    MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
+2023-07-31 15:12:06,224 INFO    MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
+2023-07-31 15:12:06,301 INFO    MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2023-07-31 15:12:06,301 INFO    MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
+2023-07-31 15:12:11,043 INFO    MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
+2023-07-31 15:13:04,229 INFO    MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2023-07-31 15:13:04,231 INFO    MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
+2023-07-31 15:13:04,236 INFO    MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
+2023-07-31 15:13:04,244 INFO    MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2023-07-31 15:13:04,244 INFO    MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
+2023-07-31 15:13:04,249 INFO    MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
+2023-07-31 15:13:04,818 INFO    MainThread:21 [wandb_run.py:_config_callback():1282] config_cb None None {'vocab_size': 65024, 'hidden_size': 4544, 'n_layer': 32, 'n_head': 71, 'layer_norm_epsilon': 1e-05, 'initializer_range': 0.02, 'use_cache': False, 'apply_residual_connection_post_layernorm': False, 'hidden_dropout': 0.0, 'attention_dropout': 0.0, 'bos_token_id': 11, 'eos_token_id': 11, 'multi_query': True, 'alibi': False, 'bias': False, 'parallel_attn': True, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'bfloat16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['RWForCausalLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'pad_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'ybelkada/falcon-7b-sharded-bf16', 'transformers_version': '4.31.0', 'auto_map': {'AutoConfig': 'tiiuae/falcon-7b--configuration_RW.RWConfig', 'AutoModel': 'tiiuae/falcon-7b--modelling_RW.RWModel', 'AutoModelForCausalLM': 'tiiuae/falcon-7b--modelling_RW.RWForCausalLM', 'AutoModelForQuestionAnswering': 'tiiuae/falcon-7b--modelling_RW.RWForQuestionAnswering', 'AutoModelForSequenceClassification': 'tiiuae/falcon-7b--modelling_RW.RWForSequenceClassification', 'AutoModelForTokenClassification': 'tiiuae/falcon-7b--modelling_RW.RWForTokenClassification'}, 'model_type': 'RefinedWebModel', 'quantization_config': {'load_in_8bit': False, 'load_in_4bit': True, 'llm_int8_threshold': 6.0, 'llm_int8_skip_modules': None, 'llm_int8_enable_fp32_cpu_offload': False, 'llm_int8_has_fp16_weight': False, 'bnb_4bit_quant_type': 'nf4', 'bnb_4bit_use_double_quant': False, 'bnb_4bit_compute_dtype': 'float16'}, 'output_dir': './results', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'no', 'prediction_loss_only': False, 'per_device_train_batch_size': 4, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': 'None', 'per_gpu_eval_batch_size': 'None', 'gradient_accumulation_steps': 4, 'eval_accumulation_steps': 'None', 'eval_delay': 0, 'learning_rate': 0.0002, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 0.3, 'num_train_epochs': 3.0, 'max_steps': 500, 'lr_scheduler_type': 'constant', 'warmup_ratio': 0.03, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './results/runs/Jul31_15-12-06_pytorch-2-0-0-gpu--ml-g4dn-2xlarge-9a500aed7fe4dadadc562adc1e80', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 10, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 10, 'save_total_limit': 'None', 'save_safetensors': False, 'save_on_each_node': False, 'no_cuda': False, 'use_mps_device': False, 'seed': 42, 'data_seed': 'None', 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': 'None', 'local_rank': 0, 'ddp_backend': 'None', 'tpu_num_cores': 'None', 'tpu_metrics_debug': False, 'debug': '[]', 'dataloader_drop_last': False, 'eval_steps': 'None', 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './results', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': 'None', 'load_best_model_at_end': False, 'metric_for_best_model': 'None', 'greater_is_better': 'None', 'ignore_data_skip': False, 'sharded_ddp': '[]', 'fsdp': '[]', 'fsdp_min_num_params': 0, 'fsdp_config': "{'fsdp_min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False}", 'fsdp_transformer_layer_cls_to_wrap': 'None', 'deepspeed': 'None', 'label_smoothing_factor': 0.0, 'optim': 'paged_adamw_32bit', 'optim_args': 'None', 'adafactor': False, 'group_by_length': True, 'length_column_name': 'length', 'report_to': "['wandb']", 'ddp_find_unused_parameters': 'None', 'ddp_bucket_cap_mb': 'None', 'ddp_broadcast_buffers': 'None', 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': False, 'resume_from_checkpoint': 'None', 'hub_model_id': 'None', 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'gradient_checkpointing': False, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': 'None', 'push_to_hub_organization': 'None', 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': 'None', 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': 'None', 'torch_compile_mode': 'None', 'xpu_backend': 'None', 'train_batch_size': 4, 'eval_batch_size': 8}
+2023-07-31 17:09:57,806 INFO    MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
+2023-07-31 17:09:57,808 INFO    MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend

wandb/run-20230727_154936-a41qiywg/run-a41qiywg.wandb ADDED Viewed

Binary file (426 kB). View file