Upload 19 files
Browse files- .gitattributes +3 -0
- wandb/debug-internal.log +3 -0
- wandb/debug.log +76 -0
- wandb/latest-run/files/conda-environment.yaml +498 -0
- wandb/latest-run/files/config.yaml +649 -0
- wandb/latest-run/files/output.log +112 -0
- wandb/latest-run/files/requirements.txt +240 -0
- wandb/latest-run/files/wandb-metadata.json +78 -0
- wandb/latest-run/files/wandb-summary.json +1 -0
- wandb/latest-run/logs/debug-internal.log +3 -0
- wandb/latest-run/logs/debug.log +76 -0
- wandb/run-20230727_154936-a41qiywg/files/conda-environment.yaml +498 -0
- wandb/run-20230727_154936-a41qiywg/files/config.yaml +649 -0
- wandb/run-20230727_154936-a41qiywg/files/output.log +112 -0
- wandb/run-20230727_154936-a41qiywg/files/requirements.txt +240 -0
- wandb/run-20230727_154936-a41qiywg/files/wandb-metadata.json +78 -0
- wandb/run-20230727_154936-a41qiywg/files/wandb-summary.json +1 -0
- wandb/run-20230727_154936-a41qiywg/logs/debug-internal.log +3 -0
- wandb/run-20230727_154936-a41qiywg/logs/debug.log +76 -0
- wandb/run-20230727_154936-a41qiywg/run-a41qiywg.wandb +0 -0
.gitattributes
CHANGED
@@ -34,3 +34,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
results/checkpoint-100/Unconfirmed[[:space:]]828739.crdownload filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
results/checkpoint-100/Unconfirmed[[:space:]]828739.crdownload filter=lfs diff=lfs merge=lfs -text
|
37 |
+
wandb/debug-internal.log filter=lfs diff=lfs merge=lfs -text
|
38 |
+
wandb/latest-run/logs/debug-internal.log filter=lfs diff=lfs merge=lfs -text
|
39 |
+
wandb/run-20230727_154936-a41qiywg/logs/debug-internal.log filter=lfs diff=lfs merge=lfs -text
|
wandb/debug-internal.log
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0d82385c7c91ccf548be984016744cafe22c0bffbe4c56266892c862cde84fe4
|
3 |
+
size 16040370
|
wandb/debug.log
ADDED
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2023-07-27 15:49:36,411 INFO MainThread:21 [wandb_setup.py:_flush():76] Current SDK version is 0.15.7
|
2 |
+
2023-07-27 15:49:36,412 INFO MainThread:21 [wandb_setup.py:_flush():76] Configure stats pid to 21
|
3 |
+
2023-07-27 15:49:36,412 INFO MainThread:21 [wandb_setup.py:_flush():76] Loading settings from /root/.config/wandb/settings
|
4 |
+
2023-07-27 15:49:36,412 INFO MainThread:21 [wandb_setup.py:_flush():76] Loading settings from /root/mskov/falcon7b_quant/wandb/settings
|
5 |
+
2023-07-27 15:49:36,412 INFO MainThread:21 [wandb_setup.py:_flush():76] Loading settings from environment variables: {}
|
6 |
+
2023-07-27 15:49:36,412 INFO MainThread:21 [wandb_setup.py:_flush():76] Applying setup settings: {'_disable_service': False}
|
7 |
+
2023-07-27 15:49:36,412 INFO MainThread:21 [wandb_setup.py:_flush():76] Inferring run settings from compute environment: {'program': '<python with no main file>'}
|
8 |
+
2023-07-27 15:49:36,412 INFO MainThread:21 [wandb_setup.py:_flush():76] Applying login settings: {'api_key': '***REDACTED***'}
|
9 |
+
2023-07-27 15:49:36,412 INFO MainThread:21 [wandb_init.py:_log_setup():507] Logging user logs to /root/mskov/falcon7b_quant/wandb/run-20230727_154936-a41qiywg/logs/debug.log
|
10 |
+
2023-07-27 15:49:36,412 INFO MainThread:21 [wandb_init.py:_log_setup():508] Logging internal logs to /root/mskov/falcon7b_quant/wandb/run-20230727_154936-a41qiywg/logs/debug-internal.log
|
11 |
+
2023-07-27 15:49:36,412 INFO MainThread:21 [wandb_init.py:_jupyter_setup():453] configuring jupyter hooks <wandb.sdk.wandb_init._WandbInit object at 0x7f468db73070>
|
12 |
+
2023-07-27 15:49:36,412 INFO MainThread:21 [wandb_init.py:init():547] calling init triggers
|
13 |
+
2023-07-27 15:49:36,412 INFO MainThread:21 [wandb_init.py:init():554] wandb.init called with sweep_config: {}
|
14 |
+
config: {}
|
15 |
+
2023-07-27 15:49:36,412 INFO MainThread:21 [wandb_init.py:init():596] starting backend
|
16 |
+
2023-07-27 15:49:36,412 INFO MainThread:21 [wandb_init.py:init():600] setting up manager
|
17 |
+
2023-07-27 15:49:36,414 INFO MainThread:21 [backend.py:_multiprocessing_setup():106] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
|
18 |
+
2023-07-27 15:49:36,416 INFO MainThread:21 [wandb_init.py:init():606] backend started and connected
|
19 |
+
2023-07-27 15:49:36,424 INFO MainThread:21 [wandb_run.py:_label_probe_notebook():1234] probe notebook
|
20 |
+
2023-07-27 15:49:36,429 INFO MainThread:21 [wandb_run.py:_label_probe_notebook():1244] Unable to probe notebook: 'NoneType' object has no attribute 'get'
|
21 |
+
2023-07-27 15:49:36,429 INFO MainThread:21 [wandb_init.py:init():697] updated telemetry
|
22 |
+
2023-07-27 15:49:36,450 INFO MainThread:21 [wandb_init.py:init():730] communicating run to backend with 60.0 second timeout
|
23 |
+
2023-07-27 15:49:36,781 INFO MainThread:21 [wandb_run.py:_on_init():2174] communicating current version
|
24 |
+
2023-07-27 15:49:36,852 INFO MainThread:21 [wandb_run.py:_on_init():2183] got version response
|
25 |
+
2023-07-27 15:49:36,852 INFO MainThread:21 [wandb_init.py:init():781] starting run threads in backend
|
26 |
+
2023-07-27 15:49:44,828 INFO MainThread:21 [wandb_run.py:_console_start():2153] atexit reg
|
27 |
+
2023-07-27 15:49:44,830 INFO MainThread:21 [wandb_run.py:_redirect():2008] redirect: wrap_raw
|
28 |
+
2023-07-27 15:49:44,830 INFO MainThread:21 [wandb_run.py:_redirect():2073] Wrapping output streams.
|
29 |
+
2023-07-27 15:49:44,830 INFO MainThread:21 [wandb_run.py:_redirect():2098] Redirects installed.
|
30 |
+
2023-07-27 15:49:44,832 INFO MainThread:21 [wandb_init.py:init():822] run started, returning control to user process
|
31 |
+
2023-07-27 15:49:44,835 INFO MainThread:21 [wandb_run.py:_config_callback():1282] config_cb None None {'vocab_size': 65024, 'hidden_size': 4544, 'n_layer': 32, 'n_head': 71, 'layer_norm_epsilon': 1e-05, 'initializer_range': 0.02, 'use_cache': False, 'apply_residual_connection_post_layernorm': False, 'hidden_dropout': 0.0, 'attention_dropout': 0.0, 'bos_token_id': 11, 'eos_token_id': 11, 'multi_query': True, 'alibi': False, 'bias': False, 'parallel_attn': True, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'bfloat16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['RWForCausalLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'pad_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'ybelkada/falcon-7b-sharded-bf16', 'transformers_version': '4.31.0', 'auto_map': {'AutoConfig': 'tiiuae/falcon-7b--configuration_RW.RWConfig', 'AutoModel': 'tiiuae/falcon-7b--modelling_RW.RWModel', 'AutoModelForCausalLM': 'tiiuae/falcon-7b--modelling_RW.RWForCausalLM', 'AutoModelForQuestionAnswering': 'tiiuae/falcon-7b--modelling_RW.RWForQuestionAnswering', 'AutoModelForSequenceClassification': 'tiiuae/falcon-7b--modelling_RW.RWForSequenceClassification', 'AutoModelForTokenClassification': 'tiiuae/falcon-7b--modelling_RW.RWForTokenClassification'}, 'model_type': 'RefinedWebModel', 'quantization_config': {'load_in_8bit': False, 'load_in_4bit': True, 'llm_int8_threshold': 6.0, 'llm_int8_skip_modules': None, 'llm_int8_enable_fp32_cpu_offload': False, 'llm_int8_has_fp16_weight': False, 'bnb_4bit_quant_type': 'nf4', 'bnb_4bit_use_double_quant': False, 'bnb_4bit_compute_dtype': 'float16'}, 'output_dir': './results', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'no', 'prediction_loss_only': False, 'per_device_train_batch_size': 4, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': 'None', 'per_gpu_eval_batch_size': 'None', 'gradient_accumulation_steps': 4, 'eval_accumulation_steps': 'None', 'eval_delay': 0, 'learning_rate': 0.0002, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 0.3, 'num_train_epochs': 3.0, 'max_steps': 500, 'lr_scheduler_type': 'constant', 'warmup_ratio': 0.03, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './results/runs/Jul27_15-48-23_pytorch-2-0-0-gpu--ml-g4dn-2xlarge-9a500aed7fe4dadadc562adc1e80', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 10, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 10, 'save_total_limit': 'None', 'save_safetensors': False, 'save_on_each_node': False, 'no_cuda': False, 'use_mps_device': False, 'seed': 42, 'data_seed': 'None', 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': 'None', 'local_rank': 0, 'ddp_backend': 'None', 'tpu_num_cores': 'None', 'tpu_metrics_debug': False, 'debug': '[]', 'dataloader_drop_last': False, 'eval_steps': 'None', 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './results', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': 'None', 'load_best_model_at_end': False, 'metric_for_best_model': 'None', 'greater_is_better': 'None', 'ignore_data_skip': False, 'sharded_ddp': '[]', 'fsdp': '[]', 'fsdp_min_num_params': 0, 'fsdp_config': "{'fsdp_min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False}", 'fsdp_transformer_layer_cls_to_wrap': 'None', 'deepspeed': 'None', 'label_smoothing_factor': 0.0, 'optim': 'paged_adamw_32bit', 'optim_args': 'None', 'adafactor': False, 'group_by_length': True, 'length_column_name': 'length', 'report_to': "['wandb']", 'ddp_find_unused_parameters': 'None', 'ddp_bucket_cap_mb': 'None', 'ddp_broadcast_buffers': 'None', 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': False, 'resume_from_checkpoint': 'None', 'hub_model_id': 'None', 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'gradient_checkpointing': False, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': 'None', 'push_to_hub_organization': 'None', 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': 'None', 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': 'None', 'torch_compile_mode': 'None', 'xpu_backend': 'None', 'train_batch_size': 4, 'eval_batch_size': 8}
|
32 |
+
2023-07-27 17:45:31,239 INFO MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
33 |
+
2023-07-27 17:45:31,240 INFO MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
|
34 |
+
2023-07-31 14:45:09,605 INFO MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
|
35 |
+
2023-07-31 14:45:09,630 INFO MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
36 |
+
2023-07-31 14:45:09,630 INFO MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
|
37 |
+
2023-07-31 15:11:17,481 INFO MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
|
38 |
+
2023-07-31 15:11:29,927 INFO MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
39 |
+
2023-07-31 15:11:29,929 INFO MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
|
40 |
+
2023-07-31 15:11:29,934 INFO MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
|
41 |
+
2023-07-31 15:11:32,706 INFO MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
42 |
+
2023-07-31 15:11:32,707 INFO MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
|
43 |
+
2023-07-31 15:11:32,712 INFO MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
|
44 |
+
2023-07-31 15:11:35,511 INFO MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
45 |
+
2023-07-31 15:11:35,512 INFO MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
|
46 |
+
2023-07-31 15:11:35,517 INFO MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
|
47 |
+
2023-07-31 15:11:38,405 INFO MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
48 |
+
2023-07-31 15:11:38,407 INFO MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
|
49 |
+
2023-07-31 15:11:39,706 INFO MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
|
50 |
+
2023-07-31 15:11:42,399 INFO MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
51 |
+
2023-07-31 15:11:42,400 INFO MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
|
52 |
+
2023-07-31 15:11:42,759 INFO MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
|
53 |
+
2023-07-31 15:11:42,762 INFO MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
54 |
+
2023-07-31 15:11:42,762 INFO MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
|
55 |
+
2023-07-31 15:11:47,781 INFO MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
|
56 |
+
2023-07-31 15:12:05,813 INFO MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
57 |
+
2023-07-31 15:12:05,815 INFO MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
|
58 |
+
2023-07-31 15:12:05,839 INFO MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
|
59 |
+
2023-07-31 15:12:06,211 INFO MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
60 |
+
2023-07-31 15:12:06,211 INFO MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
|
61 |
+
2023-07-31 15:12:06,217 INFO MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
|
62 |
+
2023-07-31 15:12:06,218 INFO MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
63 |
+
2023-07-31 15:12:06,218 INFO MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
|
64 |
+
2023-07-31 15:12:06,224 INFO MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
|
65 |
+
2023-07-31 15:12:06,301 INFO MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
66 |
+
2023-07-31 15:12:06,301 INFO MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
|
67 |
+
2023-07-31 15:12:11,043 INFO MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
|
68 |
+
2023-07-31 15:13:04,229 INFO MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
69 |
+
2023-07-31 15:13:04,231 INFO MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
|
70 |
+
2023-07-31 15:13:04,236 INFO MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
|
71 |
+
2023-07-31 15:13:04,244 INFO MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
72 |
+
2023-07-31 15:13:04,244 INFO MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
|
73 |
+
2023-07-31 15:13:04,249 INFO MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
|
74 |
+
2023-07-31 15:13:04,818 INFO MainThread:21 [wandb_run.py:_config_callback():1282] config_cb None None {'vocab_size': 65024, 'hidden_size': 4544, 'n_layer': 32, 'n_head': 71, 'layer_norm_epsilon': 1e-05, 'initializer_range': 0.02, 'use_cache': False, 'apply_residual_connection_post_layernorm': False, 'hidden_dropout': 0.0, 'attention_dropout': 0.0, 'bos_token_id': 11, 'eos_token_id': 11, 'multi_query': True, 'alibi': False, 'bias': False, 'parallel_attn': True, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'bfloat16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['RWForCausalLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'pad_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'ybelkada/falcon-7b-sharded-bf16', 'transformers_version': '4.31.0', 'auto_map': {'AutoConfig': 'tiiuae/falcon-7b--configuration_RW.RWConfig', 'AutoModel': 'tiiuae/falcon-7b--modelling_RW.RWModel', 'AutoModelForCausalLM': 'tiiuae/falcon-7b--modelling_RW.RWForCausalLM', 'AutoModelForQuestionAnswering': 'tiiuae/falcon-7b--modelling_RW.RWForQuestionAnswering', 'AutoModelForSequenceClassification': 'tiiuae/falcon-7b--modelling_RW.RWForSequenceClassification', 'AutoModelForTokenClassification': 'tiiuae/falcon-7b--modelling_RW.RWForTokenClassification'}, 'model_type': 'RefinedWebModel', 'quantization_config': {'load_in_8bit': False, 'load_in_4bit': True, 'llm_int8_threshold': 6.0, 'llm_int8_skip_modules': None, 'llm_int8_enable_fp32_cpu_offload': False, 'llm_int8_has_fp16_weight': False, 'bnb_4bit_quant_type': 'nf4', 'bnb_4bit_use_double_quant': False, 'bnb_4bit_compute_dtype': 'float16'}, 'output_dir': './results', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'no', 'prediction_loss_only': False, 'per_device_train_batch_size': 4, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': 'None', 'per_gpu_eval_batch_size': 'None', 'gradient_accumulation_steps': 4, 'eval_accumulation_steps': 'None', 'eval_delay': 0, 'learning_rate': 0.0002, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 0.3, 'num_train_epochs': 3.0, 'max_steps': 500, 'lr_scheduler_type': 'constant', 'warmup_ratio': 0.03, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './results/runs/Jul31_15-12-06_pytorch-2-0-0-gpu--ml-g4dn-2xlarge-9a500aed7fe4dadadc562adc1e80', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 10, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 10, 'save_total_limit': 'None', 'save_safetensors': False, 'save_on_each_node': False, 'no_cuda': False, 'use_mps_device': False, 'seed': 42, 'data_seed': 'None', 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': 'None', 'local_rank': 0, 'ddp_backend': 'None', 'tpu_num_cores': 'None', 'tpu_metrics_debug': False, 'debug': '[]', 'dataloader_drop_last': False, 'eval_steps': 'None', 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './results', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': 'None', 'load_best_model_at_end': False, 'metric_for_best_model': 'None', 'greater_is_better': 'None', 'ignore_data_skip': False, 'sharded_ddp': '[]', 'fsdp': '[]', 'fsdp_min_num_params': 0, 'fsdp_config': "{'fsdp_min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False}", 'fsdp_transformer_layer_cls_to_wrap': 'None', 'deepspeed': 'None', 'label_smoothing_factor': 0.0, 'optim': 'paged_adamw_32bit', 'optim_args': 'None', 'adafactor': False, 'group_by_length': True, 'length_column_name': 'length', 'report_to': "['wandb']", 'ddp_find_unused_parameters': 'None', 'ddp_bucket_cap_mb': 'None', 'ddp_broadcast_buffers': 'None', 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': False, 'resume_from_checkpoint': 'None', 'hub_model_id': 'None', 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'gradient_checkpointing': False, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': 'None', 'push_to_hub_organization': 'None', 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': 'None', 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': 'None', 'torch_compile_mode': 'None', 'xpu_backend': 'None', 'train_batch_size': 4, 'eval_batch_size': 8}
|
75 |
+
2023-07-31 17:09:57,806 INFO MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
76 |
+
2023-07-31 17:09:57,808 INFO MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
|
wandb/latest-run/files/conda-environment.yaml
ADDED
@@ -0,0 +1,498 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: base
|
2 |
+
channels:
|
3 |
+
- fastai
|
4 |
+
- dglteam/label/cu118
|
5 |
+
- nvidia/label/cuda-11.8.0
|
6 |
+
- https://aws-ml-conda-pre-prod-ec2.s3.us-west-2.amazonaws.com
|
7 |
+
- conda-forge
|
8 |
+
dependencies:
|
9 |
+
- _libgcc_mutex=0.1=conda_forge
|
10 |
+
- _openmp_mutex=4.5=2_kmp_llvm
|
11 |
+
- alsa-lib=1.2.8=h166bdaf_0
|
12 |
+
- aom=3.5.0=h27087fc_0
|
13 |
+
- asttokens=2.2.1=pyhd8ed1ab_0
|
14 |
+
- attr=2.5.1=h166bdaf_1
|
15 |
+
- aws-ofi-nccl-dlc=1.5.0=aws_0
|
16 |
+
- awscli=1.27.132=py310hff52083_0
|
17 |
+
- backcall=0.2.0=pyh9f0ad1d_0
|
18 |
+
- backports=1.0=pyhd8ed1ab_3
|
19 |
+
- backports.functools_lru_cache=1.6.4=pyhd8ed1ab_0
|
20 |
+
- blas=1.0=mkl
|
21 |
+
- bokeh=3.1.1=pyhd8ed1ab_0
|
22 |
+
- boto3=1.26.132=pyhd8ed1ab_0
|
23 |
+
- botocore=1.29.132=pyhd8ed1ab_0
|
24 |
+
- brotli=1.0.9=h166bdaf_8
|
25 |
+
- brotli-bin=1.0.9=h166bdaf_8
|
26 |
+
- brotlipy=0.7.0=py310h5764c6d_1005
|
27 |
+
- bzip2=1.0.8=h7f98852_4
|
28 |
+
- c-ares=1.18.1=h7f98852_0
|
29 |
+
- ca-certificates=2023.5.7=hbcca054_0
|
30 |
+
- cached-property=1.5.2=hd8ed1ab_1
|
31 |
+
- cached_property=1.5.2=pyha770c72_1
|
32 |
+
- cairo=1.16.0=ha61ee94_1014
|
33 |
+
- catalogue=2.0.8=py310hff52083_1
|
34 |
+
- certifi=2023.5.7=pyhd8ed1ab_0
|
35 |
+
- cffi=1.15.1=py310h255011f_3
|
36 |
+
- charset-normalizer=3.1.0=pyhd8ed1ab_0
|
37 |
+
- click=8.1.3=unix_pyhd8ed1ab_2
|
38 |
+
- cloudpickle=2.2.1=pyhd8ed1ab_0
|
39 |
+
- colorama=0.4.4=pyh9f0ad1d_0
|
40 |
+
- comm=0.1.3=pyhd8ed1ab_0
|
41 |
+
- commonmark=0.9.1=py_0
|
42 |
+
- conda=23.1.0=py310hff52083_0
|
43 |
+
- conda-content-trust=0.1.3=pyhd8ed1ab_0
|
44 |
+
- conda-package-handling=2.0.2=pyh38be061_0
|
45 |
+
- conda-package-streaming=0.7.0=pyhd8ed1ab_1
|
46 |
+
- confection=0.0.4=py310hfdc917e_1
|
47 |
+
- contourpy=1.0.7=py310hdf3cbec_0
|
48 |
+
- cryptography=40.0.1=py310h34c0648_0
|
49 |
+
- cuda-cccl=11.8.89=0
|
50 |
+
- cuda-command-line-tools=11.8.0=0
|
51 |
+
- cuda-compiler=11.8.0=0
|
52 |
+
- cuda-cudart=11.8.89=0
|
53 |
+
- cuda-cudart-dev=11.8.89=0
|
54 |
+
- cuda-cuobjdump=11.8.86=0
|
55 |
+
- cuda-cupti=11.8.87=0
|
56 |
+
- cuda-cuxxfilt=11.8.86=0
|
57 |
+
- cuda-documentation=11.8.86=0
|
58 |
+
- cuda-driver-dev=11.8.89=0
|
59 |
+
- cuda-gdb=11.8.86=0
|
60 |
+
- cuda-libraries=11.8.0=0
|
61 |
+
- cuda-libraries-dev=11.8.0=0
|
62 |
+
- cuda-memcheck=11.8.86=0
|
63 |
+
- cuda-nsight=11.8.86=0
|
64 |
+
- cuda-nsight-compute=11.8.0=0
|
65 |
+
- cuda-nvcc=11.8.89=0
|
66 |
+
- cuda-nvdisasm=11.8.86=0
|
67 |
+
- cuda-nvml-dev=11.8.86=0
|
68 |
+
- cuda-nvprof=11.8.87=0
|
69 |
+
- cuda-nvprune=11.8.86=0
|
70 |
+
- cuda-nvrtc=11.8.89=0
|
71 |
+
- cuda-nvrtc-dev=11.8.89=0
|
72 |
+
- cuda-nvtx=11.8.86=0
|
73 |
+
- cuda-nvvp=11.8.87=0
|
74 |
+
- cuda-profiler-api=11.8.86=0
|
75 |
+
- cuda-runtime=11.8.0=0
|
76 |
+
- cuda-sanitizer-api=11.8.86=0
|
77 |
+
- cuda-toolkit=11.8.0=0
|
78 |
+
- cuda-tools=11.8.0=0
|
79 |
+
- cuda-visual-tools=11.8.0=0
|
80 |
+
- cycler=0.11.0=pyhd8ed1ab_0
|
81 |
+
- cymem=2.0.7=py310hd8f1fbe_1
|
82 |
+
- cython=0.29.34=py310heca2aa9_0
|
83 |
+
- cython-blis=0.7.9=py310hde88566_1
|
84 |
+
- dbus=1.13.6=h5008d03_3
|
85 |
+
- debugpy=1.6.7=py310heca2aa9_0
|
86 |
+
- decorator=5.1.1=pyhd8ed1ab_0
|
87 |
+
- dgl=1.1.0.cu118=py310_0
|
88 |
+
- docutils=0.15.2=py310hff52083_6
|
89 |
+
- executing=1.2.0=pyhd8ed1ab_0
|
90 |
+
- expat=2.5.0=hcb278e6_1
|
91 |
+
- fastai=2.7.12=py_0
|
92 |
+
- fastcore=1.5.29=py_0
|
93 |
+
- fastdownload=0.0.7=py_0
|
94 |
+
- fastprogress=1.0.3=py_0
|
95 |
+
- ffmpeg=5.1.2=gpl_h8dda1f0_106
|
96 |
+
- fftw=3.3.10=nompi_hc118613_107
|
97 |
+
- filelock=3.12.0=pyhd8ed1ab_0
|
98 |
+
- fmt=9.1.0=h924138e_0
|
99 |
+
- font-ttf-dejavu-sans-mono=2.37=hab24e00_0
|
100 |
+
- font-ttf-inconsolata=3.000=h77eed37_0
|
101 |
+
- font-ttf-source-code-pro=2.038=h77eed37_0
|
102 |
+
- font-ttf-ubuntu=0.83=hab24e00_0
|
103 |
+
- fontconfig=2.14.2=h14ed4e7_0
|
104 |
+
- fonts-conda-ecosystem=1=0
|
105 |
+
- fonts-conda-forge=1=0
|
106 |
+
- fonttools=4.39.4=py310h2372a71_0
|
107 |
+
- freeglut=3.2.2=h9c3ff4c_1
|
108 |
+
- freetype=2.12.1=hca18f0e_1
|
109 |
+
- future=0.18.3=pyhd8ed1ab_0
|
110 |
+
- gds-tools=1.4.0.31=0
|
111 |
+
- gettext=0.21.1=h27087fc_0
|
112 |
+
- glib=2.76.2=hfc55251_0
|
113 |
+
- glib-tools=2.76.2=hfc55251_0
|
114 |
+
- gmp=6.2.1=h58526e2_0
|
115 |
+
- gmpy2=2.1.2=py310h3ec546c_1
|
116 |
+
- gnutls=3.7.8=hf3e180e_0
|
117 |
+
- graphite2=1.3.13=h58526e2_1001
|
118 |
+
- gst-plugins-base=1.22.0=h4243ec0_2
|
119 |
+
- gstreamer=1.22.0=h25f0c4b_2
|
120 |
+
- gstreamer-orc=0.4.33=h166bdaf_0
|
121 |
+
- h5py=3.8.0=nompi_py310ha66b2ad_101
|
122 |
+
- harfbuzz=6.0.0=h8e241bc_0
|
123 |
+
- hdf5=1.14.0=nompi_hb72d44e_103
|
124 |
+
- icu=70.1=h27087fc_0
|
125 |
+
- idna=3.4=pyhd8ed1ab_0
|
126 |
+
- imageio=2.28.1=pyh24c5eb1_0
|
127 |
+
- importlib_metadata=6.6.0=hd8ed1ab_0
|
128 |
+
- ipykernel=6.23.0=pyh210e3f2_0
|
129 |
+
- ipython=8.13.2=pyh41d4057_0
|
130 |
+
- jack=1.9.22=h11f4161_0
|
131 |
+
- jasper=2.0.33=h0ff4b12_1
|
132 |
+
- jedi=0.18.2=pyhd8ed1ab_0
|
133 |
+
- jinja2=3.1.2=pyhd8ed1ab_1
|
134 |
+
- jmespath=1.0.1=pyhd8ed1ab_0
|
135 |
+
- joblib=1.2.0=pyhd8ed1ab_0
|
136 |
+
- jpeg=9e=h166bdaf_2
|
137 |
+
- jupyter_client=8.2.0=pyhd8ed1ab_0
|
138 |
+
- jupyter_core=5.3.0=py310hff52083_0
|
139 |
+
- keyutils=1.6.1=h166bdaf_0
|
140 |
+
- kiwisolver=1.4.4=py310hbf28c38_1
|
141 |
+
- krb5=1.20.1=h81ceb04_0
|
142 |
+
- lame=3.100=h166bdaf_1003
|
143 |
+
- langcodes=3.3.0=pyhd8ed1ab_0
|
144 |
+
- lcms2=2.15=hfd0df8a_0
|
145 |
+
- ld_impl_linux-64=2.40=h41732ed_0
|
146 |
+
- lerc=4.0.0=h27087fc_0
|
147 |
+
- libaec=1.0.6=hcb278e6_1
|
148 |
+
- libarchive=3.6.2=h3d51595_0
|
149 |
+
- libblas=3.9.0=1_h86c2bf4_netlib
|
150 |
+
- libbrotlicommon=1.0.9=h166bdaf_8
|
151 |
+
- libbrotlidec=1.0.9=h166bdaf_8
|
152 |
+
- libbrotlienc=1.0.9=h166bdaf_8
|
153 |
+
- libcap=2.67=he9d0100_0
|
154 |
+
- libcblas=3.9.0=5_h92ddd45_netlib
|
155 |
+
- libclang=15.0.7=default_had23c3d_1
|
156 |
+
- libclang13=15.0.7=default_h3e3d535_1
|
157 |
+
- libcublas=11.11.3.6=0
|
158 |
+
- libcublas-dev=11.11.3.6=0
|
159 |
+
- libcufft=10.9.0.58=0
|
160 |
+
- libcufft-dev=10.9.0.58=0
|
161 |
+
- libcufile=1.4.0.31=0
|
162 |
+
- libcufile-dev=1.4.0.31=0
|
163 |
+
- libcups=2.3.3=h36d4200_3
|
164 |
+
- libcurand=10.3.0.86=0
|
165 |
+
- libcurand-dev=10.3.0.86=0
|
166 |
+
- libcurl=7.88.1=hdc1c0ab_1
|
167 |
+
- libcusolver=11.4.1.48=0
|
168 |
+
- libcusolver-dev=11.4.1.48=0
|
169 |
+
- libcusparse=11.7.5.86=0
|
170 |
+
- libcusparse-dev=11.7.5.86=0
|
171 |
+
- libdb=6.2.32=h9c3ff4c_0
|
172 |
+
- libdeflate=1.17=h0b41bf4_0
|
173 |
+
- libdrm=2.4.114=h166bdaf_0
|
174 |
+
- libedit=3.1.20191231=he28a2e2_2
|
175 |
+
- libev=4.33=h516909a_1
|
176 |
+
- libevent=2.1.10=h28343ad_4
|
177 |
+
- libexpat=2.5.0=hcb278e6_1
|
178 |
+
- libffi=3.4.2=h7f98852_5
|
179 |
+
- libflac=1.4.2=h27087fc_0
|
180 |
+
- libgcc=7.2.0=h69d50b8_2
|
181 |
+
- libgcc-ng=12.2.0=h65d4601_19
|
182 |
+
- libgcrypt=1.10.1=h166bdaf_0
|
183 |
+
- libgfortran-ng=12.2.0=h69a702a_19
|
184 |
+
- libgfortran5=12.2.0=h337968e_19
|
185 |
+
- libglib=2.76.2=hebfc3b9_0
|
186 |
+
- libglu=9.0.0=he1b5a44_1001
|
187 |
+
- libgomp=12.2.0=h65d4601_19
|
188 |
+
- libgpg-error=1.46=h620e276_0
|
189 |
+
- libhwloc=2.9.1=hd6dc26d_0
|
190 |
+
- libiconv=1.17=h166bdaf_0
|
191 |
+
- libidn2=2.3.4=h166bdaf_0
|
192 |
+
- libjpeg-turbo=2.1.4=h166bdaf_0
|
193 |
+
- liblapack=3.9.0=5_h92ddd45_netlib
|
194 |
+
- liblapacke=3.9.0=5_h92ddd45_netlib
|
195 |
+
- libllvm11=11.1.0=he0ac6c6_5
|
196 |
+
- libllvm15=15.0.7=hadd5161_1
|
197 |
+
- libllvm16=16.0.1=hadd5161_0
|
198 |
+
- libmamba=1.4.1=hcea66bb_0
|
199 |
+
- libmambapy=1.4.1=py310h1428755_0
|
200 |
+
- libnghttp2=1.52.0=h61bc06f_0
|
201 |
+
- libnpp=11.8.0.86=0
|
202 |
+
- libnpp-dev=11.8.0.86=0
|
203 |
+
- libnsl=2.0.0=h7f98852_0
|
204 |
+
- libnvjpeg=11.9.0.86=0
|
205 |
+
- libnvjpeg-dev=11.9.0.86=0
|
206 |
+
- libogg=1.3.4=h7f98852_1
|
207 |
+
- libopenblas=0.3.21=pthreads_h78a6416_3
|
208 |
+
- libopencv=4.7.0=py310hb48cf42_1
|
209 |
+
- libopus=1.3.1=h7f98852_1
|
210 |
+
- libpciaccess=0.17=h166bdaf_0
|
211 |
+
- libpng=1.6.39=h753d276_0
|
212 |
+
- libpq=15.3=hbcd7760_0
|
213 |
+
- libprotobuf=3.21.12=h3eb15da_0
|
214 |
+
- libsndfile=1.2.0=hb75c966_0
|
215 |
+
- libsodium=1.0.18=h36c2ea0_1
|
216 |
+
- libsolv=0.7.23=h3eb15da_0
|
217 |
+
- libsqlite=3.40.0=h753d276_0
|
218 |
+
- libssh2=1.10.0=hf14f497_3
|
219 |
+
- libstdcxx-ng=12.2.0=h46fd767_19
|
220 |
+
- libsystemd0=253=h8c4010b_1
|
221 |
+
- libtasn1=4.19.0=h166bdaf_0
|
222 |
+
- libtiff=4.5.0=h6adf6a1_2
|
223 |
+
- libtool=2.4.7=h27087fc_0
|
224 |
+
- libudev1=253=h0b41bf4_1
|
225 |
+
- libunistring=0.9.10=h7f98852_0
|
226 |
+
- libuuid=2.38.1=h0b41bf4_0
|
227 |
+
- libuv=1.44.2=h166bdaf_0
|
228 |
+
- libva=2.18.0=h0b41bf4_0
|
229 |
+
- libvorbis=1.3.7=h9c3ff4c_0
|
230 |
+
- libvpx=1.11.0=h9c3ff4c_3
|
231 |
+
- libwebp-base=1.3.0=h0b41bf4_0
|
232 |
+
- libxcb=1.13=h7f98852_1004
|
233 |
+
- libxkbcommon=1.5.0=h79f4944_1
|
234 |
+
- libxml2=2.10.3=hca2bb57_4
|
235 |
+
- libzlib=1.2.13=h166bdaf_4
|
236 |
+
- llvm-openmp=16.0.3=h4dfa4b3_0
|
237 |
+
- llvmlite=0.39.1=py310h58363a5_1
|
238 |
+
- lz4-c=1.9.4=hcb278e6_0
|
239 |
+
- lzo=2.10=h516909a_1000
|
240 |
+
- mamba=1.4.1=py310h51d5547_0
|
241 |
+
- markupsafe=2.1.2=py310h1fa729e_0
|
242 |
+
- matplotlib=3.7.1=py310hff52083_0
|
243 |
+
- matplotlib-base=3.7.1=py310he60537e_0
|
244 |
+
- matplotlib-inline=0.1.6=pyhd8ed1ab_0
|
245 |
+
- mkl=2023.1.0=h84fe81f_48680
|
246 |
+
- mkl-include=2023.1.0=h84fe81f_48680
|
247 |
+
- mpc=1.3.1=hfe3b2da_0
|
248 |
+
- mpfr=4.2.0=hb012696_0
|
249 |
+
- mpg123=1.31.3=hcb278e6_0
|
250 |
+
- mpi=1.0=openmpi
|
251 |
+
- mpi4py=3.1.4=py310h6075a6b_0
|
252 |
+
- mpmath=1.3.0=pyhd8ed1ab_0
|
253 |
+
- munkres=1.1.4=pyh9f0ad1d_0
|
254 |
+
- murmurhash=1.0.9=py310hd8f1fbe_1
|
255 |
+
- mysql-common=8.0.32=hf1915f5_2
|
256 |
+
- mysql-libs=8.0.32=hca2cd23_2
|
257 |
+
- ncurses=6.3=h27087fc_1
|
258 |
+
- nest-asyncio=1.5.6=pyhd8ed1ab_0
|
259 |
+
- nettle=3.8.1=hc379101_1
|
260 |
+
- networkx=3.1=pyhd8ed1ab_0
|
261 |
+
- nsight-compute=2022.3.0.22=0
|
262 |
+
- nspr=4.35=h27087fc_0
|
263 |
+
- nss=3.89=he45b914_0
|
264 |
+
- numba=0.56.4=py310h0e39c9b_1
|
265 |
+
- numpy=1.23.5=py310h53a5b5f_0
|
266 |
+
- opencv=4.7.0=py310hff52083_1
|
267 |
+
- openh264=2.3.1=hcb278e6_2
|
268 |
+
- openjpeg=2.5.0=hfec8fc6_2
|
269 |
+
- openmpi=4.1.5=h414af15_101
|
270 |
+
- openssl=3.1.0=hd590300_3
|
271 |
+
- p11-kit=0.24.1=hc5aa10d_0
|
272 |
+
- packaging=23.1=pyhd8ed1ab_0
|
273 |
+
- pandas=2.0.1=py310h7cbd5c2_1
|
274 |
+
- parso=0.8.3=pyhd8ed1ab_0
|
275 |
+
- pathy=0.10.1=pyhd8ed1ab_0
|
276 |
+
- patsy=0.5.3=pyhd8ed1ab_0
|
277 |
+
- pcre2=10.40=hc3806b6_0
|
278 |
+
- pexpect=4.8.0=pyh1a96a4e_2
|
279 |
+
- pickleshare=0.7.5=py_1003
|
280 |
+
- pillow=9.4.0=py310h023d228_1
|
281 |
+
- pixman=0.40.0=h36c2ea0_0
|
282 |
+
- platformdirs=3.5.0=pyhd8ed1ab_0
|
283 |
+
- plotly=5.14.1=pyhd8ed1ab_0
|
284 |
+
- pluggy=1.0.0=pyhd8ed1ab_5
|
285 |
+
- ply=3.11=py_1
|
286 |
+
- pooch=1.7.0=pyha770c72_3
|
287 |
+
- preshed=3.0.8=py310hd8f1fbe_1
|
288 |
+
- prompt-toolkit=3.0.38=pyha770c72_0
|
289 |
+
- prompt_toolkit=3.0.38=hd8ed1ab_0
|
290 |
+
- psutil=5.9.5=py310h1fa729e_0
|
291 |
+
- pthread-stubs=0.4=h36c2ea0_1001
|
292 |
+
- ptyprocess=0.7.0=pyhd3deb0d_0
|
293 |
+
- pulseaudio=16.1=hcb278e6_3
|
294 |
+
- pulseaudio-client=16.1=h5195f5e_3
|
295 |
+
- pulseaudio-daemon=16.1=ha8d29e2_3
|
296 |
+
- pure_eval=0.2.2=pyhd8ed1ab_0
|
297 |
+
- py-opencv=4.7.0=py310hfdc917e_1
|
298 |
+
- pyasn1=0.4.8=py_0
|
299 |
+
- pybind11=2.10.4=py310hdf3cbec_0
|
300 |
+
- pybind11-abi=4=hd8ed1ab_3
|
301 |
+
- pybind11-global=2.10.4=py310hdf3cbec_0
|
302 |
+
- pycosat=0.6.4=py310h5764c6d_1
|
303 |
+
- pycparser=2.21=pyhd8ed1ab_0
|
304 |
+
- pydantic=1.10.7=py310h1fa729e_0
|
305 |
+
- pygments=2.15.1=pyhd8ed1ab_0
|
306 |
+
- pyopenssl=23.1.1=pyhd8ed1ab_0
|
307 |
+
- pyparsing=3.0.9=pyhd8ed1ab_0
|
308 |
+
- pyqt=5.15.7=py310hab646b1_3
|
309 |
+
- pyqt5-sip=12.11.0=py310heca2aa9_3
|
310 |
+
- pysocks=1.7.1=pyha2e5f31_6
|
311 |
+
- python=3.10.8=h4a9ceb5_0_cpython
|
312 |
+
- python-dateutil=2.8.2=pyhd8ed1ab_0
|
313 |
+
- python-tzdata=2023.3=pyhd8ed1ab_0
|
314 |
+
- python_abi=3.10=3_cp310
|
315 |
+
- pytorch=2.0.0=aws_py3.10_cuda11.8_cudnn8.7.0_0
|
316 |
+
- pytorch-cuda=11.8=h7e8668a_3
|
317 |
+
- pytorch-mutex=1.0=cuda
|
318 |
+
- pytz=2023.3=pyhd8ed1ab_0
|
319 |
+
- pyyaml=5.4.1=py310h5764c6d_4
|
320 |
+
- pyzmq=25.0.2=py310h059b190_0
|
321 |
+
- qt-main=5.15.8=h5d23da1_6
|
322 |
+
- readline=8.2=h8228510_1
|
323 |
+
- reproc=14.2.4=h0b41bf4_0
|
324 |
+
- reproc-cpp=14.2.4=hcb278e6_0
|
325 |
+
- requests=2.28.2=pyhd8ed1ab_1
|
326 |
+
- rhash=1.4.3=h166bdaf_0
|
327 |
+
- rich=12.6.0=pyhd8ed1ab_0
|
328 |
+
- rsa=4.7.2=pyh44b312d_0
|
329 |
+
- ruamel.yaml=0.17.21=py310h1fa729e_3
|
330 |
+
- ruamel.yaml.clib=0.2.7=py310h1fa729e_1
|
331 |
+
- s3transfer=0.6.1=pyhd8ed1ab_0
|
332 |
+
- scikit-learn=1.2.2=py310h41b6a48_1
|
333 |
+
- scipy=1.10.1=py310h8deb116_2
|
334 |
+
- seaborn=0.12.2=hd8ed1ab_0
|
335 |
+
- seaborn-base=0.12.2=pyhd8ed1ab_0
|
336 |
+
- setuptools=65.6.3=pyhd8ed1ab_0
|
337 |
+
- shap=0.41.0=py310h769672d_0
|
338 |
+
- shellingham=1.5.1=pyhd8ed1ab_0
|
339 |
+
- sip=6.7.9=py310hc6cd4ac_0
|
340 |
+
- six=1.16.0=pyh6c4a22f_0
|
341 |
+
- slicer=0.0.7=pyhd8ed1ab_0
|
342 |
+
- smart_open=5.2.1=pyhd8ed1ab_0
|
343 |
+
- spacy=3.5.2=py310h5a539fb_0
|
344 |
+
- spacy-legacy=3.0.12=pyhd8ed1ab_0
|
345 |
+
- spacy-loggers=1.0.4=pyhd8ed1ab_0
|
346 |
+
- srsly=2.4.6=py310heca2aa9_0
|
347 |
+
- stack_data=0.6.2=pyhd8ed1ab_0
|
348 |
+
- statsmodels=0.14.0=py310h278f3c1_1
|
349 |
+
- svt-av1=1.4.1=hcb278e6_0
|
350 |
+
- sympy=1.11.1=pypyh9d50eac_103
|
351 |
+
- tbb=2021.9.0=hf52228f_0
|
352 |
+
- tenacity=8.2.2=pyhd8ed1ab_0
|
353 |
+
- thinc=8.1.10=py310hfb6f7a9_0
|
354 |
+
- threadpoolctl=3.1.0=pyh8a188c0_0
|
355 |
+
- tk=8.6.12=h27826a3_0
|
356 |
+
- toml=0.10.2=pyhd8ed1ab_0
|
357 |
+
- tomli=2.0.1=pyhd8ed1ab_0
|
358 |
+
- toolz=0.12.0=pyhd8ed1ab_0
|
359 |
+
- torchaudio=2.0.1=py310_cu118
|
360 |
+
- torchdata=0.6.0=py310
|
361 |
+
- torchtext=0.15.1=py310
|
362 |
+
- torchvision=0.15.1=py310_cu118
|
363 |
+
- tornado=6.3=py310h1fa729e_0
|
364 |
+
- tqdm=4.65.0=pyhd8ed1ab_1
|
365 |
+
- traitlets=5.9.0=pyhd8ed1ab_0
|
366 |
+
- typer=0.7.0=pyhd8ed1ab_0
|
367 |
+
- typing=3.10.0.0=pyhd8ed1ab_0
|
368 |
+
- typing-extensions=4.5.0=hd8ed1ab_0
|
369 |
+
- typing_extensions=4.5.0=pyha770c72_0
|
370 |
+
- tzdata=2023c=h71feb2d_0
|
371 |
+
- unicodedata2=15.0.0=py310h5764c6d_0
|
372 |
+
- urllib3=1.26.15=pyhd8ed1ab_0
|
373 |
+
- wasabi=1.1.1=py310hff52083_1
|
374 |
+
- wcwidth=0.2.6=pyhd8ed1ab_0
|
375 |
+
- wheel=0.40.0=pyhd8ed1ab_0
|
376 |
+
- x264=1!164.3095=h166bdaf_2
|
377 |
+
- x265=3.5=h924138e_3
|
378 |
+
- xcb-util=0.4.0=h516909a_0
|
379 |
+
- xcb-util-image=0.4.0=h166bdaf_0
|
380 |
+
- xcb-util-keysyms=0.4.0=h516909a_0
|
381 |
+
- xcb-util-renderutil=0.3.9=h166bdaf_0
|
382 |
+
- xcb-util-wm=0.4.1=h516909a_0
|
383 |
+
- xkeyboard-config=2.38=h0b41bf4_0
|
384 |
+
- xorg-fixesproto=5.0=h7f98852_1002
|
385 |
+
- xorg-inputproto=2.3.2=h7f98852_1002
|
386 |
+
- xorg-kbproto=1.0.7=h7f98852_1002
|
387 |
+
- xorg-libice=1.0.10=h7f98852_0
|
388 |
+
- xorg-libsm=1.2.3=hd9c2040_1000
|
389 |
+
- xorg-libx11=1.8.4=h0b41bf4_0
|
390 |
+
- xorg-libxau=1.0.9=h7f98852_0
|
391 |
+
- xorg-libxdmcp=1.1.3=h7f98852_0
|
392 |
+
- xorg-libxext=1.3.4=h0b41bf4_2
|
393 |
+
- xorg-libxfixes=5.0.3=h7f98852_1004
|
394 |
+
- xorg-libxi=1.7.10=h7f98852_0
|
395 |
+
- xorg-libxrender=0.9.10=h7f98852_1003
|
396 |
+
- xorg-renderproto=0.11.1=h7f98852_1002
|
397 |
+
- xorg-xextproto=7.3.0=h0b41bf4_1003
|
398 |
+
- xorg-xf86vidmodeproto=2.3.1=h7f98852_1002
|
399 |
+
- xorg-xproto=7.0.31=h7f98852_1007
|
400 |
+
- xyzservices=2023.2.0=pyhd8ed1ab_0
|
401 |
+
- xz=5.2.6=h166bdaf_0
|
402 |
+
- yaml=0.2.5=h7f98852_2
|
403 |
+
- yaml-cpp=0.7.0=h27087fc_2
|
404 |
+
- zeromq=4.3.4=h9c3ff4c_1
|
405 |
+
- zipp=3.15.0=pyhd8ed1ab_0
|
406 |
+
- zlib=1.2.13=h166bdaf_4
|
407 |
+
- zstandard=0.19.0=py310hdeb6495_1
|
408 |
+
- zstd=1.5.2=h3eb15da_6
|
409 |
+
- pip:
|
410 |
+
- accelerate==0.21.0
|
411 |
+
- aiohttp==3.8.5
|
412 |
+
- aiosignal==1.3.1
|
413 |
+
- apex==0.1
|
414 |
+
- appdirs==1.4.4
|
415 |
+
- argparse==1.4.0
|
416 |
+
- async-timeout==4.0.2
|
417 |
+
- attrs==22.2.0
|
418 |
+
- bcrypt==4.0.1
|
419 |
+
- bitsandbytes==0.41.0
|
420 |
+
- cmake==3.26.3
|
421 |
+
- contextlib2==21.6.0
|
422 |
+
- datasets==2.14.0
|
423 |
+
- deepspeed==0.6.1+1ea3d4b
|
424 |
+
- dill==0.3.6
|
425 |
+
- docker-pycreds==0.4.0
|
426 |
+
- einops==0.6.1
|
427 |
+
- flash-attn==0.2.8
|
428 |
+
- frozenlist==1.4.0
|
429 |
+
- fsspec==2023.5.0
|
430 |
+
- gevent==22.10.2
|
431 |
+
- gitdb==4.0.10
|
432 |
+
- gitpython==3.1.32
|
433 |
+
- google-pasta==0.2.0
|
434 |
+
- greenlet==2.0.2
|
435 |
+
- hjson==3.1.0
|
436 |
+
- horovod==0.26.1
|
437 |
+
- huggingface-hub==0.16.4
|
438 |
+
- importlib-metadata==4.13.0
|
439 |
+
- inotify-simple==1.2.1
|
440 |
+
- ipywidgets==8.0.7
|
441 |
+
- jsonpatch==1.32
|
442 |
+
- jsonpointer==2.3
|
443 |
+
- jsonschema==4.17.3
|
444 |
+
- jupyterlab-widgets==3.0.8
|
445 |
+
- lit==16.0.3
|
446 |
+
- multidict==6.0.4
|
447 |
+
- multiprocess==0.70.14
|
448 |
+
- ninja==1.11.1
|
449 |
+
- paramiko==3.1.0
|
450 |
+
- pathos==0.3.0
|
451 |
+
- pathtools==0.1.2
|
452 |
+
- peft==0.5.0.dev0
|
453 |
+
- pip==23.1.2
|
454 |
+
- pox==0.3.2
|
455 |
+
- ppft==1.7.6.6
|
456 |
+
- protobuf==3.20.3
|
457 |
+
- protobuf3-to-dict==0.1.5
|
458 |
+
- py-cpuinfo==9.0.0
|
459 |
+
- pyarrow==12.0.0
|
460 |
+
- pyfunctional==1.4.3
|
461 |
+
- pyinstrument==3.4.2
|
462 |
+
- pyinstrument-cext==0.2.4
|
463 |
+
- pynacl==1.5.0
|
464 |
+
- pyrsistent==0.19.3
|
465 |
+
- regex==2023.6.3
|
466 |
+
- retrying==1.3.4
|
467 |
+
- s3fs==0.4.2
|
468 |
+
- safetensors==0.3.1
|
469 |
+
- sagemaker==2.154.0
|
470 |
+
- sagemaker-experiments==0.1.43
|
471 |
+
- sagemaker-pytorch-training==2.8.0
|
472 |
+
- sagemaker-training==4.5.0
|
473 |
+
- schema==0.7.5
|
474 |
+
- sentry-sdk==1.28.1
|
475 |
+
- setproctitle==1.3.2
|
476 |
+
- smclarify==0.5
|
477 |
+
- smdebug==1.0.34
|
478 |
+
- smdebug-rulesconfig==1.0.1
|
479 |
+
- smdistributed-dataparallel==1.8.0
|
480 |
+
- smdistributed-modelparallel==1.15.0
|
481 |
+
- smmap==5.0.0
|
482 |
+
- tabulate==0.9.0
|
483 |
+
- tblib==1.7.0
|
484 |
+
- tokenizers==0.13.3
|
485 |
+
- torchnet==0.0.4
|
486 |
+
- transformers==4.31.0
|
487 |
+
- triton==2.0.0.dev20221202
|
488 |
+
- trl==0.4.7
|
489 |
+
- visdom==0.2.4
|
490 |
+
- wandb==0.15.7
|
491 |
+
- websocket-client==1.5.1
|
492 |
+
- werkzeug==2.3.4
|
493 |
+
- widgetsnbextension==4.0.8
|
494 |
+
- xxhash==3.2.0
|
495 |
+
- yarl==1.9.2
|
496 |
+
- zope-event==4.6
|
497 |
+
- zope-interface==6.0
|
498 |
+
prefix: /opt/conda
|
wandb/latest-run/files/config.yaml
ADDED
@@ -0,0 +1,649 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
wandb_version: 1
|
2 |
+
|
3 |
+
_wandb:
|
4 |
+
desc: null
|
5 |
+
value:
|
6 |
+
python_version: 3.10.8
|
7 |
+
cli_version: 0.15.7
|
8 |
+
framework: huggingface
|
9 |
+
huggingface_version: 4.31.0
|
10 |
+
is_jupyter_run: true
|
11 |
+
is_kaggle_kernel: false
|
12 |
+
start_time: 1690472976.418337
|
13 |
+
t:
|
14 |
+
1:
|
15 |
+
- 1
|
16 |
+
- 5
|
17 |
+
- 11
|
18 |
+
- 49
|
19 |
+
- 51
|
20 |
+
- 53
|
21 |
+
- 55
|
22 |
+
- 71
|
23 |
+
- 84
|
24 |
+
- 98
|
25 |
+
2:
|
26 |
+
- 1
|
27 |
+
- 5
|
28 |
+
- 11
|
29 |
+
- 49
|
30 |
+
- 51
|
31 |
+
- 53
|
32 |
+
- 55
|
33 |
+
- 71
|
34 |
+
- 84
|
35 |
+
- 98
|
36 |
+
3:
|
37 |
+
- 7
|
38 |
+
- 23
|
39 |
+
4: 3.10.8
|
40 |
+
5: 0.15.7
|
41 |
+
6: 4.31.0
|
42 |
+
8:
|
43 |
+
- 1
|
44 |
+
- 5
|
45 |
+
m:
|
46 |
+
- 1: train/global_step
|
47 |
+
6:
|
48 |
+
- 3
|
49 |
+
- 1: train/loss
|
50 |
+
5: 1
|
51 |
+
6:
|
52 |
+
- 1
|
53 |
+
- 1: train/learning_rate
|
54 |
+
5: 1
|
55 |
+
6:
|
56 |
+
- 1
|
57 |
+
- 1: train/epoch
|
58 |
+
5: 1
|
59 |
+
6:
|
60 |
+
- 1
|
61 |
+
- 1: train/train_runtime
|
62 |
+
5: 1
|
63 |
+
6:
|
64 |
+
- 1
|
65 |
+
- 1: train/train_samples_per_second
|
66 |
+
5: 1
|
67 |
+
6:
|
68 |
+
- 1
|
69 |
+
- 1: train/train_steps_per_second
|
70 |
+
5: 1
|
71 |
+
6:
|
72 |
+
- 1
|
73 |
+
- 1: train/total_flos
|
74 |
+
5: 1
|
75 |
+
6:
|
76 |
+
- 1
|
77 |
+
- 1: train/train_loss
|
78 |
+
5: 1
|
79 |
+
6:
|
80 |
+
- 1
|
81 |
+
vocab_size:
|
82 |
+
desc: null
|
83 |
+
value: 65024
|
84 |
+
hidden_size:
|
85 |
+
desc: null
|
86 |
+
value: 4544
|
87 |
+
n_layer:
|
88 |
+
desc: null
|
89 |
+
value: 32
|
90 |
+
n_head:
|
91 |
+
desc: null
|
92 |
+
value: 71
|
93 |
+
layer_norm_epsilon:
|
94 |
+
desc: null
|
95 |
+
value: 1.0e-05
|
96 |
+
initializer_range:
|
97 |
+
desc: null
|
98 |
+
value: 0.02
|
99 |
+
use_cache:
|
100 |
+
desc: null
|
101 |
+
value: false
|
102 |
+
apply_residual_connection_post_layernorm:
|
103 |
+
desc: null
|
104 |
+
value: false
|
105 |
+
hidden_dropout:
|
106 |
+
desc: null
|
107 |
+
value: 0.0
|
108 |
+
attention_dropout:
|
109 |
+
desc: null
|
110 |
+
value: 0.0
|
111 |
+
bos_token_id:
|
112 |
+
desc: null
|
113 |
+
value: 11
|
114 |
+
eos_token_id:
|
115 |
+
desc: null
|
116 |
+
value: 11
|
117 |
+
multi_query:
|
118 |
+
desc: null
|
119 |
+
value: true
|
120 |
+
alibi:
|
121 |
+
desc: null
|
122 |
+
value: false
|
123 |
+
bias:
|
124 |
+
desc: null
|
125 |
+
value: false
|
126 |
+
parallel_attn:
|
127 |
+
desc: null
|
128 |
+
value: true
|
129 |
+
return_dict:
|
130 |
+
desc: null
|
131 |
+
value: true
|
132 |
+
output_hidden_states:
|
133 |
+
desc: null
|
134 |
+
value: false
|
135 |
+
output_attentions:
|
136 |
+
desc: null
|
137 |
+
value: false
|
138 |
+
torchscript:
|
139 |
+
desc: null
|
140 |
+
value: false
|
141 |
+
torch_dtype:
|
142 |
+
desc: null
|
143 |
+
value: bfloat16
|
144 |
+
use_bfloat16:
|
145 |
+
desc: null
|
146 |
+
value: false
|
147 |
+
tf_legacy_loss:
|
148 |
+
desc: null
|
149 |
+
value: false
|
150 |
+
pruned_heads:
|
151 |
+
desc: null
|
152 |
+
value: {}
|
153 |
+
tie_word_embeddings:
|
154 |
+
desc: null
|
155 |
+
value: true
|
156 |
+
is_encoder_decoder:
|
157 |
+
desc: null
|
158 |
+
value: false
|
159 |
+
is_decoder:
|
160 |
+
desc: null
|
161 |
+
value: false
|
162 |
+
cross_attention_hidden_size:
|
163 |
+
desc: null
|
164 |
+
value: null
|
165 |
+
add_cross_attention:
|
166 |
+
desc: null
|
167 |
+
value: false
|
168 |
+
tie_encoder_decoder:
|
169 |
+
desc: null
|
170 |
+
value: false
|
171 |
+
max_length:
|
172 |
+
desc: null
|
173 |
+
value: 20
|
174 |
+
min_length:
|
175 |
+
desc: null
|
176 |
+
value: 0
|
177 |
+
do_sample:
|
178 |
+
desc: null
|
179 |
+
value: false
|
180 |
+
early_stopping:
|
181 |
+
desc: null
|
182 |
+
value: false
|
183 |
+
num_beams:
|
184 |
+
desc: null
|
185 |
+
value: 1
|
186 |
+
num_beam_groups:
|
187 |
+
desc: null
|
188 |
+
value: 1
|
189 |
+
diversity_penalty:
|
190 |
+
desc: null
|
191 |
+
value: 0.0
|
192 |
+
temperature:
|
193 |
+
desc: null
|
194 |
+
value: 1.0
|
195 |
+
top_k:
|
196 |
+
desc: null
|
197 |
+
value: 50
|
198 |
+
top_p:
|
199 |
+
desc: null
|
200 |
+
value: 1.0
|
201 |
+
typical_p:
|
202 |
+
desc: null
|
203 |
+
value: 1.0
|
204 |
+
repetition_penalty:
|
205 |
+
desc: null
|
206 |
+
value: 1.0
|
207 |
+
length_penalty:
|
208 |
+
desc: null
|
209 |
+
value: 1.0
|
210 |
+
no_repeat_ngram_size:
|
211 |
+
desc: null
|
212 |
+
value: 0
|
213 |
+
encoder_no_repeat_ngram_size:
|
214 |
+
desc: null
|
215 |
+
value: 0
|
216 |
+
bad_words_ids:
|
217 |
+
desc: null
|
218 |
+
value: null
|
219 |
+
num_return_sequences:
|
220 |
+
desc: null
|
221 |
+
value: 1
|
222 |
+
chunk_size_feed_forward:
|
223 |
+
desc: null
|
224 |
+
value: 0
|
225 |
+
output_scores:
|
226 |
+
desc: null
|
227 |
+
value: false
|
228 |
+
return_dict_in_generate:
|
229 |
+
desc: null
|
230 |
+
value: false
|
231 |
+
forced_bos_token_id:
|
232 |
+
desc: null
|
233 |
+
value: null
|
234 |
+
forced_eos_token_id:
|
235 |
+
desc: null
|
236 |
+
value: null
|
237 |
+
remove_invalid_values:
|
238 |
+
desc: null
|
239 |
+
value: false
|
240 |
+
exponential_decay_length_penalty:
|
241 |
+
desc: null
|
242 |
+
value: null
|
243 |
+
suppress_tokens:
|
244 |
+
desc: null
|
245 |
+
value: null
|
246 |
+
begin_suppress_tokens:
|
247 |
+
desc: null
|
248 |
+
value: null
|
249 |
+
architectures:
|
250 |
+
desc: null
|
251 |
+
value:
|
252 |
+
- RWForCausalLM
|
253 |
+
finetuning_task:
|
254 |
+
desc: null
|
255 |
+
value: null
|
256 |
+
id2label:
|
257 |
+
desc: null
|
258 |
+
value:
|
259 |
+
'0': LABEL_0
|
260 |
+
'1': LABEL_1
|
261 |
+
label2id:
|
262 |
+
desc: null
|
263 |
+
value:
|
264 |
+
LABEL_0: 0
|
265 |
+
LABEL_1: 1
|
266 |
+
tokenizer_class:
|
267 |
+
desc: null
|
268 |
+
value: null
|
269 |
+
prefix:
|
270 |
+
desc: null
|
271 |
+
value: null
|
272 |
+
pad_token_id:
|
273 |
+
desc: null
|
274 |
+
value: null
|
275 |
+
sep_token_id:
|
276 |
+
desc: null
|
277 |
+
value: null
|
278 |
+
decoder_start_token_id:
|
279 |
+
desc: null
|
280 |
+
value: null
|
281 |
+
task_specific_params:
|
282 |
+
desc: null
|
283 |
+
value: null
|
284 |
+
problem_type:
|
285 |
+
desc: null
|
286 |
+
value: null
|
287 |
+
_name_or_path:
|
288 |
+
desc: null
|
289 |
+
value: ybelkada/falcon-7b-sharded-bf16
|
290 |
+
transformers_version:
|
291 |
+
desc: null
|
292 |
+
value: 4.31.0
|
293 |
+
auto_map:
|
294 |
+
desc: null
|
295 |
+
value:
|
296 |
+
AutoConfig: tiiuae/falcon-7b--configuration_RW.RWConfig
|
297 |
+
AutoModel: tiiuae/falcon-7b--modelling_RW.RWModel
|
298 |
+
AutoModelForCausalLM: tiiuae/falcon-7b--modelling_RW.RWForCausalLM
|
299 |
+
AutoModelForQuestionAnswering: tiiuae/falcon-7b--modelling_RW.RWForQuestionAnswering
|
300 |
+
AutoModelForSequenceClassification: tiiuae/falcon-7b--modelling_RW.RWForSequenceClassification
|
301 |
+
AutoModelForTokenClassification: tiiuae/falcon-7b--modelling_RW.RWForTokenClassification
|
302 |
+
model_type:
|
303 |
+
desc: null
|
304 |
+
value: RefinedWebModel
|
305 |
+
quantization_config:
|
306 |
+
desc: null
|
307 |
+
value:
|
308 |
+
load_in_8bit: false
|
309 |
+
load_in_4bit: true
|
310 |
+
llm_int8_threshold: 6.0
|
311 |
+
llm_int8_skip_modules: null
|
312 |
+
llm_int8_enable_fp32_cpu_offload: false
|
313 |
+
llm_int8_has_fp16_weight: false
|
314 |
+
bnb_4bit_quant_type: nf4
|
315 |
+
bnb_4bit_use_double_quant: false
|
316 |
+
bnb_4bit_compute_dtype: float16
|
317 |
+
output_dir:
|
318 |
+
desc: null
|
319 |
+
value: ./results
|
320 |
+
overwrite_output_dir:
|
321 |
+
desc: null
|
322 |
+
value: false
|
323 |
+
do_train:
|
324 |
+
desc: null
|
325 |
+
value: false
|
326 |
+
do_eval:
|
327 |
+
desc: null
|
328 |
+
value: false
|
329 |
+
do_predict:
|
330 |
+
desc: null
|
331 |
+
value: false
|
332 |
+
evaluation_strategy:
|
333 |
+
desc: null
|
334 |
+
value: 'no'
|
335 |
+
prediction_loss_only:
|
336 |
+
desc: null
|
337 |
+
value: false
|
338 |
+
per_device_train_batch_size:
|
339 |
+
desc: null
|
340 |
+
value: 4
|
341 |
+
per_device_eval_batch_size:
|
342 |
+
desc: null
|
343 |
+
value: 8
|
344 |
+
per_gpu_train_batch_size:
|
345 |
+
desc: null
|
346 |
+
value: None
|
347 |
+
per_gpu_eval_batch_size:
|
348 |
+
desc: null
|
349 |
+
value: None
|
350 |
+
gradient_accumulation_steps:
|
351 |
+
desc: null
|
352 |
+
value: 4
|
353 |
+
eval_accumulation_steps:
|
354 |
+
desc: null
|
355 |
+
value: None
|
356 |
+
eval_delay:
|
357 |
+
desc: null
|
358 |
+
value: 0
|
359 |
+
learning_rate:
|
360 |
+
desc: null
|
361 |
+
value: 0.0002
|
362 |
+
weight_decay:
|
363 |
+
desc: null
|
364 |
+
value: 0.0
|
365 |
+
adam_beta1:
|
366 |
+
desc: null
|
367 |
+
value: 0.9
|
368 |
+
adam_beta2:
|
369 |
+
desc: null
|
370 |
+
value: 0.999
|
371 |
+
adam_epsilon:
|
372 |
+
desc: null
|
373 |
+
value: 1.0e-08
|
374 |
+
max_grad_norm:
|
375 |
+
desc: null
|
376 |
+
value: 0.3
|
377 |
+
num_train_epochs:
|
378 |
+
desc: null
|
379 |
+
value: 3.0
|
380 |
+
max_steps:
|
381 |
+
desc: null
|
382 |
+
value: 500
|
383 |
+
lr_scheduler_type:
|
384 |
+
desc: null
|
385 |
+
value: constant
|
386 |
+
warmup_ratio:
|
387 |
+
desc: null
|
388 |
+
value: 0.03
|
389 |
+
warmup_steps:
|
390 |
+
desc: null
|
391 |
+
value: 0
|
392 |
+
log_level:
|
393 |
+
desc: null
|
394 |
+
value: passive
|
395 |
+
log_level_replica:
|
396 |
+
desc: null
|
397 |
+
value: warning
|
398 |
+
log_on_each_node:
|
399 |
+
desc: null
|
400 |
+
value: true
|
401 |
+
logging_dir:
|
402 |
+
desc: null
|
403 |
+
value: ./results/runs/Jul31_15-12-06_pytorch-2-0-0-gpu--ml-g4dn-2xlarge-9a500aed7fe4dadadc562adc1e80
|
404 |
+
logging_strategy:
|
405 |
+
desc: null
|
406 |
+
value: steps
|
407 |
+
logging_first_step:
|
408 |
+
desc: null
|
409 |
+
value: false
|
410 |
+
logging_steps:
|
411 |
+
desc: null
|
412 |
+
value: 10
|
413 |
+
logging_nan_inf_filter:
|
414 |
+
desc: null
|
415 |
+
value: true
|
416 |
+
save_strategy:
|
417 |
+
desc: null
|
418 |
+
value: steps
|
419 |
+
save_steps:
|
420 |
+
desc: null
|
421 |
+
value: 10
|
422 |
+
save_total_limit:
|
423 |
+
desc: null
|
424 |
+
value: None
|
425 |
+
save_safetensors:
|
426 |
+
desc: null
|
427 |
+
value: false
|
428 |
+
save_on_each_node:
|
429 |
+
desc: null
|
430 |
+
value: false
|
431 |
+
no_cuda:
|
432 |
+
desc: null
|
433 |
+
value: false
|
434 |
+
use_mps_device:
|
435 |
+
desc: null
|
436 |
+
value: false
|
437 |
+
seed:
|
438 |
+
desc: null
|
439 |
+
value: 42
|
440 |
+
data_seed:
|
441 |
+
desc: null
|
442 |
+
value: None
|
443 |
+
jit_mode_eval:
|
444 |
+
desc: null
|
445 |
+
value: false
|
446 |
+
use_ipex:
|
447 |
+
desc: null
|
448 |
+
value: false
|
449 |
+
bf16:
|
450 |
+
desc: null
|
451 |
+
value: false
|
452 |
+
fp16:
|
453 |
+
desc: null
|
454 |
+
value: true
|
455 |
+
fp16_opt_level:
|
456 |
+
desc: null
|
457 |
+
value: O1
|
458 |
+
half_precision_backend:
|
459 |
+
desc: null
|
460 |
+
value: auto
|
461 |
+
bf16_full_eval:
|
462 |
+
desc: null
|
463 |
+
value: false
|
464 |
+
fp16_full_eval:
|
465 |
+
desc: null
|
466 |
+
value: false
|
467 |
+
tf32:
|
468 |
+
desc: null
|
469 |
+
value: None
|
470 |
+
local_rank:
|
471 |
+
desc: null
|
472 |
+
value: 0
|
473 |
+
ddp_backend:
|
474 |
+
desc: null
|
475 |
+
value: None
|
476 |
+
tpu_num_cores:
|
477 |
+
desc: null
|
478 |
+
value: None
|
479 |
+
tpu_metrics_debug:
|
480 |
+
desc: null
|
481 |
+
value: false
|
482 |
+
debug:
|
483 |
+
desc: null
|
484 |
+
value: '[]'
|
485 |
+
dataloader_drop_last:
|
486 |
+
desc: null
|
487 |
+
value: false
|
488 |
+
eval_steps:
|
489 |
+
desc: null
|
490 |
+
value: None
|
491 |
+
dataloader_num_workers:
|
492 |
+
desc: null
|
493 |
+
value: 0
|
494 |
+
past_index:
|
495 |
+
desc: null
|
496 |
+
value: -1
|
497 |
+
run_name:
|
498 |
+
desc: null
|
499 |
+
value: ./results
|
500 |
+
disable_tqdm:
|
501 |
+
desc: null
|
502 |
+
value: false
|
503 |
+
remove_unused_columns:
|
504 |
+
desc: null
|
505 |
+
value: true
|
506 |
+
label_names:
|
507 |
+
desc: null
|
508 |
+
value: None
|
509 |
+
load_best_model_at_end:
|
510 |
+
desc: null
|
511 |
+
value: false
|
512 |
+
metric_for_best_model:
|
513 |
+
desc: null
|
514 |
+
value: None
|
515 |
+
greater_is_better:
|
516 |
+
desc: null
|
517 |
+
value: None
|
518 |
+
ignore_data_skip:
|
519 |
+
desc: null
|
520 |
+
value: false
|
521 |
+
sharded_ddp:
|
522 |
+
desc: null
|
523 |
+
value: '[]'
|
524 |
+
fsdp:
|
525 |
+
desc: null
|
526 |
+
value: '[]'
|
527 |
+
fsdp_min_num_params:
|
528 |
+
desc: null
|
529 |
+
value: 0
|
530 |
+
fsdp_config:
|
531 |
+
desc: null
|
532 |
+
value: '{''fsdp_min_num_params'': 0, ''xla'': False, ''xla_fsdp_grad_ckpt'': False}'
|
533 |
+
fsdp_transformer_layer_cls_to_wrap:
|
534 |
+
desc: null
|
535 |
+
value: None
|
536 |
+
deepspeed:
|
537 |
+
desc: null
|
538 |
+
value: None
|
539 |
+
label_smoothing_factor:
|
540 |
+
desc: null
|
541 |
+
value: 0.0
|
542 |
+
optim:
|
543 |
+
desc: null
|
544 |
+
value: paged_adamw_32bit
|
545 |
+
optim_args:
|
546 |
+
desc: null
|
547 |
+
value: None
|
548 |
+
adafactor:
|
549 |
+
desc: null
|
550 |
+
value: false
|
551 |
+
group_by_length:
|
552 |
+
desc: null
|
553 |
+
value: true
|
554 |
+
length_column_name:
|
555 |
+
desc: null
|
556 |
+
value: length
|
557 |
+
report_to:
|
558 |
+
desc: null
|
559 |
+
value: '[''wandb'']'
|
560 |
+
ddp_find_unused_parameters:
|
561 |
+
desc: null
|
562 |
+
value: None
|
563 |
+
ddp_bucket_cap_mb:
|
564 |
+
desc: null
|
565 |
+
value: None
|
566 |
+
ddp_broadcast_buffers:
|
567 |
+
desc: null
|
568 |
+
value: None
|
569 |
+
dataloader_pin_memory:
|
570 |
+
desc: null
|
571 |
+
value: true
|
572 |
+
skip_memory_metrics:
|
573 |
+
desc: null
|
574 |
+
value: true
|
575 |
+
use_legacy_prediction_loop:
|
576 |
+
desc: null
|
577 |
+
value: false
|
578 |
+
push_to_hub:
|
579 |
+
desc: null
|
580 |
+
value: false
|
581 |
+
resume_from_checkpoint:
|
582 |
+
desc: null
|
583 |
+
value: None
|
584 |
+
hub_model_id:
|
585 |
+
desc: null
|
586 |
+
value: None
|
587 |
+
hub_strategy:
|
588 |
+
desc: null
|
589 |
+
value: every_save
|
590 |
+
hub_token:
|
591 |
+
desc: null
|
592 |
+
value: <HUB_TOKEN>
|
593 |
+
hub_private_repo:
|
594 |
+
desc: null
|
595 |
+
value: false
|
596 |
+
gradient_checkpointing:
|
597 |
+
desc: null
|
598 |
+
value: false
|
599 |
+
include_inputs_for_metrics:
|
600 |
+
desc: null
|
601 |
+
value: false
|
602 |
+
fp16_backend:
|
603 |
+
desc: null
|
604 |
+
value: auto
|
605 |
+
push_to_hub_model_id:
|
606 |
+
desc: null
|
607 |
+
value: None
|
608 |
+
push_to_hub_organization:
|
609 |
+
desc: null
|
610 |
+
value: None
|
611 |
+
push_to_hub_token:
|
612 |
+
desc: null
|
613 |
+
value: <PUSH_TO_HUB_TOKEN>
|
614 |
+
mp_parameters:
|
615 |
+
desc: null
|
616 |
+
value: ''
|
617 |
+
auto_find_batch_size:
|
618 |
+
desc: null
|
619 |
+
value: false
|
620 |
+
full_determinism:
|
621 |
+
desc: null
|
622 |
+
value: false
|
623 |
+
torchdynamo:
|
624 |
+
desc: null
|
625 |
+
value: None
|
626 |
+
ray_scope:
|
627 |
+
desc: null
|
628 |
+
value: last
|
629 |
+
ddp_timeout:
|
630 |
+
desc: null
|
631 |
+
value: 1800
|
632 |
+
torch_compile:
|
633 |
+
desc: null
|
634 |
+
value: false
|
635 |
+
torch_compile_backend:
|
636 |
+
desc: null
|
637 |
+
value: None
|
638 |
+
torch_compile_mode:
|
639 |
+
desc: null
|
640 |
+
value: None
|
641 |
+
xpu_backend:
|
642 |
+
desc: null
|
643 |
+
value: None
|
644 |
+
train_batch_size:
|
645 |
+
desc: null
|
646 |
+
value: 4
|
647 |
+
eval_batch_size:
|
648 |
+
desc: null
|
649 |
+
value: 8
|
wandb/latest-run/files/output.log
ADDED
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
You're using a PreTrainedTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
|
3 |
+
{}
|
4 |
+
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
|
5 |
+
To disable this warning, you can either:
|
6 |
+
- Avoid using `tokenizers` before the fork if possible
|
7 |
+
- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
|
8 |
+
[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv
|
9 |
+
[1m[[34m[22mnotice[39m[1m][22m A new release of pip is available: [31m23.1.2[39m -> [32m23.2.1
|
10 |
+
[1m[[34m[22mnotice[39m[1m][22m To update, run: [32mpip install --upgrade pip
|
11 |
+
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
|
12 |
+
To disable this warning, you can either:
|
13 |
+
- Avoid using `tokenizers` before the fork if possible
|
14 |
+
- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
|
15 |
+
[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv
|
16 |
+
[1m[[34m[22mnotice[39m[1m][22m A new release of pip is available: [31m23.1.2[39m -> [32m23.2.1
|
17 |
+
[1m[[34m[22mnotice[39m[1m][22m To update, run: [32mpip install --upgrade pip
|
18 |
+
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
|
19 |
+
To disable this warning, you can either:
|
20 |
+
- Avoid using `tokenizers` before the fork if possible
|
21 |
+
- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
|
22 |
+
Requirement already satisfied: ipywidgets in /opt/conda/lib/python3.10/site-packages (8.0.7)
|
23 |
+
Requirement already satisfied: ipykernel>=4.5.1 in /opt/conda/lib/python3.10/site-packages (from ipywidgets) (6.23.0)
|
24 |
+
Requirement already satisfied: ipython>=6.1.0 in /opt/conda/lib/python3.10/site-packages (from ipywidgets) (8.13.2)
|
25 |
+
Requirement already satisfied: traitlets>=4.3.1 in /opt/conda/lib/python3.10/site-packages (from ipywidgets) (5.9.0)
|
26 |
+
Requirement already satisfied: widgetsnbextension~=4.0.7 in /opt/conda/lib/python3.10/site-packages (from ipywidgets) (4.0.8)
|
27 |
+
Requirement already satisfied: jupyterlab-widgets~=3.0.7 in /opt/conda/lib/python3.10/site-packages (from ipywidgets) (3.0.8)
|
28 |
+
Requirement already satisfied: comm>=0.1.1 in /opt/conda/lib/python3.10/site-packages (from ipykernel>=4.5.1->ipywidgets) (0.1.3)
|
29 |
+
Requirement already satisfied: debugpy>=1.6.5 in /opt/conda/lib/python3.10/site-packages (from ipykernel>=4.5.1->ipywidgets) (1.6.7)
|
30 |
+
Requirement already satisfied: jupyter-client>=6.1.12 in /opt/conda/lib/python3.10/site-packages (from ipykernel>=4.5.1->ipywidgets) (8.2.0)
|
31 |
+
Requirement already satisfied: jupyter-core!=5.0.*,>=4.12 in /opt/conda/lib/python3.10/site-packages (from ipykernel>=4.5.1->ipywidgets) (5.3.0)
|
32 |
+
Requirement already satisfied: matplotlib-inline>=0.1 in /opt/conda/lib/python3.10/site-packages (from ipykernel>=4.5.1->ipywidgets) (0.1.6)
|
33 |
+
Requirement already satisfied: nest-asyncio in /opt/conda/lib/python3.10/site-packages (from ipykernel>=4.5.1->ipywidgets) (1.5.6)
|
34 |
+
Requirement already satisfied: packaging in /opt/conda/lib/python3.10/site-packages (from ipykernel>=4.5.1->ipywidgets) (23.1)
|
35 |
+
Requirement already satisfied: psutil in /opt/conda/lib/python3.10/site-packages (from ipykernel>=4.5.1->ipywidgets) (5.9.5)
|
36 |
+
Requirement already satisfied: pyzmq>=20 in /opt/conda/lib/python3.10/site-packages (from ipykernel>=4.5.1->ipywidgets) (25.0.2)
|
37 |
+
Requirement already satisfied: tornado>=6.1 in /opt/conda/lib/python3.10/site-packages (from ipykernel>=4.5.1->ipywidgets) (6.3)
|
38 |
+
Requirement already satisfied: backcall in /opt/conda/lib/python3.10/site-packages (from ipython>=6.1.0->ipywidgets) (0.2.0)
|
39 |
+
Requirement already satisfied: decorator in /opt/conda/lib/python3.10/site-packages (from ipython>=6.1.0->ipywidgets) (5.1.1)
|
40 |
+
Requirement already satisfied: jedi>=0.16 in /opt/conda/lib/python3.10/site-packages (from ipython>=6.1.0->ipywidgets) (0.18.2)
|
41 |
+
Requirement already satisfied: pickleshare in /opt/conda/lib/python3.10/site-packages (from ipython>=6.1.0->ipywidgets) (0.7.5)
|
42 |
+
Requirement already satisfied: prompt-toolkit!=3.0.37,<3.1.0,>=3.0.30 in /opt/conda/lib/python3.10/site-packages (from ipython>=6.1.0->ipywidgets) (3.0.38)
|
43 |
+
Requirement already satisfied: pygments>=2.4.0 in /opt/conda/lib/python3.10/site-packages (from ipython>=6.1.0->ipywidgets) (2.15.1)
|
44 |
+
Requirement already satisfied: stack-data in /opt/conda/lib/python3.10/site-packages (from ipython>=6.1.0->ipywidgets) (0.6.2)
|
45 |
+
Requirement already satisfied: pexpect>4.3 in /opt/conda/lib/python3.10/site-packages (from ipython>=6.1.0->ipywidgets) (4.8.0)
|
46 |
+
Requirement already satisfied: parso<0.9.0,>=0.8.0 in /opt/conda/lib/python3.10/site-packages (from jedi>=0.16->ipython>=6.1.0->ipywidgets) (0.8.3)
|
47 |
+
Requirement already satisfied: python-dateutil>=2.8.2 in /opt/conda/lib/python3.10/site-packages (from jupyter-client>=6.1.12->ipykernel>=4.5.1->ipywidgets) (2.8.2)
|
48 |
+
Requirement already satisfied: platformdirs>=2.5 in /opt/conda/lib/python3.10/site-packages (from jupyter-core!=5.0.*,>=4.12->ipykernel>=4.5.1->ipywidgets) (3.5.0)
|
49 |
+
Requirement already satisfied: ptyprocess>=0.5 in /opt/conda/lib/python3.10/site-packages (from pexpect>4.3->ipython>=6.1.0->ipywidgets) (0.7.0)
|
50 |
+
Requirement already satisfied: wcwidth in /opt/conda/lib/python3.10/site-packages (from prompt-toolkit!=3.0.37,<3.1.0,>=3.0.30->ipython>=6.1.0->ipywidgets) (0.2.6)
|
51 |
+
Requirement already satisfied: executing>=1.2.0 in /opt/conda/lib/python3.10/site-packages (from stack-data->ipython>=6.1.0->ipywidgets) (1.2.0)
|
52 |
+
Requirement already satisfied: asttokens>=2.1.0 in /opt/conda/lib/python3.10/site-packages (from stack-data->ipython>=6.1.0->ipywidgets) (2.2.1)
|
53 |
+
Requirement already satisfied: pure-eval in /opt/conda/lib/python3.10/site-packages (from stack-data->ipython>=6.1.0->ipywidgets) (0.2.2)
|
54 |
+
Requirement already satisfied: six in /opt/conda/lib/python3.10/site-packages (from asttokens>=2.1.0->stack-data->ipython>=6.1.0->ipywidgets) (1.16.0)
|
55 |
+
[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv
|
56 |
+
[1m[[34m[22mnotice[39m[1m][22m A new release of pip is available: [31m23.1.2[39m -> [32m23.2.1
|
57 |
+
[1m[[34m[22mnotice[39m[1m][22m To update, run: [32mpip install --upgrade pip
|
58 |
+
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
|
59 |
+
To disable this warning, you can either:
|
60 |
+
- Avoid using `tokenizers` before the fork if possible
|
61 |
+
- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
|
62 |
+
Requirement already satisfied: datasets in /opt/conda/lib/python3.10/site-packages (2.14.0)
|
63 |
+
Requirement already satisfied: numpy>=1.17 in /opt/conda/lib/python3.10/site-packages (from datasets) (1.23.5)
|
64 |
+
Requirement already satisfied: pyarrow>=8.0.0 in /opt/conda/lib/python3.10/site-packages (from datasets) (12.0.0)
|
65 |
+
Requirement already satisfied: dill<0.3.8,>=0.3.0 in /opt/conda/lib/python3.10/site-packages (from datasets) (0.3.6)
|
66 |
+
Requirement already satisfied: pandas in /opt/conda/lib/python3.10/site-packages (from datasets) (2.0.1)
|
67 |
+
Requirement already satisfied: requests>=2.19.0 in /opt/conda/lib/python3.10/site-packages (from datasets) (2.28.2)
|
68 |
+
Requirement already satisfied: tqdm>=4.62.1 in /opt/conda/lib/python3.10/site-packages (from datasets) (4.65.0)
|
69 |
+
Requirement already satisfied: xxhash in /opt/conda/lib/python3.10/site-packages (from datasets) (3.2.0)
|
70 |
+
Requirement already satisfied: multiprocess in /opt/conda/lib/python3.10/site-packages (from datasets) (0.70.14)
|
71 |
+
Requirement already satisfied: fsspec[http]>=2021.11.1 in /opt/conda/lib/python3.10/site-packages (from datasets) (2023.5.0)
|
72 |
+
Requirement already satisfied: aiohttp in /opt/conda/lib/python3.10/site-packages (from datasets) (3.8.5)
|
73 |
+
Requirement already satisfied: huggingface-hub<1.0.0,>=0.14.0 in /opt/conda/lib/python3.10/site-packages (from datasets) (0.16.4)
|
74 |
+
Requirement already satisfied: packaging in /opt/conda/lib/python3.10/site-packages (from datasets) (23.1)
|
75 |
+
Requirement already satisfied: pyyaml>=5.1 in /opt/conda/lib/python3.10/site-packages (from datasets) (5.4.1)
|
76 |
+
Requirement already satisfied: attrs>=17.3.0 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets) (22.2.0)
|
77 |
+
Requirement already satisfied: charset-normalizer<4.0,>=2.0 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets) (3.1.0)
|
78 |
+
Requirement already satisfied: multidict<7.0,>=4.5 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets) (6.0.4)
|
79 |
+
Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets) (4.0.2)
|
80 |
+
Requirement already satisfied: yarl<2.0,>=1.0 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets) (1.9.2)
|
81 |
+
Requirement already satisfied: frozenlist>=1.1.1 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets) (1.4.0)
|
82 |
+
Requirement already satisfied: aiosignal>=1.1.2 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets) (1.3.1)
|
83 |
+
Requirement already satisfied: filelock in /opt/conda/lib/python3.10/site-packages (from huggingface-hub<1.0.0,>=0.14.0->datasets) (3.12.0)
|
84 |
+
Requirement already satisfied: typing-extensions>=3.7.4.3 in /opt/conda/lib/python3.10/site-packages (from huggingface-hub<1.0.0,>=0.14.0->datasets) (4.5.0)
|
85 |
+
Requirement already satisfied: idna<4,>=2.5 in /opt/conda/lib/python3.10/site-packages (from requests>=2.19.0->datasets) (3.4)
|
86 |
+
Requirement already satisfied: urllib3<1.27,>=1.21.1 in /opt/conda/lib/python3.10/site-packages (from requests>=2.19.0->datasets) (1.26.15)
|
87 |
+
Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.10/site-packages (from requests>=2.19.0->datasets) (2023.5.7)
|
88 |
+
Requirement already satisfied: python-dateutil>=2.8.2 in /opt/conda/lib/python3.10/site-packages (from pandas->datasets) (2.8.2)
|
89 |
+
Requirement already satisfied: pytz>=2020.1 in /opt/conda/lib/python3.10/site-packages (from pandas->datasets) (2023.3)
|
90 |
+
Requirement already satisfied: tzdata>=2022.1 in /opt/conda/lib/python3.10/site-packages (from pandas->datasets) (2023.3)
|
91 |
+
Requirement already satisfied: six>=1.5 in /opt/conda/lib/python3.10/site-packages (from python-dateutil>=2.8.2->pandas->datasets) (1.16.0)
|
92 |
+
[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv
|
93 |
+
[1m[[34m[22mnotice[39m[1m][22m A new release of pip is available: [31m23.1.2[39m -> [32m23.2.1
|
94 |
+
[1m[[34m[22mnotice[39m[1m][22m To update, run: [32mpip install --upgrade pip
|
95 |
+
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
|
96 |
+
To disable this warning, you can either:
|
97 |
+
- Avoid using `tokenizers` before the fork if possible
|
98 |
+
- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
|
99 |
+
Requirement already satisfied: torch in /opt/conda/lib/python3.10/site-packages (2.0.0)
|
100 |
+
Requirement already satisfied: filelock in /opt/conda/lib/python3.10/site-packages (from torch) (3.12.0)
|
101 |
+
Requirement already satisfied: typing-extensions in /opt/conda/lib/python3.10/site-packages (from torch) (4.5.0)
|
102 |
+
Requirement already satisfied: sympy in /opt/conda/lib/python3.10/site-packages (from torch) (1.11.1)
|
103 |
+
Requirement already satisfied: networkx in /opt/conda/lib/python3.10/site-packages (from torch) (3.1)
|
104 |
+
Requirement already satisfied: jinja2 in /opt/conda/lib/python3.10/site-packages (from torch) (3.1.2)
|
105 |
+
Requirement already satisfied: MarkupSafe>=2.0 in /opt/conda/lib/python3.10/site-packages (from jinja2->torch) (2.1.2)
|
106 |
+
Requirement already satisfied: mpmath>=0.19 in /opt/conda/lib/python3.10/site-packages (from sympy->torch) (1.3.0)
|
107 |
+
[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv
|
108 |
+
[1m[[34m[22mnotice[39m[1m][22m A new release of pip is available: [31m23.1.2[39m -> [32m23.2.1
|
109 |
+
[1m[[34m[22mnotice[39m[1m][22m To update, run: [32mpip install --upgrade pip
|
110 |
+
True
|
111 |
+
/opt/conda/lib/python3.10/site-packages/peft/utils/other.py:104: FutureWarning: prepare_model_for_int8_training is deprecated and will be removed in a future version. Use prepare_model_for_kbit_training instead.
|
112 |
+
warnings.warn(
|
wandb/latest-run/files/requirements.txt
ADDED
@@ -0,0 +1,240 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
accelerate==0.21.0
|
2 |
+
aiohttp==3.8.5
|
3 |
+
aiosignal==1.3.1
|
4 |
+
apex==0.1
|
5 |
+
appdirs==1.4.4
|
6 |
+
argparse==1.4.0
|
7 |
+
asttokens==2.2.1
|
8 |
+
async-timeout==4.0.2
|
9 |
+
attrs==22.2.0
|
10 |
+
awscli==1.27.132
|
11 |
+
backcall==0.2.0
|
12 |
+
backports.functools-lru-cache==1.6.4
|
13 |
+
bcrypt==4.0.1
|
14 |
+
bitsandbytes==0.41.0
|
15 |
+
blis==0.7.9
|
16 |
+
bokeh==3.1.1
|
17 |
+
boto3==1.26.132
|
18 |
+
botocore==1.29.132
|
19 |
+
brotlipy==0.7.0
|
20 |
+
cached-property==1.5.2
|
21 |
+
catalogue==2.0.8
|
22 |
+
certifi==2023.5.7
|
23 |
+
cffi==1.15.1
|
24 |
+
charset-normalizer==3.1.0
|
25 |
+
click==8.1.3
|
26 |
+
cloudpickle==2.2.1
|
27 |
+
cmake==3.26.3
|
28 |
+
colorama==0.4.4
|
29 |
+
comm==0.1.3
|
30 |
+
commonmark==0.9.1
|
31 |
+
conda-content-trust==0.1.3
|
32 |
+
conda-package-handling==2.0.2
|
33 |
+
conda-package-streaming==0.7.0
|
34 |
+
conda==23.1.0
|
35 |
+
confection==0.0.4
|
36 |
+
contextlib2==21.6.0
|
37 |
+
contourpy==1.0.7
|
38 |
+
cryptography==40.0.1
|
39 |
+
cycler==0.11.0
|
40 |
+
cymem==2.0.7
|
41 |
+
cython==0.29.34
|
42 |
+
datasets==2.14.0
|
43 |
+
debugpy==1.6.7
|
44 |
+
decorator==5.1.1
|
45 |
+
deepspeed==0.6.1+1ea3d4b
|
46 |
+
dgl==1.1.0+cu118
|
47 |
+
dill==0.3.6
|
48 |
+
docker-pycreds==0.4.0
|
49 |
+
docutils==0.15.2
|
50 |
+
einops==0.6.1
|
51 |
+
executing==1.2.0
|
52 |
+
fastai==2.7.12
|
53 |
+
fastcore==1.5.29
|
54 |
+
fastdownload==0.0.7
|
55 |
+
fastprogress==1.0.3
|
56 |
+
filelock==3.12.0
|
57 |
+
flash-attn==0.2.8
|
58 |
+
fonttools==4.39.4
|
59 |
+
frozenlist==1.4.0
|
60 |
+
fsspec==2023.5.0
|
61 |
+
future==0.18.3
|
62 |
+
gevent==22.10.2
|
63 |
+
gitdb==4.0.10
|
64 |
+
gitpython==3.1.32
|
65 |
+
gmpy2==2.1.2
|
66 |
+
google-pasta==0.2.0
|
67 |
+
greenlet==2.0.2
|
68 |
+
h5py==3.8.0
|
69 |
+
hjson==3.1.0
|
70 |
+
horovod==0.26.1
|
71 |
+
huggingface-hub==0.16.4
|
72 |
+
idna==3.4
|
73 |
+
imageio==2.28.1
|
74 |
+
importlib-metadata==4.13.0
|
75 |
+
inotify-simple==1.2.1
|
76 |
+
ipykernel==6.23.0
|
77 |
+
ipython==8.13.2
|
78 |
+
ipywidgets==8.0.7
|
79 |
+
jedi==0.18.2
|
80 |
+
jinja2==3.1.2
|
81 |
+
jmespath==1.0.1
|
82 |
+
joblib==1.2.0
|
83 |
+
jsonpatch==1.32
|
84 |
+
jsonpointer==2.3
|
85 |
+
jsonschema==4.17.3
|
86 |
+
jupyter-client==8.2.0
|
87 |
+
jupyter-core==5.3.0
|
88 |
+
jupyterlab-widgets==3.0.8
|
89 |
+
kiwisolver==1.4.4
|
90 |
+
langcodes==3.3.0
|
91 |
+
libmambapy==1.4.1
|
92 |
+
lit==16.0.3
|
93 |
+
llvmlite==0.39.1
|
94 |
+
mamba==1.4.1
|
95 |
+
markupsafe==2.1.2
|
96 |
+
matplotlib-inline==0.1.6
|
97 |
+
matplotlib==3.7.1
|
98 |
+
mpi4py==3.1.4
|
99 |
+
mpmath==1.3.0
|
100 |
+
multidict==6.0.4
|
101 |
+
multiprocess==0.70.14
|
102 |
+
munkres==1.1.4
|
103 |
+
murmurhash==1.0.9
|
104 |
+
nest-asyncio==1.5.6
|
105 |
+
networkx==3.1
|
106 |
+
ninja==1.11.1
|
107 |
+
numba==0.56.4
|
108 |
+
numpy==1.23.5
|
109 |
+
opencv-python==4.7.0
|
110 |
+
packaging==23.1
|
111 |
+
pandas==2.0.1
|
112 |
+
paramiko==3.1.0
|
113 |
+
parso==0.8.3
|
114 |
+
pathos==0.3.0
|
115 |
+
pathtools==0.1.2
|
116 |
+
pathy==0.10.1
|
117 |
+
patsy==0.5.3
|
118 |
+
peft==0.5.0.dev0
|
119 |
+
pexpect==4.8.0
|
120 |
+
pickleshare==0.7.5
|
121 |
+
pillow==9.4.0
|
122 |
+
pip==23.1.2
|
123 |
+
platformdirs==3.5.0
|
124 |
+
plotly==5.14.1
|
125 |
+
pluggy==1.0.0
|
126 |
+
ply==3.11
|
127 |
+
pooch==1.7.0
|
128 |
+
pox==0.3.2
|
129 |
+
ppft==1.7.6.6
|
130 |
+
preshed==3.0.8
|
131 |
+
prompt-toolkit==3.0.38
|
132 |
+
protobuf3-to-dict==0.1.5
|
133 |
+
protobuf==3.20.3
|
134 |
+
psutil==5.9.5
|
135 |
+
ptyprocess==0.7.0
|
136 |
+
pure-eval==0.2.2
|
137 |
+
py-cpuinfo==9.0.0
|
138 |
+
pyarrow==12.0.0
|
139 |
+
pyasn1==0.4.8
|
140 |
+
pybind11-global==2.10.4
|
141 |
+
pybind11==2.10.4
|
142 |
+
pycosat==0.6.4
|
143 |
+
pycparser==2.21
|
144 |
+
pydantic==1.10.7
|
145 |
+
pyfunctional==1.4.3
|
146 |
+
pygments==2.15.1
|
147 |
+
pyinstrument-cext==0.2.4
|
148 |
+
pyinstrument==3.4.2
|
149 |
+
pynacl==1.5.0
|
150 |
+
pyopenssl==23.1.1
|
151 |
+
pyparsing==3.0.9
|
152 |
+
pyqt5-sip==12.11.0
|
153 |
+
pyqt5==5.15.7
|
154 |
+
pyrsistent==0.19.3
|
155 |
+
pysocks==1.7.1
|
156 |
+
python-dateutil==2.8.2
|
157 |
+
pytz==2023.3
|
158 |
+
pyyaml==5.4.1
|
159 |
+
pyzmq==25.0.2
|
160 |
+
regex==2023.6.3
|
161 |
+
requests==2.28.2
|
162 |
+
retrying==1.3.4
|
163 |
+
rich==12.6.0
|
164 |
+
rsa==4.7.2
|
165 |
+
ruamel.yaml.clib==0.2.7
|
166 |
+
ruamel.yaml==0.17.21
|
167 |
+
s3fs==0.4.2
|
168 |
+
s3transfer==0.6.1
|
169 |
+
safetensors==0.3.1
|
170 |
+
sagemaker-experiments==0.1.43
|
171 |
+
sagemaker-pytorch-training==2.8.0
|
172 |
+
sagemaker-training==4.5.0
|
173 |
+
sagemaker==2.154.0
|
174 |
+
schema==0.7.5
|
175 |
+
scikit-learn==1.2.2
|
176 |
+
scipy==1.10.1
|
177 |
+
seaborn==0.12.2
|
178 |
+
sentry-sdk==1.28.1
|
179 |
+
setproctitle==1.3.2
|
180 |
+
setuptools==65.6.3
|
181 |
+
shap==0.41.0
|
182 |
+
shellingham==1.5.1
|
183 |
+
sip==6.7.9
|
184 |
+
six==1.16.0
|
185 |
+
slicer==0.0.7
|
186 |
+
smart-open==5.2.1
|
187 |
+
smclarify==0.5
|
188 |
+
smdebug-rulesconfig==1.0.1
|
189 |
+
smdebug==1.0.34
|
190 |
+
smdistributed-dataparallel==1.8.0
|
191 |
+
smdistributed-modelparallel==1.15.0
|
192 |
+
smmap==5.0.0
|
193 |
+
spacy-legacy==3.0.12
|
194 |
+
spacy-loggers==1.0.4
|
195 |
+
spacy==3.5.2
|
196 |
+
srsly==2.4.6
|
197 |
+
stack-data==0.6.2
|
198 |
+
statsmodels==0.14.0
|
199 |
+
sympy==1.11.1
|
200 |
+
tabulate==0.9.0
|
201 |
+
tblib==1.7.0
|
202 |
+
tenacity==8.2.2
|
203 |
+
thinc==8.1.10
|
204 |
+
threadpoolctl==3.1.0
|
205 |
+
tokenizers==0.13.3
|
206 |
+
toml==0.10.2
|
207 |
+
tomli==2.0.1
|
208 |
+
toolz==0.12.0
|
209 |
+
torch==2.0.0
|
210 |
+
torchaudio==2.0.1
|
211 |
+
torchdata==0.6.0
|
212 |
+
torchnet==0.0.4
|
213 |
+
torchtext==0.15.1
|
214 |
+
torchvision==0.15.1
|
215 |
+
tornado==6.3
|
216 |
+
tqdm==4.65.0
|
217 |
+
traitlets==5.9.0
|
218 |
+
transformers==4.31.0
|
219 |
+
triton==2.0.0.dev20221202
|
220 |
+
trl==0.4.7
|
221 |
+
typer==0.7.0
|
222 |
+
typing-extensions==4.5.0
|
223 |
+
tzdata==2023.3
|
224 |
+
unicodedata2==15.0.0
|
225 |
+
urllib3==1.26.15
|
226 |
+
visdom==0.2.4
|
227 |
+
wandb==0.15.7
|
228 |
+
wasabi==1.1.1
|
229 |
+
wcwidth==0.2.6
|
230 |
+
websocket-client==1.5.1
|
231 |
+
werkzeug==2.3.4
|
232 |
+
wheel==0.40.0
|
233 |
+
widgetsnbextension==4.0.8
|
234 |
+
xxhash==3.2.0
|
235 |
+
xyzservices==2023.2.0
|
236 |
+
yarl==1.9.2
|
237 |
+
zipp==3.15.0
|
238 |
+
zope.event==4.6
|
239 |
+
zope.interface==6.0
|
240 |
+
zstandard==0.19.0
|
wandb/latest-run/files/wandb-metadata.json
ADDED
@@ -0,0 +1,78 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"os": "Linux-4.14.318-241.531.amzn2.x86_64-x86_64-with-glibc2.31",
|
3 |
+
"python": "3.10.8",
|
4 |
+
"heartbeatAt": "2023-07-27T15:49:36.888553",
|
5 |
+
"startedAt": "2023-07-27T15:49:36.344100",
|
6 |
+
"docker": null,
|
7 |
+
"cuda": null,
|
8 |
+
"args": [],
|
9 |
+
"state": "running",
|
10 |
+
"program": "<python with no main file>",
|
11 |
+
"host": "pytorch-2-0-0-gpu--ml-g4dn-2xlarge-9a500aed7fe4dadadc562adc1e80",
|
12 |
+
"username": "root",
|
13 |
+
"executable": "/opt/conda/bin/python",
|
14 |
+
"cpu_count": 4,
|
15 |
+
"cpu_count_logical": 8,
|
16 |
+
"cpu_freq": {
|
17 |
+
"current": 3100.120625,
|
18 |
+
"min": 0.0,
|
19 |
+
"max": 0.0
|
20 |
+
},
|
21 |
+
"cpu_freq_per_core": [
|
22 |
+
{
|
23 |
+
"current": 3107.574,
|
24 |
+
"min": 0.0,
|
25 |
+
"max": 0.0
|
26 |
+
},
|
27 |
+
{
|
28 |
+
"current": 3102.47,
|
29 |
+
"min": 0.0,
|
30 |
+
"max": 0.0
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"current": 3099.63,
|
34 |
+
"min": 0.0,
|
35 |
+
"max": 0.0
|
36 |
+
},
|
37 |
+
{
|
38 |
+
"current": 3099.058,
|
39 |
+
"min": 0.0,
|
40 |
+
"max": 0.0
|
41 |
+
},
|
42 |
+
{
|
43 |
+
"current": 3100.716,
|
44 |
+
"min": 0.0,
|
45 |
+
"max": 0.0
|
46 |
+
},
|
47 |
+
{
|
48 |
+
"current": 3099.393,
|
49 |
+
"min": 0.0,
|
50 |
+
"max": 0.0
|
51 |
+
},
|
52 |
+
{
|
53 |
+
"current": 3099.988,
|
54 |
+
"min": 0.0,
|
55 |
+
"max": 0.0
|
56 |
+
},
|
57 |
+
{
|
58 |
+
"current": 3092.136,
|
59 |
+
"min": 0.0,
|
60 |
+
"max": 0.0
|
61 |
+
}
|
62 |
+
],
|
63 |
+
"disk": {
|
64 |
+
"total": 32.0,
|
65 |
+
"used": 0.414398193359375
|
66 |
+
},
|
67 |
+
"gpu": "Tesla T4",
|
68 |
+
"gpu_count": 1,
|
69 |
+
"gpu_devices": [
|
70 |
+
{
|
71 |
+
"name": "Tesla T4",
|
72 |
+
"memory_total": 15843721216
|
73 |
+
}
|
74 |
+
],
|
75 |
+
"memory": {
|
76 |
+
"total": 30.947834014892578
|
77 |
+
}
|
78 |
+
}
|
wandb/latest-run/files/wandb-summary.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"train/loss": 1.5234, "train/learning_rate": 0.0002, "train/epoch": 5.8, "train/global_step": 500, "_timestamp": 1690823397.7400424, "_runtime": 350421.32170534134, "_step": 101, "train/train_runtime": 7012.9274, "train/train_samples_per_second": 1.141, "train/train_steps_per_second": 0.071, "train/total_flos": 2.3703947270255616e+16, "train/train_loss": 2.225116060256958}
|
wandb/latest-run/logs/debug-internal.log
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0d82385c7c91ccf548be984016744cafe22c0bffbe4c56266892c862cde84fe4
|
3 |
+
size 16040370
|
wandb/latest-run/logs/debug.log
ADDED
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2023-07-27 15:49:36,411 INFO MainThread:21 [wandb_setup.py:_flush():76] Current SDK version is 0.15.7
|
2 |
+
2023-07-27 15:49:36,412 INFO MainThread:21 [wandb_setup.py:_flush():76] Configure stats pid to 21
|
3 |
+
2023-07-27 15:49:36,412 INFO MainThread:21 [wandb_setup.py:_flush():76] Loading settings from /root/.config/wandb/settings
|
4 |
+
2023-07-27 15:49:36,412 INFO MainThread:21 [wandb_setup.py:_flush():76] Loading settings from /root/mskov/falcon7b_quant/wandb/settings
|
5 |
+
2023-07-27 15:49:36,412 INFO MainThread:21 [wandb_setup.py:_flush():76] Loading settings from environment variables: {}
|
6 |
+
2023-07-27 15:49:36,412 INFO MainThread:21 [wandb_setup.py:_flush():76] Applying setup settings: {'_disable_service': False}
|
7 |
+
2023-07-27 15:49:36,412 INFO MainThread:21 [wandb_setup.py:_flush():76] Inferring run settings from compute environment: {'program': '<python with no main file>'}
|
8 |
+
2023-07-27 15:49:36,412 INFO MainThread:21 [wandb_setup.py:_flush():76] Applying login settings: {'api_key': '***REDACTED***'}
|
9 |
+
2023-07-27 15:49:36,412 INFO MainThread:21 [wandb_init.py:_log_setup():507] Logging user logs to /root/mskov/falcon7b_quant/wandb/run-20230727_154936-a41qiywg/logs/debug.log
|
10 |
+
2023-07-27 15:49:36,412 INFO MainThread:21 [wandb_init.py:_log_setup():508] Logging internal logs to /root/mskov/falcon7b_quant/wandb/run-20230727_154936-a41qiywg/logs/debug-internal.log
|
11 |
+
2023-07-27 15:49:36,412 INFO MainThread:21 [wandb_init.py:_jupyter_setup():453] configuring jupyter hooks <wandb.sdk.wandb_init._WandbInit object at 0x7f468db73070>
|
12 |
+
2023-07-27 15:49:36,412 INFO MainThread:21 [wandb_init.py:init():547] calling init triggers
|
13 |
+
2023-07-27 15:49:36,412 INFO MainThread:21 [wandb_init.py:init():554] wandb.init called with sweep_config: {}
|
14 |
+
config: {}
|
15 |
+
2023-07-27 15:49:36,412 INFO MainThread:21 [wandb_init.py:init():596] starting backend
|
16 |
+
2023-07-27 15:49:36,412 INFO MainThread:21 [wandb_init.py:init():600] setting up manager
|
17 |
+
2023-07-27 15:49:36,414 INFO MainThread:21 [backend.py:_multiprocessing_setup():106] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
|
18 |
+
2023-07-27 15:49:36,416 INFO MainThread:21 [wandb_init.py:init():606] backend started and connected
|
19 |
+
2023-07-27 15:49:36,424 INFO MainThread:21 [wandb_run.py:_label_probe_notebook():1234] probe notebook
|
20 |
+
2023-07-27 15:49:36,429 INFO MainThread:21 [wandb_run.py:_label_probe_notebook():1244] Unable to probe notebook: 'NoneType' object has no attribute 'get'
|
21 |
+
2023-07-27 15:49:36,429 INFO MainThread:21 [wandb_init.py:init():697] updated telemetry
|
22 |
+
2023-07-27 15:49:36,450 INFO MainThread:21 [wandb_init.py:init():730] communicating run to backend with 60.0 second timeout
|
23 |
+
2023-07-27 15:49:36,781 INFO MainThread:21 [wandb_run.py:_on_init():2174] communicating current version
|
24 |
+
2023-07-27 15:49:36,852 INFO MainThread:21 [wandb_run.py:_on_init():2183] got version response
|
25 |
+
2023-07-27 15:49:36,852 INFO MainThread:21 [wandb_init.py:init():781] starting run threads in backend
|
26 |
+
2023-07-27 15:49:44,828 INFO MainThread:21 [wandb_run.py:_console_start():2153] atexit reg
|
27 |
+
2023-07-27 15:49:44,830 INFO MainThread:21 [wandb_run.py:_redirect():2008] redirect: wrap_raw
|
28 |
+
2023-07-27 15:49:44,830 INFO MainThread:21 [wandb_run.py:_redirect():2073] Wrapping output streams.
|
29 |
+
2023-07-27 15:49:44,830 INFO MainThread:21 [wandb_run.py:_redirect():2098] Redirects installed.
|
30 |
+
2023-07-27 15:49:44,832 INFO MainThread:21 [wandb_init.py:init():822] run started, returning control to user process
|
31 |
+
2023-07-27 15:49:44,835 INFO MainThread:21 [wandb_run.py:_config_callback():1282] config_cb None None {'vocab_size': 65024, 'hidden_size': 4544, 'n_layer': 32, 'n_head': 71, 'layer_norm_epsilon': 1e-05, 'initializer_range': 0.02, 'use_cache': False, 'apply_residual_connection_post_layernorm': False, 'hidden_dropout': 0.0, 'attention_dropout': 0.0, 'bos_token_id': 11, 'eos_token_id': 11, 'multi_query': True, 'alibi': False, 'bias': False, 'parallel_attn': True, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'bfloat16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['RWForCausalLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'pad_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'ybelkada/falcon-7b-sharded-bf16', 'transformers_version': '4.31.0', 'auto_map': {'AutoConfig': 'tiiuae/falcon-7b--configuration_RW.RWConfig', 'AutoModel': 'tiiuae/falcon-7b--modelling_RW.RWModel', 'AutoModelForCausalLM': 'tiiuae/falcon-7b--modelling_RW.RWForCausalLM', 'AutoModelForQuestionAnswering': 'tiiuae/falcon-7b--modelling_RW.RWForQuestionAnswering', 'AutoModelForSequenceClassification': 'tiiuae/falcon-7b--modelling_RW.RWForSequenceClassification', 'AutoModelForTokenClassification': 'tiiuae/falcon-7b--modelling_RW.RWForTokenClassification'}, 'model_type': 'RefinedWebModel', 'quantization_config': {'load_in_8bit': False, 'load_in_4bit': True, 'llm_int8_threshold': 6.0, 'llm_int8_skip_modules': None, 'llm_int8_enable_fp32_cpu_offload': False, 'llm_int8_has_fp16_weight': False, 'bnb_4bit_quant_type': 'nf4', 'bnb_4bit_use_double_quant': False, 'bnb_4bit_compute_dtype': 'float16'}, 'output_dir': './results', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'no', 'prediction_loss_only': False, 'per_device_train_batch_size': 4, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': 'None', 'per_gpu_eval_batch_size': 'None', 'gradient_accumulation_steps': 4, 'eval_accumulation_steps': 'None', 'eval_delay': 0, 'learning_rate': 0.0002, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 0.3, 'num_train_epochs': 3.0, 'max_steps': 500, 'lr_scheduler_type': 'constant', 'warmup_ratio': 0.03, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './results/runs/Jul27_15-48-23_pytorch-2-0-0-gpu--ml-g4dn-2xlarge-9a500aed7fe4dadadc562adc1e80', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 10, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 10, 'save_total_limit': 'None', 'save_safetensors': False, 'save_on_each_node': False, 'no_cuda': False, 'use_mps_device': False, 'seed': 42, 'data_seed': 'None', 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': 'None', 'local_rank': 0, 'ddp_backend': 'None', 'tpu_num_cores': 'None', 'tpu_metrics_debug': False, 'debug': '[]', 'dataloader_drop_last': False, 'eval_steps': 'None', 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './results', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': 'None', 'load_best_model_at_end': False, 'metric_for_best_model': 'None', 'greater_is_better': 'None', 'ignore_data_skip': False, 'sharded_ddp': '[]', 'fsdp': '[]', 'fsdp_min_num_params': 0, 'fsdp_config': "{'fsdp_min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False}", 'fsdp_transformer_layer_cls_to_wrap': 'None', 'deepspeed': 'None', 'label_smoothing_factor': 0.0, 'optim': 'paged_adamw_32bit', 'optim_args': 'None', 'adafactor': False, 'group_by_length': True, 'length_column_name': 'length', 'report_to': "['wandb']", 'ddp_find_unused_parameters': 'None', 'ddp_bucket_cap_mb': 'None', 'ddp_broadcast_buffers': 'None', 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': False, 'resume_from_checkpoint': 'None', 'hub_model_id': 'None', 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'gradient_checkpointing': False, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': 'None', 'push_to_hub_organization': 'None', 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': 'None', 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': 'None', 'torch_compile_mode': 'None', 'xpu_backend': 'None', 'train_batch_size': 4, 'eval_batch_size': 8}
|
32 |
+
2023-07-27 17:45:31,239 INFO MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
33 |
+
2023-07-27 17:45:31,240 INFO MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
|
34 |
+
2023-07-31 14:45:09,605 INFO MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
|
35 |
+
2023-07-31 14:45:09,630 INFO MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
36 |
+
2023-07-31 14:45:09,630 INFO MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
|
37 |
+
2023-07-31 15:11:17,481 INFO MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
|
38 |
+
2023-07-31 15:11:29,927 INFO MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
39 |
+
2023-07-31 15:11:29,929 INFO MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
|
40 |
+
2023-07-31 15:11:29,934 INFO MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
|
41 |
+
2023-07-31 15:11:32,706 INFO MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
42 |
+
2023-07-31 15:11:32,707 INFO MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
|
43 |
+
2023-07-31 15:11:32,712 INFO MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
|
44 |
+
2023-07-31 15:11:35,511 INFO MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
45 |
+
2023-07-31 15:11:35,512 INFO MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
|
46 |
+
2023-07-31 15:11:35,517 INFO MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
|
47 |
+
2023-07-31 15:11:38,405 INFO MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
48 |
+
2023-07-31 15:11:38,407 INFO MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
|
49 |
+
2023-07-31 15:11:39,706 INFO MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
|
50 |
+
2023-07-31 15:11:42,399 INFO MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
51 |
+
2023-07-31 15:11:42,400 INFO MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
|
52 |
+
2023-07-31 15:11:42,759 INFO MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
|
53 |
+
2023-07-31 15:11:42,762 INFO MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
54 |
+
2023-07-31 15:11:42,762 INFO MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
|
55 |
+
2023-07-31 15:11:47,781 INFO MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
|
56 |
+
2023-07-31 15:12:05,813 INFO MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
57 |
+
2023-07-31 15:12:05,815 INFO MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
|
58 |
+
2023-07-31 15:12:05,839 INFO MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
|
59 |
+
2023-07-31 15:12:06,211 INFO MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
60 |
+
2023-07-31 15:12:06,211 INFO MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
|
61 |
+
2023-07-31 15:12:06,217 INFO MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
|
62 |
+
2023-07-31 15:12:06,218 INFO MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
63 |
+
2023-07-31 15:12:06,218 INFO MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
|
64 |
+
2023-07-31 15:12:06,224 INFO MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
|
65 |
+
2023-07-31 15:12:06,301 INFO MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
66 |
+
2023-07-31 15:12:06,301 INFO MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
|
67 |
+
2023-07-31 15:12:11,043 INFO MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
|
68 |
+
2023-07-31 15:13:04,229 INFO MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
69 |
+
2023-07-31 15:13:04,231 INFO MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
|
70 |
+
2023-07-31 15:13:04,236 INFO MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
|
71 |
+
2023-07-31 15:13:04,244 INFO MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
72 |
+
2023-07-31 15:13:04,244 INFO MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
|
73 |
+
2023-07-31 15:13:04,249 INFO MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
|
74 |
+
2023-07-31 15:13:04,818 INFO MainThread:21 [wandb_run.py:_config_callback():1282] config_cb None None {'vocab_size': 65024, 'hidden_size': 4544, 'n_layer': 32, 'n_head': 71, 'layer_norm_epsilon': 1e-05, 'initializer_range': 0.02, 'use_cache': False, 'apply_residual_connection_post_layernorm': False, 'hidden_dropout': 0.0, 'attention_dropout': 0.0, 'bos_token_id': 11, 'eos_token_id': 11, 'multi_query': True, 'alibi': False, 'bias': False, 'parallel_attn': True, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'bfloat16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['RWForCausalLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'pad_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'ybelkada/falcon-7b-sharded-bf16', 'transformers_version': '4.31.0', 'auto_map': {'AutoConfig': 'tiiuae/falcon-7b--configuration_RW.RWConfig', 'AutoModel': 'tiiuae/falcon-7b--modelling_RW.RWModel', 'AutoModelForCausalLM': 'tiiuae/falcon-7b--modelling_RW.RWForCausalLM', 'AutoModelForQuestionAnswering': 'tiiuae/falcon-7b--modelling_RW.RWForQuestionAnswering', 'AutoModelForSequenceClassification': 'tiiuae/falcon-7b--modelling_RW.RWForSequenceClassification', 'AutoModelForTokenClassification': 'tiiuae/falcon-7b--modelling_RW.RWForTokenClassification'}, 'model_type': 'RefinedWebModel', 'quantization_config': {'load_in_8bit': False, 'load_in_4bit': True, 'llm_int8_threshold': 6.0, 'llm_int8_skip_modules': None, 'llm_int8_enable_fp32_cpu_offload': False, 'llm_int8_has_fp16_weight': False, 'bnb_4bit_quant_type': 'nf4', 'bnb_4bit_use_double_quant': False, 'bnb_4bit_compute_dtype': 'float16'}, 'output_dir': './results', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'no', 'prediction_loss_only': False, 'per_device_train_batch_size': 4, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': 'None', 'per_gpu_eval_batch_size': 'None', 'gradient_accumulation_steps': 4, 'eval_accumulation_steps': 'None', 'eval_delay': 0, 'learning_rate': 0.0002, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 0.3, 'num_train_epochs': 3.0, 'max_steps': 500, 'lr_scheduler_type': 'constant', 'warmup_ratio': 0.03, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './results/runs/Jul31_15-12-06_pytorch-2-0-0-gpu--ml-g4dn-2xlarge-9a500aed7fe4dadadc562adc1e80', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 10, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 10, 'save_total_limit': 'None', 'save_safetensors': False, 'save_on_each_node': False, 'no_cuda': False, 'use_mps_device': False, 'seed': 42, 'data_seed': 'None', 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': 'None', 'local_rank': 0, 'ddp_backend': 'None', 'tpu_num_cores': 'None', 'tpu_metrics_debug': False, 'debug': '[]', 'dataloader_drop_last': False, 'eval_steps': 'None', 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './results', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': 'None', 'load_best_model_at_end': False, 'metric_for_best_model': 'None', 'greater_is_better': 'None', 'ignore_data_skip': False, 'sharded_ddp': '[]', 'fsdp': '[]', 'fsdp_min_num_params': 0, 'fsdp_config': "{'fsdp_min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False}", 'fsdp_transformer_layer_cls_to_wrap': 'None', 'deepspeed': 'None', 'label_smoothing_factor': 0.0, 'optim': 'paged_adamw_32bit', 'optim_args': 'None', 'adafactor': False, 'group_by_length': True, 'length_column_name': 'length', 'report_to': "['wandb']", 'ddp_find_unused_parameters': 'None', 'ddp_bucket_cap_mb': 'None', 'ddp_broadcast_buffers': 'None', 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': False, 'resume_from_checkpoint': 'None', 'hub_model_id': 'None', 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'gradient_checkpointing': False, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': 'None', 'push_to_hub_organization': 'None', 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': 'None', 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': 'None', 'torch_compile_mode': 'None', 'xpu_backend': 'None', 'train_batch_size': 4, 'eval_batch_size': 8}
|
75 |
+
2023-07-31 17:09:57,806 INFO MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
76 |
+
2023-07-31 17:09:57,808 INFO MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
|
wandb/run-20230727_154936-a41qiywg/files/conda-environment.yaml
ADDED
@@ -0,0 +1,498 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: base
|
2 |
+
channels:
|
3 |
+
- fastai
|
4 |
+
- dglteam/label/cu118
|
5 |
+
- nvidia/label/cuda-11.8.0
|
6 |
+
- https://aws-ml-conda-pre-prod-ec2.s3.us-west-2.amazonaws.com
|
7 |
+
- conda-forge
|
8 |
+
dependencies:
|
9 |
+
- _libgcc_mutex=0.1=conda_forge
|
10 |
+
- _openmp_mutex=4.5=2_kmp_llvm
|
11 |
+
- alsa-lib=1.2.8=h166bdaf_0
|
12 |
+
- aom=3.5.0=h27087fc_0
|
13 |
+
- asttokens=2.2.1=pyhd8ed1ab_0
|
14 |
+
- attr=2.5.1=h166bdaf_1
|
15 |
+
- aws-ofi-nccl-dlc=1.5.0=aws_0
|
16 |
+
- awscli=1.27.132=py310hff52083_0
|
17 |
+
- backcall=0.2.0=pyh9f0ad1d_0
|
18 |
+
- backports=1.0=pyhd8ed1ab_3
|
19 |
+
- backports.functools_lru_cache=1.6.4=pyhd8ed1ab_0
|
20 |
+
- blas=1.0=mkl
|
21 |
+
- bokeh=3.1.1=pyhd8ed1ab_0
|
22 |
+
- boto3=1.26.132=pyhd8ed1ab_0
|
23 |
+
- botocore=1.29.132=pyhd8ed1ab_0
|
24 |
+
- brotli=1.0.9=h166bdaf_8
|
25 |
+
- brotli-bin=1.0.9=h166bdaf_8
|
26 |
+
- brotlipy=0.7.0=py310h5764c6d_1005
|
27 |
+
- bzip2=1.0.8=h7f98852_4
|
28 |
+
- c-ares=1.18.1=h7f98852_0
|
29 |
+
- ca-certificates=2023.5.7=hbcca054_0
|
30 |
+
- cached-property=1.5.2=hd8ed1ab_1
|
31 |
+
- cached_property=1.5.2=pyha770c72_1
|
32 |
+
- cairo=1.16.0=ha61ee94_1014
|
33 |
+
- catalogue=2.0.8=py310hff52083_1
|
34 |
+
- certifi=2023.5.7=pyhd8ed1ab_0
|
35 |
+
- cffi=1.15.1=py310h255011f_3
|
36 |
+
- charset-normalizer=3.1.0=pyhd8ed1ab_0
|
37 |
+
- click=8.1.3=unix_pyhd8ed1ab_2
|
38 |
+
- cloudpickle=2.2.1=pyhd8ed1ab_0
|
39 |
+
- colorama=0.4.4=pyh9f0ad1d_0
|
40 |
+
- comm=0.1.3=pyhd8ed1ab_0
|
41 |
+
- commonmark=0.9.1=py_0
|
42 |
+
- conda=23.1.0=py310hff52083_0
|
43 |
+
- conda-content-trust=0.1.3=pyhd8ed1ab_0
|
44 |
+
- conda-package-handling=2.0.2=pyh38be061_0
|
45 |
+
- conda-package-streaming=0.7.0=pyhd8ed1ab_1
|
46 |
+
- confection=0.0.4=py310hfdc917e_1
|
47 |
+
- contourpy=1.0.7=py310hdf3cbec_0
|
48 |
+
- cryptography=40.0.1=py310h34c0648_0
|
49 |
+
- cuda-cccl=11.8.89=0
|
50 |
+
- cuda-command-line-tools=11.8.0=0
|
51 |
+
- cuda-compiler=11.8.0=0
|
52 |
+
- cuda-cudart=11.8.89=0
|
53 |
+
- cuda-cudart-dev=11.8.89=0
|
54 |
+
- cuda-cuobjdump=11.8.86=0
|
55 |
+
- cuda-cupti=11.8.87=0
|
56 |
+
- cuda-cuxxfilt=11.8.86=0
|
57 |
+
- cuda-documentation=11.8.86=0
|
58 |
+
- cuda-driver-dev=11.8.89=0
|
59 |
+
- cuda-gdb=11.8.86=0
|
60 |
+
- cuda-libraries=11.8.0=0
|
61 |
+
- cuda-libraries-dev=11.8.0=0
|
62 |
+
- cuda-memcheck=11.8.86=0
|
63 |
+
- cuda-nsight=11.8.86=0
|
64 |
+
- cuda-nsight-compute=11.8.0=0
|
65 |
+
- cuda-nvcc=11.8.89=0
|
66 |
+
- cuda-nvdisasm=11.8.86=0
|
67 |
+
- cuda-nvml-dev=11.8.86=0
|
68 |
+
- cuda-nvprof=11.8.87=0
|
69 |
+
- cuda-nvprune=11.8.86=0
|
70 |
+
- cuda-nvrtc=11.8.89=0
|
71 |
+
- cuda-nvrtc-dev=11.8.89=0
|
72 |
+
- cuda-nvtx=11.8.86=0
|
73 |
+
- cuda-nvvp=11.8.87=0
|
74 |
+
- cuda-profiler-api=11.8.86=0
|
75 |
+
- cuda-runtime=11.8.0=0
|
76 |
+
- cuda-sanitizer-api=11.8.86=0
|
77 |
+
- cuda-toolkit=11.8.0=0
|
78 |
+
- cuda-tools=11.8.0=0
|
79 |
+
- cuda-visual-tools=11.8.0=0
|
80 |
+
- cycler=0.11.0=pyhd8ed1ab_0
|
81 |
+
- cymem=2.0.7=py310hd8f1fbe_1
|
82 |
+
- cython=0.29.34=py310heca2aa9_0
|
83 |
+
- cython-blis=0.7.9=py310hde88566_1
|
84 |
+
- dbus=1.13.6=h5008d03_3
|
85 |
+
- debugpy=1.6.7=py310heca2aa9_0
|
86 |
+
- decorator=5.1.1=pyhd8ed1ab_0
|
87 |
+
- dgl=1.1.0.cu118=py310_0
|
88 |
+
- docutils=0.15.2=py310hff52083_6
|
89 |
+
- executing=1.2.0=pyhd8ed1ab_0
|
90 |
+
- expat=2.5.0=hcb278e6_1
|
91 |
+
- fastai=2.7.12=py_0
|
92 |
+
- fastcore=1.5.29=py_0
|
93 |
+
- fastdownload=0.0.7=py_0
|
94 |
+
- fastprogress=1.0.3=py_0
|
95 |
+
- ffmpeg=5.1.2=gpl_h8dda1f0_106
|
96 |
+
- fftw=3.3.10=nompi_hc118613_107
|
97 |
+
- filelock=3.12.0=pyhd8ed1ab_0
|
98 |
+
- fmt=9.1.0=h924138e_0
|
99 |
+
- font-ttf-dejavu-sans-mono=2.37=hab24e00_0
|
100 |
+
- font-ttf-inconsolata=3.000=h77eed37_0
|
101 |
+
- font-ttf-source-code-pro=2.038=h77eed37_0
|
102 |
+
- font-ttf-ubuntu=0.83=hab24e00_0
|
103 |
+
- fontconfig=2.14.2=h14ed4e7_0
|
104 |
+
- fonts-conda-ecosystem=1=0
|
105 |
+
- fonts-conda-forge=1=0
|
106 |
+
- fonttools=4.39.4=py310h2372a71_0
|
107 |
+
- freeglut=3.2.2=h9c3ff4c_1
|
108 |
+
- freetype=2.12.1=hca18f0e_1
|
109 |
+
- future=0.18.3=pyhd8ed1ab_0
|
110 |
+
- gds-tools=1.4.0.31=0
|
111 |
+
- gettext=0.21.1=h27087fc_0
|
112 |
+
- glib=2.76.2=hfc55251_0
|
113 |
+
- glib-tools=2.76.2=hfc55251_0
|
114 |
+
- gmp=6.2.1=h58526e2_0
|
115 |
+
- gmpy2=2.1.2=py310h3ec546c_1
|
116 |
+
- gnutls=3.7.8=hf3e180e_0
|
117 |
+
- graphite2=1.3.13=h58526e2_1001
|
118 |
+
- gst-plugins-base=1.22.0=h4243ec0_2
|
119 |
+
- gstreamer=1.22.0=h25f0c4b_2
|
120 |
+
- gstreamer-orc=0.4.33=h166bdaf_0
|
121 |
+
- h5py=3.8.0=nompi_py310ha66b2ad_101
|
122 |
+
- harfbuzz=6.0.0=h8e241bc_0
|
123 |
+
- hdf5=1.14.0=nompi_hb72d44e_103
|
124 |
+
- icu=70.1=h27087fc_0
|
125 |
+
- idna=3.4=pyhd8ed1ab_0
|
126 |
+
- imageio=2.28.1=pyh24c5eb1_0
|
127 |
+
- importlib_metadata=6.6.0=hd8ed1ab_0
|
128 |
+
- ipykernel=6.23.0=pyh210e3f2_0
|
129 |
+
- ipython=8.13.2=pyh41d4057_0
|
130 |
+
- jack=1.9.22=h11f4161_0
|
131 |
+
- jasper=2.0.33=h0ff4b12_1
|
132 |
+
- jedi=0.18.2=pyhd8ed1ab_0
|
133 |
+
- jinja2=3.1.2=pyhd8ed1ab_1
|
134 |
+
- jmespath=1.0.1=pyhd8ed1ab_0
|
135 |
+
- joblib=1.2.0=pyhd8ed1ab_0
|
136 |
+
- jpeg=9e=h166bdaf_2
|
137 |
+
- jupyter_client=8.2.0=pyhd8ed1ab_0
|
138 |
+
- jupyter_core=5.3.0=py310hff52083_0
|
139 |
+
- keyutils=1.6.1=h166bdaf_0
|
140 |
+
- kiwisolver=1.4.4=py310hbf28c38_1
|
141 |
+
- krb5=1.20.1=h81ceb04_0
|
142 |
+
- lame=3.100=h166bdaf_1003
|
143 |
+
- langcodes=3.3.0=pyhd8ed1ab_0
|
144 |
+
- lcms2=2.15=hfd0df8a_0
|
145 |
+
- ld_impl_linux-64=2.40=h41732ed_0
|
146 |
+
- lerc=4.0.0=h27087fc_0
|
147 |
+
- libaec=1.0.6=hcb278e6_1
|
148 |
+
- libarchive=3.6.2=h3d51595_0
|
149 |
+
- libblas=3.9.0=1_h86c2bf4_netlib
|
150 |
+
- libbrotlicommon=1.0.9=h166bdaf_8
|
151 |
+
- libbrotlidec=1.0.9=h166bdaf_8
|
152 |
+
- libbrotlienc=1.0.9=h166bdaf_8
|
153 |
+
- libcap=2.67=he9d0100_0
|
154 |
+
- libcblas=3.9.0=5_h92ddd45_netlib
|
155 |
+
- libclang=15.0.7=default_had23c3d_1
|
156 |
+
- libclang13=15.0.7=default_h3e3d535_1
|
157 |
+
- libcublas=11.11.3.6=0
|
158 |
+
- libcublas-dev=11.11.3.6=0
|
159 |
+
- libcufft=10.9.0.58=0
|
160 |
+
- libcufft-dev=10.9.0.58=0
|
161 |
+
- libcufile=1.4.0.31=0
|
162 |
+
- libcufile-dev=1.4.0.31=0
|
163 |
+
- libcups=2.3.3=h36d4200_3
|
164 |
+
- libcurand=10.3.0.86=0
|
165 |
+
- libcurand-dev=10.3.0.86=0
|
166 |
+
- libcurl=7.88.1=hdc1c0ab_1
|
167 |
+
- libcusolver=11.4.1.48=0
|
168 |
+
- libcusolver-dev=11.4.1.48=0
|
169 |
+
- libcusparse=11.7.5.86=0
|
170 |
+
- libcusparse-dev=11.7.5.86=0
|
171 |
+
- libdb=6.2.32=h9c3ff4c_0
|
172 |
+
- libdeflate=1.17=h0b41bf4_0
|
173 |
+
- libdrm=2.4.114=h166bdaf_0
|
174 |
+
- libedit=3.1.20191231=he28a2e2_2
|
175 |
+
- libev=4.33=h516909a_1
|
176 |
+
- libevent=2.1.10=h28343ad_4
|
177 |
+
- libexpat=2.5.0=hcb278e6_1
|
178 |
+
- libffi=3.4.2=h7f98852_5
|
179 |
+
- libflac=1.4.2=h27087fc_0
|
180 |
+
- libgcc=7.2.0=h69d50b8_2
|
181 |
+
- libgcc-ng=12.2.0=h65d4601_19
|
182 |
+
- libgcrypt=1.10.1=h166bdaf_0
|
183 |
+
- libgfortran-ng=12.2.0=h69a702a_19
|
184 |
+
- libgfortran5=12.2.0=h337968e_19
|
185 |
+
- libglib=2.76.2=hebfc3b9_0
|
186 |
+
- libglu=9.0.0=he1b5a44_1001
|
187 |
+
- libgomp=12.2.0=h65d4601_19
|
188 |
+
- libgpg-error=1.46=h620e276_0
|
189 |
+
- libhwloc=2.9.1=hd6dc26d_0
|
190 |
+
- libiconv=1.17=h166bdaf_0
|
191 |
+
- libidn2=2.3.4=h166bdaf_0
|
192 |
+
- libjpeg-turbo=2.1.4=h166bdaf_0
|
193 |
+
- liblapack=3.9.0=5_h92ddd45_netlib
|
194 |
+
- liblapacke=3.9.0=5_h92ddd45_netlib
|
195 |
+
- libllvm11=11.1.0=he0ac6c6_5
|
196 |
+
- libllvm15=15.0.7=hadd5161_1
|
197 |
+
- libllvm16=16.0.1=hadd5161_0
|
198 |
+
- libmamba=1.4.1=hcea66bb_0
|
199 |
+
- libmambapy=1.4.1=py310h1428755_0
|
200 |
+
- libnghttp2=1.52.0=h61bc06f_0
|
201 |
+
- libnpp=11.8.0.86=0
|
202 |
+
- libnpp-dev=11.8.0.86=0
|
203 |
+
- libnsl=2.0.0=h7f98852_0
|
204 |
+
- libnvjpeg=11.9.0.86=0
|
205 |
+
- libnvjpeg-dev=11.9.0.86=0
|
206 |
+
- libogg=1.3.4=h7f98852_1
|
207 |
+
- libopenblas=0.3.21=pthreads_h78a6416_3
|
208 |
+
- libopencv=4.7.0=py310hb48cf42_1
|
209 |
+
- libopus=1.3.1=h7f98852_1
|
210 |
+
- libpciaccess=0.17=h166bdaf_0
|
211 |
+
- libpng=1.6.39=h753d276_0
|
212 |
+
- libpq=15.3=hbcd7760_0
|
213 |
+
- libprotobuf=3.21.12=h3eb15da_0
|
214 |
+
- libsndfile=1.2.0=hb75c966_0
|
215 |
+
- libsodium=1.0.18=h36c2ea0_1
|
216 |
+
- libsolv=0.7.23=h3eb15da_0
|
217 |
+
- libsqlite=3.40.0=h753d276_0
|
218 |
+
- libssh2=1.10.0=hf14f497_3
|
219 |
+
- libstdcxx-ng=12.2.0=h46fd767_19
|
220 |
+
- libsystemd0=253=h8c4010b_1
|
221 |
+
- libtasn1=4.19.0=h166bdaf_0
|
222 |
+
- libtiff=4.5.0=h6adf6a1_2
|
223 |
+
- libtool=2.4.7=h27087fc_0
|
224 |
+
- libudev1=253=h0b41bf4_1
|
225 |
+
- libunistring=0.9.10=h7f98852_0
|
226 |
+
- libuuid=2.38.1=h0b41bf4_0
|
227 |
+
- libuv=1.44.2=h166bdaf_0
|
228 |
+
- libva=2.18.0=h0b41bf4_0
|
229 |
+
- libvorbis=1.3.7=h9c3ff4c_0
|
230 |
+
- libvpx=1.11.0=h9c3ff4c_3
|
231 |
+
- libwebp-base=1.3.0=h0b41bf4_0
|
232 |
+
- libxcb=1.13=h7f98852_1004
|
233 |
+
- libxkbcommon=1.5.0=h79f4944_1
|
234 |
+
- libxml2=2.10.3=hca2bb57_4
|
235 |
+
- libzlib=1.2.13=h166bdaf_4
|
236 |
+
- llvm-openmp=16.0.3=h4dfa4b3_0
|
237 |
+
- llvmlite=0.39.1=py310h58363a5_1
|
238 |
+
- lz4-c=1.9.4=hcb278e6_0
|
239 |
+
- lzo=2.10=h516909a_1000
|
240 |
+
- mamba=1.4.1=py310h51d5547_0
|
241 |
+
- markupsafe=2.1.2=py310h1fa729e_0
|
242 |
+
- matplotlib=3.7.1=py310hff52083_0
|
243 |
+
- matplotlib-base=3.7.1=py310he60537e_0
|
244 |
+
- matplotlib-inline=0.1.6=pyhd8ed1ab_0
|
245 |
+
- mkl=2023.1.0=h84fe81f_48680
|
246 |
+
- mkl-include=2023.1.0=h84fe81f_48680
|
247 |
+
- mpc=1.3.1=hfe3b2da_0
|
248 |
+
- mpfr=4.2.0=hb012696_0
|
249 |
+
- mpg123=1.31.3=hcb278e6_0
|
250 |
+
- mpi=1.0=openmpi
|
251 |
+
- mpi4py=3.1.4=py310h6075a6b_0
|
252 |
+
- mpmath=1.3.0=pyhd8ed1ab_0
|
253 |
+
- munkres=1.1.4=pyh9f0ad1d_0
|
254 |
+
- murmurhash=1.0.9=py310hd8f1fbe_1
|
255 |
+
- mysql-common=8.0.32=hf1915f5_2
|
256 |
+
- mysql-libs=8.0.32=hca2cd23_2
|
257 |
+
- ncurses=6.3=h27087fc_1
|
258 |
+
- nest-asyncio=1.5.6=pyhd8ed1ab_0
|
259 |
+
- nettle=3.8.1=hc379101_1
|
260 |
+
- networkx=3.1=pyhd8ed1ab_0
|
261 |
+
- nsight-compute=2022.3.0.22=0
|
262 |
+
- nspr=4.35=h27087fc_0
|
263 |
+
- nss=3.89=he45b914_0
|
264 |
+
- numba=0.56.4=py310h0e39c9b_1
|
265 |
+
- numpy=1.23.5=py310h53a5b5f_0
|
266 |
+
- opencv=4.7.0=py310hff52083_1
|
267 |
+
- openh264=2.3.1=hcb278e6_2
|
268 |
+
- openjpeg=2.5.0=hfec8fc6_2
|
269 |
+
- openmpi=4.1.5=h414af15_101
|
270 |
+
- openssl=3.1.0=hd590300_3
|
271 |
+
- p11-kit=0.24.1=hc5aa10d_0
|
272 |
+
- packaging=23.1=pyhd8ed1ab_0
|
273 |
+
- pandas=2.0.1=py310h7cbd5c2_1
|
274 |
+
- parso=0.8.3=pyhd8ed1ab_0
|
275 |
+
- pathy=0.10.1=pyhd8ed1ab_0
|
276 |
+
- patsy=0.5.3=pyhd8ed1ab_0
|
277 |
+
- pcre2=10.40=hc3806b6_0
|
278 |
+
- pexpect=4.8.0=pyh1a96a4e_2
|
279 |
+
- pickleshare=0.7.5=py_1003
|
280 |
+
- pillow=9.4.0=py310h023d228_1
|
281 |
+
- pixman=0.40.0=h36c2ea0_0
|
282 |
+
- platformdirs=3.5.0=pyhd8ed1ab_0
|
283 |
+
- plotly=5.14.1=pyhd8ed1ab_0
|
284 |
+
- pluggy=1.0.0=pyhd8ed1ab_5
|
285 |
+
- ply=3.11=py_1
|
286 |
+
- pooch=1.7.0=pyha770c72_3
|
287 |
+
- preshed=3.0.8=py310hd8f1fbe_1
|
288 |
+
- prompt-toolkit=3.0.38=pyha770c72_0
|
289 |
+
- prompt_toolkit=3.0.38=hd8ed1ab_0
|
290 |
+
- psutil=5.9.5=py310h1fa729e_0
|
291 |
+
- pthread-stubs=0.4=h36c2ea0_1001
|
292 |
+
- ptyprocess=0.7.0=pyhd3deb0d_0
|
293 |
+
- pulseaudio=16.1=hcb278e6_3
|
294 |
+
- pulseaudio-client=16.1=h5195f5e_3
|
295 |
+
- pulseaudio-daemon=16.1=ha8d29e2_3
|
296 |
+
- pure_eval=0.2.2=pyhd8ed1ab_0
|
297 |
+
- py-opencv=4.7.0=py310hfdc917e_1
|
298 |
+
- pyasn1=0.4.8=py_0
|
299 |
+
- pybind11=2.10.4=py310hdf3cbec_0
|
300 |
+
- pybind11-abi=4=hd8ed1ab_3
|
301 |
+
- pybind11-global=2.10.4=py310hdf3cbec_0
|
302 |
+
- pycosat=0.6.4=py310h5764c6d_1
|
303 |
+
- pycparser=2.21=pyhd8ed1ab_0
|
304 |
+
- pydantic=1.10.7=py310h1fa729e_0
|
305 |
+
- pygments=2.15.1=pyhd8ed1ab_0
|
306 |
+
- pyopenssl=23.1.1=pyhd8ed1ab_0
|
307 |
+
- pyparsing=3.0.9=pyhd8ed1ab_0
|
308 |
+
- pyqt=5.15.7=py310hab646b1_3
|
309 |
+
- pyqt5-sip=12.11.0=py310heca2aa9_3
|
310 |
+
- pysocks=1.7.1=pyha2e5f31_6
|
311 |
+
- python=3.10.8=h4a9ceb5_0_cpython
|
312 |
+
- python-dateutil=2.8.2=pyhd8ed1ab_0
|
313 |
+
- python-tzdata=2023.3=pyhd8ed1ab_0
|
314 |
+
- python_abi=3.10=3_cp310
|
315 |
+
- pytorch=2.0.0=aws_py3.10_cuda11.8_cudnn8.7.0_0
|
316 |
+
- pytorch-cuda=11.8=h7e8668a_3
|
317 |
+
- pytorch-mutex=1.0=cuda
|
318 |
+
- pytz=2023.3=pyhd8ed1ab_0
|
319 |
+
- pyyaml=5.4.1=py310h5764c6d_4
|
320 |
+
- pyzmq=25.0.2=py310h059b190_0
|
321 |
+
- qt-main=5.15.8=h5d23da1_6
|
322 |
+
- readline=8.2=h8228510_1
|
323 |
+
- reproc=14.2.4=h0b41bf4_0
|
324 |
+
- reproc-cpp=14.2.4=hcb278e6_0
|
325 |
+
- requests=2.28.2=pyhd8ed1ab_1
|
326 |
+
- rhash=1.4.3=h166bdaf_0
|
327 |
+
- rich=12.6.0=pyhd8ed1ab_0
|
328 |
+
- rsa=4.7.2=pyh44b312d_0
|
329 |
+
- ruamel.yaml=0.17.21=py310h1fa729e_3
|
330 |
+
- ruamel.yaml.clib=0.2.7=py310h1fa729e_1
|
331 |
+
- s3transfer=0.6.1=pyhd8ed1ab_0
|
332 |
+
- scikit-learn=1.2.2=py310h41b6a48_1
|
333 |
+
- scipy=1.10.1=py310h8deb116_2
|
334 |
+
- seaborn=0.12.2=hd8ed1ab_0
|
335 |
+
- seaborn-base=0.12.2=pyhd8ed1ab_0
|
336 |
+
- setuptools=65.6.3=pyhd8ed1ab_0
|
337 |
+
- shap=0.41.0=py310h769672d_0
|
338 |
+
- shellingham=1.5.1=pyhd8ed1ab_0
|
339 |
+
- sip=6.7.9=py310hc6cd4ac_0
|
340 |
+
- six=1.16.0=pyh6c4a22f_0
|
341 |
+
- slicer=0.0.7=pyhd8ed1ab_0
|
342 |
+
- smart_open=5.2.1=pyhd8ed1ab_0
|
343 |
+
- spacy=3.5.2=py310h5a539fb_0
|
344 |
+
- spacy-legacy=3.0.12=pyhd8ed1ab_0
|
345 |
+
- spacy-loggers=1.0.4=pyhd8ed1ab_0
|
346 |
+
- srsly=2.4.6=py310heca2aa9_0
|
347 |
+
- stack_data=0.6.2=pyhd8ed1ab_0
|
348 |
+
- statsmodels=0.14.0=py310h278f3c1_1
|
349 |
+
- svt-av1=1.4.1=hcb278e6_0
|
350 |
+
- sympy=1.11.1=pypyh9d50eac_103
|
351 |
+
- tbb=2021.9.0=hf52228f_0
|
352 |
+
- tenacity=8.2.2=pyhd8ed1ab_0
|
353 |
+
- thinc=8.1.10=py310hfb6f7a9_0
|
354 |
+
- threadpoolctl=3.1.0=pyh8a188c0_0
|
355 |
+
- tk=8.6.12=h27826a3_0
|
356 |
+
- toml=0.10.2=pyhd8ed1ab_0
|
357 |
+
- tomli=2.0.1=pyhd8ed1ab_0
|
358 |
+
- toolz=0.12.0=pyhd8ed1ab_0
|
359 |
+
- torchaudio=2.0.1=py310_cu118
|
360 |
+
- torchdata=0.6.0=py310
|
361 |
+
- torchtext=0.15.1=py310
|
362 |
+
- torchvision=0.15.1=py310_cu118
|
363 |
+
- tornado=6.3=py310h1fa729e_0
|
364 |
+
- tqdm=4.65.0=pyhd8ed1ab_1
|
365 |
+
- traitlets=5.9.0=pyhd8ed1ab_0
|
366 |
+
- typer=0.7.0=pyhd8ed1ab_0
|
367 |
+
- typing=3.10.0.0=pyhd8ed1ab_0
|
368 |
+
- typing-extensions=4.5.0=hd8ed1ab_0
|
369 |
+
- typing_extensions=4.5.0=pyha770c72_0
|
370 |
+
- tzdata=2023c=h71feb2d_0
|
371 |
+
- unicodedata2=15.0.0=py310h5764c6d_0
|
372 |
+
- urllib3=1.26.15=pyhd8ed1ab_0
|
373 |
+
- wasabi=1.1.1=py310hff52083_1
|
374 |
+
- wcwidth=0.2.6=pyhd8ed1ab_0
|
375 |
+
- wheel=0.40.0=pyhd8ed1ab_0
|
376 |
+
- x264=1!164.3095=h166bdaf_2
|
377 |
+
- x265=3.5=h924138e_3
|
378 |
+
- xcb-util=0.4.0=h516909a_0
|
379 |
+
- xcb-util-image=0.4.0=h166bdaf_0
|
380 |
+
- xcb-util-keysyms=0.4.0=h516909a_0
|
381 |
+
- xcb-util-renderutil=0.3.9=h166bdaf_0
|
382 |
+
- xcb-util-wm=0.4.1=h516909a_0
|
383 |
+
- xkeyboard-config=2.38=h0b41bf4_0
|
384 |
+
- xorg-fixesproto=5.0=h7f98852_1002
|
385 |
+
- xorg-inputproto=2.3.2=h7f98852_1002
|
386 |
+
- xorg-kbproto=1.0.7=h7f98852_1002
|
387 |
+
- xorg-libice=1.0.10=h7f98852_0
|
388 |
+
- xorg-libsm=1.2.3=hd9c2040_1000
|
389 |
+
- xorg-libx11=1.8.4=h0b41bf4_0
|
390 |
+
- xorg-libxau=1.0.9=h7f98852_0
|
391 |
+
- xorg-libxdmcp=1.1.3=h7f98852_0
|
392 |
+
- xorg-libxext=1.3.4=h0b41bf4_2
|
393 |
+
- xorg-libxfixes=5.0.3=h7f98852_1004
|
394 |
+
- xorg-libxi=1.7.10=h7f98852_0
|
395 |
+
- xorg-libxrender=0.9.10=h7f98852_1003
|
396 |
+
- xorg-renderproto=0.11.1=h7f98852_1002
|
397 |
+
- xorg-xextproto=7.3.0=h0b41bf4_1003
|
398 |
+
- xorg-xf86vidmodeproto=2.3.1=h7f98852_1002
|
399 |
+
- xorg-xproto=7.0.31=h7f98852_1007
|
400 |
+
- xyzservices=2023.2.0=pyhd8ed1ab_0
|
401 |
+
- xz=5.2.6=h166bdaf_0
|
402 |
+
- yaml=0.2.5=h7f98852_2
|
403 |
+
- yaml-cpp=0.7.0=h27087fc_2
|
404 |
+
- zeromq=4.3.4=h9c3ff4c_1
|
405 |
+
- zipp=3.15.0=pyhd8ed1ab_0
|
406 |
+
- zlib=1.2.13=h166bdaf_4
|
407 |
+
- zstandard=0.19.0=py310hdeb6495_1
|
408 |
+
- zstd=1.5.2=h3eb15da_6
|
409 |
+
- pip:
|
410 |
+
- accelerate==0.21.0
|
411 |
+
- aiohttp==3.8.5
|
412 |
+
- aiosignal==1.3.1
|
413 |
+
- apex==0.1
|
414 |
+
- appdirs==1.4.4
|
415 |
+
- argparse==1.4.0
|
416 |
+
- async-timeout==4.0.2
|
417 |
+
- attrs==22.2.0
|
418 |
+
- bcrypt==4.0.1
|
419 |
+
- bitsandbytes==0.41.0
|
420 |
+
- cmake==3.26.3
|
421 |
+
- contextlib2==21.6.0
|
422 |
+
- datasets==2.14.0
|
423 |
+
- deepspeed==0.6.1+1ea3d4b
|
424 |
+
- dill==0.3.6
|
425 |
+
- docker-pycreds==0.4.0
|
426 |
+
- einops==0.6.1
|
427 |
+
- flash-attn==0.2.8
|
428 |
+
- frozenlist==1.4.0
|
429 |
+
- fsspec==2023.5.0
|
430 |
+
- gevent==22.10.2
|
431 |
+
- gitdb==4.0.10
|
432 |
+
- gitpython==3.1.32
|
433 |
+
- google-pasta==0.2.0
|
434 |
+
- greenlet==2.0.2
|
435 |
+
- hjson==3.1.0
|
436 |
+
- horovod==0.26.1
|
437 |
+
- huggingface-hub==0.16.4
|
438 |
+
- importlib-metadata==4.13.0
|
439 |
+
- inotify-simple==1.2.1
|
440 |
+
- ipywidgets==8.0.7
|
441 |
+
- jsonpatch==1.32
|
442 |
+
- jsonpointer==2.3
|
443 |
+
- jsonschema==4.17.3
|
444 |
+
- jupyterlab-widgets==3.0.8
|
445 |
+
- lit==16.0.3
|
446 |
+
- multidict==6.0.4
|
447 |
+
- multiprocess==0.70.14
|
448 |
+
- ninja==1.11.1
|
449 |
+
- paramiko==3.1.0
|
450 |
+
- pathos==0.3.0
|
451 |
+
- pathtools==0.1.2
|
452 |
+
- peft==0.5.0.dev0
|
453 |
+
- pip==23.1.2
|
454 |
+
- pox==0.3.2
|
455 |
+
- ppft==1.7.6.6
|
456 |
+
- protobuf==3.20.3
|
457 |
+
- protobuf3-to-dict==0.1.5
|
458 |
+
- py-cpuinfo==9.0.0
|
459 |
+
- pyarrow==12.0.0
|
460 |
+
- pyfunctional==1.4.3
|
461 |
+
- pyinstrument==3.4.2
|
462 |
+
- pyinstrument-cext==0.2.4
|
463 |
+
- pynacl==1.5.0
|
464 |
+
- pyrsistent==0.19.3
|
465 |
+
- regex==2023.6.3
|
466 |
+
- retrying==1.3.4
|
467 |
+
- s3fs==0.4.2
|
468 |
+
- safetensors==0.3.1
|
469 |
+
- sagemaker==2.154.0
|
470 |
+
- sagemaker-experiments==0.1.43
|
471 |
+
- sagemaker-pytorch-training==2.8.0
|
472 |
+
- sagemaker-training==4.5.0
|
473 |
+
- schema==0.7.5
|
474 |
+
- sentry-sdk==1.28.1
|
475 |
+
- setproctitle==1.3.2
|
476 |
+
- smclarify==0.5
|
477 |
+
- smdebug==1.0.34
|
478 |
+
- smdebug-rulesconfig==1.0.1
|
479 |
+
- smdistributed-dataparallel==1.8.0
|
480 |
+
- smdistributed-modelparallel==1.15.0
|
481 |
+
- smmap==5.0.0
|
482 |
+
- tabulate==0.9.0
|
483 |
+
- tblib==1.7.0
|
484 |
+
- tokenizers==0.13.3
|
485 |
+
- torchnet==0.0.4
|
486 |
+
- transformers==4.31.0
|
487 |
+
- triton==2.0.0.dev20221202
|
488 |
+
- trl==0.4.7
|
489 |
+
- visdom==0.2.4
|
490 |
+
- wandb==0.15.7
|
491 |
+
- websocket-client==1.5.1
|
492 |
+
- werkzeug==2.3.4
|
493 |
+
- widgetsnbextension==4.0.8
|
494 |
+
- xxhash==3.2.0
|
495 |
+
- yarl==1.9.2
|
496 |
+
- zope-event==4.6
|
497 |
+
- zope-interface==6.0
|
498 |
+
prefix: /opt/conda
|
wandb/run-20230727_154936-a41qiywg/files/config.yaml
ADDED
@@ -0,0 +1,649 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
wandb_version: 1
|
2 |
+
|
3 |
+
_wandb:
|
4 |
+
desc: null
|
5 |
+
value:
|
6 |
+
python_version: 3.10.8
|
7 |
+
cli_version: 0.15.7
|
8 |
+
framework: huggingface
|
9 |
+
huggingface_version: 4.31.0
|
10 |
+
is_jupyter_run: true
|
11 |
+
is_kaggle_kernel: false
|
12 |
+
start_time: 1690472976.418337
|
13 |
+
t:
|
14 |
+
1:
|
15 |
+
- 1
|
16 |
+
- 5
|
17 |
+
- 11
|
18 |
+
- 49
|
19 |
+
- 51
|
20 |
+
- 53
|
21 |
+
- 55
|
22 |
+
- 71
|
23 |
+
- 84
|
24 |
+
- 98
|
25 |
+
2:
|
26 |
+
- 1
|
27 |
+
- 5
|
28 |
+
- 11
|
29 |
+
- 49
|
30 |
+
- 51
|
31 |
+
- 53
|
32 |
+
- 55
|
33 |
+
- 71
|
34 |
+
- 84
|
35 |
+
- 98
|
36 |
+
3:
|
37 |
+
- 7
|
38 |
+
- 23
|
39 |
+
4: 3.10.8
|
40 |
+
5: 0.15.7
|
41 |
+
6: 4.31.0
|
42 |
+
8:
|
43 |
+
- 1
|
44 |
+
- 5
|
45 |
+
m:
|
46 |
+
- 1: train/global_step
|
47 |
+
6:
|
48 |
+
- 3
|
49 |
+
- 1: train/loss
|
50 |
+
5: 1
|
51 |
+
6:
|
52 |
+
- 1
|
53 |
+
- 1: train/learning_rate
|
54 |
+
5: 1
|
55 |
+
6:
|
56 |
+
- 1
|
57 |
+
- 1: train/epoch
|
58 |
+
5: 1
|
59 |
+
6:
|
60 |
+
- 1
|
61 |
+
- 1: train/train_runtime
|
62 |
+
5: 1
|
63 |
+
6:
|
64 |
+
- 1
|
65 |
+
- 1: train/train_samples_per_second
|
66 |
+
5: 1
|
67 |
+
6:
|
68 |
+
- 1
|
69 |
+
- 1: train/train_steps_per_second
|
70 |
+
5: 1
|
71 |
+
6:
|
72 |
+
- 1
|
73 |
+
- 1: train/total_flos
|
74 |
+
5: 1
|
75 |
+
6:
|
76 |
+
- 1
|
77 |
+
- 1: train/train_loss
|
78 |
+
5: 1
|
79 |
+
6:
|
80 |
+
- 1
|
81 |
+
vocab_size:
|
82 |
+
desc: null
|
83 |
+
value: 65024
|
84 |
+
hidden_size:
|
85 |
+
desc: null
|
86 |
+
value: 4544
|
87 |
+
n_layer:
|
88 |
+
desc: null
|
89 |
+
value: 32
|
90 |
+
n_head:
|
91 |
+
desc: null
|
92 |
+
value: 71
|
93 |
+
layer_norm_epsilon:
|
94 |
+
desc: null
|
95 |
+
value: 1.0e-05
|
96 |
+
initializer_range:
|
97 |
+
desc: null
|
98 |
+
value: 0.02
|
99 |
+
use_cache:
|
100 |
+
desc: null
|
101 |
+
value: false
|
102 |
+
apply_residual_connection_post_layernorm:
|
103 |
+
desc: null
|
104 |
+
value: false
|
105 |
+
hidden_dropout:
|
106 |
+
desc: null
|
107 |
+
value: 0.0
|
108 |
+
attention_dropout:
|
109 |
+
desc: null
|
110 |
+
value: 0.0
|
111 |
+
bos_token_id:
|
112 |
+
desc: null
|
113 |
+
value: 11
|
114 |
+
eos_token_id:
|
115 |
+
desc: null
|
116 |
+
value: 11
|
117 |
+
multi_query:
|
118 |
+
desc: null
|
119 |
+
value: true
|
120 |
+
alibi:
|
121 |
+
desc: null
|
122 |
+
value: false
|
123 |
+
bias:
|
124 |
+
desc: null
|
125 |
+
value: false
|
126 |
+
parallel_attn:
|
127 |
+
desc: null
|
128 |
+
value: true
|
129 |
+
return_dict:
|
130 |
+
desc: null
|
131 |
+
value: true
|
132 |
+
output_hidden_states:
|
133 |
+
desc: null
|
134 |
+
value: false
|
135 |
+
output_attentions:
|
136 |
+
desc: null
|
137 |
+
value: false
|
138 |
+
torchscript:
|
139 |
+
desc: null
|
140 |
+
value: false
|
141 |
+
torch_dtype:
|
142 |
+
desc: null
|
143 |
+
value: bfloat16
|
144 |
+
use_bfloat16:
|
145 |
+
desc: null
|
146 |
+
value: false
|
147 |
+
tf_legacy_loss:
|
148 |
+
desc: null
|
149 |
+
value: false
|
150 |
+
pruned_heads:
|
151 |
+
desc: null
|
152 |
+
value: {}
|
153 |
+
tie_word_embeddings:
|
154 |
+
desc: null
|
155 |
+
value: true
|
156 |
+
is_encoder_decoder:
|
157 |
+
desc: null
|
158 |
+
value: false
|
159 |
+
is_decoder:
|
160 |
+
desc: null
|
161 |
+
value: false
|
162 |
+
cross_attention_hidden_size:
|
163 |
+
desc: null
|
164 |
+
value: null
|
165 |
+
add_cross_attention:
|
166 |
+
desc: null
|
167 |
+
value: false
|
168 |
+
tie_encoder_decoder:
|
169 |
+
desc: null
|
170 |
+
value: false
|
171 |
+
max_length:
|
172 |
+
desc: null
|
173 |
+
value: 20
|
174 |
+
min_length:
|
175 |
+
desc: null
|
176 |
+
value: 0
|
177 |
+
do_sample:
|
178 |
+
desc: null
|
179 |
+
value: false
|
180 |
+
early_stopping:
|
181 |
+
desc: null
|
182 |
+
value: false
|
183 |
+
num_beams:
|
184 |
+
desc: null
|
185 |
+
value: 1
|
186 |
+
num_beam_groups:
|
187 |
+
desc: null
|
188 |
+
value: 1
|
189 |
+
diversity_penalty:
|
190 |
+
desc: null
|
191 |
+
value: 0.0
|
192 |
+
temperature:
|
193 |
+
desc: null
|
194 |
+
value: 1.0
|
195 |
+
top_k:
|
196 |
+
desc: null
|
197 |
+
value: 50
|
198 |
+
top_p:
|
199 |
+
desc: null
|
200 |
+
value: 1.0
|
201 |
+
typical_p:
|
202 |
+
desc: null
|
203 |
+
value: 1.0
|
204 |
+
repetition_penalty:
|
205 |
+
desc: null
|
206 |
+
value: 1.0
|
207 |
+
length_penalty:
|
208 |
+
desc: null
|
209 |
+
value: 1.0
|
210 |
+
no_repeat_ngram_size:
|
211 |
+
desc: null
|
212 |
+
value: 0
|
213 |
+
encoder_no_repeat_ngram_size:
|
214 |
+
desc: null
|
215 |
+
value: 0
|
216 |
+
bad_words_ids:
|
217 |
+
desc: null
|
218 |
+
value: null
|
219 |
+
num_return_sequences:
|
220 |
+
desc: null
|
221 |
+
value: 1
|
222 |
+
chunk_size_feed_forward:
|
223 |
+
desc: null
|
224 |
+
value: 0
|
225 |
+
output_scores:
|
226 |
+
desc: null
|
227 |
+
value: false
|
228 |
+
return_dict_in_generate:
|
229 |
+
desc: null
|
230 |
+
value: false
|
231 |
+
forced_bos_token_id:
|
232 |
+
desc: null
|
233 |
+
value: null
|
234 |
+
forced_eos_token_id:
|
235 |
+
desc: null
|
236 |
+
value: null
|
237 |
+
remove_invalid_values:
|
238 |
+
desc: null
|
239 |
+
value: false
|
240 |
+
exponential_decay_length_penalty:
|
241 |
+
desc: null
|
242 |
+
value: null
|
243 |
+
suppress_tokens:
|
244 |
+
desc: null
|
245 |
+
value: null
|
246 |
+
begin_suppress_tokens:
|
247 |
+
desc: null
|
248 |
+
value: null
|
249 |
+
architectures:
|
250 |
+
desc: null
|
251 |
+
value:
|
252 |
+
- RWForCausalLM
|
253 |
+
finetuning_task:
|
254 |
+
desc: null
|
255 |
+
value: null
|
256 |
+
id2label:
|
257 |
+
desc: null
|
258 |
+
value:
|
259 |
+
'0': LABEL_0
|
260 |
+
'1': LABEL_1
|
261 |
+
label2id:
|
262 |
+
desc: null
|
263 |
+
value:
|
264 |
+
LABEL_0: 0
|
265 |
+
LABEL_1: 1
|
266 |
+
tokenizer_class:
|
267 |
+
desc: null
|
268 |
+
value: null
|
269 |
+
prefix:
|
270 |
+
desc: null
|
271 |
+
value: null
|
272 |
+
pad_token_id:
|
273 |
+
desc: null
|
274 |
+
value: null
|
275 |
+
sep_token_id:
|
276 |
+
desc: null
|
277 |
+
value: null
|
278 |
+
decoder_start_token_id:
|
279 |
+
desc: null
|
280 |
+
value: null
|
281 |
+
task_specific_params:
|
282 |
+
desc: null
|
283 |
+
value: null
|
284 |
+
problem_type:
|
285 |
+
desc: null
|
286 |
+
value: null
|
287 |
+
_name_or_path:
|
288 |
+
desc: null
|
289 |
+
value: ybelkada/falcon-7b-sharded-bf16
|
290 |
+
transformers_version:
|
291 |
+
desc: null
|
292 |
+
value: 4.31.0
|
293 |
+
auto_map:
|
294 |
+
desc: null
|
295 |
+
value:
|
296 |
+
AutoConfig: tiiuae/falcon-7b--configuration_RW.RWConfig
|
297 |
+
AutoModel: tiiuae/falcon-7b--modelling_RW.RWModel
|
298 |
+
AutoModelForCausalLM: tiiuae/falcon-7b--modelling_RW.RWForCausalLM
|
299 |
+
AutoModelForQuestionAnswering: tiiuae/falcon-7b--modelling_RW.RWForQuestionAnswering
|
300 |
+
AutoModelForSequenceClassification: tiiuae/falcon-7b--modelling_RW.RWForSequenceClassification
|
301 |
+
AutoModelForTokenClassification: tiiuae/falcon-7b--modelling_RW.RWForTokenClassification
|
302 |
+
model_type:
|
303 |
+
desc: null
|
304 |
+
value: RefinedWebModel
|
305 |
+
quantization_config:
|
306 |
+
desc: null
|
307 |
+
value:
|
308 |
+
load_in_8bit: false
|
309 |
+
load_in_4bit: true
|
310 |
+
llm_int8_threshold: 6.0
|
311 |
+
llm_int8_skip_modules: null
|
312 |
+
llm_int8_enable_fp32_cpu_offload: false
|
313 |
+
llm_int8_has_fp16_weight: false
|
314 |
+
bnb_4bit_quant_type: nf4
|
315 |
+
bnb_4bit_use_double_quant: false
|
316 |
+
bnb_4bit_compute_dtype: float16
|
317 |
+
output_dir:
|
318 |
+
desc: null
|
319 |
+
value: ./results
|
320 |
+
overwrite_output_dir:
|
321 |
+
desc: null
|
322 |
+
value: false
|
323 |
+
do_train:
|
324 |
+
desc: null
|
325 |
+
value: false
|
326 |
+
do_eval:
|
327 |
+
desc: null
|
328 |
+
value: false
|
329 |
+
do_predict:
|
330 |
+
desc: null
|
331 |
+
value: false
|
332 |
+
evaluation_strategy:
|
333 |
+
desc: null
|
334 |
+
value: 'no'
|
335 |
+
prediction_loss_only:
|
336 |
+
desc: null
|
337 |
+
value: false
|
338 |
+
per_device_train_batch_size:
|
339 |
+
desc: null
|
340 |
+
value: 4
|
341 |
+
per_device_eval_batch_size:
|
342 |
+
desc: null
|
343 |
+
value: 8
|
344 |
+
per_gpu_train_batch_size:
|
345 |
+
desc: null
|
346 |
+
value: None
|
347 |
+
per_gpu_eval_batch_size:
|
348 |
+
desc: null
|
349 |
+
value: None
|
350 |
+
gradient_accumulation_steps:
|
351 |
+
desc: null
|
352 |
+
value: 4
|
353 |
+
eval_accumulation_steps:
|
354 |
+
desc: null
|
355 |
+
value: None
|
356 |
+
eval_delay:
|
357 |
+
desc: null
|
358 |
+
value: 0
|
359 |
+
learning_rate:
|
360 |
+
desc: null
|
361 |
+
value: 0.0002
|
362 |
+
weight_decay:
|
363 |
+
desc: null
|
364 |
+
value: 0.0
|
365 |
+
adam_beta1:
|
366 |
+
desc: null
|
367 |
+
value: 0.9
|
368 |
+
adam_beta2:
|
369 |
+
desc: null
|
370 |
+
value: 0.999
|
371 |
+
adam_epsilon:
|
372 |
+
desc: null
|
373 |
+
value: 1.0e-08
|
374 |
+
max_grad_norm:
|
375 |
+
desc: null
|
376 |
+
value: 0.3
|
377 |
+
num_train_epochs:
|
378 |
+
desc: null
|
379 |
+
value: 3.0
|
380 |
+
max_steps:
|
381 |
+
desc: null
|
382 |
+
value: 500
|
383 |
+
lr_scheduler_type:
|
384 |
+
desc: null
|
385 |
+
value: constant
|
386 |
+
warmup_ratio:
|
387 |
+
desc: null
|
388 |
+
value: 0.03
|
389 |
+
warmup_steps:
|
390 |
+
desc: null
|
391 |
+
value: 0
|
392 |
+
log_level:
|
393 |
+
desc: null
|
394 |
+
value: passive
|
395 |
+
log_level_replica:
|
396 |
+
desc: null
|
397 |
+
value: warning
|
398 |
+
log_on_each_node:
|
399 |
+
desc: null
|
400 |
+
value: true
|
401 |
+
logging_dir:
|
402 |
+
desc: null
|
403 |
+
value: ./results/runs/Jul31_15-12-06_pytorch-2-0-0-gpu--ml-g4dn-2xlarge-9a500aed7fe4dadadc562adc1e80
|
404 |
+
logging_strategy:
|
405 |
+
desc: null
|
406 |
+
value: steps
|
407 |
+
logging_first_step:
|
408 |
+
desc: null
|
409 |
+
value: false
|
410 |
+
logging_steps:
|
411 |
+
desc: null
|
412 |
+
value: 10
|
413 |
+
logging_nan_inf_filter:
|
414 |
+
desc: null
|
415 |
+
value: true
|
416 |
+
save_strategy:
|
417 |
+
desc: null
|
418 |
+
value: steps
|
419 |
+
save_steps:
|
420 |
+
desc: null
|
421 |
+
value: 10
|
422 |
+
save_total_limit:
|
423 |
+
desc: null
|
424 |
+
value: None
|
425 |
+
save_safetensors:
|
426 |
+
desc: null
|
427 |
+
value: false
|
428 |
+
save_on_each_node:
|
429 |
+
desc: null
|
430 |
+
value: false
|
431 |
+
no_cuda:
|
432 |
+
desc: null
|
433 |
+
value: false
|
434 |
+
use_mps_device:
|
435 |
+
desc: null
|
436 |
+
value: false
|
437 |
+
seed:
|
438 |
+
desc: null
|
439 |
+
value: 42
|
440 |
+
data_seed:
|
441 |
+
desc: null
|
442 |
+
value: None
|
443 |
+
jit_mode_eval:
|
444 |
+
desc: null
|
445 |
+
value: false
|
446 |
+
use_ipex:
|
447 |
+
desc: null
|
448 |
+
value: false
|
449 |
+
bf16:
|
450 |
+
desc: null
|
451 |
+
value: false
|
452 |
+
fp16:
|
453 |
+
desc: null
|
454 |
+
value: true
|
455 |
+
fp16_opt_level:
|
456 |
+
desc: null
|
457 |
+
value: O1
|
458 |
+
half_precision_backend:
|
459 |
+
desc: null
|
460 |
+
value: auto
|
461 |
+
bf16_full_eval:
|
462 |
+
desc: null
|
463 |
+
value: false
|
464 |
+
fp16_full_eval:
|
465 |
+
desc: null
|
466 |
+
value: false
|
467 |
+
tf32:
|
468 |
+
desc: null
|
469 |
+
value: None
|
470 |
+
local_rank:
|
471 |
+
desc: null
|
472 |
+
value: 0
|
473 |
+
ddp_backend:
|
474 |
+
desc: null
|
475 |
+
value: None
|
476 |
+
tpu_num_cores:
|
477 |
+
desc: null
|
478 |
+
value: None
|
479 |
+
tpu_metrics_debug:
|
480 |
+
desc: null
|
481 |
+
value: false
|
482 |
+
debug:
|
483 |
+
desc: null
|
484 |
+
value: '[]'
|
485 |
+
dataloader_drop_last:
|
486 |
+
desc: null
|
487 |
+
value: false
|
488 |
+
eval_steps:
|
489 |
+
desc: null
|
490 |
+
value: None
|
491 |
+
dataloader_num_workers:
|
492 |
+
desc: null
|
493 |
+
value: 0
|
494 |
+
past_index:
|
495 |
+
desc: null
|
496 |
+
value: -1
|
497 |
+
run_name:
|
498 |
+
desc: null
|
499 |
+
value: ./results
|
500 |
+
disable_tqdm:
|
501 |
+
desc: null
|
502 |
+
value: false
|
503 |
+
remove_unused_columns:
|
504 |
+
desc: null
|
505 |
+
value: true
|
506 |
+
label_names:
|
507 |
+
desc: null
|
508 |
+
value: None
|
509 |
+
load_best_model_at_end:
|
510 |
+
desc: null
|
511 |
+
value: false
|
512 |
+
metric_for_best_model:
|
513 |
+
desc: null
|
514 |
+
value: None
|
515 |
+
greater_is_better:
|
516 |
+
desc: null
|
517 |
+
value: None
|
518 |
+
ignore_data_skip:
|
519 |
+
desc: null
|
520 |
+
value: false
|
521 |
+
sharded_ddp:
|
522 |
+
desc: null
|
523 |
+
value: '[]'
|
524 |
+
fsdp:
|
525 |
+
desc: null
|
526 |
+
value: '[]'
|
527 |
+
fsdp_min_num_params:
|
528 |
+
desc: null
|
529 |
+
value: 0
|
530 |
+
fsdp_config:
|
531 |
+
desc: null
|
532 |
+
value: '{''fsdp_min_num_params'': 0, ''xla'': False, ''xla_fsdp_grad_ckpt'': False}'
|
533 |
+
fsdp_transformer_layer_cls_to_wrap:
|
534 |
+
desc: null
|
535 |
+
value: None
|
536 |
+
deepspeed:
|
537 |
+
desc: null
|
538 |
+
value: None
|
539 |
+
label_smoothing_factor:
|
540 |
+
desc: null
|
541 |
+
value: 0.0
|
542 |
+
optim:
|
543 |
+
desc: null
|
544 |
+
value: paged_adamw_32bit
|
545 |
+
optim_args:
|
546 |
+
desc: null
|
547 |
+
value: None
|
548 |
+
adafactor:
|
549 |
+
desc: null
|
550 |
+
value: false
|
551 |
+
group_by_length:
|
552 |
+
desc: null
|
553 |
+
value: true
|
554 |
+
length_column_name:
|
555 |
+
desc: null
|
556 |
+
value: length
|
557 |
+
report_to:
|
558 |
+
desc: null
|
559 |
+
value: '[''wandb'']'
|
560 |
+
ddp_find_unused_parameters:
|
561 |
+
desc: null
|
562 |
+
value: None
|
563 |
+
ddp_bucket_cap_mb:
|
564 |
+
desc: null
|
565 |
+
value: None
|
566 |
+
ddp_broadcast_buffers:
|
567 |
+
desc: null
|
568 |
+
value: None
|
569 |
+
dataloader_pin_memory:
|
570 |
+
desc: null
|
571 |
+
value: true
|
572 |
+
skip_memory_metrics:
|
573 |
+
desc: null
|
574 |
+
value: true
|
575 |
+
use_legacy_prediction_loop:
|
576 |
+
desc: null
|
577 |
+
value: false
|
578 |
+
push_to_hub:
|
579 |
+
desc: null
|
580 |
+
value: false
|
581 |
+
resume_from_checkpoint:
|
582 |
+
desc: null
|
583 |
+
value: None
|
584 |
+
hub_model_id:
|
585 |
+
desc: null
|
586 |
+
value: None
|
587 |
+
hub_strategy:
|
588 |
+
desc: null
|
589 |
+
value: every_save
|
590 |
+
hub_token:
|
591 |
+
desc: null
|
592 |
+
value: <HUB_TOKEN>
|
593 |
+
hub_private_repo:
|
594 |
+
desc: null
|
595 |
+
value: false
|
596 |
+
gradient_checkpointing:
|
597 |
+
desc: null
|
598 |
+
value: false
|
599 |
+
include_inputs_for_metrics:
|
600 |
+
desc: null
|
601 |
+
value: false
|
602 |
+
fp16_backend:
|
603 |
+
desc: null
|
604 |
+
value: auto
|
605 |
+
push_to_hub_model_id:
|
606 |
+
desc: null
|
607 |
+
value: None
|
608 |
+
push_to_hub_organization:
|
609 |
+
desc: null
|
610 |
+
value: None
|
611 |
+
push_to_hub_token:
|
612 |
+
desc: null
|
613 |
+
value: <PUSH_TO_HUB_TOKEN>
|
614 |
+
mp_parameters:
|
615 |
+
desc: null
|
616 |
+
value: ''
|
617 |
+
auto_find_batch_size:
|
618 |
+
desc: null
|
619 |
+
value: false
|
620 |
+
full_determinism:
|
621 |
+
desc: null
|
622 |
+
value: false
|
623 |
+
torchdynamo:
|
624 |
+
desc: null
|
625 |
+
value: None
|
626 |
+
ray_scope:
|
627 |
+
desc: null
|
628 |
+
value: last
|
629 |
+
ddp_timeout:
|
630 |
+
desc: null
|
631 |
+
value: 1800
|
632 |
+
torch_compile:
|
633 |
+
desc: null
|
634 |
+
value: false
|
635 |
+
torch_compile_backend:
|
636 |
+
desc: null
|
637 |
+
value: None
|
638 |
+
torch_compile_mode:
|
639 |
+
desc: null
|
640 |
+
value: None
|
641 |
+
xpu_backend:
|
642 |
+
desc: null
|
643 |
+
value: None
|
644 |
+
train_batch_size:
|
645 |
+
desc: null
|
646 |
+
value: 4
|
647 |
+
eval_batch_size:
|
648 |
+
desc: null
|
649 |
+
value: 8
|
wandb/run-20230727_154936-a41qiywg/files/output.log
ADDED
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
You're using a PreTrainedTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
|
3 |
+
{}
|
4 |
+
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
|
5 |
+
To disable this warning, you can either:
|
6 |
+
- Avoid using `tokenizers` before the fork if possible
|
7 |
+
- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
|
8 |
+
[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv
|
9 |
+
[1m[[34m[22mnotice[39m[1m][22m A new release of pip is available: [31m23.1.2[39m -> [32m23.2.1
|
10 |
+
[1m[[34m[22mnotice[39m[1m][22m To update, run: [32mpip install --upgrade pip
|
11 |
+
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
|
12 |
+
To disable this warning, you can either:
|
13 |
+
- Avoid using `tokenizers` before the fork if possible
|
14 |
+
- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
|
15 |
+
[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv
|
16 |
+
[1m[[34m[22mnotice[39m[1m][22m A new release of pip is available: [31m23.1.2[39m -> [32m23.2.1
|
17 |
+
[1m[[34m[22mnotice[39m[1m][22m To update, run: [32mpip install --upgrade pip
|
18 |
+
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
|
19 |
+
To disable this warning, you can either:
|
20 |
+
- Avoid using `tokenizers` before the fork if possible
|
21 |
+
- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
|
22 |
+
Requirement already satisfied: ipywidgets in /opt/conda/lib/python3.10/site-packages (8.0.7)
|
23 |
+
Requirement already satisfied: ipykernel>=4.5.1 in /opt/conda/lib/python3.10/site-packages (from ipywidgets) (6.23.0)
|
24 |
+
Requirement already satisfied: ipython>=6.1.0 in /opt/conda/lib/python3.10/site-packages (from ipywidgets) (8.13.2)
|
25 |
+
Requirement already satisfied: traitlets>=4.3.1 in /opt/conda/lib/python3.10/site-packages (from ipywidgets) (5.9.0)
|
26 |
+
Requirement already satisfied: widgetsnbextension~=4.0.7 in /opt/conda/lib/python3.10/site-packages (from ipywidgets) (4.0.8)
|
27 |
+
Requirement already satisfied: jupyterlab-widgets~=3.0.7 in /opt/conda/lib/python3.10/site-packages (from ipywidgets) (3.0.8)
|
28 |
+
Requirement already satisfied: comm>=0.1.1 in /opt/conda/lib/python3.10/site-packages (from ipykernel>=4.5.1->ipywidgets) (0.1.3)
|
29 |
+
Requirement already satisfied: debugpy>=1.6.5 in /opt/conda/lib/python3.10/site-packages (from ipykernel>=4.5.1->ipywidgets) (1.6.7)
|
30 |
+
Requirement already satisfied: jupyter-client>=6.1.12 in /opt/conda/lib/python3.10/site-packages (from ipykernel>=4.5.1->ipywidgets) (8.2.0)
|
31 |
+
Requirement already satisfied: jupyter-core!=5.0.*,>=4.12 in /opt/conda/lib/python3.10/site-packages (from ipykernel>=4.5.1->ipywidgets) (5.3.0)
|
32 |
+
Requirement already satisfied: matplotlib-inline>=0.1 in /opt/conda/lib/python3.10/site-packages (from ipykernel>=4.5.1->ipywidgets) (0.1.6)
|
33 |
+
Requirement already satisfied: nest-asyncio in /opt/conda/lib/python3.10/site-packages (from ipykernel>=4.5.1->ipywidgets) (1.5.6)
|
34 |
+
Requirement already satisfied: packaging in /opt/conda/lib/python3.10/site-packages (from ipykernel>=4.5.1->ipywidgets) (23.1)
|
35 |
+
Requirement already satisfied: psutil in /opt/conda/lib/python3.10/site-packages (from ipykernel>=4.5.1->ipywidgets) (5.9.5)
|
36 |
+
Requirement already satisfied: pyzmq>=20 in /opt/conda/lib/python3.10/site-packages (from ipykernel>=4.5.1->ipywidgets) (25.0.2)
|
37 |
+
Requirement already satisfied: tornado>=6.1 in /opt/conda/lib/python3.10/site-packages (from ipykernel>=4.5.1->ipywidgets) (6.3)
|
38 |
+
Requirement already satisfied: backcall in /opt/conda/lib/python3.10/site-packages (from ipython>=6.1.0->ipywidgets) (0.2.0)
|
39 |
+
Requirement already satisfied: decorator in /opt/conda/lib/python3.10/site-packages (from ipython>=6.1.0->ipywidgets) (5.1.1)
|
40 |
+
Requirement already satisfied: jedi>=0.16 in /opt/conda/lib/python3.10/site-packages (from ipython>=6.1.0->ipywidgets) (0.18.2)
|
41 |
+
Requirement already satisfied: pickleshare in /opt/conda/lib/python3.10/site-packages (from ipython>=6.1.0->ipywidgets) (0.7.5)
|
42 |
+
Requirement already satisfied: prompt-toolkit!=3.0.37,<3.1.0,>=3.0.30 in /opt/conda/lib/python3.10/site-packages (from ipython>=6.1.0->ipywidgets) (3.0.38)
|
43 |
+
Requirement already satisfied: pygments>=2.4.0 in /opt/conda/lib/python3.10/site-packages (from ipython>=6.1.0->ipywidgets) (2.15.1)
|
44 |
+
Requirement already satisfied: stack-data in /opt/conda/lib/python3.10/site-packages (from ipython>=6.1.0->ipywidgets) (0.6.2)
|
45 |
+
Requirement already satisfied: pexpect>4.3 in /opt/conda/lib/python3.10/site-packages (from ipython>=6.1.0->ipywidgets) (4.8.0)
|
46 |
+
Requirement already satisfied: parso<0.9.0,>=0.8.0 in /opt/conda/lib/python3.10/site-packages (from jedi>=0.16->ipython>=6.1.0->ipywidgets) (0.8.3)
|
47 |
+
Requirement already satisfied: python-dateutil>=2.8.2 in /opt/conda/lib/python3.10/site-packages (from jupyter-client>=6.1.12->ipykernel>=4.5.1->ipywidgets) (2.8.2)
|
48 |
+
Requirement already satisfied: platformdirs>=2.5 in /opt/conda/lib/python3.10/site-packages (from jupyter-core!=5.0.*,>=4.12->ipykernel>=4.5.1->ipywidgets) (3.5.0)
|
49 |
+
Requirement already satisfied: ptyprocess>=0.5 in /opt/conda/lib/python3.10/site-packages (from pexpect>4.3->ipython>=6.1.0->ipywidgets) (0.7.0)
|
50 |
+
Requirement already satisfied: wcwidth in /opt/conda/lib/python3.10/site-packages (from prompt-toolkit!=3.0.37,<3.1.0,>=3.0.30->ipython>=6.1.0->ipywidgets) (0.2.6)
|
51 |
+
Requirement already satisfied: executing>=1.2.0 in /opt/conda/lib/python3.10/site-packages (from stack-data->ipython>=6.1.0->ipywidgets) (1.2.0)
|
52 |
+
Requirement already satisfied: asttokens>=2.1.0 in /opt/conda/lib/python3.10/site-packages (from stack-data->ipython>=6.1.0->ipywidgets) (2.2.1)
|
53 |
+
Requirement already satisfied: pure-eval in /opt/conda/lib/python3.10/site-packages (from stack-data->ipython>=6.1.0->ipywidgets) (0.2.2)
|
54 |
+
Requirement already satisfied: six in /opt/conda/lib/python3.10/site-packages (from asttokens>=2.1.0->stack-data->ipython>=6.1.0->ipywidgets) (1.16.0)
|
55 |
+
[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv
|
56 |
+
[1m[[34m[22mnotice[39m[1m][22m A new release of pip is available: [31m23.1.2[39m -> [32m23.2.1
|
57 |
+
[1m[[34m[22mnotice[39m[1m][22m To update, run: [32mpip install --upgrade pip
|
58 |
+
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
|
59 |
+
To disable this warning, you can either:
|
60 |
+
- Avoid using `tokenizers` before the fork if possible
|
61 |
+
- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
|
62 |
+
Requirement already satisfied: datasets in /opt/conda/lib/python3.10/site-packages (2.14.0)
|
63 |
+
Requirement already satisfied: numpy>=1.17 in /opt/conda/lib/python3.10/site-packages (from datasets) (1.23.5)
|
64 |
+
Requirement already satisfied: pyarrow>=8.0.0 in /opt/conda/lib/python3.10/site-packages (from datasets) (12.0.0)
|
65 |
+
Requirement already satisfied: dill<0.3.8,>=0.3.0 in /opt/conda/lib/python3.10/site-packages (from datasets) (0.3.6)
|
66 |
+
Requirement already satisfied: pandas in /opt/conda/lib/python3.10/site-packages (from datasets) (2.0.1)
|
67 |
+
Requirement already satisfied: requests>=2.19.0 in /opt/conda/lib/python3.10/site-packages (from datasets) (2.28.2)
|
68 |
+
Requirement already satisfied: tqdm>=4.62.1 in /opt/conda/lib/python3.10/site-packages (from datasets) (4.65.0)
|
69 |
+
Requirement already satisfied: xxhash in /opt/conda/lib/python3.10/site-packages (from datasets) (3.2.0)
|
70 |
+
Requirement already satisfied: multiprocess in /opt/conda/lib/python3.10/site-packages (from datasets) (0.70.14)
|
71 |
+
Requirement already satisfied: fsspec[http]>=2021.11.1 in /opt/conda/lib/python3.10/site-packages (from datasets) (2023.5.0)
|
72 |
+
Requirement already satisfied: aiohttp in /opt/conda/lib/python3.10/site-packages (from datasets) (3.8.5)
|
73 |
+
Requirement already satisfied: huggingface-hub<1.0.0,>=0.14.0 in /opt/conda/lib/python3.10/site-packages (from datasets) (0.16.4)
|
74 |
+
Requirement already satisfied: packaging in /opt/conda/lib/python3.10/site-packages (from datasets) (23.1)
|
75 |
+
Requirement already satisfied: pyyaml>=5.1 in /opt/conda/lib/python3.10/site-packages (from datasets) (5.4.1)
|
76 |
+
Requirement already satisfied: attrs>=17.3.0 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets) (22.2.0)
|
77 |
+
Requirement already satisfied: charset-normalizer<4.0,>=2.0 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets) (3.1.0)
|
78 |
+
Requirement already satisfied: multidict<7.0,>=4.5 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets) (6.0.4)
|
79 |
+
Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets) (4.0.2)
|
80 |
+
Requirement already satisfied: yarl<2.0,>=1.0 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets) (1.9.2)
|
81 |
+
Requirement already satisfied: frozenlist>=1.1.1 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets) (1.4.0)
|
82 |
+
Requirement already satisfied: aiosignal>=1.1.2 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets) (1.3.1)
|
83 |
+
Requirement already satisfied: filelock in /opt/conda/lib/python3.10/site-packages (from huggingface-hub<1.0.0,>=0.14.0->datasets) (3.12.0)
|
84 |
+
Requirement already satisfied: typing-extensions>=3.7.4.3 in /opt/conda/lib/python3.10/site-packages (from huggingface-hub<1.0.0,>=0.14.0->datasets) (4.5.0)
|
85 |
+
Requirement already satisfied: idna<4,>=2.5 in /opt/conda/lib/python3.10/site-packages (from requests>=2.19.0->datasets) (3.4)
|
86 |
+
Requirement already satisfied: urllib3<1.27,>=1.21.1 in /opt/conda/lib/python3.10/site-packages (from requests>=2.19.0->datasets) (1.26.15)
|
87 |
+
Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.10/site-packages (from requests>=2.19.0->datasets) (2023.5.7)
|
88 |
+
Requirement already satisfied: python-dateutil>=2.8.2 in /opt/conda/lib/python3.10/site-packages (from pandas->datasets) (2.8.2)
|
89 |
+
Requirement already satisfied: pytz>=2020.1 in /opt/conda/lib/python3.10/site-packages (from pandas->datasets) (2023.3)
|
90 |
+
Requirement already satisfied: tzdata>=2022.1 in /opt/conda/lib/python3.10/site-packages (from pandas->datasets) (2023.3)
|
91 |
+
Requirement already satisfied: six>=1.5 in /opt/conda/lib/python3.10/site-packages (from python-dateutil>=2.8.2->pandas->datasets) (1.16.0)
|
92 |
+
[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv
|
93 |
+
[1m[[34m[22mnotice[39m[1m][22m A new release of pip is available: [31m23.1.2[39m -> [32m23.2.1
|
94 |
+
[1m[[34m[22mnotice[39m[1m][22m To update, run: [32mpip install --upgrade pip
|
95 |
+
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
|
96 |
+
To disable this warning, you can either:
|
97 |
+
- Avoid using `tokenizers` before the fork if possible
|
98 |
+
- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
|
99 |
+
Requirement already satisfied: torch in /opt/conda/lib/python3.10/site-packages (2.0.0)
|
100 |
+
Requirement already satisfied: filelock in /opt/conda/lib/python3.10/site-packages (from torch) (3.12.0)
|
101 |
+
Requirement already satisfied: typing-extensions in /opt/conda/lib/python3.10/site-packages (from torch) (4.5.0)
|
102 |
+
Requirement already satisfied: sympy in /opt/conda/lib/python3.10/site-packages (from torch) (1.11.1)
|
103 |
+
Requirement already satisfied: networkx in /opt/conda/lib/python3.10/site-packages (from torch) (3.1)
|
104 |
+
Requirement already satisfied: jinja2 in /opt/conda/lib/python3.10/site-packages (from torch) (3.1.2)
|
105 |
+
Requirement already satisfied: MarkupSafe>=2.0 in /opt/conda/lib/python3.10/site-packages (from jinja2->torch) (2.1.2)
|
106 |
+
Requirement already satisfied: mpmath>=0.19 in /opt/conda/lib/python3.10/site-packages (from sympy->torch) (1.3.0)
|
107 |
+
[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv
|
108 |
+
[1m[[34m[22mnotice[39m[1m][22m A new release of pip is available: [31m23.1.2[39m -> [32m23.2.1
|
109 |
+
[1m[[34m[22mnotice[39m[1m][22m To update, run: [32mpip install --upgrade pip
|
110 |
+
True
|
111 |
+
/opt/conda/lib/python3.10/site-packages/peft/utils/other.py:104: FutureWarning: prepare_model_for_int8_training is deprecated and will be removed in a future version. Use prepare_model_for_kbit_training instead.
|
112 |
+
warnings.warn(
|
wandb/run-20230727_154936-a41qiywg/files/requirements.txt
ADDED
@@ -0,0 +1,240 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
accelerate==0.21.0
|
2 |
+
aiohttp==3.8.5
|
3 |
+
aiosignal==1.3.1
|
4 |
+
apex==0.1
|
5 |
+
appdirs==1.4.4
|
6 |
+
argparse==1.4.0
|
7 |
+
asttokens==2.2.1
|
8 |
+
async-timeout==4.0.2
|
9 |
+
attrs==22.2.0
|
10 |
+
awscli==1.27.132
|
11 |
+
backcall==0.2.0
|
12 |
+
backports.functools-lru-cache==1.6.4
|
13 |
+
bcrypt==4.0.1
|
14 |
+
bitsandbytes==0.41.0
|
15 |
+
blis==0.7.9
|
16 |
+
bokeh==3.1.1
|
17 |
+
boto3==1.26.132
|
18 |
+
botocore==1.29.132
|
19 |
+
brotlipy==0.7.0
|
20 |
+
cached-property==1.5.2
|
21 |
+
catalogue==2.0.8
|
22 |
+
certifi==2023.5.7
|
23 |
+
cffi==1.15.1
|
24 |
+
charset-normalizer==3.1.0
|
25 |
+
click==8.1.3
|
26 |
+
cloudpickle==2.2.1
|
27 |
+
cmake==3.26.3
|
28 |
+
colorama==0.4.4
|
29 |
+
comm==0.1.3
|
30 |
+
commonmark==0.9.1
|
31 |
+
conda-content-trust==0.1.3
|
32 |
+
conda-package-handling==2.0.2
|
33 |
+
conda-package-streaming==0.7.0
|
34 |
+
conda==23.1.0
|
35 |
+
confection==0.0.4
|
36 |
+
contextlib2==21.6.0
|
37 |
+
contourpy==1.0.7
|
38 |
+
cryptography==40.0.1
|
39 |
+
cycler==0.11.0
|
40 |
+
cymem==2.0.7
|
41 |
+
cython==0.29.34
|
42 |
+
datasets==2.14.0
|
43 |
+
debugpy==1.6.7
|
44 |
+
decorator==5.1.1
|
45 |
+
deepspeed==0.6.1+1ea3d4b
|
46 |
+
dgl==1.1.0+cu118
|
47 |
+
dill==0.3.6
|
48 |
+
docker-pycreds==0.4.0
|
49 |
+
docutils==0.15.2
|
50 |
+
einops==0.6.1
|
51 |
+
executing==1.2.0
|
52 |
+
fastai==2.7.12
|
53 |
+
fastcore==1.5.29
|
54 |
+
fastdownload==0.0.7
|
55 |
+
fastprogress==1.0.3
|
56 |
+
filelock==3.12.0
|
57 |
+
flash-attn==0.2.8
|
58 |
+
fonttools==4.39.4
|
59 |
+
frozenlist==1.4.0
|
60 |
+
fsspec==2023.5.0
|
61 |
+
future==0.18.3
|
62 |
+
gevent==22.10.2
|
63 |
+
gitdb==4.0.10
|
64 |
+
gitpython==3.1.32
|
65 |
+
gmpy2==2.1.2
|
66 |
+
google-pasta==0.2.0
|
67 |
+
greenlet==2.0.2
|
68 |
+
h5py==3.8.0
|
69 |
+
hjson==3.1.0
|
70 |
+
horovod==0.26.1
|
71 |
+
huggingface-hub==0.16.4
|
72 |
+
idna==3.4
|
73 |
+
imageio==2.28.1
|
74 |
+
importlib-metadata==4.13.0
|
75 |
+
inotify-simple==1.2.1
|
76 |
+
ipykernel==6.23.0
|
77 |
+
ipython==8.13.2
|
78 |
+
ipywidgets==8.0.7
|
79 |
+
jedi==0.18.2
|
80 |
+
jinja2==3.1.2
|
81 |
+
jmespath==1.0.1
|
82 |
+
joblib==1.2.0
|
83 |
+
jsonpatch==1.32
|
84 |
+
jsonpointer==2.3
|
85 |
+
jsonschema==4.17.3
|
86 |
+
jupyter-client==8.2.0
|
87 |
+
jupyter-core==5.3.0
|
88 |
+
jupyterlab-widgets==3.0.8
|
89 |
+
kiwisolver==1.4.4
|
90 |
+
langcodes==3.3.0
|
91 |
+
libmambapy==1.4.1
|
92 |
+
lit==16.0.3
|
93 |
+
llvmlite==0.39.1
|
94 |
+
mamba==1.4.1
|
95 |
+
markupsafe==2.1.2
|
96 |
+
matplotlib-inline==0.1.6
|
97 |
+
matplotlib==3.7.1
|
98 |
+
mpi4py==3.1.4
|
99 |
+
mpmath==1.3.0
|
100 |
+
multidict==6.0.4
|
101 |
+
multiprocess==0.70.14
|
102 |
+
munkres==1.1.4
|
103 |
+
murmurhash==1.0.9
|
104 |
+
nest-asyncio==1.5.6
|
105 |
+
networkx==3.1
|
106 |
+
ninja==1.11.1
|
107 |
+
numba==0.56.4
|
108 |
+
numpy==1.23.5
|
109 |
+
opencv-python==4.7.0
|
110 |
+
packaging==23.1
|
111 |
+
pandas==2.0.1
|
112 |
+
paramiko==3.1.0
|
113 |
+
parso==0.8.3
|
114 |
+
pathos==0.3.0
|
115 |
+
pathtools==0.1.2
|
116 |
+
pathy==0.10.1
|
117 |
+
patsy==0.5.3
|
118 |
+
peft==0.5.0.dev0
|
119 |
+
pexpect==4.8.0
|
120 |
+
pickleshare==0.7.5
|
121 |
+
pillow==9.4.0
|
122 |
+
pip==23.1.2
|
123 |
+
platformdirs==3.5.0
|
124 |
+
plotly==5.14.1
|
125 |
+
pluggy==1.0.0
|
126 |
+
ply==3.11
|
127 |
+
pooch==1.7.0
|
128 |
+
pox==0.3.2
|
129 |
+
ppft==1.7.6.6
|
130 |
+
preshed==3.0.8
|
131 |
+
prompt-toolkit==3.0.38
|
132 |
+
protobuf3-to-dict==0.1.5
|
133 |
+
protobuf==3.20.3
|
134 |
+
psutil==5.9.5
|
135 |
+
ptyprocess==0.7.0
|
136 |
+
pure-eval==0.2.2
|
137 |
+
py-cpuinfo==9.0.0
|
138 |
+
pyarrow==12.0.0
|
139 |
+
pyasn1==0.4.8
|
140 |
+
pybind11-global==2.10.4
|
141 |
+
pybind11==2.10.4
|
142 |
+
pycosat==0.6.4
|
143 |
+
pycparser==2.21
|
144 |
+
pydantic==1.10.7
|
145 |
+
pyfunctional==1.4.3
|
146 |
+
pygments==2.15.1
|
147 |
+
pyinstrument-cext==0.2.4
|
148 |
+
pyinstrument==3.4.2
|
149 |
+
pynacl==1.5.0
|
150 |
+
pyopenssl==23.1.1
|
151 |
+
pyparsing==3.0.9
|
152 |
+
pyqt5-sip==12.11.0
|
153 |
+
pyqt5==5.15.7
|
154 |
+
pyrsistent==0.19.3
|
155 |
+
pysocks==1.7.1
|
156 |
+
python-dateutil==2.8.2
|
157 |
+
pytz==2023.3
|
158 |
+
pyyaml==5.4.1
|
159 |
+
pyzmq==25.0.2
|
160 |
+
regex==2023.6.3
|
161 |
+
requests==2.28.2
|
162 |
+
retrying==1.3.4
|
163 |
+
rich==12.6.0
|
164 |
+
rsa==4.7.2
|
165 |
+
ruamel.yaml.clib==0.2.7
|
166 |
+
ruamel.yaml==0.17.21
|
167 |
+
s3fs==0.4.2
|
168 |
+
s3transfer==0.6.1
|
169 |
+
safetensors==0.3.1
|
170 |
+
sagemaker-experiments==0.1.43
|
171 |
+
sagemaker-pytorch-training==2.8.0
|
172 |
+
sagemaker-training==4.5.0
|
173 |
+
sagemaker==2.154.0
|
174 |
+
schema==0.7.5
|
175 |
+
scikit-learn==1.2.2
|
176 |
+
scipy==1.10.1
|
177 |
+
seaborn==0.12.2
|
178 |
+
sentry-sdk==1.28.1
|
179 |
+
setproctitle==1.3.2
|
180 |
+
setuptools==65.6.3
|
181 |
+
shap==0.41.0
|
182 |
+
shellingham==1.5.1
|
183 |
+
sip==6.7.9
|
184 |
+
six==1.16.0
|
185 |
+
slicer==0.0.7
|
186 |
+
smart-open==5.2.1
|
187 |
+
smclarify==0.5
|
188 |
+
smdebug-rulesconfig==1.0.1
|
189 |
+
smdebug==1.0.34
|
190 |
+
smdistributed-dataparallel==1.8.0
|
191 |
+
smdistributed-modelparallel==1.15.0
|
192 |
+
smmap==5.0.0
|
193 |
+
spacy-legacy==3.0.12
|
194 |
+
spacy-loggers==1.0.4
|
195 |
+
spacy==3.5.2
|
196 |
+
srsly==2.4.6
|
197 |
+
stack-data==0.6.2
|
198 |
+
statsmodels==0.14.0
|
199 |
+
sympy==1.11.1
|
200 |
+
tabulate==0.9.0
|
201 |
+
tblib==1.7.0
|
202 |
+
tenacity==8.2.2
|
203 |
+
thinc==8.1.10
|
204 |
+
threadpoolctl==3.1.0
|
205 |
+
tokenizers==0.13.3
|
206 |
+
toml==0.10.2
|
207 |
+
tomli==2.0.1
|
208 |
+
toolz==0.12.0
|
209 |
+
torch==2.0.0
|
210 |
+
torchaudio==2.0.1
|
211 |
+
torchdata==0.6.0
|
212 |
+
torchnet==0.0.4
|
213 |
+
torchtext==0.15.1
|
214 |
+
torchvision==0.15.1
|
215 |
+
tornado==6.3
|
216 |
+
tqdm==4.65.0
|
217 |
+
traitlets==5.9.0
|
218 |
+
transformers==4.31.0
|
219 |
+
triton==2.0.0.dev20221202
|
220 |
+
trl==0.4.7
|
221 |
+
typer==0.7.0
|
222 |
+
typing-extensions==4.5.0
|
223 |
+
tzdata==2023.3
|
224 |
+
unicodedata2==15.0.0
|
225 |
+
urllib3==1.26.15
|
226 |
+
visdom==0.2.4
|
227 |
+
wandb==0.15.7
|
228 |
+
wasabi==1.1.1
|
229 |
+
wcwidth==0.2.6
|
230 |
+
websocket-client==1.5.1
|
231 |
+
werkzeug==2.3.4
|
232 |
+
wheel==0.40.0
|
233 |
+
widgetsnbextension==4.0.8
|
234 |
+
xxhash==3.2.0
|
235 |
+
xyzservices==2023.2.0
|
236 |
+
yarl==1.9.2
|
237 |
+
zipp==3.15.0
|
238 |
+
zope.event==4.6
|
239 |
+
zope.interface==6.0
|
240 |
+
zstandard==0.19.0
|
wandb/run-20230727_154936-a41qiywg/files/wandb-metadata.json
ADDED
@@ -0,0 +1,78 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"os": "Linux-4.14.318-241.531.amzn2.x86_64-x86_64-with-glibc2.31",
|
3 |
+
"python": "3.10.8",
|
4 |
+
"heartbeatAt": "2023-07-27T15:49:36.888553",
|
5 |
+
"startedAt": "2023-07-27T15:49:36.344100",
|
6 |
+
"docker": null,
|
7 |
+
"cuda": null,
|
8 |
+
"args": [],
|
9 |
+
"state": "running",
|
10 |
+
"program": "<python with no main file>",
|
11 |
+
"host": "pytorch-2-0-0-gpu--ml-g4dn-2xlarge-9a500aed7fe4dadadc562adc1e80",
|
12 |
+
"username": "root",
|
13 |
+
"executable": "/opt/conda/bin/python",
|
14 |
+
"cpu_count": 4,
|
15 |
+
"cpu_count_logical": 8,
|
16 |
+
"cpu_freq": {
|
17 |
+
"current": 3100.120625,
|
18 |
+
"min": 0.0,
|
19 |
+
"max": 0.0
|
20 |
+
},
|
21 |
+
"cpu_freq_per_core": [
|
22 |
+
{
|
23 |
+
"current": 3107.574,
|
24 |
+
"min": 0.0,
|
25 |
+
"max": 0.0
|
26 |
+
},
|
27 |
+
{
|
28 |
+
"current": 3102.47,
|
29 |
+
"min": 0.0,
|
30 |
+
"max": 0.0
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"current": 3099.63,
|
34 |
+
"min": 0.0,
|
35 |
+
"max": 0.0
|
36 |
+
},
|
37 |
+
{
|
38 |
+
"current": 3099.058,
|
39 |
+
"min": 0.0,
|
40 |
+
"max": 0.0
|
41 |
+
},
|
42 |
+
{
|
43 |
+
"current": 3100.716,
|
44 |
+
"min": 0.0,
|
45 |
+
"max": 0.0
|
46 |
+
},
|
47 |
+
{
|
48 |
+
"current": 3099.393,
|
49 |
+
"min": 0.0,
|
50 |
+
"max": 0.0
|
51 |
+
},
|
52 |
+
{
|
53 |
+
"current": 3099.988,
|
54 |
+
"min": 0.0,
|
55 |
+
"max": 0.0
|
56 |
+
},
|
57 |
+
{
|
58 |
+
"current": 3092.136,
|
59 |
+
"min": 0.0,
|
60 |
+
"max": 0.0
|
61 |
+
}
|
62 |
+
],
|
63 |
+
"disk": {
|
64 |
+
"total": 32.0,
|
65 |
+
"used": 0.414398193359375
|
66 |
+
},
|
67 |
+
"gpu": "Tesla T4",
|
68 |
+
"gpu_count": 1,
|
69 |
+
"gpu_devices": [
|
70 |
+
{
|
71 |
+
"name": "Tesla T4",
|
72 |
+
"memory_total": 15843721216
|
73 |
+
}
|
74 |
+
],
|
75 |
+
"memory": {
|
76 |
+
"total": 30.947834014892578
|
77 |
+
}
|
78 |
+
}
|
wandb/run-20230727_154936-a41qiywg/files/wandb-summary.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"train/loss": 1.5234, "train/learning_rate": 0.0002, "train/epoch": 5.8, "train/global_step": 500, "_timestamp": 1690823397.7400424, "_runtime": 350421.32170534134, "_step": 101, "train/train_runtime": 7012.9274, "train/train_samples_per_second": 1.141, "train/train_steps_per_second": 0.071, "train/total_flos": 2.3703947270255616e+16, "train/train_loss": 2.225116060256958}
|
wandb/run-20230727_154936-a41qiywg/logs/debug-internal.log
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0d82385c7c91ccf548be984016744cafe22c0bffbe4c56266892c862cde84fe4
|
3 |
+
size 16040370
|
wandb/run-20230727_154936-a41qiywg/logs/debug.log
ADDED
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2023-07-27 15:49:36,411 INFO MainThread:21 [wandb_setup.py:_flush():76] Current SDK version is 0.15.7
|
2 |
+
2023-07-27 15:49:36,412 INFO MainThread:21 [wandb_setup.py:_flush():76] Configure stats pid to 21
|
3 |
+
2023-07-27 15:49:36,412 INFO MainThread:21 [wandb_setup.py:_flush():76] Loading settings from /root/.config/wandb/settings
|
4 |
+
2023-07-27 15:49:36,412 INFO MainThread:21 [wandb_setup.py:_flush():76] Loading settings from /root/mskov/falcon7b_quant/wandb/settings
|
5 |
+
2023-07-27 15:49:36,412 INFO MainThread:21 [wandb_setup.py:_flush():76] Loading settings from environment variables: {}
|
6 |
+
2023-07-27 15:49:36,412 INFO MainThread:21 [wandb_setup.py:_flush():76] Applying setup settings: {'_disable_service': False}
|
7 |
+
2023-07-27 15:49:36,412 INFO MainThread:21 [wandb_setup.py:_flush():76] Inferring run settings from compute environment: {'program': '<python with no main file>'}
|
8 |
+
2023-07-27 15:49:36,412 INFO MainThread:21 [wandb_setup.py:_flush():76] Applying login settings: {'api_key': '***REDACTED***'}
|
9 |
+
2023-07-27 15:49:36,412 INFO MainThread:21 [wandb_init.py:_log_setup():507] Logging user logs to /root/mskov/falcon7b_quant/wandb/run-20230727_154936-a41qiywg/logs/debug.log
|
10 |
+
2023-07-27 15:49:36,412 INFO MainThread:21 [wandb_init.py:_log_setup():508] Logging internal logs to /root/mskov/falcon7b_quant/wandb/run-20230727_154936-a41qiywg/logs/debug-internal.log
|
11 |
+
2023-07-27 15:49:36,412 INFO MainThread:21 [wandb_init.py:_jupyter_setup():453] configuring jupyter hooks <wandb.sdk.wandb_init._WandbInit object at 0x7f468db73070>
|
12 |
+
2023-07-27 15:49:36,412 INFO MainThread:21 [wandb_init.py:init():547] calling init triggers
|
13 |
+
2023-07-27 15:49:36,412 INFO MainThread:21 [wandb_init.py:init():554] wandb.init called with sweep_config: {}
|
14 |
+
config: {}
|
15 |
+
2023-07-27 15:49:36,412 INFO MainThread:21 [wandb_init.py:init():596] starting backend
|
16 |
+
2023-07-27 15:49:36,412 INFO MainThread:21 [wandb_init.py:init():600] setting up manager
|
17 |
+
2023-07-27 15:49:36,414 INFO MainThread:21 [backend.py:_multiprocessing_setup():106] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
|
18 |
+
2023-07-27 15:49:36,416 INFO MainThread:21 [wandb_init.py:init():606] backend started and connected
|
19 |
+
2023-07-27 15:49:36,424 INFO MainThread:21 [wandb_run.py:_label_probe_notebook():1234] probe notebook
|
20 |
+
2023-07-27 15:49:36,429 INFO MainThread:21 [wandb_run.py:_label_probe_notebook():1244] Unable to probe notebook: 'NoneType' object has no attribute 'get'
|
21 |
+
2023-07-27 15:49:36,429 INFO MainThread:21 [wandb_init.py:init():697] updated telemetry
|
22 |
+
2023-07-27 15:49:36,450 INFO MainThread:21 [wandb_init.py:init():730] communicating run to backend with 60.0 second timeout
|
23 |
+
2023-07-27 15:49:36,781 INFO MainThread:21 [wandb_run.py:_on_init():2174] communicating current version
|
24 |
+
2023-07-27 15:49:36,852 INFO MainThread:21 [wandb_run.py:_on_init():2183] got version response
|
25 |
+
2023-07-27 15:49:36,852 INFO MainThread:21 [wandb_init.py:init():781] starting run threads in backend
|
26 |
+
2023-07-27 15:49:44,828 INFO MainThread:21 [wandb_run.py:_console_start():2153] atexit reg
|
27 |
+
2023-07-27 15:49:44,830 INFO MainThread:21 [wandb_run.py:_redirect():2008] redirect: wrap_raw
|
28 |
+
2023-07-27 15:49:44,830 INFO MainThread:21 [wandb_run.py:_redirect():2073] Wrapping output streams.
|
29 |
+
2023-07-27 15:49:44,830 INFO MainThread:21 [wandb_run.py:_redirect():2098] Redirects installed.
|
30 |
+
2023-07-27 15:49:44,832 INFO MainThread:21 [wandb_init.py:init():822] run started, returning control to user process
|
31 |
+
2023-07-27 15:49:44,835 INFO MainThread:21 [wandb_run.py:_config_callback():1282] config_cb None None {'vocab_size': 65024, 'hidden_size': 4544, 'n_layer': 32, 'n_head': 71, 'layer_norm_epsilon': 1e-05, 'initializer_range': 0.02, 'use_cache': False, 'apply_residual_connection_post_layernorm': False, 'hidden_dropout': 0.0, 'attention_dropout': 0.0, 'bos_token_id': 11, 'eos_token_id': 11, 'multi_query': True, 'alibi': False, 'bias': False, 'parallel_attn': True, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'bfloat16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['RWForCausalLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'pad_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'ybelkada/falcon-7b-sharded-bf16', 'transformers_version': '4.31.0', 'auto_map': {'AutoConfig': 'tiiuae/falcon-7b--configuration_RW.RWConfig', 'AutoModel': 'tiiuae/falcon-7b--modelling_RW.RWModel', 'AutoModelForCausalLM': 'tiiuae/falcon-7b--modelling_RW.RWForCausalLM', 'AutoModelForQuestionAnswering': 'tiiuae/falcon-7b--modelling_RW.RWForQuestionAnswering', 'AutoModelForSequenceClassification': 'tiiuae/falcon-7b--modelling_RW.RWForSequenceClassification', 'AutoModelForTokenClassification': 'tiiuae/falcon-7b--modelling_RW.RWForTokenClassification'}, 'model_type': 'RefinedWebModel', 'quantization_config': {'load_in_8bit': False, 'load_in_4bit': True, 'llm_int8_threshold': 6.0, 'llm_int8_skip_modules': None, 'llm_int8_enable_fp32_cpu_offload': False, 'llm_int8_has_fp16_weight': False, 'bnb_4bit_quant_type': 'nf4', 'bnb_4bit_use_double_quant': False, 'bnb_4bit_compute_dtype': 'float16'}, 'output_dir': './results', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'no', 'prediction_loss_only': False, 'per_device_train_batch_size': 4, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': 'None', 'per_gpu_eval_batch_size': 'None', 'gradient_accumulation_steps': 4, 'eval_accumulation_steps': 'None', 'eval_delay': 0, 'learning_rate': 0.0002, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 0.3, 'num_train_epochs': 3.0, 'max_steps': 500, 'lr_scheduler_type': 'constant', 'warmup_ratio': 0.03, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './results/runs/Jul27_15-48-23_pytorch-2-0-0-gpu--ml-g4dn-2xlarge-9a500aed7fe4dadadc562adc1e80', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 10, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 10, 'save_total_limit': 'None', 'save_safetensors': False, 'save_on_each_node': False, 'no_cuda': False, 'use_mps_device': False, 'seed': 42, 'data_seed': 'None', 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': 'None', 'local_rank': 0, 'ddp_backend': 'None', 'tpu_num_cores': 'None', 'tpu_metrics_debug': False, 'debug': '[]', 'dataloader_drop_last': False, 'eval_steps': 'None', 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './results', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': 'None', 'load_best_model_at_end': False, 'metric_for_best_model': 'None', 'greater_is_better': 'None', 'ignore_data_skip': False, 'sharded_ddp': '[]', 'fsdp': '[]', 'fsdp_min_num_params': 0, 'fsdp_config': "{'fsdp_min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False}", 'fsdp_transformer_layer_cls_to_wrap': 'None', 'deepspeed': 'None', 'label_smoothing_factor': 0.0, 'optim': 'paged_adamw_32bit', 'optim_args': 'None', 'adafactor': False, 'group_by_length': True, 'length_column_name': 'length', 'report_to': "['wandb']", 'ddp_find_unused_parameters': 'None', 'ddp_bucket_cap_mb': 'None', 'ddp_broadcast_buffers': 'None', 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': False, 'resume_from_checkpoint': 'None', 'hub_model_id': 'None', 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'gradient_checkpointing': False, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': 'None', 'push_to_hub_organization': 'None', 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': 'None', 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': 'None', 'torch_compile_mode': 'None', 'xpu_backend': 'None', 'train_batch_size': 4, 'eval_batch_size': 8}
|
32 |
+
2023-07-27 17:45:31,239 INFO MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
33 |
+
2023-07-27 17:45:31,240 INFO MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
|
34 |
+
2023-07-31 14:45:09,605 INFO MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
|
35 |
+
2023-07-31 14:45:09,630 INFO MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
36 |
+
2023-07-31 14:45:09,630 INFO MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
|
37 |
+
2023-07-31 15:11:17,481 INFO MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
|
38 |
+
2023-07-31 15:11:29,927 INFO MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
39 |
+
2023-07-31 15:11:29,929 INFO MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
|
40 |
+
2023-07-31 15:11:29,934 INFO MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
|
41 |
+
2023-07-31 15:11:32,706 INFO MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
42 |
+
2023-07-31 15:11:32,707 INFO MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
|
43 |
+
2023-07-31 15:11:32,712 INFO MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
|
44 |
+
2023-07-31 15:11:35,511 INFO MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
45 |
+
2023-07-31 15:11:35,512 INFO MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
|
46 |
+
2023-07-31 15:11:35,517 INFO MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
|
47 |
+
2023-07-31 15:11:38,405 INFO MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
48 |
+
2023-07-31 15:11:38,407 INFO MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
|
49 |
+
2023-07-31 15:11:39,706 INFO MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
|
50 |
+
2023-07-31 15:11:42,399 INFO MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
51 |
+
2023-07-31 15:11:42,400 INFO MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
|
52 |
+
2023-07-31 15:11:42,759 INFO MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
|
53 |
+
2023-07-31 15:11:42,762 INFO MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
54 |
+
2023-07-31 15:11:42,762 INFO MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
|
55 |
+
2023-07-31 15:11:47,781 INFO MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
|
56 |
+
2023-07-31 15:12:05,813 INFO MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
57 |
+
2023-07-31 15:12:05,815 INFO MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
|
58 |
+
2023-07-31 15:12:05,839 INFO MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
|
59 |
+
2023-07-31 15:12:06,211 INFO MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
60 |
+
2023-07-31 15:12:06,211 INFO MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
|
61 |
+
2023-07-31 15:12:06,217 INFO MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
|
62 |
+
2023-07-31 15:12:06,218 INFO MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
63 |
+
2023-07-31 15:12:06,218 INFO MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
|
64 |
+
2023-07-31 15:12:06,224 INFO MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
|
65 |
+
2023-07-31 15:12:06,301 INFO MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
66 |
+
2023-07-31 15:12:06,301 INFO MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
|
67 |
+
2023-07-31 15:12:11,043 INFO MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
|
68 |
+
2023-07-31 15:13:04,229 INFO MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
69 |
+
2023-07-31 15:13:04,231 INFO MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
|
70 |
+
2023-07-31 15:13:04,236 INFO MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
|
71 |
+
2023-07-31 15:13:04,244 INFO MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
72 |
+
2023-07-31 15:13:04,244 INFO MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
|
73 |
+
2023-07-31 15:13:04,249 INFO MainThread:21 [wandb_init.py:_resume_backend():423] resuming backend
|
74 |
+
2023-07-31 15:13:04,818 INFO MainThread:21 [wandb_run.py:_config_callback():1282] config_cb None None {'vocab_size': 65024, 'hidden_size': 4544, 'n_layer': 32, 'n_head': 71, 'layer_norm_epsilon': 1e-05, 'initializer_range': 0.02, 'use_cache': False, 'apply_residual_connection_post_layernorm': False, 'hidden_dropout': 0.0, 'attention_dropout': 0.0, 'bos_token_id': 11, 'eos_token_id': 11, 'multi_query': True, 'alibi': False, 'bias': False, 'parallel_attn': True, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'bfloat16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['RWForCausalLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'pad_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'ybelkada/falcon-7b-sharded-bf16', 'transformers_version': '4.31.0', 'auto_map': {'AutoConfig': 'tiiuae/falcon-7b--configuration_RW.RWConfig', 'AutoModel': 'tiiuae/falcon-7b--modelling_RW.RWModel', 'AutoModelForCausalLM': 'tiiuae/falcon-7b--modelling_RW.RWForCausalLM', 'AutoModelForQuestionAnswering': 'tiiuae/falcon-7b--modelling_RW.RWForQuestionAnswering', 'AutoModelForSequenceClassification': 'tiiuae/falcon-7b--modelling_RW.RWForSequenceClassification', 'AutoModelForTokenClassification': 'tiiuae/falcon-7b--modelling_RW.RWForTokenClassification'}, 'model_type': 'RefinedWebModel', 'quantization_config': {'load_in_8bit': False, 'load_in_4bit': True, 'llm_int8_threshold': 6.0, 'llm_int8_skip_modules': None, 'llm_int8_enable_fp32_cpu_offload': False, 'llm_int8_has_fp16_weight': False, 'bnb_4bit_quant_type': 'nf4', 'bnb_4bit_use_double_quant': False, 'bnb_4bit_compute_dtype': 'float16'}, 'output_dir': './results', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'no', 'prediction_loss_only': False, 'per_device_train_batch_size': 4, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': 'None', 'per_gpu_eval_batch_size': 'None', 'gradient_accumulation_steps': 4, 'eval_accumulation_steps': 'None', 'eval_delay': 0, 'learning_rate': 0.0002, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 0.3, 'num_train_epochs': 3.0, 'max_steps': 500, 'lr_scheduler_type': 'constant', 'warmup_ratio': 0.03, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './results/runs/Jul31_15-12-06_pytorch-2-0-0-gpu--ml-g4dn-2xlarge-9a500aed7fe4dadadc562adc1e80', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 10, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 10, 'save_total_limit': 'None', 'save_safetensors': False, 'save_on_each_node': False, 'no_cuda': False, 'use_mps_device': False, 'seed': 42, 'data_seed': 'None', 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': 'None', 'local_rank': 0, 'ddp_backend': 'None', 'tpu_num_cores': 'None', 'tpu_metrics_debug': False, 'debug': '[]', 'dataloader_drop_last': False, 'eval_steps': 'None', 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './results', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': 'None', 'load_best_model_at_end': False, 'metric_for_best_model': 'None', 'greater_is_better': 'None', 'ignore_data_skip': False, 'sharded_ddp': '[]', 'fsdp': '[]', 'fsdp_min_num_params': 0, 'fsdp_config': "{'fsdp_min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False}", 'fsdp_transformer_layer_cls_to_wrap': 'None', 'deepspeed': 'None', 'label_smoothing_factor': 0.0, 'optim': 'paged_adamw_32bit', 'optim_args': 'None', 'adafactor': False, 'group_by_length': True, 'length_column_name': 'length', 'report_to': "['wandb']", 'ddp_find_unused_parameters': 'None', 'ddp_bucket_cap_mb': 'None', 'ddp_broadcast_buffers': 'None', 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': False, 'resume_from_checkpoint': 'None', 'hub_model_id': 'None', 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'gradient_checkpointing': False, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': 'None', 'push_to_hub_organization': 'None', 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': 'None', 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': 'None', 'torch_compile_mode': 'None', 'xpu_backend': 'None', 'train_batch_size': 4, 'eval_batch_size': 8}
|
75 |
+
2023-07-31 17:09:57,806 INFO MainThread:21 [jupyter.py:save_ipynb():373] not saving jupyter notebook
|
76 |
+
2023-07-31 17:09:57,808 INFO MainThread:21 [wandb_init.py:_pause_backend():418] pausing backend
|
wandb/run-20230727_154936-a41qiywg/run-a41qiywg.wandb
ADDED
Binary file (426 kB). View file
|
|