|
python -m mlc_chat gen_config /ssd1/cfruan/mlc-llm-repos/mlc-llm-head/dist/models/gpt2 --quantization q0f16 --conv-template gpt2 --output /tmp/tmpif4lzkrt |
|
[2023-12-28 08:13:57] INFO auto_config.py:115: [92mFound[0m model configuration: /ssd1/cfruan/mlc-llm-repos/mlc-llm-head/dist/models/gpt2/config.json |
|
[2023-12-28 08:13:57] INFO auto_config.py:151: [92mFound[0m model type: [1mgpt2[0m. Use `--model-type` to override. |
|
[2023-12-28 08:13:57] INFO gpt2_model.py:44: [1mcontext_window_size[0m not found in config.json. Falling back to [1mn_positions[0m (1024) |
|
[2023-12-28 08:13:57] INFO gen_config.py:115: [generation_config.json] Setting [1mbos_token_id[0m: 50256 |
|
[2023-12-28 08:13:57] INFO gen_config.py:115: [generation_config.json] Setting [1meos_token_id[0m: 50256 |
|
[2023-12-28 08:13:57] INFO gen_config.py:129: [91mNot found[0m tokenizer config: /ssd1/cfruan/mlc-llm-repos/mlc-llm-head/dist/models/gpt2/tokenizer.model |
|
[2023-12-28 08:13:57] INFO gen_config.py:127: [92mFound[0m tokenizer config: /ssd1/cfruan/mlc-llm-repos/mlc-llm-head/dist/models/gpt2/tokenizer.json. Copying to [1m/tmp/tmpif4lzkrt/tokenizer.json[0m |
|
[2023-12-28 08:13:57] INFO gen_config.py:127: [92mFound[0m tokenizer config: /ssd1/cfruan/mlc-llm-repos/mlc-llm-head/dist/models/gpt2/vocab.json. Copying to [1m/tmp/tmpif4lzkrt/vocab.json[0m |
|
[2023-12-28 08:13:57] INFO gen_config.py:127: [92mFound[0m tokenizer config: /ssd1/cfruan/mlc-llm-repos/mlc-llm-head/dist/models/gpt2/merges.txt. Copying to [1m/tmp/tmpif4lzkrt/merges.txt[0m |
|
[2023-12-28 08:13:57] INFO gen_config.py:129: [91mNot found[0m tokenizer config: /ssd1/cfruan/mlc-llm-repos/mlc-llm-head/dist/models/gpt2/added_tokens.json |
|
[2023-12-28 08:13:57] INFO gen_config.py:129: [91mNot found[0m tokenizer config: /ssd1/cfruan/mlc-llm-repos/mlc-llm-head/dist/models/gpt2/tokenizer_config.json |
|
[2023-12-28 08:13:57] INFO gen_config.py:69: [System default] Setting [1mpad_token_id[0m: 0 |
|
[2023-12-28 08:13:57] INFO gen_config.py:69: [System default] Setting [1mtemperature[0m: 0.7 |
|
[2023-12-28 08:13:57] INFO gen_config.py:69: [System default] Setting [1mrepetition_penalty[0m: 1.0 |
|
[2023-12-28 08:13:57] INFO gen_config.py:69: [System default] Setting [1mtop_p[0m: 0.95 |
|
[2023-12-28 08:13:57] INFO gen_config.py:69: [System default] Setting [1mmean_gen_len[0m: 128 |
|
[2023-12-28 08:13:57] INFO gen_config.py:69: [System default] Setting [1mmax_gen_len[0m: 512 |
|
[2023-12-28 08:13:57] INFO gen_config.py:69: [System default] Setting [1mshift_fill_factor[0m: 0.3 |
|
[2023-12-28 08:13:57] INFO gen_config.py:157: Dumping configuration file to: [1m/tmp/tmpif4lzkrt/mlc-chat-config.json[0m |
|
python -m mlc_chat convert_weight /ssd1/cfruan/mlc-llm-repos/mlc-llm-head/dist/models/gpt2 --quantization q0f16 --source-format auto --output /tmp/tmpif4lzkrt |
|
[2023-12-28 08:13:57] INFO auto_config.py:115: [92mFound[0m model configuration: /ssd1/cfruan/mlc-llm-repos/mlc-llm-head/dist/models/gpt2/config.json |
|
[2023-12-28 08:13:57] INFO auto_device.py:75: [92mFound[0m device: cuda:0 |
|
[2023-12-28 08:13:57] INFO auto_device.py:75: [92mFound[0m device: cuda:1 |
|
[2023-12-28 08:13:58] INFO auto_device.py:84: [91mNot found[0m device: rocm:0 |
|
[2023-12-28 08:13:58] INFO auto_device.py:84: [91mNot found[0m device: metal:0 |
|
[2023-12-28 08:13:58] INFO auto_device.py:75: [92mFound[0m device: vulkan:0 |
|
[2023-12-28 08:13:58] INFO auto_device.py:75: [92mFound[0m device: vulkan:1 |
|
[2023-12-28 08:13:58] INFO auto_device.py:75: [92mFound[0m device: vulkan:2 |
|
[2023-12-28 08:13:58] INFO auto_device.py:84: [91mNot found[0m device: opencl:0 |
|
[2023-12-28 08:13:58] INFO auto_device.py:33: Using device: [1mcuda:0[0m |
|
[2023-12-28 08:13:58] INFO auto_weight.py:70: Finding weights in: /ssd1/cfruan/mlc-llm-repos/mlc-llm-head/dist/models/gpt2 |
|
[2023-12-28 08:13:58] INFO auto_weight.py:129: [92mFound[0m source weight format: huggingface-torch. Source configuration: /ssd1/cfruan/mlc-llm-repos/mlc-llm-head/dist/models/gpt2/pytorch_model.bin |
|
[2023-12-28 08:13:58] INFO auto_weight.py:149: [91mNot found[0m Huggingface Safetensor |
|
[2023-12-28 08:13:58] INFO auto_weight.py:106: Using source weight configuration: [1m/ssd1/cfruan/mlc-llm-repos/mlc-llm-head/dist/models/gpt2/pytorch_model.bin[0m. Use `--source` to override. |
|
[2023-12-28 08:13:58] INFO auto_weight.py:110: Using source weight format: [1mhuggingface-torch[0m. Use `--source-format` to override. |
|
[2023-12-28 08:13:58] INFO auto_config.py:151: [92mFound[0m model type: [1mgpt2[0m. Use `--model-type` to override. |
|
[2023-12-28 08:13:58] INFO gpt2_model.py:44: [1mcontext_window_size[0m not found in config.json. Falling back to [1mn_positions[0m (1024) |
|
[2023-12-28 08:13:59] INFO huggingface_loader.py:169: Loading HF parameters from: /ssd1/cfruan/mlc-llm-repos/mlc-llm-head/dist/models/gpt2/pytorch_model.bin |
|
[1mWeight conversion with arguments:[0m |
|
[1m--config[0m /ssd1/cfruan/mlc-llm-repos/mlc-llm-head/dist/models/gpt2/config.json |
|
[1m--quantization[0m NoQuantize(name='q0f16', kind='no-quant', model_dtype='float16') |
|
[1m--model-type[0m gpt2 |
|
[1m--device[0m cuda:0 |
|
[1m--source[0m /ssd1/cfruan/mlc-llm-repos/mlc-llm-head/dist/models/gpt2/pytorch_model.bin |
|
[1m--source-format[0m huggingface-torch |
|
[1m--output[0m /tmp/tmpif4lzkrt |
|
0%| | 0/149 [00:00<?, ?it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mlm_head.weight[0m", shape: (50257, 768), dtype: float16 |
|
0%| | 0/149 [00:00<?, ?it/s]
1%|ββ | 1/149 [00:00<00:36, 4.05it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.wte.weight[0m", shape: (50257, 768), dtype: float16 |
|
1%|ββ | 1/149 [00:00<00:36, 4.05it/s]
1%|ββββ | 2/149 [00:00<00:35, 4.13it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.wpe.weight[0m", shape: (1024, 768), dtype: float16 |
|
1%|ββββ | 2/149 [00:00<00:35, 4.13it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.0.ln_1.weight[0m", shape: (768,), dtype: float16 |
|
1%|ββββ | 2/149 [00:00<00:35, 4.13it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.0.ln_1.bias[0m", shape: (768,), dtype: float16 |
|
1%|ββββ | 2/149 [00:00<00:35, 4.13it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.0.attn.c_attn.weight[0m", shape: (2304, 768), dtype: float16 |
|
1%|ββββ | 2/149 [00:00<00:35, 4.13it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.0.attn.c_attn.bias[0m", shape: (2304,), dtype: float16 |
|
1%|ββββ | 2/149 [00:00<00:35, 4.13it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.0.attn.c_proj.weight[0m", shape: (768, 768), dtype: float16 |
|
1%|ββββ | 2/149 [00:00<00:35, 4.13it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.0.attn.c_proj.bias[0m", shape: (768,), dtype: float16 |
|
1%|ββββ | 2/149 [00:00<00:35, 4.13it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.0.ln_2.weight[0m", shape: (768,), dtype: float16 |
|
1%|ββββ | 2/149 [00:00<00:35, 4.13it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.0.ln_2.bias[0m", shape: (768,), dtype: float16 |
|
1%|ββββ | 2/149 [00:00<00:35, 4.13it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.0.mlp.c_fc.weight[0m", shape: (3072, 768), dtype: float16 |
|
1%|ββββ | 2/149 [00:00<00:35, 4.13it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.0.mlp.c_fc.bias[0m", shape: (3072,), dtype: float16 |
|
1%|ββββ | 2/149 [00:00<00:35, 4.13it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.0.mlp.c_proj.weight[0m", shape: (768, 3072), dtype: float16 |
|
1%|ββββ | 2/149 [00:00<00:35, 4.13it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.0.mlp.c_proj.bias[0m", shape: (768,), dtype: float16 |
|
1%|ββββ | 2/149 [00:00<00:35, 4.13it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.1.ln_1.weight[0m", shape: (768,), dtype: float16 |
|
1%|ββββ | 2/149 [00:00<00:35, 4.13it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.1.ln_1.bias[0m", shape: (768,), dtype: float16 |
|
1%|ββββ | 2/149 [00:00<00:35, 4.13it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.1.attn.c_attn.weight[0m", shape: (2304, 768), dtype: float16 |
|
1%|ββββ | 2/149 [00:00<00:35, 4.13it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.1.attn.c_attn.bias[0m", shape: (2304,), dtype: float16 |
|
1%|ββββ | 2/149 [00:00<00:35, 4.13it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.1.attn.c_proj.weight[0m", shape: (768, 768), dtype: float16 |
|
1%|ββββ | 2/149 [00:00<00:35, 4.13it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.1.attn.c_proj.bias[0m", shape: (768,), dtype: float16 |
|
1%|ββββ | 2/149 [00:00<00:35, 4.13it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.1.ln_2.weight[0m", shape: (768,), dtype: float16 |
|
1%|ββββ | 2/149 [00:00<00:35, 4.13it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.1.ln_2.bias[0m", shape: (768,), dtype: float16 |
|
1%|ββββ | 2/149 [00:00<00:35, 4.13it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.1.mlp.c_fc.weight[0m", shape: (3072, 768), dtype: float16 |
|
1%|ββββ | 2/149 [00:00<00:35, 4.13it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.1.mlp.c_fc.bias[0m", shape: (3072,), dtype: float16 |
|
1%|ββββ | 2/149 [00:00<00:35, 4.13it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.1.mlp.c_proj.weight[0m", shape: (768, 3072), dtype: float16 |
|
1%|ββββ | 2/149 [00:00<00:35, 4.13it/s]
17%|ββββββββββββββββββββββββββββββββββββββββββββββ | 26/149 [00:00<00:01, 64.25it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.1.mlp.c_proj.bias[0m", shape: (768,), dtype: float16 |
|
17%|ββββββββββββββββββββββββββββββββββββββββββββββ | 26/149 [00:00<00:01, 64.25it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.2.ln_1.weight[0m", shape: (768,), dtype: float16 |
|
17%|ββββββββββββββββββββββββββββββββββββββββββββββ | 26/149 [00:00<00:01, 64.25it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.2.ln_1.bias[0m", shape: (768,), dtype: float16 |
|
17%|ββββββββββββββββββββββββββββββββββββββββββββββ | 26/149 [00:00<00:01, 64.25it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.2.attn.c_attn.weight[0m", shape: (2304, 768), dtype: float16 |
|
17%|ββββββββββββββββββββββββββββββββββββββββββββββ | 26/149 [00:00<00:01, 64.25it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.2.attn.c_attn.bias[0m", shape: (2304,), dtype: float16 |
|
17%|ββββββββββββββββββββββββββββββββββββββββββββββ | 26/149 [00:00<00:01, 64.25it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.2.attn.c_proj.weight[0m", shape: (768, 768), dtype: float16 |
|
17%|ββββββββββββββββββββββββββββββββββββββββββββββ | 26/149 [00:00<00:01, 64.25it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.2.attn.c_proj.bias[0m", shape: (768,), dtype: float16 |
|
17%|ββββββββββββββββββββββββββββββββββββββββββββββ | 26/149 [00:00<00:01, 64.25it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.2.ln_2.weight[0m", shape: (768,), dtype: float16 |
|
17%|ββββββββββββββββββββββββββββββββββββββββββββββ | 26/149 [00:00<00:01, 64.25it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.2.ln_2.bias[0m", shape: (768,), dtype: float16 |
|
17%|ββββββββββββββββββββββββββββββββββββββββββββββ | 26/149 [00:00<00:01, 64.25it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.2.mlp.c_fc.weight[0m", shape: (3072, 768), dtype: float16 |
|
17%|ββββββββββββββββββββββββββββββββββββββββββββββ | 26/149 [00:00<00:01, 64.25it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.2.mlp.c_fc.bias[0m", shape: (3072,), dtype: float16 |
|
17%|ββββββββββββββββββββββββββββββββββββββββββββββ | 26/149 [00:00<00:01, 64.25it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.2.mlp.c_proj.weight[0m", shape: (768, 3072), dtype: float16 |
|
17%|ββββββββββββββββββββββββββββββββββββββββββββββ | 26/149 [00:00<00:01, 64.25it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.2.mlp.c_proj.bias[0m", shape: (768,), dtype: float16 |
|
17%|ββββββββββββββββββββββββββββββββββββββββββββββ | 26/149 [00:00<00:01, 64.25it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.3.ln_1.weight[0m", shape: (768,), dtype: float16 |
|
17%|ββββββββββββββββββββββββββββββββββββββββββββββ | 26/149 [00:00<00:01, 64.25it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.3.ln_1.bias[0m", shape: (768,), dtype: float16 |
|
17%|ββββββββββββββββββββββββββββββββββββββββββββββ | 26/149 [00:00<00:01, 64.25it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.3.attn.c_attn.weight[0m", shape: (2304, 768), dtype: float16 |
|
17%|ββββββββββββββββββββββββββββββββββββββββββββββ | 26/149 [00:00<00:01, 64.25it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.3.attn.c_attn.bias[0m", shape: (2304,), dtype: float16 |
|
17%|ββββββββββββββββββββββββββββββββββββββββββββββ | 26/149 [00:00<00:01, 64.25it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.3.attn.c_proj.weight[0m", shape: (768, 768), dtype: float16 |
|
17%|ββββββββββββββββββββββββββββββββββββββββββββββ | 26/149 [00:00<00:01, 64.25it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.3.attn.c_proj.bias[0m", shape: (768,), dtype: float16 |
|
17%|ββββββββββββββββββββββββββββββββββββββββββββββ | 26/149 [00:00<00:01, 64.25it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.3.ln_2.weight[0m", shape: (768,), dtype: float16 |
|
17%|ββββββββββββββββββββββββββββββββββββββββββββββ | 26/149 [00:00<00:01, 64.25it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.3.ln_2.bias[0m", shape: (768,), dtype: float16 |
|
17%|ββββββββββββββββββββββββββββββββββββββββββββββ | 26/149 [00:00<00:01, 64.25it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.3.mlp.c_fc.weight[0m", shape: (3072, 768), dtype: float16 |
|
17%|ββββββββββββββββββββββββββββββββββββββββββββββ | 26/149 [00:00<00:01, 64.25it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.3.mlp.c_fc.bias[0m", shape: (3072,), dtype: float16 |
|
17%|ββββββββββββββββββββββββββββββββββββββββββββββ | 26/149 [00:00<00:01, 64.25it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.3.mlp.c_proj.weight[0m", shape: (768, 3072), dtype: float16 |
|
17%|ββββββββββββββββββββββββββββββββββββββββββββββ | 26/149 [00:00<00:01, 64.25it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.3.mlp.c_proj.bias[0m", shape: (768,), dtype: float16 |
|
17%|ββββββββββββββββββββββββββββββββββββββββββββββ | 26/149 [00:00<00:01, 64.25it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.4.ln_1.weight[0m", shape: (768,), dtype: float16 |
|
17%|ββββββββββββββββββββββββββββββββββββββββββββββ | 26/149 [00:00<00:01, 64.25it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.4.ln_1.bias[0m", shape: (768,), dtype: float16 |
|
17%|ββββββββββββββββββββββββββββββββββββββββββββββ | 26/149 [00:00<00:01, 64.25it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.4.attn.c_attn.weight[0m", shape: (2304, 768), dtype: float16 |
|
17%|ββββββββββββββββββββββββββββββββββββββββββββββ | 26/149 [00:00<00:01, 64.25it/s]
36%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 54/149 [00:00<00:00, 119.07it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.4.attn.c_attn.bias[0m", shape: (2304,), dtype: float16 |
|
36%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 54/149 [00:00<00:00, 119.07it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.4.attn.c_proj.weight[0m", shape: (768, 768), dtype: float16 |
|
36%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 54/149 [00:00<00:00, 119.07it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.4.attn.c_proj.bias[0m", shape: (768,), dtype: float16 |
|
36%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 54/149 [00:00<00:00, 119.07it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.4.ln_2.weight[0m", shape: (768,), dtype: float16 |
|
36%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 54/149 [00:00<00:00, 119.07it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.4.ln_2.bias[0m", shape: (768,), dtype: float16 |
|
36%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 54/149 [00:00<00:00, 119.07it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.4.mlp.c_fc.weight[0m", shape: (3072, 768), dtype: float16 |
|
36%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 54/149 [00:00<00:00, 119.07it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.4.mlp.c_fc.bias[0m", shape: (3072,), dtype: float16 |
|
36%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 54/149 [00:00<00:00, 119.07it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.4.mlp.c_proj.weight[0m", shape: (768, 3072), dtype: float16 |
|
36%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 54/149 [00:00<00:00, 119.07it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.4.mlp.c_proj.bias[0m", shape: (768,), dtype: float16 |
|
36%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 54/149 [00:00<00:00, 119.07it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.5.ln_1.weight[0m", shape: (768,), dtype: float16 |
|
36%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 54/149 [00:00<00:00, 119.07it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.5.ln_1.bias[0m", shape: (768,), dtype: float16 |
|
36%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 54/149 [00:00<00:00, 119.07it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.5.attn.c_attn.weight[0m", shape: (2304, 768), dtype: float16 |
|
36%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 54/149 [00:00<00:00, 119.07it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.5.attn.c_attn.bias[0m", shape: (2304,), dtype: float16 |
|
36%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 54/149 [00:00<00:00, 119.07it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.5.attn.c_proj.weight[0m", shape: (768, 768), dtype: float16 |
|
36%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 54/149 [00:00<00:00, 119.07it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.5.attn.c_proj.bias[0m", shape: (768,), dtype: float16 |
|
36%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 54/149 [00:00<00:00, 119.07it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.5.ln_2.weight[0m", shape: (768,), dtype: float16 |
|
36%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 54/149 [00:00<00:00, 119.07it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.5.ln_2.bias[0m", shape: (768,), dtype: float16 |
|
36%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 54/149 [00:00<00:00, 119.07it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.5.mlp.c_fc.weight[0m", shape: (3072, 768), dtype: float16 |
|
36%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 54/149 [00:00<00:00, 119.07it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.5.mlp.c_fc.bias[0m", shape: (3072,), dtype: float16 |
|
36%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 54/149 [00:00<00:00, 119.07it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.5.mlp.c_proj.weight[0m", shape: (768, 3072), dtype: float16 |
|
36%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 54/149 [00:00<00:00, 119.07it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.5.mlp.c_proj.bias[0m", shape: (768,), dtype: float16 |
|
36%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 54/149 [00:00<00:00, 119.07it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.6.ln_1.weight[0m", shape: (768,), dtype: float16 |
|
36%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 54/149 [00:00<00:00, 119.07it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.6.ln_1.bias[0m", shape: (768,), dtype: float16 |
|
36%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 54/149 [00:00<00:00, 119.07it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.6.attn.c_attn.weight[0m", shape: (2304, 768), dtype: float16 |
|
36%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 54/149 [00:00<00:00, 119.07it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.6.attn.c_attn.bias[0m", shape: (2304,), dtype: float16 |
|
36%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 54/149 [00:00<00:00, 119.07it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.6.attn.c_proj.weight[0m", shape: (768, 768), dtype: float16 |
|
36%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 54/149 [00:00<00:00, 119.07it/s]
54%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 80/149 [00:00<00:00, 156.83it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.6.attn.c_proj.bias[0m", shape: (768,), dtype: float16 |
|
54%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 80/149 [00:00<00:00, 156.83it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.6.ln_2.weight[0m", shape: (768,), dtype: float16 |
|
54%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 80/149 [00:00<00:00, 156.83it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.6.ln_2.bias[0m", shape: (768,), dtype: float16 |
|
54%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 80/149 [00:00<00:00, 156.83it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.6.mlp.c_fc.weight[0m", shape: (3072, 768), dtype: float16 |
|
54%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 80/149 [00:00<00:00, 156.83it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.6.mlp.c_fc.bias[0m", shape: (3072,), dtype: float16 |
|
54%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 80/149 [00:00<00:00, 156.83it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.6.mlp.c_proj.weight[0m", shape: (768, 3072), dtype: float16 |
|
54%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 80/149 [00:00<00:00, 156.83it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.6.mlp.c_proj.bias[0m", shape: (768,), dtype: float16 |
|
54%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 80/149 [00:00<00:00, 156.83it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.7.ln_1.weight[0m", shape: (768,), dtype: float16 |
|
54%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 80/149 [00:00<00:00, 156.83it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.7.ln_1.bias[0m", shape: (768,), dtype: float16 |
|
54%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 80/149 [00:00<00:00, 156.83it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.7.attn.c_attn.weight[0m", shape: (2304, 768), dtype: float16 |
|
54%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 80/149 [00:00<00:00, 156.83it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.7.attn.c_attn.bias[0m", shape: (2304,), dtype: float16 |
|
54%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 80/149 [00:00<00:00, 156.83it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.7.attn.c_proj.weight[0m", shape: (768, 768), dtype: float16 |
|
54%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 80/149 [00:00<00:00, 156.83it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.7.attn.c_proj.bias[0m", shape: (768,), dtype: float16 |
|
54%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 80/149 [00:00<00:00, 156.83it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.7.ln_2.weight[0m", shape: (768,), dtype: float16 |
|
54%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 80/149 [00:00<00:00, 156.83it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.7.ln_2.bias[0m", shape: (768,), dtype: float16 |
|
54%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 80/149 [00:00<00:00, 156.83it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.7.mlp.c_fc.weight[0m", shape: (3072, 768), dtype: float16 |
|
54%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 80/149 [00:00<00:00, 156.83it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.7.mlp.c_fc.bias[0m", shape: (3072,), dtype: float16 |
|
54%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 80/149 [00:00<00:00, 156.83it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.7.mlp.c_proj.weight[0m", shape: (768, 3072), dtype: float16 |
|
54%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 80/149 [00:00<00:00, 156.83it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.7.mlp.c_proj.bias[0m", shape: (768,), dtype: float16 |
|
54%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 80/149 [00:00<00:00, 156.83it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.8.ln_1.weight[0m", shape: (768,), dtype: float16 |
|
54%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 80/149 [00:00<00:00, 156.83it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.8.ln_1.bias[0m", shape: (768,), dtype: float16 |
|
54%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 80/149 [00:00<00:00, 156.83it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.8.attn.c_attn.weight[0m", shape: (2304, 768), dtype: float16 |
|
54%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 80/149 [00:00<00:00, 156.83it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.8.attn.c_attn.bias[0m", shape: (2304,), dtype: float16 |
|
54%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 80/149 [00:00<00:00, 156.83it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.8.attn.c_proj.weight[0m", shape: (768, 768), dtype: float16 |
|
54%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 80/149 [00:00<00:00, 156.83it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.8.attn.c_proj.bias[0m", shape: (768,), dtype: float16 |
|
54%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 80/149 [00:00<00:00, 156.83it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.8.ln_2.weight[0m", shape: (768,), dtype: float16 |
|
54%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 80/149 [00:00<00:00, 156.83it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.8.ln_2.bias[0m", shape: (768,), dtype: float16 |
|
54%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 80/149 [00:00<00:00, 156.83it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.8.mlp.c_fc.weight[0m", shape: (3072, 768), dtype: float16 |
|
54%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 80/149 [00:00<00:00, 156.83it/s]
72%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 108/149 [00:00<00:00, 183.99it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.8.mlp.c_fc.bias[0m", shape: (3072,), dtype: float16 |
|
72%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 108/149 [00:00<00:00, 183.99it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.8.mlp.c_proj.weight[0m", shape: (768, 3072), dtype: float16 |
|
72%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 108/149 [00:00<00:00, 183.99it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.8.mlp.c_proj.bias[0m", shape: (768,), dtype: float16 |
|
72%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 108/149 [00:00<00:00, 183.99it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.9.ln_1.weight[0m", shape: (768,), dtype: float16 |
|
72%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 108/149 [00:00<00:00, 183.99it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.9.ln_1.bias[0m", shape: (768,), dtype: float16 |
|
72%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 108/149 [00:00<00:00, 183.99it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.9.attn.c_attn.weight[0m", shape: (2304, 768), dtype: float16 |
|
72%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 108/149 [00:00<00:00, 183.99it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.9.attn.c_attn.bias[0m", shape: (2304,), dtype: float16 |
|
72%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 108/149 [00:00<00:00, 183.99it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.9.attn.c_proj.weight[0m", shape: (768, 768), dtype: float16 |
|
72%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 108/149 [00:00<00:00, 183.99it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.9.attn.c_proj.bias[0m", shape: (768,), dtype: float16 |
|
72%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 108/149 [00:00<00:00, 183.99it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.9.ln_2.weight[0m", shape: (768,), dtype: float16 |
|
72%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 108/149 [00:00<00:00, 183.99it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.9.ln_2.bias[0m", shape: (768,), dtype: float16 |
|
72%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 108/149 [00:00<00:00, 183.99it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.9.mlp.c_fc.weight[0m", shape: (3072, 768), dtype: float16 |
|
72%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 108/149 [00:00<00:00, 183.99it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.9.mlp.c_fc.bias[0m", shape: (3072,), dtype: float16 |
|
72%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 108/149 [00:00<00:00, 183.99it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.9.mlp.c_proj.weight[0m", shape: (768, 3072), dtype: float16 |
|
72%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 108/149 [00:00<00:00, 183.99it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.9.mlp.c_proj.bias[0m", shape: (768,), dtype: float16 |
|
72%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 108/149 [00:00<00:00, 183.99it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.10.ln_1.weight[0m", shape: (768,), dtype: float16 |
|
72%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 108/149 [00:00<00:00, 183.99it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.10.ln_1.bias[0m", shape: (768,), dtype: float16 |
|
72%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 108/149 [00:00<00:00, 183.99it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.10.attn.c_attn.weight[0m", shape: (2304, 768), dtype: float16 |
|
72%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 108/149 [00:00<00:00, 183.99it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.10.attn.c_attn.bias[0m", shape: (2304,), dtype: float16 |
|
72%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 108/149 [00:00<00:00, 183.99it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.10.attn.c_proj.weight[0m", shape: (768, 768), dtype: float16 |
|
72%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 108/149 [00:00<00:00, 183.99it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.10.attn.c_proj.bias[0m", shape: (768,), dtype: float16 |
|
72%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 108/149 [00:00<00:00, 183.99it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.10.ln_2.weight[0m", shape: (768,), dtype: float16 |
|
72%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 108/149 [00:00<00:00, 183.99it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.10.ln_2.bias[0m", shape: (768,), dtype: float16 |
|
72%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 108/149 [00:00<00:00, 183.99it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.10.mlp.c_fc.weight[0m", shape: (3072, 768), dtype: float16 |
|
72%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 108/149 [00:01<00:00, 183.99it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.10.mlp.c_fc.bias[0m", shape: (3072,), dtype: float16 |
|
72%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 108/149 [00:01<00:00, 183.99it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.10.mlp.c_proj.weight[0m", shape: (768, 3072), dtype: float16 |
|
72%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 108/149 [00:01<00:00, 183.99it/s]
90%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 134/149 [00:01<00:00, 199.21it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.10.mlp.c_proj.bias[0m", shape: (768,), dtype: float16 |
|
90%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 134/149 [00:01<00:00, 199.21it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.11.ln_1.weight[0m", shape: (768,), dtype: float16 |
|
90%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 134/149 [00:01<00:00, 199.21it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.11.ln_1.bias[0m", shape: (768,), dtype: float16 |
|
90%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 134/149 [00:01<00:00, 199.21it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.11.attn.c_attn.weight[0m", shape: (2304, 768), dtype: float16 |
|
90%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 134/149 [00:01<00:00, 199.21it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.11.attn.c_attn.bias[0m", shape: (2304,), dtype: float16 |
|
90%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 134/149 [00:01<00:00, 199.21it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.11.attn.c_proj.weight[0m", shape: (768, 768), dtype: float16 |
|
90%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 134/149 [00:01<00:00, 199.21it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.11.attn.c_proj.bias[0m", shape: (768,), dtype: float16 |
|
90%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 134/149 [00:01<00:00, 199.21it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.11.ln_2.weight[0m", shape: (768,), dtype: float16 |
|
90%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 134/149 [00:01<00:00, 199.21it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.11.ln_2.bias[0m", shape: (768,), dtype: float16 |
|
90%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 134/149 [00:01<00:00, 199.21it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.11.mlp.c_fc.weight[0m", shape: (3072, 768), dtype: float16 |
|
90%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 134/149 [00:01<00:00, 199.21it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.11.mlp.c_fc.bias[0m", shape: (3072,), dtype: float16 |
|
90%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 134/149 [00:01<00:00, 199.21it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.11.mlp.c_proj.weight[0m", shape: (768, 3072), dtype: float16 |
|
90%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 134/149 [00:01<00:00, 199.21it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.h.11.mlp.c_proj.bias[0m", shape: (768,), dtype: float16 |
|
90%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 134/149 [00:01<00:00, 199.21it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.ln_f.weight[0m", shape: (768,), dtype: float16 |
|
90%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 134/149 [00:01<00:00, 199.21it/s]
[2023-12-28 08:14:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mtransformer.ln_f.bias[0m", shape: (768,), dtype: float16 |
|
90%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 134/149 [00:01<00:00, 199.21it/s]
100%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 149/149 [00:01<00:00, 138.80it/s] |
|
[2023-12-28 08:14:02] INFO huggingface_loader.py:179: Unloading HF weight file: /ssd1/cfruan/mlc-llm-repos/mlc-llm-head/dist/models/gpt2/pytorch_model.bin |
|
[2023-12-28 08:14:02] INFO stats.py:71: [92mTime usage[0m: HF loading: 2.135 sec; Pre-quantization mapping: 0.693 sec; Quantization: 0.000 sec |
|
[2023-12-28 08:14:02] INFO stats.py:85: [92mRAM usage[0m: Peak RAM: 0.510 GB. Total bytes loaded from disk: 0.510 GB |
|
[2023-12-28 08:14:02] INFO convert_weight.py:110: [92mParameter size[0m after quantization: 0.304 GB |
|
[2023-12-28 08:14:02] INFO convert_weight.py:115: [92mTotal parameters[0m: 163,037,184 |
|
[2023-12-28 08:14:02] INFO convert_weight.py:116: [92mBits per parameter[0m: 16.000 |
|
Start storing to cache /tmp/tmpif4lzkrt |
|
[0001/0149] saving lm_head.weight
[0002/0149] saving transformer.wte.weight
[0003/0149] saving transformer.wpe.weight
[0004/0149] saving transformer.h.0.ln_1.weight
[0005/0149] saving transformer.h.0.ln_1.bias
[0006/0149] saving transformer.h.0.attn.c_attn.weight
[0007/0149] saving transformer.h.0.attn.c_attn.bias
[0008/0149] saving transformer.h.0.attn.c_proj.weight
[0009/0149] saving transformer.h.0.attn.c_proj.bias
[0010/0149] saving transformer.h.0.ln_2.weight
[0011/0149] saving transformer.h.0.ln_2.bias
[0012/0149] saving transformer.h.0.mlp.c_fc.weight
[0013/0149] saving transformer.h.0.mlp.c_fc.bias
[0014/0149] saving transformer.h.0.mlp.c_proj.weight
[0015/0149] saving transformer.h.0.mlp.c_proj.bias
[0016/0149] saving transformer.h.1.ln_1.weight
[0017/0149] saving transformer.h.1.ln_1.bias
[0018/0149] saving transformer.h.1.attn.c_attn.weight
[0019/0149] saving transformer.h.1.attn.c_attn.bias
[0020/0149] saving transformer.h.1.attn.c_proj.weight
[0021/0149] saving transformer.h.1.attn.c_proj.bias
[0022/0149] saving transformer.h.1.ln_2.weight
[0023/0149] saving transformer.h.1.ln_2.bias
[0024/0149] saving transformer.h.1.mlp.c_fc.weight
[0025/0149] saving transformer.h.1.mlp.c_fc.bias
[0026/0149] saving transformer.h.1.mlp.c_proj.weight
[0027/0149] saving transformer.h.1.mlp.c_proj.bias
[0028/0149] saving transformer.h.2.ln_1.weight
[0029/0149] saving transformer.h.2.ln_1.bias
[0030/0149] saving transformer.h.2.attn.c_attn.weight
[0031/0149] saving transformer.h.2.attn.c_attn.bias
[0032/0149] saving transformer.h.2.attn.c_proj.weight
[0033/0149] saving transformer.h.2.attn.c_proj.bias
[0034/0149] saving transformer.h.2.ln_2.weight
[0035/0149] saving transformer.h.2.ln_2.bias
[0036/0149] saving transformer.h.2.mlp.c_fc.weight
[0037/0149] saving transformer.h.2.mlp.c_fc.bias
[0038/0149] saving transformer.h.2.mlp.c_proj.weight
[0039/0149] saving transformer.h.2.mlp.c_proj.bias
[0040/0149] saving transformer.h.3.ln_1.weight
[0041/0149] saving transformer.h.3.ln_1.bias
[0042/0149] saving transformer.h.3.attn.c_attn.weight
[0043/0149] saving transformer.h.3.attn.c_attn.bias
[0044/0149] saving transformer.h.3.attn.c_proj.weight
[0045/0149] saving transformer.h.3.attn.c_proj.bias
[0046/0149] saving transformer.h.3.ln_2.weight
[0047/0149] saving transformer.h.3.ln_2.bias
[0048/0149] saving transformer.h.3.mlp.c_fc.weight
[0049/0149] saving transformer.h.3.mlp.c_fc.bias
[0050/0149] saving transformer.h.3.mlp.c_proj.weight
[0051/0149] saving transformer.h.3.mlp.c_proj.bias
[0052/0149] saving transformer.h.4.ln_1.weight
[0053/0149] saving transformer.h.4.ln_1.bias
[0054/0149] saving transformer.h.4.attn.c_attn.weight
[0055/0149] saving transformer.h.4.attn.c_attn.bias
[0056/0149] saving transformer.h.4.attn.c_proj.weight
[0057/0149] saving transformer.h.4.attn.c_proj.bias
[0058/0149] saving transformer.h.4.ln_2.weight
[0059/0149] saving transformer.h.4.ln_2.bias
[0060/0149] saving transformer.h.4.mlp.c_fc.weight
[0061/0149] saving transformer.h.4.mlp.c_fc.bias
[0062/0149] saving transformer.h.4.mlp.c_proj.weight
[0063/0149] saving transformer.h.4.mlp.c_proj.bias
[0064/0149] saving transformer.h.5.ln_1.weight
[0065/0149] saving transformer.h.5.ln_1.bias
[0066/0149] saving transformer.h.5.attn.c_attn.weight
[0067/0149] saving transformer.h.5.attn.c_attn.bias
[0068/0149] saving transformer.h.5.attn.c_proj.weight
[0069/0149] saving transformer.h.5.attn.c_proj.bias
[0070/0149] saving transformer.h.5.ln_2.weight
[0071/0149] saving transformer.h.5.ln_2.bias
[0072/0149] saving transformer.h.5.mlp.c_fc.weight
[0073/0149] saving transformer.h.5.mlp.c_fc.bias
[0074/0149] saving transformer.h.5.mlp.c_proj.weight
[0075/0149] saving transformer.h.5.mlp.c_proj.bias
[0076/0149] saving transformer.h.6.ln_1.weight
[0077/0149] saving transformer.h.6.ln_1.bias
[0078/0149] saving transformer.h.6.attn.c_attn.weight
[0079/0149] saving transformer.h.6.attn.c_attn.bias[2023-12-28 08:14:03] INFO convert_weight.py:132: Saved to directory: [1m/tmp/tmpif4lzkrt[0m |
|
[0080/0149] saving transformer.h.6.attn.c_proj.weight
[0081/0149] saving transformer.h.6.attn.c_proj.bias
[0082/0149] saving transformer.h.6.ln_2.weight
[0083/0149] saving transformer.h.6.ln_2.bias
[0084/0149] saving transformer.h.6.mlp.c_fc.weight
[0085/0149] saving transformer.h.6.mlp.c_fc.bias
[0086/0149] saving transformer.h.6.mlp.c_proj.weight
[0087/0149] saving transformer.h.6.mlp.c_proj.bias
[0088/0149] saving transformer.h.7.ln_1.weight
[0089/0149] saving transformer.h.7.ln_1.bias
[0090/0149] saving transformer.h.7.attn.c_attn.weight
[0091/0149] saving transformer.h.7.attn.c_attn.bias
[0092/0149] saving transformer.h.7.attn.c_proj.weight
[0093/0149] saving transformer.h.7.attn.c_proj.bias
[0094/0149] saving transformer.h.7.ln_2.weight
[0095/0149] saving transformer.h.7.ln_2.bias
[0096/0149] saving transformer.h.7.mlp.c_fc.weight
[0097/0149] saving transformer.h.7.mlp.c_fc.bias
[0098/0149] saving transformer.h.7.mlp.c_proj.weight
[0099/0149] saving transformer.h.7.mlp.c_proj.bias
[0100/0149] saving transformer.h.8.ln_1.weight
[0101/0149] saving transformer.h.8.ln_1.bias
[0102/0149] saving transformer.h.8.attn.c_attn.weight
[0103/0149] saving transformer.h.8.attn.c_attn.bias
[0104/0149] saving transformer.h.8.attn.c_proj.weight
[0105/0149] saving transformer.h.8.attn.c_proj.bias
[0106/0149] saving transformer.h.8.ln_2.weight
[0107/0149] saving transformer.h.8.ln_2.bias
[0108/0149] saving transformer.h.8.mlp.c_fc.weight
[0109/0149] saving transformer.h.8.mlp.c_fc.bias
[0110/0149] saving transformer.h.8.mlp.c_proj.weight
[0111/0149] saving transformer.h.8.mlp.c_proj.bias
[0112/0149] saving transformer.h.9.ln_1.weight
[0113/0149] saving transformer.h.9.ln_1.bias
[0114/0149] saving transformer.h.9.attn.c_attn.weight
[0115/0149] saving transformer.h.9.attn.c_attn.bias
[0116/0149] saving transformer.h.9.attn.c_proj.weight
[0117/0149] saving transformer.h.9.attn.c_proj.bias
[0118/0149] saving transformer.h.9.ln_2.weight
[0119/0149] saving transformer.h.9.ln_2.bias
[0120/0149] saving transformer.h.9.mlp.c_fc.weight
[0121/0149] saving transformer.h.9.mlp.c_fc.bias
[0122/0149] saving transformer.h.9.mlp.c_proj.weight
[0123/0149] saving transformer.h.9.mlp.c_proj.bias
[0124/0149] saving transformer.h.10.ln_1.weight
[0125/0149] saving transformer.h.10.ln_1.bias
[0126/0149] saving transformer.h.10.attn.c_attn.weight
[0127/0149] saving transformer.h.10.attn.c_attn.bias
[0128/0149] saving transformer.h.10.attn.c_proj.weight
[0129/0149] saving transformer.h.10.attn.c_proj.bias
[0130/0149] saving transformer.h.10.ln_2.weight
[0131/0149] saving transformer.h.10.ln_2.bias
[0132/0149] saving transformer.h.10.mlp.c_fc.weight
[0133/0149] saving transformer.h.10.mlp.c_fc.bias
[0134/0149] saving transformer.h.10.mlp.c_proj.weight
[0135/0149] saving transformer.h.10.mlp.c_proj.bias
[0136/0149] saving transformer.h.11.ln_1.weight
[0137/0149] saving transformer.h.11.ln_1.bias
[0138/0149] saving transformer.h.11.attn.c_attn.weight
[0139/0149] saving transformer.h.11.attn.c_attn.bias
[0140/0149] saving transformer.h.11.attn.c_proj.weight
[0141/0149] saving transformer.h.11.attn.c_proj.bias
[0142/0149] saving transformer.h.11.ln_2.weight
[0143/0149] saving transformer.h.11.ln_2.bias
[0144/0149] saving transformer.h.11.mlp.c_fc.weight
[0145/0149] saving transformer.h.11.mlp.c_fc.bias
[0146/0149] saving transformer.h.11.mlp.c_proj.weight
[0147/0149] saving transformer.h.11.mlp.c_proj.bias
[0148/0149] saving transformer.ln_f.weight
[0149/0149] saving transformer.ln_f.bias |
|
All finished, 8 total shards committed, record saved to /tmp/tmpif4lzkrt/ndarray-cache.json |
|
|