Initial commit
Browse files- logs.txt +111 -0
- mlc-chat-config.json +71 -0
- tokenizer.json +0 -0
- tokenizer.model +3 -0
- tokenizer_config.json +37 -0
logs.txt
ADDED
@@ -0,0 +1,111 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
0 |
0%| | 0/195 [00:00<?, ?it/s]
|
1 |
|
|
|
2 |
0%| | 0/195 [00:00<?, ?it/s]
|
3 |
0%| | 0/195 [00:00<?, ?it/s]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
/home/floriadmin/miniforge3/envs/mlc/bin/python -m mlc_llm gen_config ../dist/models/ToolLLaMA-2-7b-v2 --quantization q4f32_1 --conv-template llama-2 --output /tmp/tmpxjsa38do --tensor-parallel-shards 2
|
2 |
+
[2024-03-18 21:03:53] INFO auto_config.py:115: [92mFound[0m model configuration: ../dist/models/ToolLLaMA-2-7b-v2/config.json
|
3 |
+
[2024-03-18 21:03:53] INFO auto_config.py:153: [92mFound[0m model type: [1mllama[0m. Use `--model-type` to override.
|
4 |
+
[2024-03-18 21:03:53] INFO llama_model.py:52: [1mcontext_window_size[0m not found in config.json. Falling back to [1mmax_position_embeddings[0m (4096)
|
5 |
+
[2024-03-18 21:03:53] INFO llama_model.py:72: [1mprefill_chunk_size[0m defaults to [1mcontext_window_size[0m (4096)
|
6 |
+
[2024-03-18 21:03:53] INFO config.py:106: Overriding [1mmax_batch_size[0m from 1 to 80
|
7 |
+
[2024-03-18 21:03:53] INFO config.py:106: Overriding [1mtensor_parallel_shards[0m from 1 to 2
|
8 |
+
[2024-03-18 21:03:53] INFO gen_config.py:133: [generation_config.json] Setting [1mbos_token_id[0m: 1
|
9 |
+
[2024-03-18 21:03:53] INFO gen_config.py:133: [generation_config.json] Setting [1meos_token_id[0m: 2
|
10 |
+
[2024-03-18 21:03:53] INFO gen_config.py:145: [92mFound[0m tokenizer config: ../dist/models/ToolLLaMA-2-7b-v2/tokenizer.model. Copying to [1m/tmp/tmpxjsa38do/tokenizer.model[0m
|
11 |
+
[2024-03-18 21:03:53] INFO gen_config.py:147: [91mNot found[0m tokenizer config: ../dist/models/ToolLLaMA-2-7b-v2/tokenizer.json
|
12 |
+
[2024-03-18 21:03:53] INFO gen_config.py:147: [91mNot found[0m tokenizer config: ../dist/models/ToolLLaMA-2-7b-v2/vocab.json
|
13 |
+
[2024-03-18 21:03:53] INFO gen_config.py:147: [91mNot found[0m tokenizer config: ../dist/models/ToolLLaMA-2-7b-v2/merges.txt
|
14 |
+
[2024-03-18 21:03:53] INFO gen_config.py:147: [91mNot found[0m tokenizer config: ../dist/models/ToolLLaMA-2-7b-v2/added_tokens.json
|
15 |
+
[2024-03-18 21:03:53] INFO gen_config.py:145: [92mFound[0m tokenizer config: ../dist/models/ToolLLaMA-2-7b-v2/tokenizer_config.json. Copying to [1m/tmp/tmpxjsa38do/tokenizer_config.json[0m
|
16 |
+
[2024-03-18 21:03:53] INFO gen_config.py:153: The model has `tokenizer.model` but not `tokenizer.json`. It is always recommended to prefer JSON instead. Attempting to convert using HuggingFace transformers library
|
17 |
+
You are using the default legacy behaviour of the <class 'transformers.models.llama.tokenization_llama.LlamaTokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565
|
18 |
+
[2024-03-18 21:03:54] INFO gen_config.py:167: Succesfully converted `tokenizer.model` to: /tmp/tmpxjsa38do/tokenizer.json
|
19 |
+
[2024-03-18 21:03:54] INFO gen_config.py:75: [System default] Setting [1mpad_token_id[0m: 0
|
20 |
+
[2024-03-18 21:03:54] INFO gen_config.py:75: [System default] Setting [1mtemperature[0m: 0.7
|
21 |
+
[2024-03-18 21:03:54] INFO gen_config.py:75: [System default] Setting [1mpresence_penalty[0m: 0.0
|
22 |
+
[2024-03-18 21:03:54] INFO gen_config.py:75: [System default] Setting [1mfrequency_penalty[0m: 0.0
|
23 |
+
[2024-03-18 21:03:54] INFO gen_config.py:75: [System default] Setting [1mrepetition_penalty[0m: 1.0
|
24 |
+
[2024-03-18 21:03:54] INFO gen_config.py:75: [System default] Setting [1mtop_p[0m: 0.95
|
25 |
+
[2024-03-18 21:03:54] INFO gen_config.py:75: [System default] Setting [1mmean_gen_len[0m: 128
|
26 |
+
[2024-03-18 21:03:54] INFO gen_config.py:75: [System default] Setting [1mmax_gen_len[0m: 512
|
27 |
+
[2024-03-18 21:03:54] INFO gen_config.py:75: [System default] Setting [1mshift_fill_factor[0m: 0.3
|
28 |
+
[2024-03-18 21:03:54] INFO gen_config.py:198: Dumping configuration file to: [1m/tmp/tmpxjsa38do/mlc-chat-config.json[0m
|
29 |
+
/home/floriadmin/miniforge3/envs/mlc/bin/python -m mlc_llm convert_weight ../dist/models/ToolLLaMA-2-7b-v2 --quantization q4f32_1 --source-format auto --output /tmp/tmpxjsa38do
|
30 |
+
[2024-03-18 21:03:55] INFO auto_config.py:115: [92mFound[0m model configuration: ../dist/models/ToolLLaMA-2-7b-v2/config.json
|
31 |
+
[2024-03-18 21:03:56] INFO auto_device.py:76: [92mFound[0m device: cuda:0
|
32 |
+
[2024-03-18 21:03:56] INFO auto_device.py:76: [92mFound[0m device: cuda:1
|
33 |
+
[2024-03-18 21:03:56] INFO auto_device.py:76: [92mFound[0m device: cuda:2
|
34 |
+
[2024-03-18 21:03:56] INFO auto_device.py:76: [92mFound[0m device: cuda:3
|
35 |
+
[2024-03-18 21:03:56] INFO auto_device.py:76: [92mFound[0m device: cuda:4
|
36 |
+
[2024-03-18 21:03:56] INFO auto_device.py:76: [92mFound[0m device: cuda:5
|
37 |
+
[2024-03-18 21:03:56] INFO auto_device.py:76: [92mFound[0m device: cuda:6
|
38 |
+
[2024-03-18 21:03:56] INFO auto_device.py:76: [92mFound[0m device: cuda:7
|
39 |
+
[2024-03-18 21:03:56] INFO auto_device.py:76: [92mFound[0m device: cuda:8
|
40 |
+
[2024-03-18 21:03:56] INFO auto_device.py:76: [92mFound[0m device: cuda:9
|
41 |
+
[2024-03-18 21:03:57] INFO auto_device.py:85: [91mNot found[0m device: rocm:0
|
42 |
+
[2024-03-18 21:03:58] INFO auto_device.py:85: [91mNot found[0m device: metal:0
|
43 |
+
[2024-03-18 21:04:02] INFO auto_device.py:76: [92mFound[0m device: vulkan:0
|
44 |
+
[2024-03-18 21:04:02] INFO auto_device.py:76: [92mFound[0m device: vulkan:1
|
45 |
+
[2024-03-18 21:04:02] INFO auto_device.py:76: [92mFound[0m device: vulkan:2
|
46 |
+
[2024-03-18 21:04:02] INFO auto_device.py:76: [92mFound[0m device: vulkan:3
|
47 |
+
[2024-03-18 21:04:02] INFO auto_device.py:76: [92mFound[0m device: vulkan:4
|
48 |
+
[2024-03-18 21:04:02] INFO auto_device.py:76: [92mFound[0m device: vulkan:5
|
49 |
+
[2024-03-18 21:04:02] INFO auto_device.py:76: [92mFound[0m device: vulkan:6
|
50 |
+
[2024-03-18 21:04:02] INFO auto_device.py:76: [92mFound[0m device: vulkan:7
|
51 |
+
[2024-03-18 21:04:02] INFO auto_device.py:76: [92mFound[0m device: vulkan:8
|
52 |
+
[2024-03-18 21:04:02] INFO auto_device.py:76: [92mFound[0m device: vulkan:9
|
53 |
+
[2024-03-18 21:04:02] INFO auto_device.py:76: [92mFound[0m device: vulkan:10
|
54 |
+
[2024-03-18 21:04:03] INFO auto_device.py:85: [91mNot found[0m device: opencl:0
|
55 |
+
[2024-03-18 21:04:03] INFO auto_device.py:33: Using device: [1mcuda:0[0m
|
56 |
+
[2024-03-18 21:04:03] INFO auto_weight.py:70: Finding weights in: ../dist/models/ToolLLaMA-2-7b-v2
|
57 |
+
[2024-03-18 21:04:03] INFO auto_weight.py:120: [92mFound[0m source weight format: huggingface-torch. Source configuration: ../dist/models/ToolLLaMA-2-7b-v2/pytorch_model.bin.index.json
|
58 |
+
[2024-03-18 21:04:03] INFO auto_weight.py:167: [91mNot found[0m Huggingface Safetensor
|
59 |
+
[2024-03-18 21:04:03] INFO auto_weight.py:106: Using source weight configuration: [1m../dist/models/ToolLLaMA-2-7b-v2/pytorch_model.bin.index.json[0m. Use `--source` to override.
|
60 |
+
[2024-03-18 21:04:03] INFO auto_weight.py:110: Using source weight format: [1mhuggingface-torch[0m. Use `--source-format` to override.
|
61 |
+
[2024-03-18 21:04:03] INFO auto_config.py:153: [92mFound[0m model type: [1mllama[0m. Use `--model-type` to override.
|
62 |
+
[2024-03-18 21:04:03] INFO llama_model.py:52: [1mcontext_window_size[0m not found in config.json. Falling back to [1mmax_position_embeddings[0m (4096)
|
63 |
+
[2024-03-18 21:04:03] INFO llama_model.py:72: [1mprefill_chunk_size[0m defaults to [1mcontext_window_size[0m (4096)
|
64 |
+
[1mWeight conversion with arguments:[0m
|
65 |
+
[1m--config[0m ../dist/models/ToolLLaMA-2-7b-v2/config.json
|
66 |
+
[1m--quantization[0m GroupQuantize(name='q4f32_1', kind='group-quant', group_size=40, quantize_dtype='int4', storage_dtype='uint32', model_dtype='float32', linear_weight_layout='NK', quantize_embedding=True, quantize_final_fc=True, num_elem_per_storage=8, num_storage_per_group=5, max_int_value=7)
|
67 |
+
[1m--model-type[0m llama
|
68 |
+
[1m--device[0m cuda:0
|
69 |
+
[1m--source[0m ../dist/models/ToolLLaMA-2-7b-v2/pytorch_model.bin.index.json
|
70 |
+
[1m--source-format[0m huggingface-torch
|
71 |
+
[1m--output[0m /tmp/tmpxjsa38do
|
72 |
+
Start storing to cache /tmp/tmpxjsa38do
|
73 |
+
|
74 |
0%| | 0/195 [00:00<?, ?it/s]
|
75 |
|
76 |
+
|
77 |
0%| | 0/195 [00:00<?, ?it/s]
|
78 |
0%| | 0/195 [00:00<?, ?it/s]
|
79 |
+
Traceback (most recent call last):
|
80 |
+
File "<frozen runpy>", line 198, in _run_module_as_main
|
81 |
+
File "<frozen runpy>", line 88, in _run_code
|
82 |
+
File "/home/floriadmin/mlc-llm/python/mlc_llm/__main__.py", line 47, in <module>
|
83 |
+
main()
|
84 |
+
File "/home/floriadmin/mlc-llm/python/mlc_llm/__main__.py", line 28, in main
|
85 |
+
cli.main(sys.argv[2:])
|
86 |
+
File "/home/floriadmin/mlc-llm/python/mlc_llm/cli/convert_weight.py", line 87, in main
|
87 |
+
convert_weight(
|
88 |
+
File "/home/floriadmin/mlc-llm/python/mlc_llm/interface/convert_weight.py", line 182, in convert_weight
|
89 |
+
_convert_args(args)
|
90 |
+
File "/home/floriadmin/mlc-llm/python/mlc_llm/interface/convert_weight.py", line 146, in _convert_args
|
91 |
+
tvmjs.dump_ndarray_cache(
|
92 |
+
File "/home/floriadmin/miniforge3/envs/mlc/lib/python3.11/site-packages/tvm/contrib/tvmjs.py", line 210, in dump_ndarray_cache
|
93 |
+
for k, origin_v in param_generator:
|
94 |
+
File "/home/floriadmin/mlc-llm/python/mlc_llm/interface/convert_weight.py", line 130, in _param_generator
|
95 |
+
for name, param in loader.load(device=args.device, preshard_funcs=preshard_funcs):
|
96 |
+
File "/home/floriadmin/mlc-llm/python/mlc_llm/loader/huggingface_loader.py", line 117, in load
|
97 |
+
param = self._load_mlc_param(mlc_name, device=device)
|
98 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
99 |
+
File "/home/floriadmin/mlc-llm/python/mlc_llm/loader/huggingface_loader.py", line 147, in _load_mlc_param
|
100 |
+
self._load_file(path)
|
101 |
+
File "/home/floriadmin/mlc-llm/python/mlc_llm/loader/huggingface_loader.py", line 186, in _load_file
|
102 |
+
for name, param in load_func(path):
|
103 |
+
File "/home/floriadmin/mlc-llm/python/mlc_llm/loader/utils.py", line 42, in load_torch_shard
|
104 |
+
for name, param in torch.load(path, map_location=torch.device("cpu")).items():
|
105 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
106 |
+
File "/home/floriadmin/miniforge3/envs/mlc/lib/python3.11/site-packages/torch/serialization.py", line 998, in load
|
107 |
+
with _open_file_like(f, 'rb') as opened_file:
|
108 |
+
^^^^^^^^^^^^^^^^^^^^^^^^
|
109 |
+
File "/home/floriadmin/miniforge3/envs/mlc/lib/python3.11/site-packages/torch/serialization.py", line 445, in _open_file_like
|
110 |
+
return _open_file(name_or_buffer, mode)
|
111 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
112 |
+
File "/home/floriadmin/miniforge3/envs/mlc/lib/python3.11/site-packages/torch/serialization.py", line 426, in __init__
|
113 |
+
super().__init__(open(name, mode))
|
114 |
+
^^^^^^^^^^^^^^^^
|
115 |
+
FileNotFoundError: [Errno 2] No such file or directory: '../dist/models/ToolLLaMA-2-7b-v2/pytorch_model-00003-of-00003.bin'
|
mlc-chat-config.json
ADDED
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"model_type": "llama",
|
3 |
+
"quantization": "q4f32_1",
|
4 |
+
"model_config": {
|
5 |
+
"hidden_size": 4096,
|
6 |
+
"intermediate_size": 11008,
|
7 |
+
"num_attention_heads": 32,
|
8 |
+
"num_hidden_layers": 32,
|
9 |
+
"rms_norm_eps": 1e-05,
|
10 |
+
"vocab_size": 32000,
|
11 |
+
"position_embedding_base": 10000.0,
|
12 |
+
"context_window_size": 4096,
|
13 |
+
"prefill_chunk_size": 4096,
|
14 |
+
"num_key_value_heads": 32,
|
15 |
+
"head_dim": 128,
|
16 |
+
"tensor_parallel_shards": 2,
|
17 |
+
"max_batch_size": 80
|
18 |
+
},
|
19 |
+
"vocab_size": 32000,
|
20 |
+
"context_window_size": 4096,
|
21 |
+
"sliding_window_size": -1,
|
22 |
+
"prefill_chunk_size": 4096,
|
23 |
+
"attention_sink_size": -1,
|
24 |
+
"tensor_parallel_shards": 2,
|
25 |
+
"mean_gen_len": 128,
|
26 |
+
"max_gen_len": 512,
|
27 |
+
"shift_fill_factor": 0.3,
|
28 |
+
"temperature": 0.7,
|
29 |
+
"presence_penalty": 0.0,
|
30 |
+
"frequency_penalty": 0.0,
|
31 |
+
"repetition_penalty": 1.0,
|
32 |
+
"top_p": 0.95,
|
33 |
+
"conv_template": {
|
34 |
+
"name": "llama-2",
|
35 |
+
"system_template": "[INST] <<SYS>>\n{system_message}\n<</SYS>>\n\n ",
|
36 |
+
"system_message": "You are a helpful, respectful and honest assistant.",
|
37 |
+
"roles": {
|
38 |
+
"user": "[INST]",
|
39 |
+
"assistant": "[/INST]",
|
40 |
+
"tool": "[INST]"
|
41 |
+
},
|
42 |
+
"role_templates": {
|
43 |
+
"user": "{user_message}",
|
44 |
+
"assistant": "{assistant_message}",
|
45 |
+
"tool": "{tool_message}"
|
46 |
+
},
|
47 |
+
"messages": [],
|
48 |
+
"seps": [
|
49 |
+
" "
|
50 |
+
],
|
51 |
+
"role_content_sep": " ",
|
52 |
+
"role_empty_sep": " ",
|
53 |
+
"stop_str": [
|
54 |
+
"[INST]"
|
55 |
+
],
|
56 |
+
"stop_token_ids": [
|
57 |
+
2
|
58 |
+
],
|
59 |
+
"function_string": "",
|
60 |
+
"use_function_calling": false
|
61 |
+
},
|
62 |
+
"pad_token_id": 0,
|
63 |
+
"bos_token_id": 1,
|
64 |
+
"eos_token_id": 2,
|
65 |
+
"tokenizer_files": [
|
66 |
+
"tokenizer.model",
|
67 |
+
"tokenizer_config.json",
|
68 |
+
"tokenizer.json"
|
69 |
+
],
|
70 |
+
"version": "0.1.0"
|
71 |
+
}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer.model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
|
3 |
+
size 499723
|
tokenizer_config.json
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"add_bos_token": true,
|
3 |
+
"add_eos_token": false,
|
4 |
+
"bos_token": {
|
5 |
+
"__type": "AddedToken",
|
6 |
+
"content": "<s>",
|
7 |
+
"lstrip": false,
|
8 |
+
"normalized": true,
|
9 |
+
"rstrip": false,
|
10 |
+
"single_word": false
|
11 |
+
},
|
12 |
+
"clean_up_tokenization_spaces": false,
|
13 |
+
"eos_token": {
|
14 |
+
"__type": "AddedToken",
|
15 |
+
"content": "</s>",
|
16 |
+
"lstrip": false,
|
17 |
+
"normalized": true,
|
18 |
+
"rstrip": false,
|
19 |
+
"single_word": false
|
20 |
+
},
|
21 |
+
"legacy": null,
|
22 |
+
"model_max_length": 8192,
|
23 |
+
"pad_token": null,
|
24 |
+
"padding_side": "right",
|
25 |
+
"sp_model_kwargs": {},
|
26 |
+
"spaces_between_special_tokens": false,
|
27 |
+
"tokenizer_class": "LlamaTokenizer",
|
28 |
+
"unk_token": {
|
29 |
+
"__type": "AddedToken",
|
30 |
+
"content": "<unk>",
|
31 |
+
"lstrip": false,
|
32 |
+
"normalized": true,
|
33 |
+
"rstrip": false,
|
34 |
+
"single_word": false
|
35 |
+
},
|
36 |
+
"use_default_system_prompt": true
|
37 |
+
}
|