Lin-K76 commited on
Commit
9f5dfe3
1 Parent(s): c1c4b1f

Upload folder using huggingface_hub

Browse files
config.json CHANGED
@@ -1,10 +1,51 @@
1
  {
 
2
  "architectures": [
3
  "LlamaForCausalLM"
4
  ],
5
  "attention_bias": false,
6
  "attention_dropout": 0.0,
7
  "bos_token_id": 128000,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  "eos_token_id": [
9
  128001,
10
  128008,
@@ -24,15 +65,15 @@
24
  "rms_norm_eps": 1e-05,
25
  "rope_scaling": {
26
  "factor": 8.0,
27
- "low_freq_factor": 1.0,
28
  "high_freq_factor": 4.0,
 
29
  "original_max_position_embeddings": 8192,
30
  "rope_type": "llama3"
31
  },
32
  "rope_theta": 500000.0,
33
  "tie_word_embeddings": false,
34
- "torch_dtype": "bfloat16",
35
- "transformers_version": "4.42.3",
36
  "use_cache": true,
37
  "vocab_size": 128256
38
- }
 
1
  {
2
+ "_name_or_path": "/home/meta-llama/Meta-Llama-3.1-70B-Instruct",
3
  "architectures": [
4
  "LlamaForCausalLM"
5
  ],
6
  "attention_bias": false,
7
  "attention_dropout": 0.0,
8
  "bos_token_id": 128000,
9
+ "compression_config": {
10
+ "config_groups": {
11
+ "group_0": {
12
+ "input_activations": {
13
+ "block_structure": null,
14
+ "dynamic": false,
15
+ "group_size": null,
16
+ "num_bits": 8,
17
+ "observer": "minmax",
18
+ "observer_kwargs": {},
19
+ "strategy": "tensor",
20
+ "symmetric": true,
21
+ "type": "float"
22
+ },
23
+ "output_activations": null,
24
+ "targets": [
25
+ "Linear"
26
+ ],
27
+ "weights": {
28
+ "block_structure": null,
29
+ "dynamic": false,
30
+ "group_size": null,
31
+ "num_bits": 8,
32
+ "observer": "minmax",
33
+ "observer_kwargs": {},
34
+ "strategy": "tensor",
35
+ "symmetric": true,
36
+ "type": "float"
37
+ }
38
+ }
39
+ },
40
+ "format": "naive-quantized",
41
+ "global_compression_ratio": 1.463576153555163,
42
+ "ignore": [
43
+ "lm_head"
44
+ ],
45
+ "kv_cache_scheme": null,
46
+ "quant_method": "compressed-tensors",
47
+ "quantization_status": "frozen"
48
+ },
49
  "eos_token_id": [
50
  128001,
51
  128008,
 
65
  "rms_norm_eps": 1e-05,
66
  "rope_scaling": {
67
  "factor": 8.0,
 
68
  "high_freq_factor": 4.0,
69
+ "low_freq_factor": 1.0,
70
  "original_max_position_embeddings": 8192,
71
  "rope_type": "llama3"
72
  },
73
  "rope_theta": 500000.0,
74
  "tie_word_embeddings": false,
75
+ "torch_dtype": "float16",
76
+ "transformers_version": "4.43.1",
77
  "use_cache": true,
78
  "vocab_size": 128256
79
+ }
generation_config.json CHANGED
@@ -8,5 +8,5 @@
8
  ],
9
  "temperature": 0.6,
10
  "top_p": 0.9,
11
- "transformers_version": "4.42.3"
12
- }
 
8
  ],
9
  "temperature": 0.6,
10
  "top_p": 0.9,
11
+ "transformers_version": "4.43.1"
12
+ }
model.safetensors.index.json CHANGED
The diff for this file is too large to render. See raw diff
 
special_tokens_map.json CHANGED
@@ -12,5 +12,6 @@
12
  "normalized": false,
13
  "rstrip": false,
14
  "single_word": false
15
- }
 
16
  }
 
12
  "normalized": false,
13
  "rstrip": false,
14
  "single_word": false
15
+ },
16
+ "pad_token": "<|eot_id|>"
17
  }
tokenizer.json CHANGED
@@ -2329,69 +2329,10 @@
2329
  ]
2330
  },
2331
  "post_processor": {
2332
- "type": "Sequence",
2333
- "processors": [
2334
- {
2335
- "type": "ByteLevel",
2336
- "add_prefix_space": true,
2337
- "trim_offsets": false,
2338
- "use_regex": true
2339
- },
2340
- {
2341
- "type": "TemplateProcessing",
2342
- "single": [
2343
- {
2344
- "SpecialToken": {
2345
- "id": "<|begin_of_text|>",
2346
- "type_id": 0
2347
- }
2348
- },
2349
- {
2350
- "Sequence": {
2351
- "id": "A",
2352
- "type_id": 0
2353
- }
2354
- }
2355
- ],
2356
- "pair": [
2357
- {
2358
- "SpecialToken": {
2359
- "id": "<|begin_of_text|>",
2360
- "type_id": 0
2361
- }
2362
- },
2363
- {
2364
- "Sequence": {
2365
- "id": "A",
2366
- "type_id": 0
2367
- }
2368
- },
2369
- {
2370
- "SpecialToken": {
2371
- "id": "<|begin_of_text|>",
2372
- "type_id": 1
2373
- }
2374
- },
2375
- {
2376
- "Sequence": {
2377
- "id": "B",
2378
- "type_id": 1
2379
- }
2380
- }
2381
- ],
2382
- "special_tokens": {
2383
- "<|begin_of_text|>": {
2384
- "id": "<|begin_of_text|>",
2385
- "ids": [
2386
- 128000
2387
- ],
2388
- "tokens": [
2389
- "<|begin_of_text|>"
2390
- ]
2391
- }
2392
- }
2393
- }
2394
- ]
2395
  },
2396
  "decoder": {
2397
  "type": "ByteLevel",
 
2329
  ]
2330
  },
2331
  "post_processor": {
2332
+ "type": "ByteLevel",
2333
+ "add_prefix_space": true,
2334
+ "trim_offsets": false,
2335
+ "use_regex": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2336
  },
2337
  "decoder": {
2338
  "type": "ByteLevel",
tokenizer_config.json CHANGED
@@ -2058,5 +2058,6 @@
2058
  "attention_mask"
2059
  ],
2060
  "model_max_length": 131072,
 
2061
  "tokenizer_class": "PreTrainedTokenizerFast"
2062
  }
 
2058
  "attention_mask"
2059
  ],
2060
  "model_max_length": 131072,
2061
+ "pad_token": "<|eot_id|>",
2062
  "tokenizer_class": "PreTrainedTokenizerFast"
2063
  }