mtasic85 commited on
Commit
e12b6ee
2 Parent(s): 11609de 8e24900

Merge branch 'main' of hf.co:tangledgroup/tangled-llama-a-128k-base-v0.1

Browse files
evaluate-long/results.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4e815922bf6f45ba3b7c865892e367873df0eae9ee8509804fef35b0c64a44e
3
+ size 6193231
out/converted_model/config.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "tangledgroup/tangled-llama-a-32k-base-v0.1",
3
+ "architectures": [
4
+ "LlamaForCausalLM"
5
+ ],
6
+ "bos_token_id": 1,
7
+ "eos_token_id": 2,
8
+ "head_dim": 64,
9
+ "hidden_act": "silu",
10
+ "hidden_size": 512,
11
+ "intermediate_size": 2048,
12
+ "max_position_embeddings": 8192,
13
+ "model_type": "llama",
14
+ "num_attention_heads": 32,
15
+ "num_hidden_layers": 8,
16
+ "num_key_value_heads": 8,
17
+ "rms_norm_eps": 1e-05,
18
+ "rope_scaling": null,
19
+ "tie_word_embeddings": true,
20
+ "torch_dtype": "bfloat16",
21
+ "transformers_version": "4.44.2",
22
+ "use_cache": true,
23
+ "vocab_size": 32768
24
+ }
out/converted_model/model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a791c0cdb7c0d71374f797b1ac7ba7824c4b0167739ee4c61f5be026d146c3c6
3
+ size 159417495