Fix model file path to match repo structure

#6
by Pi3141 - opened
training_files/convert-hf-to-pth-16b.py CHANGED
@@ -1,14 +1,14 @@
1
- #Convert hf to pth
2
  import os
3
  import json
4
 
5
  import torch
6
  from transformers import LlamaTokenizer, LlamaForCausalLM
7
 
8
- tokenizer = LlamaTokenizer.from_pretrained("./llama-7b-hf")
9
 
10
  base_model = LlamaForCausalLM.from_pretrained(
11
- "output_7b",
12
  load_in_8bit=False,
13
  torch_dtype=torch.float16,
14
  device_map={"": "cpu"},
@@ -29,18 +29,21 @@ n_heads = params["n_heads"]
29
  dim = params["dim"]
30
  dims_per_head = dim // n_heads
31
  base = 10000.0
32
- inv_freq = 1.0 / (base ** (torch.arange(0, dims_per_head, 2).float() / dims_per_head))
 
33
 
34
 
35
  def permute(w):
36
  return (
37
- w.view(n_heads, dim // n_heads // 2, 2, dim).transpose(1, 2).reshape(dim, dim)
 
38
  )
39
 
40
 
41
  def unpermute(w):
42
  return (
43
- w.view(n_heads, 2, dim // n_heads // 2, dim).transpose(1, 2).reshape(dim, dim)
 
44
  )
45
 
46
 
@@ -96,7 +99,7 @@ torch.save(new_state_dict, "consolidated.00.pth")
96
  with open("params.json", "w") as f:
97
  json.dump(params, f)
98
 
99
- #Resize tensors
100
  model = torch.load("consolidated.00.pth", map_location=torch.device('cpu'))
101
  x = model["tok_embeddings.weight"]
102
  y = model["output.weight"]
@@ -106,4 +109,4 @@ y = y[:row_exclude]
106
  model["tok_embeddings.weight"] = x
107
  model["output.weight"] = y
108
  torch.save(model, "consolidated.01.pth")
109
- #Delete consolidated.00.pth and rename consolidated.01.pth into consolidated.00.pth
 
1
+ # Convert hf to pth
2
  import os
3
  import json
4
 
5
  import torch
6
  from transformers import LlamaTokenizer, LlamaForCausalLM
7
 
8
+ tokenizer = LlamaTokenizer.from_pretrained("../7B-2nd-train")
9
 
10
  base_model = LlamaForCausalLM.from_pretrained(
11
+ "../7B-2nd-train",
12
  load_in_8bit=False,
13
  torch_dtype=torch.float16,
14
  device_map={"": "cpu"},
 
29
  dim = params["dim"]
30
  dims_per_head = dim // n_heads
31
  base = 10000.0
32
+ inv_freq = 1.0 / \
33
+ (base ** (torch.arange(0, dims_per_head, 2).float() / dims_per_head))
34
 
35
 
36
  def permute(w):
37
  return (
38
+ w.view(n_heads, dim // n_heads // 2, 2,
39
+ dim).transpose(1, 2).reshape(dim, dim)
40
  )
41
 
42
 
43
  def unpermute(w):
44
  return (
45
+ w.view(n_heads, 2, dim // n_heads // 2,
46
+ dim).transpose(1, 2).reshape(dim, dim)
47
  )
48
 
49
 
 
99
  with open("params.json", "w") as f:
100
  json.dump(params, f)
101
 
102
+ # Resize tensors
103
  model = torch.load("consolidated.00.pth", map_location=torch.device('cpu'))
104
  x = model["tok_embeddings.weight"]
105
  y = model["output.weight"]
 
109
  model["tok_embeddings.weight"] = x
110
  model["output.weight"] = y
111
  torch.save(model, "consolidated.01.pth")
112
+ # Delete consolidated.00.pth and rename consolidated.01.pth into consolidated.00.pth
training_files/convert-hf-to-pth-32b.py CHANGED
@@ -1,14 +1,14 @@
1
- #Convert hf to pth
2
  import os
3
  import json
4
 
5
  import torch
6
  from transformers import LlamaTokenizer, LlamaForCausalLM
7
 
8
- tokenizer = LlamaTokenizer.from_pretrained("./llama-7b-hf")
9
 
10
  base_model = LlamaForCausalLM.from_pretrained(
11
- "output_7b",
12
  load_in_8bit=False,
13
  torch_dtype=torch.float16,
14
  device_map={"": "cpu"},
@@ -29,18 +29,21 @@ n_heads = params["n_heads"]
29
  dim = params["dim"]
30
  dims_per_head = dim // n_heads
31
  base = 10000.0
32
- inv_freq = 1.0 / (base ** (torch.arange(0, dims_per_head, 2).float() / dims_per_head))
 
33
 
34
 
35
  def permute(w):
36
  return (
37
- w.view(n_heads, dim // n_heads // 2, 2, dim).transpose(1, 2).reshape(dim, dim)
 
38
  )
39
 
40
 
41
  def unpermute(w):
42
  return (
43
- w.view(n_heads, 2, dim // n_heads // 2, dim).transpose(1, 2).reshape(dim, dim)
 
44
  )
45
 
46
 
 
1
+ # Convert hf to pth
2
  import os
3
  import json
4
 
5
  import torch
6
  from transformers import LlamaTokenizer, LlamaForCausalLM
7
 
8
+ tokenizer = LlamaTokenizer.from_pretrained("../7B-2nd-train")
9
 
10
  base_model = LlamaForCausalLM.from_pretrained(
11
+ "../7B-2nd-train",
12
  load_in_8bit=False,
13
  torch_dtype=torch.float16,
14
  device_map={"": "cpu"},
 
29
  dim = params["dim"]
30
  dims_per_head = dim // n_heads
31
  base = 10000.0
32
+ inv_freq = 1.0 / \
33
+ (base ** (torch.arange(0, dims_per_head, 2).float() / dims_per_head))
34
 
35
 
36
  def permute(w):
37
  return (
38
+ w.view(n_heads, dim // n_heads // 2, 2,
39
+ dim).transpose(1, 2).reshape(dim, dim)
40
  )
41
 
42
 
43
  def unpermute(w):
44
  return (
45
+ w.view(n_heads, 2, dim // n_heads // 2,
46
+ dim).transpose(1, 2).reshape(dim, dim)
47
  )
48
 
49