LeroyDyer commited on
Commit
037f1de
1 Parent(s): 7f9319c

Upload Model_LOADER.py

Browse files
Files changed (1) hide show
  1. Model_LOADER.py +210 -0
Model_LOADER.py ADDED
@@ -0,0 +1,210 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoTokenizer, AutoModelForCausalLM, TextGenerationPipeline, AutoConfig, BitsAndBytesConfig,AutoConfig
2
+ import time
3
+ import torch
4
+ torch.backends.cuda.matmul.allow_tf32 = True
5
+ import random
6
+ from datasets import load_dataset
7
+ from transformers import TrainingArguments
8
+ from trl import SFTTrainer
9
+ from peft import LoraConfig
10
+ # from accelerate import infer_auto_device_map, init_empty_weights, dispatch_model
11
+ from torch.nn import CrossEntropyLoss
12
+ torch.autograd.set_detect_anomaly(True)
13
+ random_seed = 42
14
+ torch.manual_seed(random_seed)
15
+ random.seed(random_seed)
16
+ # Set the device for each process
17
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
18
+ # torch.cuda.set_device(device)
19
+
20
+
21
+
22
+ n_ahead_talk_global = 4
23
+ n_passes_global = 2
24
+ n_ahead_global = 8
25
+ n_examples = 0
26
+
27
+ def model_init(params):
28
+ original = False
29
+ if params is None:
30
+ params = {}
31
+ else:
32
+ params = params.params
33
+ # save params to file
34
+ n_ahead = params.get("n_ahead", n_ahead_global if not original else 1)
35
+ n_ahead_talk = params.get("n_ahead_talk", n_ahead_talk_global if not original else 1)
36
+ n_passes = params.get("n_passes", n_passes_global if not original else 1)
37
+ gumbel_temperature = params.get("gumbel_temperature", 1)
38
+ use_start_thought_token = params.get("use_start_thought_token", True)
39
+ use_end_thought_token = params.get("use_end_thought_token", True)
40
+ include_policy_loss = params.get("include_policy_loss", True)
41
+ gumbel_detach = params.get("gumbel_detach", True)
42
+ merged_talk_heads = params.get("merged_talk_heads", True)
43
+ residual_think_head = params.get("residual_think_head", False)
44
+ optimize_lm_head_only_at_start = params.get("optimize_lm_head_only_at_start", False)
45
+
46
+ model_id = "LeroyDyer/_Spydaz_Web_AI_V2_Aligned"
47
+ tokenizer_id = model_id
48
+ print("Loading model")
49
+
50
+ model = AutoModelForCausalLM.from_pretrained(
51
+ model_id,
52
+ torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
53
+ max_thoughts=n_ahead + n_ahead_talk + 1,
54
+ merged_talk_heads=merged_talk_heads,
55
+ merged_lm_and_talk_heads=False,
56
+ merged_lm_and_think_heads=True,
57
+ use_concat_talk_head=True,
58
+ use_shallow_think=True,
59
+ use_shallow_talk=False,
60
+ use_complex_think_head=False,
61
+ use_complex_talk_head=True,
62
+ use_weighted_talk_head=True,
63
+ trust_remote_code=True,
64
+ device_map="auto",
65
+ )
66
+ print("Loaded model")
67
+
68
+ tokenizer = AutoTokenizer.from_pretrained(tokenizer_id, truncation=True, padding_side="right")
69
+ tokenizer.pad_token_id = tokenizer.eos_token_id
70
+
71
+ special_tokens_to_add = []
72
+ if model.use_start_thought_token:
73
+ special_tokens_to_add.append("<|startthought|>")
74
+ if model.use_end_thought_token:
75
+ special_tokens_to_add.append("<|endthought|>")
76
+ if special_tokens_to_add:
77
+ tokenizer.add_special_tokens({"additional_special_tokens": special_tokens_to_add})
78
+ model.resize_token_embeddings(len(tokenizer))
79
+ model.tokenizer = tokenizer
80
+ for name, module in model.named_modules():
81
+ if "embed" in name:
82
+ print(module, flush=True)
83
+
84
+ model.gumbel_detach = gumbel_detach
85
+ model.include_policy_loss = include_policy_loss
86
+ model.use_end_thought_token = use_end_thought_token
87
+ model.use_start_thought_token = use_start_thought_token
88
+ model.n_ahead = n_ahead
89
+ model.n_ahead_talk = n_ahead_talk
90
+ model.n_passes = n_passes
91
+ model.residual_think_head = residual_think_head
92
+ model.optimize_lm_head_only_at_start = optimize_lm_head_only_at_start
93
+ model.gumbel_temperature = gumbel_temperature
94
+ model.original_mode = original
95
+ model.config_params = params
96
+ return model
97
+ model,tokenizer = model_init(None)
98
+ tokenizer.save_pretrained("IpretrainedModel")
99
+ model.save_pretrained("IpretrainedModel")
100
+
101
+
102
+ ## TRAINING :
103
+
104
+ peft_config = LoraConfig(
105
+ r = 128, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
106
+ target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
107
+ "gate_proj", "up_proj", "down_proj","lm_head", "embed_tokens"],
108
+ lora_alpha = 32,
109
+ lora_dropout = 0, # Supports any, but = 0 is optimized
110
+ bias = "none",
111
+ use_dora=True,
112
+ )
113
+
114
+ from transformers import AutoTokenizer, AutoModelForCausalLM, TextGenerationPipeline, AutoConfig
115
+ from datasets import load_dataset
116
+ from transformers import TrainingArguments
117
+ from trl import SFTTrainer
118
+ from peft import LoraConfig
119
+
120
+ ## DATA
121
+ alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
122
+
123
+ ### Instruction:
124
+ {}
125
+
126
+ ### Input:
127
+ {}
128
+
129
+ ### Response:
130
+ {}"""
131
+ EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN
132
+ def formatting_prompts_func(examples):
133
+ instructions = examples["instruction"]
134
+ inputs = examples["input"]
135
+ outputs = examples["output"]
136
+ texts = []
137
+ for instruction, input, output in zip(instructions, inputs, outputs):
138
+ # Must add EOS_TOKEN, otherwise your generation will go on forever!
139
+ text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN
140
+ texts.append(text)
141
+ return { "text" : texts, }
142
+ pass
143
+ dataset = load_dataset("gate369/Alpaca-Star", split = "train[:2000]")
144
+ dataset = dataset.shuffle(seed=3704)
145
+ dataset = dataset.map(formatting_prompts_func, batched = True,)
146
+ ## TRAIN
147
+ model
148
+ model.train
149
+ max_seq_length = 32000
150
+ training_args = TrainingArguments(
151
+ output_dir="./out",
152
+ num_train_epochs=3,
153
+ per_device_train_batch_size=1,
154
+ gradient_checkpointing=False,
155
+ gradient_accumulation_steps=8,
156
+ optim="lion_32bit",
157
+ logging_steps=1,
158
+ save_strategy="steps",
159
+ save_steps=300,
160
+ max_steps=1000,
161
+ bf16=True,
162
+ tf32=False,
163
+ learning_rate=6e-05,
164
+ max_grad_norm=0.3,
165
+ warmup_ratio=0.06,
166
+ lr_scheduler_type="cosine",
167
+ push_to_hub=False,
168
+
169
+ )
170
+ trainer = SFTTrainer(
171
+ args=training_args,
172
+ train_dataset=dataset,
173
+ model=model,
174
+ tokenizer=tokenizer,
175
+ max_seq_length=max_seq_length,
176
+ dataset_text_field="text",
177
+ peft_config=peft_config,
178
+ )
179
+ trainer.train()
180
+
181
+ ## SAVE
182
+ tokenizer.save_pretrained("SFTTrainerModel")
183
+ model.save_pretrained("SFTTrainerModel")
184
+
185
+
186
+ import os
187
+ import huggingface_hub
188
+ from huggingface_hub import notebook_login
189
+ from huggingface_hub import create_repo, HfApi
190
+ from huggingface_hub import hf_hub_download
191
+ from huggingface_hub import create_repo, HfApi
192
+ from huggingface_hub import snapshot_download
193
+
194
+ MODEL_NAME = "_Spydaz_Web_AI_MistralStar"
195
+ Folderinput = "SFTTrainerModel"
196
+ WRITE_TOKEN = ""
197
+ username = "LeroyDyer"
198
+ huggingface_hub.login(WRITE_TOKEN)
199
+ api = HfApi(token=WRITE_TOKEN)
200
+ # Create empty repo
201
+ api.create_repo(
202
+ repo_id = f"{username}/{MODEL_NAME}",
203
+ repo_type="model",
204
+ exist_ok=True,
205
+ )
206
+
207
+ api.upload_folder(
208
+ repo_id = f"{username}/{MODEL_NAME}",
209
+ folder_path = Folderinput
210
+ )