crumb commited on
Commit
7024b4c
1 Parent(s): 026b770

Upload model

Browse files
Files changed (2) hide show
  1. model.safetensors +2 -2
  2. modeling_t2.py +7 -15
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1daa5083d32e5c14f1f024b693d30a7414cce7cf7814b9ce1dc1b0eb03dfb46b
3
- size 385823368
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1946078d3452868c8afe7c88aff1e9364a13ecd67361113eb7515f652a70c302
3
+ size 409219512
modeling_t2.py CHANGED
@@ -322,11 +322,7 @@ class TransformerPreTrainedModel(PreTrainedModel):
322
  class TransformerModel(TransformerPreTrainedModel):
323
  def __init__(self, config):
324
  super().__init__(config)
325
- # self.wte = nn.Embedding(config.vocab_size, config.hidden_size)
326
- self.wte = nn.Sequential(
327
- nn.Embedding(config.vocab_size, config.stage_0_hidden_size),
328
- StageLinear(config.stage_0_hidden_size, config.hidden_size, bias=False, stage=0, config=config)
329
- )
330
  self.h = nn.ModuleList(
331
  [TransformerBlock(config, stage=1) for i in range(config.num_hidden_layers)]
332
  )
@@ -337,10 +333,10 @@ class TransformerModel(TransformerPreTrainedModel):
337
  self.post_init()
338
 
339
  def get_input_embeddings(self):
340
- return self.wte[0]
341
 
342
  def set_input_embeddings(self, new_embeddings):
343
- self.wte[0] = new_embeddings
344
 
345
  def forward(
346
  self,
@@ -500,7 +496,7 @@ class TransformerModel(TransformerPreTrainedModel):
500
  )
501
 
502
  class TransformerModelForCausalLM(TransformerPreTrainedModel):
503
- _tied_weights_keys = ["lm_head.1.weight"]
504
  _tied_weights_keys = []
505
  def __init__(self, config):
506
  super().__init__(config)
@@ -508,20 +504,16 @@ class TransformerModelForCausalLM(TransformerPreTrainedModel):
508
  # self.lm_head = nn.Linear(
509
  # config.hidden_size, config.vocab_size, bias=False
510
  # )
511
- self.lm_head = nn.Sequential(
512
- StageLinear(config.hidden_size, config.stage_0_hidden_size, bias=False, stage=0, config=config),
513
- nn.Linear(config.stage_0_hidden_size, config.vocab_size),
514
- )
515
  self.model_parallel = False
516
  self.device_map = None
517
  self.post_init()
518
 
519
  def get_output_embeddings(self):
520
- return self.lm_head[1]
521
 
522
  def set_output_embeddings(self, new_embeddings):
523
- # print("Huggingface is inexplicably trying to tie the lm head no matter how many times i'm saying tie_weights False but I'm not letting it")
524
- self.lm_head[1] = new_embeddings
525
 
526
  def prepare_inputs_for_generation(
527
  self, input_ids, past_key_values=None, inputs_embeds=None, **kwargs
 
322
  class TransformerModel(TransformerPreTrainedModel):
323
  def __init__(self, config):
324
  super().__init__(config)
325
+ self.wte = nn.Embedding(config.vocab_size, config.hidden_size)
 
 
 
 
326
  self.h = nn.ModuleList(
327
  [TransformerBlock(config, stage=1) for i in range(config.num_hidden_layers)]
328
  )
 
333
  self.post_init()
334
 
335
  def get_input_embeddings(self):
336
+ return self.wte
337
 
338
  def set_input_embeddings(self, new_embeddings):
339
+ self.wte = new_embeddings
340
 
341
  def forward(
342
  self,
 
496
  )
497
 
498
  class TransformerModelForCausalLM(TransformerPreTrainedModel):
499
+ _tied_weights_keys = ["lm_head.weight"]
500
  _tied_weights_keys = []
501
  def __init__(self, config):
502
  super().__init__(config)
 
504
  # self.lm_head = nn.Linear(
505
  # config.hidden_size, config.vocab_size, bias=False
506
  # )
507
+ self.lm_head = nn.Linear(config.hidden_size, config.vocab_size)
 
 
 
508
  self.model_parallel = False
509
  self.device_map = None
510
  self.post_init()
511
 
512
  def get_output_embeddings(self):
513
+ return self.lm_head
514
 
515
  def set_output_embeddings(self, new_embeddings):
516
+ self.lm_head = new_embeddings
 
517
 
518
  def prepare_inputs_for_generation(
519
  self, input_ids, past_key_values=None, inputs_embeds=None, **kwargs