Upload model
Browse files- model.safetensors +2 -2
- modeling_t2.py +7 -15
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1946078d3452868c8afe7c88aff1e9364a13ecd67361113eb7515f652a70c302
|
3 |
+
size 409219512
|
modeling_t2.py
CHANGED
@@ -322,11 +322,7 @@ class TransformerPreTrainedModel(PreTrainedModel):
|
|
322 |
class TransformerModel(TransformerPreTrainedModel):
|
323 |
def __init__(self, config):
|
324 |
super().__init__(config)
|
325 |
-
|
326 |
-
self.wte = nn.Sequential(
|
327 |
-
nn.Embedding(config.vocab_size, config.stage_0_hidden_size),
|
328 |
-
StageLinear(config.stage_0_hidden_size, config.hidden_size, bias=False, stage=0, config=config)
|
329 |
-
)
|
330 |
self.h = nn.ModuleList(
|
331 |
[TransformerBlock(config, stage=1) for i in range(config.num_hidden_layers)]
|
332 |
)
|
@@ -337,10 +333,10 @@ class TransformerModel(TransformerPreTrainedModel):
|
|
337 |
self.post_init()
|
338 |
|
339 |
def get_input_embeddings(self):
|
340 |
-
return self.wte
|
341 |
|
342 |
def set_input_embeddings(self, new_embeddings):
|
343 |
-
self.wte
|
344 |
|
345 |
def forward(
|
346 |
self,
|
@@ -500,7 +496,7 @@ class TransformerModel(TransformerPreTrainedModel):
|
|
500 |
)
|
501 |
|
502 |
class TransformerModelForCausalLM(TransformerPreTrainedModel):
|
503 |
-
_tied_weights_keys = ["lm_head.
|
504 |
_tied_weights_keys = []
|
505 |
def __init__(self, config):
|
506 |
super().__init__(config)
|
@@ -508,20 +504,16 @@ class TransformerModelForCausalLM(TransformerPreTrainedModel):
|
|
508 |
# self.lm_head = nn.Linear(
|
509 |
# config.hidden_size, config.vocab_size, bias=False
|
510 |
# )
|
511 |
-
self.lm_head = nn.
|
512 |
-
StageLinear(config.hidden_size, config.stage_0_hidden_size, bias=False, stage=0, config=config),
|
513 |
-
nn.Linear(config.stage_0_hidden_size, config.vocab_size),
|
514 |
-
)
|
515 |
self.model_parallel = False
|
516 |
self.device_map = None
|
517 |
self.post_init()
|
518 |
|
519 |
def get_output_embeddings(self):
|
520 |
-
return self.lm_head
|
521 |
|
522 |
def set_output_embeddings(self, new_embeddings):
|
523 |
-
|
524 |
-
self.lm_head[1] = new_embeddings
|
525 |
|
526 |
def prepare_inputs_for_generation(
|
527 |
self, input_ids, past_key_values=None, inputs_embeds=None, **kwargs
|
|
|
322 |
class TransformerModel(TransformerPreTrainedModel):
|
323 |
def __init__(self, config):
|
324 |
super().__init__(config)
|
325 |
+
self.wte = nn.Embedding(config.vocab_size, config.hidden_size)
|
|
|
|
|
|
|
|
|
326 |
self.h = nn.ModuleList(
|
327 |
[TransformerBlock(config, stage=1) for i in range(config.num_hidden_layers)]
|
328 |
)
|
|
|
333 |
self.post_init()
|
334 |
|
335 |
def get_input_embeddings(self):
|
336 |
+
return self.wte
|
337 |
|
338 |
def set_input_embeddings(self, new_embeddings):
|
339 |
+
self.wte = new_embeddings
|
340 |
|
341 |
def forward(
|
342 |
self,
|
|
|
496 |
)
|
497 |
|
498 |
class TransformerModelForCausalLM(TransformerPreTrainedModel):
|
499 |
+
_tied_weights_keys = ["lm_head.weight"]
|
500 |
_tied_weights_keys = []
|
501 |
def __init__(self, config):
|
502 |
super().__init__(config)
|
|
|
504 |
# self.lm_head = nn.Linear(
|
505 |
# config.hidden_size, config.vocab_size, bias=False
|
506 |
# )
|
507 |
+
self.lm_head = nn.Linear(config.hidden_size, config.vocab_size)
|
|
|
|
|
|
|
508 |
self.model_parallel = False
|
509 |
self.device_map = None
|
510 |
self.post_init()
|
511 |
|
512 |
def get_output_embeddings(self):
|
513 |
+
return self.lm_head
|
514 |
|
515 |
def set_output_embeddings(self, new_embeddings):
|
516 |
+
self.lm_head = new_embeddings
|
|
|
517 |
|
518 |
def prepare_inputs_for_generation(
|
519 |
self, input_ids, past_key_values=None, inputs_embeds=None, **kwargs
|