Upload T5MIMOForConditionalGeneration
Browse files- model.safetensors +2 -2
- modeling_t5mimo.py +3 -3
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d117251674a519aec18caa013c1f8afdcb2d9b0c30432fbb38083b9d9002d171
|
3 |
+
size 33649068
|
modeling_t5mimo.py
CHANGED
@@ -1325,7 +1325,7 @@ class T5MIMOForConditionalGeneration(T5PreTrainedModel):
|
|
1325 |
self.decoder = T5Stack(decoder_config, self.shared)
|
1326 |
|
1327 |
|
1328 |
-
|
1329 |
self.lm_head = nn.Linear(config.d_model, config.vocab_size, bias=False)
|
1330 |
|
1331 |
# Initialize weights and apply final processing
|
@@ -1518,8 +1518,8 @@ class T5MIMOForConditionalGeneration(T5PreTrainedModel):
|
|
1518 |
sequence_output = decoder_outputs[0]
|
1519 |
|
1520 |
|
1521 |
-
|
1522 |
-
|
1523 |
|
1524 |
# Set device for model parallelism
|
1525 |
if self.model_parallel:
|
|
|
1325 |
self.decoder = T5Stack(decoder_config, self.shared)
|
1326 |
|
1327 |
|
1328 |
+
self.conv_block = MultivariateConvBlock(config)
|
1329 |
self.lm_head = nn.Linear(config.d_model, config.vocab_size, bias=False)
|
1330 |
|
1331 |
# Initialize weights and apply final processing
|
|
|
1518 |
sequence_output = decoder_outputs[0]
|
1519 |
|
1520 |
|
1521 |
+
if use_conv:
|
1522 |
+
sequence_output = self.conv_block(sequence_output)
|
1523 |
|
1524 |
# Set device for model parallelism
|
1525 |
if self.model_parallel:
|