mlinmg commited on
Commit
7bd6c2d
1 Parent(s): a3c5a68

Update configuration_quasar.py

Browse files
Files changed (1) hide show
  1. configuration_quasar.py +4 -47
configuration_quasar.py CHANGED
@@ -23,10 +23,10 @@ from transformers.utils import logging
23
  logger = logging.get_logger(__name__)
24
 
25
  QUASAR_PRETRAINED_CONFIG_ARCHIVE_MAP = {
26
- "AstraMindAI/AstraQuasar-4.5B": "https://huggingface.co/AstraMindAI/AstraQuasar-4.5B/resolve/main/config.json",
27
  }
28
 
29
- #From phi-2 Phi -> Quasar
30
  class QuasarConfig(PretrainedConfig):
31
  r"""
32
  This is the configuration class to store the configuration of a [`QuasarModel`]. It is used to instantiate an Quasar
@@ -138,11 +138,7 @@ class QuasarConfig(PretrainedConfig):
138
  qk_layernorm=False,
139
  bos_token_id=1,
140
  eos_token_id=2,
141
- ## Aggiunto
142
- #duplicate_trick_v2=True,
143
- #duplicate_rank=8,
144
- #duplicate_dropout=0.0,
145
- sliding_window=4096,
146
  simple_norm=False,
147
  remove_ff_bias=True,
148
  gated_activation=False,
@@ -151,7 +147,7 @@ class QuasarConfig(PretrainedConfig):
151
  layer_ranges=[[0, 16],[8, 21],[12, 25],[16, 29],[25, 32]],
152
  **kwargs,
153
  ):
154
- ##Aggiunti
155
  self.sliding_window = sliding_window
156
  self.simple_norm = simple_norm
157
  self.remove_ff_bias = remove_ff_bias
@@ -160,15 +156,6 @@ class QuasarConfig(PretrainedConfig):
160
  self.duplicate_grad = duplicate_grad
161
  self.layer_ranges = layer_ranges if layer_ranges is not None else []
162
 
163
- ##V2###
164
- #self.duplicate_trick_v2 = duplicate_trick_v2
165
- #self.layer_ranges_duplicate_v2 = []
166
- #self._assing_layer_ranges_duplicate_v2()
167
- #self.duplicate_rank = duplicate_rank
168
- #self.duplicate_dropout = duplicate_dropout
169
- #self._duplicate_trick_v2_validation()
170
- ####
171
-
172
  self.vocab_size = vocab_size
173
  self.hidden_size = hidden_size
174
  self.intermediate_size = intermediate_size
@@ -200,36 +187,6 @@ class QuasarConfig(PretrainedConfig):
200
  **kwargs,
201
  )
202
 
203
- def _assing_layer_ranges_duplicate_v2(self):
204
- # Calcolo gli offset iniziali per ciascun intervallo nella lista unica
205
- offsets = [0]
206
- for i in range(1, len(self.layer_ranges)):
207
- offset = offsets[-1] + self.layer_ranges[i - 1][1] - self.layer_ranges[i - 1][0]
208
- offsets.append(offset)
209
-
210
- # Seleziono solo gli intervalli dispari e calcolo le loro posizioni assolute
211
- odd_intervals_positions = []
212
- for i in range(1, len(self.layer_ranges), 2):
213
- start, end = self.layer_ranges[i]
214
- for n in range(start, end):
215
- position = offsets[i] + (n - start)
216
- odd_intervals_positions.append(position)
217
-
218
- self.layer_ranges_duplicate_v2 = list(set(odd_intervals_positions))
219
-
220
-
221
- def _duplicate_trick_v2_validation(self):
222
- if self.duplicate_trick_v2 and self.duplicate_trick:
223
- # warn just one time that only one of the two flags will be used
224
- logger.warning(
225
- "Both `duplicate_trick` and `duplicate_trick_v2` are set to True. Only `duplicate_trick_v2` will be used."
226
- )
227
- if self.duplicate_trick_v2 and self.duplicate_rank < 1:
228
- raise ValueError("`duplicate_rank` must be a positive integer")
229
- if self.duplicate_trick_v2 and not self.layer_ranges:
230
- raise ValueError("`layer_ranges` must be set when `duplicate_trick_v2` is True")
231
-
232
-
233
  # Copied from transformers.models.llama.configuration_llama.LlamaConfig._rope_scaling_validation
234
  def _rope_scaling_validation(self):
235
  """
 
23
  logger = logging.get_logger(__name__)
24
 
25
  QUASAR_PRETRAINED_CONFIG_ARCHIVE_MAP = {
26
+ "AstraMindAI/AstraQuasar-4B": "https://huggingface.co/AstraMindAI/AstraQuasar-4B/resolve/main/config.json",
27
  }
28
 
29
+ #from microsoft/phi-2, Phi -> Quasar
30
  class QuasarConfig(PretrainedConfig):
31
  r"""
32
  This is the configuration class to store the configuration of a [`QuasarModel`]. It is used to instantiate an Quasar
 
138
  qk_layernorm=False,
139
  bos_token_id=1,
140
  eos_token_id=2,
141
+ sliding_window=2048,
 
 
 
 
142
  simple_norm=False,
143
  remove_ff_bias=True,
144
  gated_activation=False,
 
147
  layer_ranges=[[0, 16],[8, 21],[12, 25],[16, 29],[25, 32]],
148
  **kwargs,
149
  ):
150
+
151
  self.sliding_window = sliding_window
152
  self.simple_norm = simple_norm
153
  self.remove_ff_bias = remove_ff_bias
 
156
  self.duplicate_grad = duplicate_grad
157
  self.layer_ranges = layer_ranges if layer_ranges is not None else []
158
 
 
 
 
 
 
 
 
 
 
159
  self.vocab_size = vocab_size
160
  self.hidden_size = hidden_size
161
  self.intermediate_size = intermediate_size
 
187
  **kwargs,
188
  )
189
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
190
  # Copied from transformers.models.llama.configuration_llama.LlamaConfig._rope_scaling_validation
191
  def _rope_scaling_validation(self):
192
  """