vikhyatk commited on
Commit
6214f6c
1 Parent(s): c02e7e4

Upload Moondream

Browse files
config.json CHANGED
@@ -11,5 +11,5 @@
11
  "model_type": "phi"
12
  },
13
  "torch_dtype": "float16",
14
- "transformers_version": "4.38.2"
15
  }
 
11
  "model_type": "phi"
12
  },
13
  "torch_dtype": "float16",
14
+ "transformers_version": "4.36.2"
15
  }
configuration_moondream.py CHANGED
@@ -25,7 +25,6 @@ class PhiConfig(PretrainedConfig):
25
  rope_theta=10000.0,
26
  rope_scaling=None,
27
  partial_rotary_factor=0.5,
28
- qk_layernorm=False,
29
  bos_token_id=1,
30
  eos_token_id=2,
31
  **kwargs,
@@ -51,7 +50,6 @@ class PhiConfig(PretrainedConfig):
51
  self.rope_theta = rope_theta
52
  self.rope_scaling = rope_scaling
53
  self.partial_rotary_factor = partial_rotary_factor
54
- self.qk_layernorm = qk_layernorm
55
  self._rope_scaling_validation()
56
 
57
  super().__init__(
 
25
  rope_theta=10000.0,
26
  rope_scaling=None,
27
  partial_rotary_factor=0.5,
 
28
  bos_token_id=1,
29
  eos_token_id=2,
30
  **kwargs,
 
50
  self.rope_theta = rope_theta
51
  self.rope_scaling = rope_scaling
52
  self.partial_rotary_factor = partial_rotary_factor
 
53
  self._rope_scaling_validation()
54
 
55
  super().__init__(
generation_config.json CHANGED
@@ -2,5 +2,5 @@
2
  "_from_model_config": true,
3
  "bos_token_id": 1,
4
  "eos_token_id": 2,
5
- "transformers_version": "4.38.2"
6
  }
 
2
  "_from_model_config": true,
3
  "bos_token_id": 1,
4
  "eos_token_id": 2,
5
+ "transformers_version": "4.36.2"
6
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7840817a7015edf729fa3d60099c35f08fc30511a1dc8ea231acd0e9a6555bb8
3
  size 3733912224
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:927694193ed81f83b9b269c0d1ffa8dc823dec90bce4703a54b22ebd6c9632b6
3
  size 3733912224
modeling_phi.py CHANGED
@@ -16,14 +16,13 @@
16
  """ PyTorch Phi model."""
17
 
18
 
19
- import math
20
  from typing import List, Optional, Tuple, Union
21
 
22
  import torch
23
  import torch.nn.functional as F
24
  import torch.utils.checkpoint
25
  from torch import nn
26
- from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss
27
 
28
  from transformers.activations import ACT2FN
29
  from transformers.cache_utils import Cache, DynamicCache
@@ -31,7 +30,6 @@ from transformers.modeling_attn_mask_utils import _prepare_4d_causal_attention_m
31
  from transformers.modeling_outputs import (
32
  BaseModelOutputWithPast,
33
  CausalLMOutputWithPast,
34
- SequenceClassifierOutputWithPast,
35
  )
36
  from transformers.modeling_utils import PreTrainedModel
37
  from transformers.utils import (
@@ -287,19 +285,6 @@ class PhiAttention(nn.Module):
287
  self.num_heads * self.head_dim, self.hidden_size, bias=True
288
  )
289
 
290
- self.qk_layernorm = config.qk_layernorm
291
- if self.qk_layernorm:
292
- self.q_layernorm = nn.LayerNorm(
293
- config.hidden_size // self.num_heads,
294
- eps=config.layer_norm_eps,
295
- elementwise_affine=True,
296
- )
297
- self.k_layernorm = nn.LayerNorm(
298
- config.hidden_size // self.num_heads,
299
- eps=config.layer_norm_eps,
300
- elementwise_affine=True,
301
- )
302
-
303
  self._init_rope()
304
 
305
  def _init_rope(self):
@@ -344,10 +329,6 @@ class PhiAttention(nn.Module):
344
  3, dim=-1
345
  )
346
 
347
- if self.qk_layernorm:
348
- query_states = self.q_layernorm(query_states)
349
- key_states = self.k_layernorm(key_states)
350
-
351
  query_states = query_states.view(
352
  bsz, q_len, self.num_heads, self.head_dim
353
  ).transpose(1, 2)
@@ -451,10 +432,6 @@ class PhiFlashAttention2(PhiAttention):
451
  3, dim=-1
452
  )
453
 
454
- if self.qk_layernorm:
455
- query_states = self.q_layernorm(query_states)
456
- key_states = self.k_layernorm(key_states)
457
-
458
  # Flash attention requires the input to have the shape
459
  # batch_size x seq_length x head_dim x hidden_dim
460
  # therefore we just need to keep the original shape
 
16
  """ PyTorch Phi model."""
17
 
18
 
 
19
  from typing import List, Optional, Tuple, Union
20
 
21
  import torch
22
  import torch.nn.functional as F
23
  import torch.utils.checkpoint
24
  from torch import nn
25
+ from torch.nn import CrossEntropyLoss
26
 
27
  from transformers.activations import ACT2FN
28
  from transformers.cache_utils import Cache, DynamicCache
 
30
  from transformers.modeling_outputs import (
31
  BaseModelOutputWithPast,
32
  CausalLMOutputWithPast,
 
33
  )
34
  from transformers.modeling_utils import PreTrainedModel
35
  from transformers.utils import (
 
285
  self.num_heads * self.head_dim, self.hidden_size, bias=True
286
  )
287
 
 
 
 
 
 
 
 
 
 
 
 
 
 
288
  self._init_rope()
289
 
290
  def _init_rope(self):
 
329
  3, dim=-1
330
  )
331
 
 
 
 
 
332
  query_states = query_states.view(
333
  bsz, q_len, self.num_heads, self.head_dim
334
  ).transpose(1, 2)
 
432
  3, dim=-1
433
  )
434
 
 
 
 
 
435
  # Flash attention requires the input to have the shape
436
  # batch_size x seq_length x head_dim x hidden_dim
437
  # therefore we just need to keep the original shape
moondream.py CHANGED
@@ -59,6 +59,9 @@ class Moondream(PreTrainedModel):
59
 
60
  return torch.cat(embeds, dim=1)
61
 
 
 
 
62
  def generate(
63
  self,
64
  image_embeds,
 
59
 
60
  return torch.cat(embeds, dim=1)
61
 
62
+ def get_input_embeddings(self):
63
+ return self.text_model.get_input_embeddings()
64
+
65
  def generate(
66
  self,
67
  image_embeds,