Logic123456789 commited on
Commit
fd6a002
1 Parent(s): e16b4bd

change the models.py

Browse files
Files changed (1) hide show
  1. models.py +59 -49
models.py CHANGED
@@ -3,8 +3,10 @@ import torch.nn as nn
3
  import torch.nn.functional as F
4
  import torch.distributed as dist
5
 
6
- from simcse.modeling_glm import GLMModel, GLMPreTrainedModel
7
- import simcse.mse_loss
 
 
8
 
9
  import transformers
10
  from transformers import RobertaTokenizer, AutoModel, PreTrainedModel
@@ -23,7 +25,7 @@ glm_model = None
23
 
24
  def init_glm(path):
25
  global glm_model
26
- glm_model = GLMModel.from_pretrained(path, trust_remote_code=True).to("cuda:0")
27
  for param in glm_model.parameters():
28
  param.requires_grad = False
29
 
@@ -129,9 +131,6 @@ def cl_forward(cls,
129
  return_dict=None,
130
  mlm_input_ids=None,
131
  mlm_labels=None,
132
- left_emb=None,
133
- right_emb=None,
134
- kl_loss=False
135
  ):
136
  return_dict = return_dict if return_dict is not None else cls.config.use_return_dict
137
  ori_input_ids = input_ids
@@ -184,13 +183,29 @@ def cl_forward(cls,
184
  # If using "cls", we add an extra MLP layer
185
  # (same as BERT's original implementation) over the representation.
186
  if cls.pooler_type == "cls":
 
187
  pooler_output = cls.mlp(pooler_output)
188
 
189
  # Separate representation
190
  z1, z2 = pooler_output[:, 0], pooler_output[:, 1]
191
 
192
- tensor_left = left_emb
193
- tensor_right = right_emb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
194
 
195
  # Hard negative
196
  if num_sent == 3:
@@ -219,45 +234,44 @@ def cl_forward(cls,
219
  # Get full batch embeddings: (bs x N, hidden)
220
  z1 = torch.cat(z1_list, 0)
221
  z2 = torch.cat(z2_list, 0)
222
-
223
- mse_loss = F.mse_loss(z1, tensor_left) + F.mse_loss(z2, tensor_right)
224
 
225
- # softmax_row, softmax_col = simcse.mse_loss.giveMeMatrix(tensor_left, tensor_right)
226
- # softmax_row_model, softmax_col_model = simcse.mse_loss.giveMeMatrix(z1,z2)
227
- # ziang_labels = torch.tensor([i for i in range(8)], device='cuda:0')
 
 
 
 
 
 
 
 
 
 
 
228
 
229
  """
230
  this is KL div loss
231
  """
232
-
233
- KL_loss = nn.KLDivLoss(reduction="batchmean")
234
- beta = 5
235
-
236
- # openai的embed,giveMeMatrix返回一个normalized过前后向量,相乘后的矩阵
237
- cos_sim_matrix_openai = simcse.mse_loss.giveMeMatrix(tensor_left, tensor_right)
238
- beta_scaled_cos_sim_matrix_openai = beta * cos_sim_matrix_openai
239
-
240
- # 我们的embed,giveMeMatrix返回一个normalized过前后向量,相乘后的矩阵
241
- cos_sim_matrix_data = simcse.mse_loss.giveMeMatrix(z1, z2)
242
- beta_scaled_cos_sim_matrix_data = beta * cos_sim_matrix_data
243
-
244
- beta_scaled_cos_sim_matrix_openai_vertical = beta_scaled_cos_sim_matrix_openai.softmax(dim=1)
245
- beta_scaled_cos_sim_matrix_openai_horizontal = beta_scaled_cos_sim_matrix_openai.softmax(dim=0)
246
-
247
- beta_scaled_cos_sim_matrix_data_vertical = beta_scaled_cos_sim_matrix_data.softmax(dim=1)
248
- beta_scaled_cos_sim_matrix_data_horizontal = beta_scaled_cos_sim_matrix_data.softmax(dim=0)
249
-
250
- # remove reduction="batchmean"
251
- KL_vertical_loss = KL_loss(beta_scaled_cos_sim_matrix_data_vertical.log(), beta_scaled_cos_sim_matrix_openai_vertical)
252
- KL_horizontal_loss = KL_loss(beta_scaled_cos_sim_matrix_data_horizontal.log(), beta_scaled_cos_sim_matrix_openai_horizontal)
253
-
254
- KL_loss = (KL_vertical_loss + KL_horizontal_loss) / 2
255
-
256
- # KL_row_loss = F.kl_div(softmax_row_model.log(), softmax_row, reduction='batchmean')
257
- # KL_col_loss = F.kl_div(softmax_col_model.log(), softmax_col, reduction='batchmean')
258
- # KL_loss = (KL_row_loss + KL_col_loss) / 2
259
 
260
- ziang_loss = KL_loss + mse_loss
 
 
 
 
 
 
 
 
 
 
 
 
 
 
261
 
262
  cos_sim = cls.sim(z1.unsqueeze(1), z2.unsqueeze(0))
263
 
@@ -292,10 +306,14 @@ def cl_forward(cls,
292
  output = (cos_sim,) + outputs[2:]
293
  return ((loss,) + output) if loss is not None else output
294
 
 
 
295
  return SequenceClassifierOutput(
 
296
  loss=ziang_loss,
297
  logits=cos_sim,
298
  hidden_states=outputs.hidden_states,
 
299
  )
300
 
301
 
@@ -378,8 +396,6 @@ class BertForCL(BertPreTrainedModel):
378
  sent_emb=False,
379
  mlm_input_ids=None,
380
  mlm_labels=None,
381
- left_emb=None,
382
- right_emb=None,
383
  ):
384
  if self.model_args.init_embeddings_model:
385
  input_ids_for_glm = input_ids.view((-1, input_ids.size(-1))) # (bs * num_sent, len)
@@ -428,8 +444,6 @@ class BertForCL(BertPreTrainedModel):
428
  return_dict=return_dict,
429
  mlm_input_ids=mlm_input_ids,
430
  mlm_labels=mlm_labels,
431
- left_emb=left_emb,
432
- right_emb=right_emb,
433
  )
434
 
435
 
@@ -467,8 +481,6 @@ class RobertaForCL(RobertaPreTrainedModel):
467
  sent_emb=False,
468
  mlm_input_ids=None,
469
  mlm_labels=None,
470
- left_emb=None,
471
- right_emb=None,
472
  ):
473
 
474
  if self.model_args.init_embeddings_model and not sent_emb:
@@ -518,7 +530,5 @@ class RobertaForCL(RobertaPreTrainedModel):
518
  return_dict=return_dict,
519
  mlm_input_ids=mlm_input_ids,
520
  mlm_labels=mlm_labels,
521
- left_emb=left_emb,
522
- right_emb=right_emb,
523
  )
524
 
 
3
  import torch.nn.functional as F
4
  import torch.distributed as dist
5
 
6
+ # from simcse.modeling_glm import GLMModel, GLMPreTrainedModel
7
+
8
+ # import simcse.readEmbeddings
9
+ # import simcse.mse_loss
10
 
11
  import transformers
12
  from transformers import RobertaTokenizer, AutoModel, PreTrainedModel
 
25
 
26
  def init_glm(path):
27
  global glm_model
28
+ glm_model = AutoModel.from_pretrained(path, trust_remote_code=True).to("cuda:0")
29
  for param in glm_model.parameters():
30
  param.requires_grad = False
31
 
 
131
  return_dict=None,
132
  mlm_input_ids=None,
133
  mlm_labels=None,
 
 
 
134
  ):
135
  return_dict = return_dict if return_dict is not None else cls.config.use_return_dict
136
  ori_input_ids = input_ids
 
183
  # If using "cls", we add an extra MLP layer
184
  # (same as BERT's original implementation) over the representation.
185
  if cls.pooler_type == "cls":
186
+ # print("this pooler is cls and running mlp")
187
  pooler_output = cls.mlp(pooler_output)
188
 
189
  # Separate representation
190
  z1, z2 = pooler_output[:, 0], pooler_output[:, 1]
191
 
192
+ # simcse.mse_loss.global_num += 8
193
+ # print(simcse.mse_loss.global_num)
194
+ tensor_left, tensor_right = simcse.mse_loss.giveMeBatchEmbeddings(simcse.mse_loss.global_num,
195
+ simcse.readEmbeddings.data)
196
+ simcse.mse_loss.global_num += 32
197
+ # print(F.mse_loss(z1,tensor_left))
198
+ # print(F.mse_loss(z2,tensor_right))
199
+
200
+ # print(tensor_left.size())
201
+ # print(tensor_right.size())
202
+ # print(len(pooler_output[:,]))
203
+ # print(len(z1))
204
+ # print(len(z2))
205
+ # print(len(z1[0]))
206
+ # print(len(z2[0]))
207
+
208
+ # print(F.mse_loss(z1[0], z2[0]))
209
 
210
  # Hard negative
211
  if num_sent == 3:
 
234
  # Get full batch embeddings: (bs x N, hidden)
235
  z1 = torch.cat(z1_list, 0)
236
  z2 = torch.cat(z2_list, 0)
 
 
237
 
238
+ ziang_loss = F.mse_loss(z1, tensor_left) + F.mse_loss(z2, tensor_right)
239
+ # print("\n MSE Loss is : ", ziang_loss)
240
+
241
+ softmax_row, softmax_col = simcse.mse_loss.giveMeMatrix(tensor_left, tensor_right)
242
+ softmax_row_model, softmax_col_model = simcse.mse_loss.giveMeMatrix(z1,z2)
243
+
244
+ ziang_labels = torch.tensor([i for i in range(32)], device='cuda:0')
245
+
246
+ """
247
+ this is cross entropy loss
248
+ """
249
+ row_loss = F.cross_entropy(softmax_row, ziang_labels)
250
+ col_loss = F.cross_entropy(softmax_col, ziang_labels)
251
+ softmax_loss = (row_loss + col_loss) / 2
252
 
253
  """
254
  this is KL div loss
255
  """
256
+ KL_row_loss = F.kl_div(softmax_row_model.log(), softmax_row, reduction='batchmean')
257
+ KL_col_loss = F.kl_div(softmax_col_model.log(), softmax_col, reduction='batchmean')
258
+ KL_loss = (KL_row_loss + KL_col_loss) / 2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
259
 
260
+ ziang_loss = KL_loss + ziang_loss + softmax_loss
261
+ # ziang_loss = softmax_loss + ziang_loss
262
+
263
+ # ziang_loss = F.mse_loss(
264
+ # torch.nn.functional.cosine_similarity(tensor_left, tensor_right),
265
+ # torch.nn.functional.cosine_similarity(z1,z2)
266
+ # )
267
+ # ziang_loss /= 0.5
268
+ # print("\n Softmax Loss is : ", softmax_loss)
269
+ # print("\n Openai Cos Similarity between two paragraph: \n", torch.nn.functional.cosine_similarity(tensor_left, tensor_right))
270
+ # print("\nCos Similarity between two paragraph: \n", torch.nn.functional.cosine_similarity(z1, z2))
271
+ # print("\n My total loss currently: ", ziang_loss)
272
+
273
+ # print(z1.size())
274
+ # print(z2.size())
275
 
276
  cos_sim = cls.sim(z1.unsqueeze(1), z2.unsqueeze(0))
277
 
 
306
  output = (cos_sim,) + outputs[2:]
307
  return ((loss,) + output) if loss is not None else output
308
 
309
+ # print("original " , loss)
310
+
311
  return SequenceClassifierOutput(
312
+ # loss=loss,
313
  loss=ziang_loss,
314
  logits=cos_sim,
315
  hidden_states=outputs.hidden_states,
316
+ # attentions=outputs.attentions,
317
  )
318
 
319
 
 
396
  sent_emb=False,
397
  mlm_input_ids=None,
398
  mlm_labels=None,
 
 
399
  ):
400
  if self.model_args.init_embeddings_model:
401
  input_ids_for_glm = input_ids.view((-1, input_ids.size(-1))) # (bs * num_sent, len)
 
444
  return_dict=return_dict,
445
  mlm_input_ids=mlm_input_ids,
446
  mlm_labels=mlm_labels,
 
 
447
  )
448
 
449
 
 
481
  sent_emb=False,
482
  mlm_input_ids=None,
483
  mlm_labels=None,
 
 
484
  ):
485
 
486
  if self.model_args.init_embeddings_model and not sent_emb:
 
530
  return_dict=return_dict,
531
  mlm_input_ids=mlm_input_ids,
532
  mlm_labels=mlm_labels,
 
 
533
  )
534