florian-hoenicke commited on
Commit
d2b8f89
1 Parent(s): a7fc441

feat: push custom model

Browse files
README.md CHANGED
@@ -12,14 +12,14 @@ tags:
12
  - sentence-similarity
13
  - mteb
14
  - Ubuntu
15
- - Linux
16
- - Software
17
- - OperatingSystem
18
  - Technical
 
 
 
19
  ---
20
- This model is a fine-tuned version of [**jinaai/jina-embeddings-v2-base-code**](https://huggingface.co/jinaai/jina-embeddings-v2-base-code) designed for the following use case:
21
 
22
- technical support search for Ubuntu
23
 
24
  ## How to Use
25
  This model can be easily integrated into your NLP pipeline for tasks such as text classification, sentiment analysis, entity recognition, and more. Here's a simple example to get you started:
 
12
  - sentence-similarity
13
  - mteb
14
  - Ubuntu
 
 
 
15
  - Technical
16
+ - Support
17
+ - Linux
18
+ - Community
19
  ---
20
+ This model is a fine-tuned version of [**jinaai/jina-embeddings-v2-base-en**](https://huggingface.co/jinaai/jina-embeddings-v2-base-en) designed for the following use case:
21
 
22
+ technical support for Ubuntu
23
 
24
  ## How to Use
25
  This model can be easily integrated into your NLP pipeline for tasks such as text classification, sentiment analysis, entity recognition, and more. Here's a simple example to get you started:
config.json CHANGED
@@ -8,15 +8,15 @@
8
  "auto_map": {
9
  "AutoConfig": "configuration_bert.JinaBertConfig",
10
  "AutoModel": "modeling_bert.JinaBertModel",
11
- "AutoModelForMaskedLM": "jinaai/jina-bert-v2-qk-post-norm--modeling_bert.JinaBertForMaskedLM",
12
- "AutoModelForSequenceClassification": "jinaai/jina-bert-v2-qk-post-norm--modeling_bert.JinaBertForSequenceClassification"
13
  },
14
  "classifier_dropout": null,
15
  "emb_pooler": "mean",
16
  "feed_forward_type": "geglu",
17
  "gradient_checkpointing": false,
18
  "hidden_act": "gelu",
19
- "hidden_dropout_prob": 0.0,
20
  "hidden_size": 768,
21
  "initializer_range": 0.02,
22
  "intermediate_size": 3072,
@@ -32,5 +32,5 @@
32
  "transformers_version": "4.40.2",
33
  "type_vocab_size": 2,
34
  "use_cache": true,
35
- "vocab_size": 61056
36
  }
 
8
  "auto_map": {
9
  "AutoConfig": "configuration_bert.JinaBertConfig",
10
  "AutoModel": "modeling_bert.JinaBertModel",
11
+ "AutoModelForMaskedLM": "jinaai/jina-bert-implementation--modeling_bert.JinaBertForMaskedLM",
12
+ "AutoModelForSequenceClassification": "jinaai/jina-bert-implementation--modeling_bert.JinaBertForSequenceClassification"
13
  },
14
  "classifier_dropout": null,
15
  "emb_pooler": "mean",
16
  "feed_forward_type": "geglu",
17
  "gradient_checkpointing": false,
18
  "hidden_act": "gelu",
19
+ "hidden_dropout_prob": 0.1,
20
  "hidden_size": 768,
21
  "initializer_range": 0.02,
22
  "intermediate_size": 3072,
 
32
  "transformers_version": "4.40.2",
33
  "type_vocab_size": 2,
34
  "use_cache": true,
35
+ "vocab_size": 30528
36
  }
configuration_bert.py CHANGED
@@ -17,18 +17,11 @@
17
  """ BERT model configuration"""
18
  from collections import OrderedDict
19
  from typing import Mapping
20
- import warnings
21
 
22
  from transformers.configuration_utils import PretrainedConfig
 
23
  from transformers.utils import logging
24
 
25
- try:
26
- from optimum.exporters.onnx.model_configs import BertOnnxConfig
27
- OPTIMUM_INSTALLED = True
28
- except ImportError:
29
- warnings.warn("optimum is not installed. To use OnnxConfig and BertOnnxConfig, make sure that `optimum` package is installed")
30
- OPTIMUM_INSTALLED = False
31
-
32
 
33
  logger = logging.get_logger(__name__)
34
 
@@ -135,7 +128,7 @@ class JinaBertConfig(PretrainedConfig):
135
  classifier_dropout=None,
136
  feed_forward_type="original",
137
  emb_pooler=None,
138
- attn_implementation=None,
139
  **kwargs,
140
  ):
141
  super().__init__(pad_token_id=pad_token_id, **kwargs)
@@ -159,19 +152,17 @@ class JinaBertConfig(PretrainedConfig):
159
  self.emb_pooler = emb_pooler
160
  self.attn_implementation = attn_implementation
161
 
162
- if OPTIMUM_INSTALLED:
163
-
164
- class JinaBertOnnxConfig(BertOnnxConfig):
165
-
166
- @property
167
- def inputs(self) -> Mapping[str, Mapping[int, str]]:
168
- if self.task == "multiple-choice":
169
- dynamic_axis = {0: "batch", 1: "choice", 2: "sequence"}
170
- else:
171
- dynamic_axis = {0: "batch", 1: "sequence"}
172
- return OrderedDict(
173
- [
174
- ("input_ids", dynamic_axis),
175
- ("attention_mask", dynamic_axis),
176
- ]
177
- )
 
17
  """ BERT model configuration"""
18
  from collections import OrderedDict
19
  from typing import Mapping
 
20
 
21
  from transformers.configuration_utils import PretrainedConfig
22
+ from transformers.onnx import OnnxConfig
23
  from transformers.utils import logging
24
 
 
 
 
 
 
 
 
25
 
26
  logger = logging.get_logger(__name__)
27
 
 
128
  classifier_dropout=None,
129
  feed_forward_type="original",
130
  emb_pooler=None,
131
+ attn_implementation='torch',
132
  **kwargs,
133
  ):
134
  super().__init__(pad_token_id=pad_token_id, **kwargs)
 
152
  self.emb_pooler = emb_pooler
153
  self.attn_implementation = attn_implementation
154
 
155
+ class JinaBertOnnxConfig(OnnxConfig):
156
+ @property
157
+ def inputs(self) -> Mapping[str, Mapping[int, str]]:
158
+ if self.task == "multiple-choice":
159
+ dynamic_axis = {0: "batch", 1: "choice", 2: "sequence"}
160
+ else:
161
+ dynamic_axis = {0: "batch", 1: "sequence"}
162
+ return OrderedDict(
163
+ [
164
+ ("input_ids", dynamic_axis),
165
+ ("attention_mask", dynamic_axis),
166
+ ("token_type_ids", dynamic_axis),
167
+ ]
168
+ )
 
 
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9c9ced9b0b4c8eeee5d75c690d9f5aaa4272b844efa8b9805eed9737badda8d9
3
- size 643505600
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5cbde1a065989fc5e605ac6d44f15ee212a5bbe0e7af7c9a3a045d1ada6de5f
3
+ size 549493968
modeling_bert.py CHANGED
@@ -280,10 +280,9 @@ class JinaBertSelfAttention(nn.Module):
280
  self.query = nn.Linear(config.hidden_size, self.all_head_size)
281
  self.key = nn.Linear(config.hidden_size, self.all_head_size)
282
  self.value = nn.Linear(config.hidden_size, self.all_head_size)
283
- self.layer_norm_q = nn.LayerNorm(config.hidden_size, eps=config.layer_norm_eps)
284
- self.layer_norm_k = nn.LayerNorm(config.hidden_size, eps=config.layer_norm_eps)
285
 
286
- self.dropout = nn.Dropout(config.attention_probs_dropout_prob)
 
287
  self.position_embedding_type = position_embedding_type or getattr(
288
  config, "position_embedding_type", "absolute"
289
  )
@@ -317,7 +316,7 @@ class JinaBertSelfAttention(nn.Module):
317
  output_attentions: Optional[bool] = False,
318
  bias: Optional[torch.FloatTensor] = None,
319
  ) -> Tuple[torch.Tensor]:
320
- mixed_query_layer = self.layer_norm_q(self.query(hidden_states))
321
 
322
  # If this is instantiated as a cross-attention module, the keys
323
  # and values come from an encoder; the attention mask needs to be
@@ -330,16 +329,16 @@ class JinaBertSelfAttention(nn.Module):
330
  value_layer = past_key_value[1]
331
  attention_mask = encoder_attention_mask
332
  elif is_cross_attention:
333
- key_layer = self.transpose_for_scores(self.layer_norm_k(self.key(encoder_hidden_states)))
334
  value_layer = self.transpose_for_scores(self.value(encoder_hidden_states))
335
  attention_mask = encoder_attention_mask
336
  elif past_key_value is not None:
337
- key_layer = self.transpose_for_scores(self.layer_norm_k(self.key(hidden_states)))
338
  value_layer = self.transpose_for_scores(self.value(hidden_states))
339
  key_layer = torch.cat([past_key_value[0], key_layer], dim=2)
340
  value_layer = torch.cat([past_key_value[1], value_layer], dim=2)
341
  else:
342
- key_layer = self.transpose_for_scores(self.layer_norm_k(self.key(hidden_states)))
343
  value_layer = self.transpose_for_scores(self.value(hidden_states))
344
 
345
  query_layer = self.transpose_for_scores(mixed_query_layer)
@@ -358,7 +357,8 @@ class JinaBertSelfAttention(nn.Module):
358
  if self.attn_implementation == 'torch' and scaled_dot_product_attention is not None:
359
  b, _, s, _ = query_layer.shape
360
  new_bias = attention_mask + bias
361
- attn = scaled_dot_product_attention(query_layer, key_layer, value_layer, new_bias)
 
362
  attn = attn.permute(0, 2, 1, 3).contiguous()
363
  return (attn.view(b, s, self.all_head_size),)
364
 
@@ -431,7 +431,7 @@ class JinaBertSelfAttention(nn.Module):
431
  context_layer = context_layer.view(new_context_layer_shape)
432
 
433
  outputs = (
434
- (context_layer, attention_scores) if output_attentions else (context_layer,)
435
  )
436
 
437
  if self.is_decoder:
@@ -515,29 +515,44 @@ class JinaBertAttention(nn.Module):
515
  return outputs
516
 
517
 
518
- class JinaBertMLP(nn.Module):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
519
  def __init__(self, config: JinaBertConfig):
520
  super().__init__()
521
- self.config = config
522
- self.act = ACT2FN[config.hidden_act]
523
- self.up_layer = nn.Linear(
524
- config.hidden_size, config.intermediate_size, bias=False
525
- )
526
- self.down_layer = nn.Linear(config.intermediate_size, config.hidden_size)
527
  self.dropout = nn.Dropout(config.hidden_dropout_prob)
528
 
529
- def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
530
- # Up
531
- hidden_mlp_states = self.act(self.up_layer(hidden_states))
532
- hidden_mlp_states = self.dropout(hidden_mlp_states)
533
- # Down
534
- return self.down_layer(hidden_mlp_states)
 
535
 
536
 
537
  class JinaBertGLUMLP(nn.Module):
538
  def __init__(self, config: JinaBertConfig):
539
  super().__init__()
540
  self.config = config
 
 
 
541
  if config.feed_forward_type == 'reglu':
542
  self.act = nn.ReLU()
543
  elif config.feed_forward_type == 'geglu':
@@ -546,21 +561,23 @@ class JinaBertGLUMLP(nn.Module):
546
  raise ValueError(
547
  f"feed_forward_type {config.feed_forward_type} not supported"
548
  )
549
- self.up_gated_layer = nn.Linear(
550
- config.hidden_size, config.intermediate_size * 2, bias=False
551
- )
552
- self.down_layer = nn.Linear(config.intermediate_size, config.hidden_size)
553
  self.dropout = nn.Dropout(config.hidden_dropout_prob)
 
554
 
555
  def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
556
- # Up with gate
557
- hidden_mlp_states = self.up_gated_layer(hidden_states)
558
- up = hidden_mlp_states[:, :, :self.config.intermediate_size]
559
- gated = hidden_mlp_states[:, :, self.config.intermediate_size:]
560
- hidden_mlp_states = up * self.act(gated)
561
- hidden_mlp_states = self.dropout(hidden_mlp_states)
562
- # Down
563
- return self.down_layer(hidden_mlp_states)
 
 
 
 
564
 
565
 
566
  class JinaBertLayer(nn.Module):
@@ -572,8 +589,6 @@ class JinaBertLayer(nn.Module):
572
  self.is_decoder = config.is_decoder
573
  self.add_cross_attention = config.add_cross_attention
574
  self.feed_forward_type = config.feed_forward_type
575
- self.layer_norm_1 = nn.LayerNorm(config.hidden_size, eps=config.layer_norm_eps)
576
- self.layer_norm_2 = nn.LayerNorm(config.hidden_size, eps=config.layer_norm_eps)
577
  if self.add_cross_attention:
578
  if not self.is_decoder:
579
  raise ValueError(
@@ -585,7 +600,8 @@ class JinaBertLayer(nn.Module):
585
  if self.feed_forward_type.endswith('glu'):
586
  self.mlp = JinaBertGLUMLP(config)
587
  else:
588
- self.mlp = JinaBertMLP(config)
 
589
 
590
  def forward(
591
  self,
@@ -598,9 +614,6 @@ class JinaBertLayer(nn.Module):
598
  past_key_value: Optional[Tuple[Tuple[torch.FloatTensor]]] = None,
599
  output_attentions: Optional[bool] = False,
600
  ) -> Tuple[torch.Tensor]:
601
- # Pre-Norm
602
- residual = hidden_states
603
-
604
  # decoder uni-directional self-attention cached key/values tuple is at positions 1,2
605
  self_attn_past_key_value = (
606
  past_key_value[:2] if past_key_value is not None else None
@@ -654,9 +667,15 @@ class JinaBertLayer(nn.Module):
654
  cross_attn_present_key_value = cross_attention_outputs[-1]
655
  present_key_value = present_key_value + cross_attn_present_key_value
656
 
657
- residual = self.layer_norm_1(residual + attention_output)
658
- mlp_output = self.mlp(residual)
659
- layer_output = self.layer_norm_2(residual + mlp_output)
 
 
 
 
 
 
660
  outputs = (layer_output,) + outputs
661
 
662
  # if decoder, return the attn key/values as the last output
@@ -665,6 +684,11 @@ class JinaBertLayer(nn.Module):
665
 
666
  return outputs
667
 
 
 
 
 
 
668
 
669
  class JinaBertEncoder(nn.Module):
670
  def __init__(self, config: JinaBertConfig):
@@ -675,6 +699,11 @@ class JinaBertEncoder(nn.Module):
675
  )
676
  self.gradient_checkpointing = False
677
  self.num_attention_heads = config.num_attention_heads
 
 
 
 
 
678
 
679
  def rebuild_alibi_tensor(
680
  self, size: int, device: Optional[Union[torch.device, str]] = None
@@ -742,7 +771,23 @@ class JinaBertEncoder(nn.Module):
742
 
743
  # Add alibi matrix to extended_attention_mask
744
  _, seqlen, _ = hidden_states.size()
745
- alibi_bias = self.rebuild_alibi_tensor(size=seqlen, device=hidden_states.device).to(hidden_states.dtype)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
746
  if self.gradient_checkpointing and self.training:
747
  if use_cache:
748
  logger.warning_once(
 
280
  self.query = nn.Linear(config.hidden_size, self.all_head_size)
281
  self.key = nn.Linear(config.hidden_size, self.all_head_size)
282
  self.value = nn.Linear(config.hidden_size, self.all_head_size)
 
 
283
 
284
+ self.dropout_p = config.attention_probs_dropout_prob
285
+ self.dropout = nn.Dropout(self.dropout_p)
286
  self.position_embedding_type = position_embedding_type or getattr(
287
  config, "position_embedding_type", "absolute"
288
  )
 
316
  output_attentions: Optional[bool] = False,
317
  bias: Optional[torch.FloatTensor] = None,
318
  ) -> Tuple[torch.Tensor]:
319
+ mixed_query_layer = self.query(hidden_states)
320
 
321
  # If this is instantiated as a cross-attention module, the keys
322
  # and values come from an encoder; the attention mask needs to be
 
329
  value_layer = past_key_value[1]
330
  attention_mask = encoder_attention_mask
331
  elif is_cross_attention:
332
+ key_layer = self.transpose_for_scores(self.key(encoder_hidden_states))
333
  value_layer = self.transpose_for_scores(self.value(encoder_hidden_states))
334
  attention_mask = encoder_attention_mask
335
  elif past_key_value is not None:
336
+ key_layer = self.transpose_for_scores(self.key(hidden_states))
337
  value_layer = self.transpose_for_scores(self.value(hidden_states))
338
  key_layer = torch.cat([past_key_value[0], key_layer], dim=2)
339
  value_layer = torch.cat([past_key_value[1], value_layer], dim=2)
340
  else:
341
+ key_layer = self.transpose_for_scores(self.key(hidden_states))
342
  value_layer = self.transpose_for_scores(self.value(hidden_states))
343
 
344
  query_layer = self.transpose_for_scores(mixed_query_layer)
 
357
  if self.attn_implementation == 'torch' and scaled_dot_product_attention is not None:
358
  b, _, s, _ = query_layer.shape
359
  new_bias = attention_mask + bias
360
+ dropout_p = self.dropout_p if self.training else 0.0
361
+ attn = scaled_dot_product_attention(query_layer, key_layer, value_layer, new_bias, dropout_p=dropout_p)
362
  attn = attn.permute(0, 2, 1, 3).contiguous()
363
  return (attn.view(b, s, self.all_head_size),)
364
 
 
431
  context_layer = context_layer.view(new_context_layer_shape)
432
 
433
  outputs = (
434
+ (context_layer, attention_probs) if output_attentions else (context_layer,)
435
  )
436
 
437
  if self.is_decoder:
 
515
  return outputs
516
 
517
 
518
+ class JinaBertIntermediate(nn.Module):
519
+ def __init__(self, config):
520
+ super().__init__()
521
+ self.dense = nn.Linear(config.hidden_size, config.intermediate_size)
522
+ if isinstance(config.hidden_act, str):
523
+ self.intermediate_act_fn = ACT2FN[config.hidden_act]
524
+ else:
525
+ self.intermediate_act_fn = config.hidden_act
526
+
527
+ def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
528
+ hidden_states = self.dense(hidden_states)
529
+ hidden_states = self.intermediate_act_fn(hidden_states)
530
+ return hidden_states
531
+
532
+
533
+ class JinaBertOutput(nn.Module):
534
  def __init__(self, config: JinaBertConfig):
535
  super().__init__()
536
+ self.dense = nn.Linear(config.intermediate_size, config.hidden_size)
537
+ self.LayerNorm = nn.LayerNorm(config.hidden_size, eps=config.layer_norm_eps)
 
 
 
 
538
  self.dropout = nn.Dropout(config.hidden_dropout_prob)
539
 
540
+ def forward(
541
+ self, hidden_states: torch.Tensor, input_tensor: torch.Tensor
542
+ ) -> torch.Tensor:
543
+ hidden_states = self.dense(hidden_states)
544
+ hidden_states = self.dropout(hidden_states)
545
+ hidden_states = self.LayerNorm(hidden_states + input_tensor)
546
+ return hidden_states
547
 
548
 
549
  class JinaBertGLUMLP(nn.Module):
550
  def __init__(self, config: JinaBertConfig):
551
  super().__init__()
552
  self.config = config
553
+ self.gated_layers = nn.Linear(
554
+ config.hidden_size, config.intermediate_size * 2, bias=False
555
+ )
556
  if config.feed_forward_type == 'reglu':
557
  self.act = nn.ReLU()
558
  elif config.feed_forward_type == 'geglu':
 
561
  raise ValueError(
562
  f"feed_forward_type {config.feed_forward_type} not supported"
563
  )
564
+ self.wo = nn.Linear(config.intermediate_size, config.hidden_size)
 
 
 
565
  self.dropout = nn.Dropout(config.hidden_dropout_prob)
566
+ self.layernorm = nn.LayerNorm(config.hidden_size, eps=config.layer_norm_eps)
567
 
568
  def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
569
+ residual_connection = hidden_states
570
+ # compute the activation
571
+ hidden_states = self.gated_layers(hidden_states)
572
+ gated = hidden_states[:, :, : self.config.intermediate_size]
573
+ non_gated = hidden_states[:, :, self.config.intermediate_size :]
574
+ hidden_states = self.act(gated) * non_gated
575
+ hidden_states = self.dropout(hidden_states)
576
+ # multiply by the second matrix
577
+ hidden_states = self.wo(hidden_states)
578
+ # add the residual connection and post-LN
579
+ hidden_states = self.layernorm(hidden_states + residual_connection)
580
+ return hidden_states
581
 
582
 
583
  class JinaBertLayer(nn.Module):
 
589
  self.is_decoder = config.is_decoder
590
  self.add_cross_attention = config.add_cross_attention
591
  self.feed_forward_type = config.feed_forward_type
 
 
592
  if self.add_cross_attention:
593
  if not self.is_decoder:
594
  raise ValueError(
 
600
  if self.feed_forward_type.endswith('glu'):
601
  self.mlp = JinaBertGLUMLP(config)
602
  else:
603
+ self.intermediate = JinaBertIntermediate(config)
604
+ self.output = JinaBertOutput(config)
605
 
606
  def forward(
607
  self,
 
614
  past_key_value: Optional[Tuple[Tuple[torch.FloatTensor]]] = None,
615
  output_attentions: Optional[bool] = False,
616
  ) -> Tuple[torch.Tensor]:
 
 
 
617
  # decoder uni-directional self-attention cached key/values tuple is at positions 1,2
618
  self_attn_past_key_value = (
619
  past_key_value[:2] if past_key_value is not None else None
 
667
  cross_attn_present_key_value = cross_attention_outputs[-1]
668
  present_key_value = present_key_value + cross_attn_present_key_value
669
 
670
+ if self.feed_forward_type.endswith('glu'):
671
+ layer_output = self.mlp(attention_output)
672
+ else:
673
+ layer_output = apply_chunking_to_forward(
674
+ self.feed_forward_chunk,
675
+ self.chunk_size_feed_forward,
676
+ self.seq_len_dim,
677
+ attention_output,
678
+ )
679
  outputs = (layer_output,) + outputs
680
 
681
  # if decoder, return the attn key/values as the last output
 
684
 
685
  return outputs
686
 
687
+ def feed_forward_chunk(self, attention_output):
688
+ intermediate_output = self.intermediate(attention_output)
689
+ layer_output = self.output(intermediate_output, attention_output)
690
+ return layer_output
691
+
692
 
693
  class JinaBertEncoder(nn.Module):
694
  def __init__(self, config: JinaBertConfig):
 
699
  )
700
  self.gradient_checkpointing = False
701
  self.num_attention_heads = config.num_attention_heads
702
+ self.register_buffer(
703
+ "alibi",
704
+ self.rebuild_alibi_tensor(size=config.max_position_embeddings),
705
+ persistent=False,
706
+ )
707
 
708
  def rebuild_alibi_tensor(
709
  self, size: int, device: Optional[Union[torch.device, str]] = None
 
771
 
772
  # Add alibi matrix to extended_attention_mask
773
  _, seqlen, _ = hidden_states.size()
774
+ if self._current_alibi_size < seqlen:
775
+ # Rebuild the alibi tensor when needed
776
+ warnings.warn(
777
+ f'Increasing alibi size from {self._current_alibi_size} to {seqlen}.'
778
+ )
779
+ self.register_buffer(
780
+ "alibi",
781
+ self.rebuild_alibi_tensor(size=seqlen, device=hidden_states.device).to(
782
+ hidden_states.dtype
783
+ ),
784
+ persistent=False,
785
+ )
786
+ elif self.alibi.device != hidden_states.device:
787
+ # Device catch-up
788
+ self.alibi = self.alibi.to(hidden_states.device)
789
+
790
+ alibi_bias = self.alibi[:, :, :seqlen, :seqlen]
791
  if self.gradient_checkpointing and self.training:
792
  if use_cache:
793
  logger.warning_once(
special_tokens_map.json CHANGED
@@ -1,48 +1,34 @@
1
  {
2
- "bos_token": {
3
- "content": "<s>",
4
- "lstrip": false,
5
- "normalized": false,
6
- "rstrip": false,
7
- "single_word": false
8
- },
9
  "cls_token": {
10
- "content": "<s>",
11
- "lstrip": false,
12
- "normalized": false,
13
- "rstrip": false,
14
- "single_word": false
15
- },
16
- "eos_token": {
17
- "content": "</s>",
18
  "lstrip": false,
19
  "normalized": false,
20
  "rstrip": false,
21
  "single_word": false
22
  },
23
  "mask_token": {
24
- "content": "<mask>",
25
- "lstrip": true,
26
  "normalized": false,
27
  "rstrip": false,
28
  "single_word": false
29
  },
30
  "pad_token": {
31
- "content": "<pad>",
32
  "lstrip": false,
33
  "normalized": false,
34
  "rstrip": false,
35
  "single_word": false
36
  },
37
  "sep_token": {
38
- "content": "</s>",
39
  "lstrip": false,
40
  "normalized": false,
41
  "rstrip": false,
42
  "single_word": false
43
  },
44
  "unk_token": {
45
- "content": "<unk>",
46
  "lstrip": false,
47
  "normalized": false,
48
  "rstrip": false,
 
1
  {
 
 
 
 
 
 
 
2
  "cls_token": {
3
+ "content": "[CLS]",
 
 
 
 
 
 
 
4
  "lstrip": false,
5
  "normalized": false,
6
  "rstrip": false,
7
  "single_word": false
8
  },
9
  "mask_token": {
10
+ "content": "[MASK]",
11
+ "lstrip": false,
12
  "normalized": false,
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
  "pad_token": {
17
+ "content": "[PAD]",
18
  "lstrip": false,
19
  "normalized": false,
20
  "rstrip": false,
21
  "single_word": false
22
  },
23
  "sep_token": {
24
+ "content": "[SEP]",
25
  "lstrip": false,
26
  "normalized": false,
27
  "rstrip": false,
28
  "single_word": false
29
  },
30
  "unk_token": {
31
+ "content": "[UNK]",
32
  "lstrip": false,
33
  "normalized": false,
34
  "rstrip": false,
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -1,57 +1,57 @@
1
  {
2
- "add_prefix_space": false,
3
  "added_tokens_decoder": {
4
  "0": {
5
- "content": "<s>",
6
  "lstrip": false,
7
  "normalized": false,
8
  "rstrip": false,
9
  "single_word": false,
10
  "special": true
11
  },
12
- "1": {
13
- "content": "<pad>",
14
  "lstrip": false,
15
  "normalized": false,
16
  "rstrip": false,
17
  "single_word": false,
18
  "special": true
19
  },
20
- "2": {
21
- "content": "</s>",
22
  "lstrip": false,
23
  "normalized": false,
24
  "rstrip": false,
25
  "single_word": false,
26
  "special": true
27
  },
28
- "3": {
29
- "content": "<unk>",
30
  "lstrip": false,
31
  "normalized": false,
32
  "rstrip": false,
33
  "single_word": false,
34
  "special": true
35
  },
36
- "4": {
37
- "content": "<mask>",
38
- "lstrip": true,
39
  "normalized": false,
40
  "rstrip": false,
41
  "single_word": false,
42
  "special": true
43
  }
44
  },
45
- "bos_token": "<s>",
46
  "clean_up_tokenization_spaces": true,
47
- "cls_token": "<s>",
48
- "eos_token": "</s>",
49
- "errors": "replace",
50
- "mask_token": "<mask>",
51
- "model_max_length": 8192,
52
- "pad_token": "<pad>",
53
- "sep_token": "</s>",
54
- "tokenizer_class": "RobertaTokenizer",
55
- "trim_offsets": true,
56
- "unk_token": "<unk>"
 
 
57
  }
 
1
  {
 
2
  "added_tokens_decoder": {
3
  "0": {
4
+ "content": "[PAD]",
5
  "lstrip": false,
6
  "normalized": false,
7
  "rstrip": false,
8
  "single_word": false,
9
  "special": true
10
  },
11
+ "100": {
12
+ "content": "[UNK]",
13
  "lstrip": false,
14
  "normalized": false,
15
  "rstrip": false,
16
  "single_word": false,
17
  "special": true
18
  },
19
+ "101": {
20
+ "content": "[CLS]",
21
  "lstrip": false,
22
  "normalized": false,
23
  "rstrip": false,
24
  "single_word": false,
25
  "special": true
26
  },
27
+ "102": {
28
+ "content": "[SEP]",
29
  "lstrip": false,
30
  "normalized": false,
31
  "rstrip": false,
32
  "single_word": false,
33
  "special": true
34
  },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
  "normalized": false,
39
  "rstrip": false,
40
  "single_word": false,
41
  "special": true
42
  }
43
  },
 
44
  "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_basic_tokenize": true,
47
+ "do_lower_case": true,
48
+ "mask_token": "[MASK]",
49
+ "model_max_length": 2147483648,
50
+ "never_split": null,
51
+ "pad_token": "[PAD]",
52
+ "sep_token": "[SEP]",
53
+ "strip_accents": null,
54
+ "tokenize_chinese_chars": true,
55
+ "tokenizer_class": "BertTokenizer",
56
+ "unk_token": "[UNK]"
57
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:66f6961d30fddaac6b19d888eb33f3dfa57407a889e89c6202660d4a2dc271fb
3
  size 4719
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db9c2a1f1e15a402ec8b4ea591e6d667a5f19b4e63a681ac8eff6f8a74adf67b
3
  size 4719
vocab.txt ADDED
The diff for this file is too large to render. See raw diff