liuqi6777 commited on
Commit
92fd218
1 Parent(s): 1171283

Delete configuration_bert.py

Browse files
Files changed (1) hide show
  1. configuration_bert.py +0 -168
configuration_bert.py DELETED
@@ -1,168 +0,0 @@
1
- # coding=utf-8
2
- # Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team.
3
- # Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
4
- # Copyright (c) 2023 Jina AI GmbH. All rights reserved.
5
- #
6
- # Licensed under the Apache License, Version 2.0 (the "License");
7
- # you may not use this file except in compliance with the License.
8
- # You may obtain a copy of the License at
9
- #
10
- # http://www.apache.org/licenses/LICENSE-2.0
11
- #
12
- # Unless required by applicable law or agreed to in writing, software
13
- # distributed under the License is distributed on an "AS IS" BASIS,
14
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
- # See the License for the specific language governing permissions and
16
- # limitations under the License.
17
- """ BERT model configuration"""
18
- from collections import OrderedDict
19
- from typing import Mapping
20
-
21
- from transformers.configuration_utils import PretrainedConfig
22
- from transformers.onnx import OnnxConfig
23
- from transformers.utils import logging
24
-
25
-
26
- logger = logging.get_logger(__name__)
27
-
28
-
29
- class JinaBertConfig(PretrainedConfig):
30
- r"""
31
- This is the configuration class to store the configuration of a [`JinaBertModel`]. It is used to
32
- instantiate a BERT model according to the specified arguments, defining the model architecture. Instantiating a
33
- configuration with the defaults will yield a similar configuration to that of the BERT
34
- [bert-base-uncased](https://huggingface.co/bert-base-uncased) architecture.
35
-
36
- Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
37
- documentation from [`PretrainedConfig`] for more information.
38
-
39
-
40
- Args:
41
- vocab_size (`int`, *optional*, defaults to 30522):
42
- Vocabulary size of the BERT model. Defines the number of different tokens that can be represented by the
43
- `inputs_ids` passed when calling [`BertModel`] or [`TFBertModel`].
44
- hidden_size (`int`, *optional*, defaults to 768):
45
- Dimensionality of the encoder layers and the pooler layer.
46
- num_hidden_layers (`int`, *optional*, defaults to 12):
47
- Number of hidden layers in the Transformer encoder.
48
- num_attention_heads (`int`, *optional*, defaults to 12):
49
- Number of attention heads for each attention layer in the Transformer encoder.
50
- intermediate_size (`int`, *optional*, defaults to 3072):
51
- Dimensionality of the "intermediate" (often named feed-forward) layer in the Transformer encoder.
52
- hidden_act (`str` or `Callable`, *optional*, defaults to `"gelu"`):
53
- The non-linear activation function (function or string) in the encoder and pooler. If string, `"gelu"`,
54
- `"relu"`, `"silu"` and `"gelu_new"` are supported.
55
- hidden_dropout_prob (`float`, *optional*, defaults to 0.1):
56
- The dropout probability for all fully connected layers in the embeddings, encoder, and pooler.
57
- attention_probs_dropout_prob (`float`, *optional*, defaults to 0.1):
58
- The dropout ratio for the attention probabilities.
59
- max_position_embeddings (`int`, *optional*, defaults to 512):
60
- The maximum sequence length that this model might ever be used with. Typically set this to something large
61
- just in case (e.g., 512 or 1024 or 2048).
62
- type_vocab_size (`int`, *optional*, defaults to 2):
63
- The vocabulary size of the `token_type_ids` passed when calling [`BertModel`] or [`TFBertModel`].
64
- initializer_range (`float`, *optional*, defaults to 0.02):
65
- The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
66
- layer_norm_eps (`float`, *optional*, defaults to 1e-12):
67
- The epsilon used by the layer normalization layers.
68
- position_embedding_type (`str`, *optional*, defaults to `"absolute"`):
69
- Type of position embedding. Choose one of `"absolute"`, `"relative_key"`, `"relative_key_query"`. For
70
- positional embeddings use `"absolute"`. For more information on `"relative_key"`, please refer to
71
- [Self-Attention with Relative Position Representations (Shaw et al.)](https://arxiv.org/abs/1803.02155).
72
- For more information on `"relative_key_query"`, please refer to *Method 4* in [Improve Transformer Models
73
- with Better Relative Position Embeddings (Huang et al.)](https://arxiv.org/abs/2009.13658).
74
- is_decoder (`bool`, *optional*, defaults to `False`):
75
- Whether the model is used as a decoder or not. If `False`, the model is used as an encoder.
76
- use_cache (`bool`, *optional*, defaults to `True`):
77
- Whether or not the model should return the last key/values attentions (not used by all models). Only
78
- relevant if `config.is_decoder=True`.
79
- classifier_dropout (`float`, *optional*):
80
- The dropout ratio for the classification head.
81
- feed_forward_type (`str`, *optional*, defaults to `"original"`):
82
- The type of feed forward layer to use in the bert layers.
83
- Can be one of GLU variants, e.g. `"reglu"`, `"geglu"`
84
- emb_pooler (`str`, *optional*, defaults to `None`):
85
- The function to use for pooling the last layer embeddings to get the sentence embeddings.
86
- Should be one of `None`, `"mean"`.
87
- attn_implementation (`str`, *optional*, defaults to `"torch"`):
88
- The implementation of the self-attention layer. Can be one of:
89
- - `None` for the original implementation,
90
- - `torch` for the PyTorch SDPA implementation,
91
-
92
- Examples:
93
-
94
- ```python
95
- >>> from transformers import JinaBertConfig, JinaBertModel
96
-
97
- >>> # Initializing a JinaBert configuration
98
- >>> configuration = JinaBertConfig()
99
-
100
- >>> # Initializing a model (with random weights) from the configuration
101
- >>> model = JinaBertModel(configuration)
102
-
103
- >>> # Accessing the model configuration
104
- >>> configuration = model.config
105
-
106
- >>> # Encode text inputs
107
- >>> embeddings = model.encode(text_inputs)
108
- ```"""
109
- model_type = "bert"
110
-
111
- def __init__(
112
- self,
113
- vocab_size=30522,
114
- hidden_size=768,
115
- num_hidden_layers=12,
116
- num_attention_heads=12,
117
- intermediate_size=3072,
118
- hidden_act="gelu",
119
- hidden_dropout_prob=0.1,
120
- attention_probs_dropout_prob=0.1,
121
- max_position_embeddings=512,
122
- type_vocab_size=2,
123
- initializer_range=0.02,
124
- layer_norm_eps=1e-12,
125
- pad_token_id=0,
126
- position_embedding_type="absolute",
127
- use_cache=True,
128
- classifier_dropout=None,
129
- feed_forward_type="original",
130
- emb_pooler=None,
131
- attn_implementation='torch',
132
- **kwargs,
133
- ):
134
- super().__init__(pad_token_id=pad_token_id, **kwargs)
135
-
136
- self.vocab_size = vocab_size
137
- self.hidden_size = hidden_size
138
- self.num_hidden_layers = num_hidden_layers
139
- self.num_attention_heads = num_attention_heads
140
- self.hidden_act = hidden_act
141
- self.intermediate_size = intermediate_size
142
- self.hidden_dropout_prob = hidden_dropout_prob
143
- self.attention_probs_dropout_prob = attention_probs_dropout_prob
144
- self.max_position_embeddings = max_position_embeddings
145
- self.type_vocab_size = type_vocab_size
146
- self.initializer_range = initializer_range
147
- self.layer_norm_eps = layer_norm_eps
148
- self.position_embedding_type = position_embedding_type
149
- self.use_cache = use_cache
150
- self.classifier_dropout = classifier_dropout
151
- self.feed_forward_type = feed_forward_type
152
- self.emb_pooler = emb_pooler
153
- self.attn_implementation = attn_implementation
154
-
155
- class JinaBertOnnxConfig(OnnxConfig):
156
- @property
157
- def inputs(self) -> Mapping[str, Mapping[int, str]]:
158
- if self.task == "multiple-choice":
159
- dynamic_axis = {0: "batch", 1: "choice", 2: "sequence"}
160
- else:
161
- dynamic_axis = {0: "batch", 1: "sequence"}
162
- return OrderedDict(
163
- [
164
- ("input_ids", dynamic_axis),
165
- ("attention_mask", dynamic_axis),
166
- ("token_type_ids", dynamic_axis),
167
- ]
168
- )