Spaces:

riccorl
/

relik-entity-linking

Sleeping

App Files Files Community

riccorl commited on Oct 19, 2023

Commit

91e262c

•

1 Parent(s): 28d9162

Upload models

Browse files

Files changed (24) hide show

.gitattributes +1 -0
app.py +3 -3
models/relik-reader-aida-deberta-small/.gitattributes +35 -0
models/relik-reader-aida-deberta-small/added_tokens.json +108 -0
models/relik-reader-aida-deberta-small/config.json +18 -0
models/relik-reader-aida-deberta-small/configuration_relik.py +33 -0
models/relik-reader-aida-deberta-small/modeling_relik.py +983 -0
models/relik-reader-aida-deberta-small/pytorch_model.bin +3 -0
models/relik-reader-aida-deberta-small/special_tokens_map.json +112 -0
models/relik-reader-aida-deberta-small/spm.model +3 -0
models/relik-reader-aida-deberta-small/tokenizer.json +0 -0
models/relik-reader-aida-deberta-small/tokenizer_config.json +970 -0
models/relik-retriever-small-aida-blink-pretrain-omniencoder/document_index/config.yaml +8 -0
models/relik-retriever-small-aida-blink-pretrain-omniencoder/document_index/documents.json +3 -0
models/relik-retriever-small-aida-blink-pretrain-omniencoder/document_index/embeddings.pt +3 -0
models/relik-retriever-small-aida-blink-pretrain-omniencoder/question_encoder/added_tokens.json +7 -0
models/relik-retriever-small-aida-blink-pretrain-omniencoder/question_encoder/config.json +28 -0
models/relik-retriever-small-aida-blink-pretrain-omniencoder/question_encoder/hf.py +88 -0
models/relik-retriever-small-aida-blink-pretrain-omniencoder/question_encoder/pytorch_model.bin +3 -0
models/relik-retriever-small-aida-blink-pretrain-omniencoder/question_encoder/special_tokens_map.json +7 -0
models/relik-retriever-small-aida-blink-pretrain-omniencoder/question_encoder/tokenizer.json +0 -0
models/relik-retriever-small-aida-blink-pretrain-omniencoder/question_encoder/tokenizer_config.json +56 -0
models/relik-retriever-small-aida-blink-pretrain-omniencoder/question_encoder/vocab.txt +0 -0
scripts/setup.sh +0 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+models/relik-retriever-small-aida-blink-pretrain-omniencoder/document_index/documents.json filter=lfs diff=lfs merge=lfs -text

app.py CHANGED Viewed

@@ -180,9 +180,9 @@ def run_client():
     # submit = st.button("Run")
     relik = Relik(
-        question_encoder="riccorl/relik-retriever-small-aida-blink-pretrain-omniencoder",
-        document_index="riccorl/index-relik-retriever-small-aida-blink-pretrain-omniencoder",
-        reader="riccorl/relik-reader-aida-deberta-small",
         top_k=100,
         window_size=32,
         window_stride=16,

     # submit = st.button("Run")
     relik = Relik(
+        question_encoder=Path(__file__).parent / "models" / "relik-retriever-small-aida-blink-pretrain-omniencoder" / "question_encoder",
+        document_index=Path(__file__).parent / "models" / "relik-retriever-small-aida-blink-pretrain-omniencoder" / "document_index",
+        reader=Path(__file__).parent / "models" /"relik-reader-aida-deberta-small",
         top_k=100,
         window_size=32,
         window_stride=16,

models/relik-reader-aida-deberta-small/.gitattributes ADDED Viewed

	@@ -0,0 +1,35 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

models/relik-reader-aida-deberta-small/added_tokens.json ADDED Viewed

	@@ -0,0 +1,108 @@

+{
+  "--NME--": 128001,
+  "[CLS]": 1,
+  "[E-0]": 128002,
+  "[E-10]": 128012,
+  "[E-11]": 128013,
+  "[E-12]": 128014,
+  "[E-13]": 128015,
+  "[E-14]": 128016,
+  "[E-15]": 128017,
+  "[E-16]": 128018,
+  "[E-17]": 128019,
+  "[E-18]": 128020,
+  "[E-19]": 128021,
+  "[E-1]": 128003,
+  "[E-20]": 128022,
+  "[E-21]": 128023,
+  "[E-22]": 128024,
+  "[E-23]": 128025,
+  "[E-24]": 128026,
+  "[E-25]": 128027,
+  "[E-26]": 128028,
+  "[E-27]": 128029,
+  "[E-28]": 128030,
+  "[E-29]": 128031,
+  "[E-2]": 128004,
+  "[E-30]": 128032,
+  "[E-31]": 128033,
+  "[E-32]": 128034,
+  "[E-33]": 128035,
+  "[E-34]": 128036,
+  "[E-35]": 128037,
+  "[E-36]": 128038,
+  "[E-37]": 128039,
+  "[E-38]": 128040,
+  "[E-39]": 128041,
+  "[E-3]": 128005,
+  "[E-40]": 128042,
+  "[E-41]": 128043,
+  "[E-42]": 128044,
+  "[E-43]": 128045,
+  "[E-44]": 128046,
+  "[E-45]": 128047,
+  "[E-46]": 128048,
+  "[E-47]": 128049,
+  "[E-48]": 128050,
+  "[E-49]": 128051,
+  "[E-4]": 128006,
+  "[E-50]": 128052,
+  "[E-51]": 128053,
+  "[E-52]": 128054,
+  "[E-53]": 128055,
+  "[E-54]": 128056,
+  "[E-55]": 128057,
+  "[E-56]": 128058,
+  "[E-57]": 128059,
+  "[E-58]": 128060,
+  "[E-59]": 128061,
+  "[E-5]": 128007,
+  "[E-60]": 128062,
+  "[E-61]": 128063,
+  "[E-62]": 128064,
+  "[E-63]": 128065,
+  "[E-64]": 128066,
+  "[E-65]": 128067,
+  "[E-66]": 128068,
+  "[E-67]": 128069,
+  "[E-68]": 128070,
+  "[E-69]": 128071,
+  "[E-6]": 128008,
+  "[E-70]": 128072,
+  "[E-71]": 128073,
+  "[E-72]": 128074,
+  "[E-73]": 128075,
+  "[E-74]": 128076,
+  "[E-75]": 128077,
+  "[E-76]": 128078,
+  "[E-77]": 128079,
+  "[E-78]": 128080,
+  "[E-79]": 128081,
+  "[E-7]": 128009,
+  "[E-80]": 128082,
+  "[E-81]": 128083,
+  "[E-82]": 128084,
+  "[E-83]": 128085,
+  "[E-84]": 128086,
+  "[E-85]": 128087,
+  "[E-86]": 128088,
+  "[E-87]": 128089,
+  "[E-88]": 128090,
+  "[E-89]": 128091,
+  "[E-8]": 128010,
+  "[E-90]": 128092,
+  "[E-91]": 128093,
+  "[E-92]": 128094,
+  "[E-93]": 128095,
+  "[E-94]": 128096,
+  "[E-95]": 128097,
+  "[E-96]": 128098,
+  "[E-97]": 128099,
+  "[E-98]": 128100,
+  "[E-99]": 128101,
+  "[E-9]": 128011,
+  "[MASK]": 128000,
+  "[PAD]": 0,
+  "[SEP]": 2,
+  "[UNK]": 3
+}

models/relik-reader-aida-deberta-small/config.json ADDED Viewed

	@@ -0,0 +1,18 @@

+{
+  "activation": "gelu",
+  "additional_special_symbols": 101,
+  "architectures": [
+    "RelikReaderELModel"
+  ],
+  "auto_map": {
+    "AutoModel": "modeling_relik.RelikReaderELModel"
+  },
+  "linears_hidden_size": 512,
+  "model_type": "relik-reader",
+  "num_layers": null,
+  "torch_dtype": "float32",
+  "training": false,
+  "transformer_model": "microsoft/deberta-v3-small",
+  "transformers_version": "4.34.0",
+  "use_last_k_layers": 1
+}

models/relik-reader-aida-deberta-small/configuration_relik.py ADDED Viewed

	@@ -0,0 +1,33 @@

+from typing import Optional
+from transformers import AutoConfig
+from transformers.configuration_utils import PretrainedConfig
+class RelikReaderConfig(PretrainedConfig):
+    model_type = "relik-reader"
+    def __init__(
+        self,
+        transformer_model: str = "microsoft/deberta-v3-base",
+        additional_special_symbols: int = 101,
+        num_layers: Optional[int] = None,
+        activation: str = "gelu",
+        linears_hidden_size: Optional[int] = 512,
+        use_last_k_layers: int = 1,
+        training: bool = False,
+        default_reader_class: Optional[str] = None,
+        **kwargs
+    ) -> None:
+        self.transformer_model = transformer_model
+        self.additional_special_symbols = additional_special_symbols
+        self.num_layers = num_layers
+        self.activation = activation
+        self.linears_hidden_size = linears_hidden_size
+        self.use_last_k_layers = use_last_k_layers
+        self.training = training
+        self.default_reader_class = default_reader_class
+        super().__init__(**kwargs)
+AutoConfig.register("relik-reader", RelikReaderConfig)

models/relik-reader-aida-deberta-small/modeling_relik.py ADDED Viewed

	@@ -0,0 +1,983 @@

+from typing import Optional, Dict, Any
+import torch
+from transformers import AutoModel, PreTrainedModel
+from transformers.activations import GELUActivation, ClippedGELUActivation
+from transformers.configuration_utils import PretrainedConfig
+from transformers.modeling_utils import PoolerEndLogits
+from .configuration_relik import RelikReaderConfig
+class RelikReaderSample:
+    def __init__(self, **kwargs):
+        super().__setattr__("_d", {})
+        self._d = kwargs
+    def __getattribute__(self, item):
+        return super(RelikReaderSample, self).__getattribute__(item)
+    def __getattr__(self, item):
+        if item.startswith("__") and item.endswith("__"):
+            # this is likely some python library-specific variable (such as __deepcopy__ for copy)
+            # better follow standard behavior here
+            raise AttributeError(item)
+        elif item in self._d:
+            return self._d[item]
+        else:
+            return None
+    def __setattr__(self, key, value):
+        if key in self._d:
+            self._d[key] = value
+        else:
+            super().__setattr__(key, value)
+activation2functions = {
+    "relu": torch.nn.ReLU(),
+    "gelu": GELUActivation(),
+    "gelu_10": ClippedGELUActivation(-10, 10),
+}
+class PoolerEndLogitsBi(PoolerEndLogits):
+    def __init__(self, config: PretrainedConfig):
+        super().__init__(config)
+        self.dense_1 = torch.nn.Linear(config.hidden_size, 2)
+    def forward(
+        self,
+        hidden_states: torch.FloatTensor,
+        start_states: Optional[torch.FloatTensor] = None,
+        start_positions: Optional[torch.LongTensor] = None,
+        p_mask: Optional[torch.FloatTensor] = None,
+    ) -> torch.FloatTensor:
+        if p_mask is not None:
+            p_mask = p_mask.unsqueeze(-1)
+        logits = super().forward(
+            hidden_states,
+            start_states,
+            start_positions,
+            p_mask,
+        )
+        return logits
+class RelikReaderSpanModel(PreTrainedModel):
+    config_class = RelikReaderConfig
+    def __init__(self, config: RelikReaderConfig, *args, **kwargs):
+        super().__init__(config)
+        # Transformer model declaration
+        self.config = config
+        self.transformer_model = (
+            AutoModel.from_pretrained(self.config.transformer_model)
+            if self.config.num_layers is None
+            else AutoModel.from_pretrained(
+                self.config.transformer_model, num_hidden_layers=self.config.num_layers
+            )
+        )
+        self.transformer_model.resize_token_embeddings(
+            self.transformer_model.config.vocab_size
+            + self.config.additional_special_symbols
+        )
+        self.activation = self.config.activation
+        self.linears_hidden_size = self.config.linears_hidden_size
+        self.use_last_k_layers = self.config.use_last_k_layers
+        # named entity detection layers
+        self.ned_start_classifier = self._get_projection_layer(
+            self.activation, last_hidden=2, layer_norm=False
+        )
+        self.ned_end_classifier = PoolerEndLogits(self.transformer_model.config)
+        # END entity disambiguation layer
+        self.ed_start_projector = self._get_projection_layer(self.activation)
+        self.ed_end_projector = self._get_projection_layer(self.activation)
+        self.training = self.config.training
+        # criterion
+        self.criterion = torch.nn.CrossEntropyLoss()
+    def _get_projection_layer(
+        self,
+        activation: str,
+        last_hidden: Optional[int] = None,
+        input_hidden=None,
+        layer_norm: bool = True,
+    ) -> torch.nn.Sequential:
+        head_components = [
+            torch.nn.Dropout(0.1),
+            torch.nn.Linear(
+                self.transformer_model.config.hidden_size * self.use_last_k_layers
+                if input_hidden is None
+                else input_hidden,
+                self.linears_hidden_size,
+            ),
+            activation2functions[activation],
+            torch.nn.Dropout(0.1),
+            torch.nn.Linear(
+                self.linears_hidden_size,
+                self.linears_hidden_size if last_hidden is None else last_hidden,
+            ),
+        ]
+        if layer_norm:
+            head_components.append(
+                torch.nn.LayerNorm(
+                    self.linears_hidden_size if last_hidden is None else last_hidden,
+                    self.transformer_model.config.layer_norm_eps,
+                )
+            )
+        return torch.nn.Sequential(*head_components)
+    def _mask_logits(self, logits: torch.Tensor, mask: torch.Tensor) -> torch.Tensor:
+        mask = mask.unsqueeze(-1)
+        if next(self.parameters()).dtype == torch.float16:
+            logits = logits * (1 - mask) - 65500 * mask
+        else:
+            logits = logits * (1 - mask) - 1e30 * mask
+        return logits
+    def _get_model_features(
+        self,
+        input_ids: torch.Tensor,
+        attention_mask: torch.Tensor,
+        token_type_ids: Optional[torch.Tensor],
+    ):
+        model_input = {
+            "input_ids": input_ids,
+            "attention_mask": attention_mask,
+            "output_hidden_states": self.use_last_k_layers > 1,
+        }
+        if token_type_ids is not None:
+            model_input["token_type_ids"] = token_type_ids
+        model_output = self.transformer_model(**model_input)
+        if self.use_last_k_layers > 1:
+            model_features = torch.cat(
+                model_output[1][-self.use_last_k_layers :], dim=-1
+            )
+        else:
+            model_features = model_output[0]
+        return model_features
+    def compute_ned_end_logits(
+        self,
+        start_predictions,
+        start_labels,
+        model_features,
+        prediction_mask,
+        batch_size,
+    ) -> Optional[torch.Tensor]:
+        # todo: maybe when constraining on the spans,
+        #  we should not use a prediction_mask for the end tokens.
+        #  at least we should not during training imo
+        start_positions = start_labels if self.training else start_predictions
+        start_positions_indices = (
+            torch.arange(start_positions.size(1), device=start_positions.device)
+            .unsqueeze(0)
+            .expand(batch_size, -1)[start_positions > 0]
+        ).to(start_positions.device)
+        if len(start_positions_indices) > 0:
+            expanded_features = torch.cat(
+                [
+                    model_features[i].unsqueeze(0).expand(x, -1, -1)
+                    for i, x in enumerate(torch.sum(start_positions > 0, dim=-1))
+                    if x > 0
+                ],
+                dim=0,
+            ).to(start_positions_indices.device)
+            expanded_prediction_mask = torch.cat(
+                [
+                    prediction_mask[i].unsqueeze(0).expand(x, -1)
+                    for i, x in enumerate(torch.sum(start_positions > 0, dim=-1))
+                    if x > 0
+                ],
+                dim=0,
+            ).to(expanded_features.device)
+            end_logits = self.ned_end_classifier(
+                hidden_states=expanded_features,
+                start_positions=start_positions_indices,
+                p_mask=expanded_prediction_mask,
+            )
+            return end_logits
+        return None
+    def compute_classification_logits(
+        self,
+        model_features,
+        special_symbols_mask,
+        prediction_mask,
+        batch_size,
+        start_positions=None,
+        end_positions=None,
+    ) -> torch.Tensor:
+        if start_positions is None or end_positions is None:
+            start_positions = torch.zeros_like(prediction_mask)
+            end_positions = torch.zeros_like(prediction_mask)
+        model_start_features = self.ed_start_projector(model_features)
+        model_end_features = self.ed_end_projector(model_features)
+        model_end_features[start_positions > 0] = model_end_features[end_positions > 0]
+        model_ed_features = torch.cat(
+            [model_start_features, model_end_features], dim=-1
+        )
+        # computing ed features
+        classes_representations = torch.sum(special_symbols_mask, dim=1)[0].item()
+        special_symbols_representation = model_ed_features[special_symbols_mask].view(
+            batch_size, classes_representations, -1
+        )
+        logits = torch.bmm(
+            model_ed_features,
+            torch.permute(special_symbols_representation, (0, 2, 1)),
+        )
+        logits = self._mask_logits(logits, prediction_mask)
+        return logits
+    def forward(
+        self,
+        input_ids: torch.Tensor,
+        attention_mask: torch.Tensor,
+        token_type_ids: Optional[torch.Tensor] = None,
+        prediction_mask: Optional[torch.Tensor] = None,
+        special_symbols_mask: Optional[torch.Tensor] = None,
+        start_labels: Optional[torch.Tensor] = None,
+        end_labels: Optional[torch.Tensor] = None,
+        use_predefined_spans: bool = False,
+        *args,
+        **kwargs,
+    ) -> Dict[str, Any]:
+        batch_size, seq_len = input_ids.shape
+        model_features = self._get_model_features(
+            input_ids, attention_mask, token_type_ids
+        )
+        ned_start_labels = None
+        # named entity detection if required
+        if use_predefined_spans:  # no need to compute spans
+            ned_start_logits, ned_start_probabilities, ned_start_predictions = (
+                None,
+                None,
+                torch.clone(start_labels)
+                if start_labels is not None
+                else torch.zeros_like(input_ids),
+            )
+            ned_end_logits, ned_end_probabilities, ned_end_predictions = (
+                None,
+                None,
+                torch.clone(end_labels)
+                if end_labels is not None
+                else torch.zeros_like(input_ids),
+            )
+            ned_start_predictions[ned_start_predictions > 0] = 1
+            ned_end_predictions[ned_end_predictions > 0] = 1
+        else:  # compute spans
+            # start boundary prediction
+            ned_start_logits = self.ned_start_classifier(model_features)
+            ned_start_logits = self._mask_logits(ned_start_logits, prediction_mask)
+            ned_start_probabilities = torch.softmax(ned_start_logits, dim=-1)
+            ned_start_predictions = ned_start_probabilities.argmax(dim=-1)
+            # end boundary prediction
+            ned_start_labels = (
+                torch.zeros_like(start_labels) if start_labels is not None else None
+            )
+            if ned_start_labels is not None:
+                ned_start_labels[start_labels == -100] = -100
+                ned_start_labels[start_labels > 0] = 1
+            ned_end_logits = self.compute_ned_end_logits(
+                ned_start_predictions,
+                ned_start_labels,
+                model_features,
+                prediction_mask,
+                batch_size,
+            )
+            if ned_end_logits is not None:
+                ned_end_probabilities = torch.softmax(ned_end_logits, dim=-1)
+                ned_end_predictions = torch.argmax(ned_end_probabilities, dim=-1)
+            else:
+                ned_end_logits, ned_end_probabilities = None, None
+                ned_end_predictions = ned_start_predictions.new_zeros(batch_size)
+            # flattening end predictions
+            #   (flattening can happen only if the
+            #   end boundaries were not predicted using the gold labels)
+            if not self.training:
+                flattened_end_predictions = torch.clone(ned_start_predictions)
+                flattened_end_predictions[flattened_end_predictions > 0] = 0
+                batch_start_predictions = list()
+                for elem_idx in range(batch_size):
+                    batch_start_predictions.append(
+                        torch.where(ned_start_predictions[elem_idx] > 0)[0].tolist()
+                    )
+                # check that the total number of start predictions
+                # is equal to the end predictions
+                total_start_predictions = sum(map(len, batch_start_predictions))
+                total_end_predictions = len(ned_end_predictions)
+                assert (
+                    total_start_predictions == 0
+                    or total_start_predictions == total_end_predictions
+                ), (
+                    f"Total number of start predictions = {total_start_predictions}. "
+                    f"Total number of end predictions = {total_end_predictions}"
+                )
+                curr_end_pred_num = 0
+                for elem_idx, bsp in enumerate(batch_start_predictions):
+                    for sp in bsp:
+                        ep = ned_end_predictions[curr_end_pred_num].item()
+                        if ep < sp:
+                            ep = sp
+                        # if we already set this span throw it (no overlap)
+                        if flattened_end_predictions[elem_idx, ep] == 1:
+                            ned_start_predictions[elem_idx, sp] = 0
+                        else:
+                            flattened_end_predictions[elem_idx, ep] = 1
+                        curr_end_pred_num += 1
+                ned_end_predictions = flattened_end_predictions
+        start_position, end_position = (
+            (start_labels, end_labels)
+            if self.training
+            else (ned_start_predictions, ned_end_predictions)
+        )
+        # Entity disambiguation
+        ed_logits = self.compute_classification_logits(
+            model_features,
+            special_symbols_mask,
+            prediction_mask,
+            batch_size,
+            start_position,
+            end_position,
+        )
+        ed_probabilities = torch.softmax(ed_logits, dim=-1)
+        ed_predictions = torch.argmax(ed_probabilities, dim=-1)
+        # output build
+        output_dict = dict(
+            batch_size=batch_size,
+            ned_start_logits=ned_start_logits,
+            ned_start_probabilities=ned_start_probabilities,
+            ned_start_predictions=ned_start_predictions,
+            ned_end_logits=ned_end_logits,
+            ned_end_probabilities=ned_end_probabilities,
+            ned_end_predictions=ned_end_predictions,
+            ed_logits=ed_logits,
+            ed_probabilities=ed_probabilities,
+            ed_predictions=ed_predictions,
+        )
+        # compute loss if labels
+        if start_labels is not None and end_labels is not None and self.training:
+            # named entity detection loss
+            # start
+            if ned_start_logits is not None:
+                ned_start_loss = self.criterion(
+                    ned_start_logits.view(-1, ned_start_logits.shape[-1]),
+                    ned_start_labels.view(-1),
+                )
+            else:
+                ned_start_loss = 0
+            # end
+            if ned_end_logits is not None:
+                ned_end_labels = torch.zeros_like(end_labels)
+                ned_end_labels[end_labels == -100] = -100
+                ned_end_labels[end_labels > 0] = 1
+                ned_end_loss = self.criterion(
+                    ned_end_logits,
+                    (
+                        torch.arange(
+                            ned_end_labels.size(1), device=ned_end_labels.device
+                        )
+                        .unsqueeze(0)
+                        .expand(batch_size, -1)[ned_end_labels > 0]
+                    ).to(ned_end_labels.device),
+                )
+            else:
+                ned_end_loss = 0
+            # entity disambiguation loss
+            start_labels[ned_start_labels != 1] = -100
+            ed_labels = torch.clone(start_labels)
+            ed_labels[end_labels > 0] = end_labels[end_labels > 0]
+            ed_loss = self.criterion(
+                ed_logits.view(-1, ed_logits.shape[-1]),
+                ed_labels.view(-1),
+            )
+            output_dict["ned_start_loss"] = ned_start_loss
+            output_dict["ned_end_loss"] = ned_end_loss
+            output_dict["ed_loss"] = ed_loss
+            output_dict["loss"] = ned_start_loss + ned_end_loss + ed_loss
+        return output_dict
+class RelikReaderREModel(PreTrainedModel):
+    config_class = RelikReaderConfig
+    def __init__(self, config, *args, **kwargs):
+        super().__init__(config)
+        # Transformer model declaration
+        # self.transformer_model_name = transformer_model
+        self.config = config
+        self.transformer_model = (
+            AutoModel.from_pretrained(config.transformer_model)
+            if config.num_layers is None
+            else AutoModel.from_pretrained(
+                config.transformer_model, num_hidden_layers=config.num_layers
+            )
+        )
+        self.transformer_model.resize_token_embeddings(
+            self.transformer_model.config.vocab_size + config.additional_special_symbols
+        )
+        # named entity detection layers
+        self.ned_start_classifier = self._get_projection_layer(
+            config.activation, last_hidden=2, layer_norm=False
+        )
+        self.ned_end_classifier = PoolerEndLogitsBi(self.transformer_model.config)
+        self.entity_type_loss = (
+            config.entity_type_loss if hasattr(config, "entity_type_loss") else False
+        )
+        self.relation_disambiguation_loss = (
+            config.relation_disambiguation_loss
+            if hasattr(config, "relation_disambiguation_loss")
+            else False
+        )
+        input_hidden_ents = 2 * self.transformer_model.config.hidden_size
+        self.re_subject_projector = self._get_projection_layer(
+            config.activation, input_hidden=input_hidden_ents
+        )
+        self.re_object_projector = self._get_projection_layer(
+            config.activation, input_hidden=input_hidden_ents
+        )
+        self.re_relation_projector = self._get_projection_layer(config.activation)
+        if self.entity_type_loss or self.relation_disambiguation_loss:
+            self.re_entities_projector = self._get_projection_layer(
+                config.activation,
+                input_hidden=2 * self.transformer_model.config.hidden_size,
+            )
+            self.re_definition_projector = self._get_projection_layer(
+                config.activation,
+            )
+        self.re_classifier = self._get_projection_layer(
+            config.activation,
+            input_hidden=config.linears_hidden_size,
+            last_hidden=2,
+            layer_norm=False,
+        )
+        if self.entity_type_loss or self.relation_disambiguation_loss:
+            self.re_ed_classifier = self._get_projection_layer(
+                config.activation,
+                input_hidden=config.linears_hidden_size,
+                last_hidden=2,
+                layer_norm=False,
+            )
+        self.training = config.training
+        # criterion
+        self.criterion = torch.nn.CrossEntropyLoss()
+    def _get_projection_layer(
+        self,
+        activation: str,
+        last_hidden: Optional[int] = None,
+        input_hidden=None,
+        layer_norm: bool = True,
+    ) -> torch.nn.Sequential:
+        head_components = [
+            torch.nn.Dropout(0.1),
+            torch.nn.Linear(
+                self.transformer_model.config.hidden_size
+                * self.config.use_last_k_layers
+                if input_hidden is None
+                else input_hidden,
+                self.config.linears_hidden_size,
+            ),
+            activation2functions[activation],
+            torch.nn.Dropout(0.1),
+            torch.nn.Linear(
+                self.config.linears_hidden_size,
+                self.config.linears_hidden_size if last_hidden is None else last_hidden,
+            ),
+        ]
+        if layer_norm:
+            head_components.append(
+                torch.nn.LayerNorm(
+                    self.config.linears_hidden_size
+                    if last_hidden is None
+                    else last_hidden,
+                    self.transformer_model.config.layer_norm_eps,
+                )
+            )
+        return torch.nn.Sequential(*head_components)
+    def _mask_logits(self, logits: torch.Tensor, mask: torch.Tensor) -> torch.Tensor:
+        mask = mask.unsqueeze(-1)
+        if next(self.parameters()).dtype == torch.float16:
+            logits = logits * (1 - mask) - 65500 * mask
+        else:
+            logits = logits * (1 - mask) - 1e30 * mask
+        return logits
+    def _get_model_features(
+        self,
+        input_ids: torch.Tensor,
+        attention_mask: torch.Tensor,
+        token_type_ids: Optional[torch.Tensor],
+    ):
+        model_input = {
+            "input_ids": input_ids,
+            "attention_mask": attention_mask,
+            "output_hidden_states": self.config.use_last_k_layers > 1,
+        }
+        if token_type_ids is not None:
+            model_input["token_type_ids"] = token_type_ids
+        model_output = self.transformer_model(**model_input)
+        if self.config.use_last_k_layers > 1:
+            model_features = torch.cat(
+                model_output[1][-self.config.use_last_k_layers :], dim=-1
+            )
+        else:
+            model_features = model_output[0]
+        return model_features
+    def compute_ned_end_logits(
+        self,
+        start_predictions,
+        start_labels,
+        model_features,
+        prediction_mask,
+        batch_size,
+    ) -> Optional[torch.Tensor]:
+        # todo: maybe when constraining on the spans,
+        #  we should not use a prediction_mask for the end tokens.
+        #  at least we should not during training imo
+        start_positions = start_labels if self.training else start_predictions
+        start_positions_indices = (
+            torch.arange(start_positions.size(1), device=start_positions.device)
+            .unsqueeze(0)
+            .expand(batch_size, -1)[start_positions > 0]
+        ).to(start_positions.device)
+        if len(start_positions_indices) > 0:
+            expanded_features = torch.cat(
+                [
+                    model_features[i].unsqueeze(0).expand(x, -1, -1)
+                    for i, x in enumerate(torch.sum(start_positions > 0, dim=-1))
+                    if x > 0
+                ],
+                dim=0,
+            ).to(start_positions_indices.device)
+            expanded_prediction_mask = torch.cat(
+                [
+                    prediction_mask[i].unsqueeze(0).expand(x, -1)
+                    for i, x in enumerate(torch.sum(start_positions > 0, dim=-1))
+                    if x > 0
+                ],
+                dim=0,
+            ).to(expanded_features.device)
+            # mask all tokens before start_positions_indices ie, mask all tokens with
+            # indices < start_positions_indices with 1, ie. [range(x) for x in start_positions_indices]
+            expanded_prediction_mask = torch.stack(
+                [
+                    torch.cat(
+                        [
+                            torch.ones(x, device=expanded_features.device),
+                            expanded_prediction_mask[i, x:],
+                        ]
+                    )
+                    for i, x in enumerate(start_positions_indices)
+                    if x > 0
+                ],
+                dim=0,
+            ).to(expanded_features.device)
+            end_logits = self.ned_end_classifier(
+                hidden_states=expanded_features,
+                start_positions=start_positions_indices,
+                p_mask=expanded_prediction_mask,
+            )
+            return end_logits
+        return None
+    def compute_relation_logits(
+        self,
+        model_entity_features,
+        special_symbols_features,
+    ) -> torch.Tensor:
+        model_subject_features = self.re_subject_projector(model_entity_features)
+        model_object_features = self.re_object_projector(model_entity_features)
+        special_symbols_start_representation = self.re_relation_projector(
+            special_symbols_features
+        )
+        re_logits = torch.einsum(
+            "bse,bde,bfe->bsdfe",
+            model_subject_features,
+            model_object_features,
+            special_symbols_start_representation,
+        )
+        re_logits = self.re_classifier(re_logits)
+        return re_logits
+    def compute_entity_logits(
+        self,
+        model_entity_features,
+        special_symbols_features,
+    ) -> torch.Tensor:
+        model_ed_features = self.re_entities_projector(model_entity_features)
+        special_symbols_ed_representation = self.re_definition_projector(
+            special_symbols_features
+        )
+        logits = torch.einsum(
+            "bce,bde->bcde",
+            model_ed_features,
+            special_symbols_ed_representation,
+        )
+        logits = self.re_ed_classifier(logits)
+        start_logits = self._mask_logits(
+            logits,
+            (model_entity_features == -100)
+            .all(2)
+            .long()
+            .unsqueeze(2)
+            .repeat(1, 1, torch.sum(model_entity_features, dim=1)[0].item()),
+        )
+        return logits
+    def compute_loss(self, logits, labels, mask=None):
+        logits = logits.view(-1, logits.shape[-1])
+        labels = labels.view(-1).long()
+        if mask is not None:
+            return self.criterion(logits[mask], labels[mask])
+        return self.criterion(logits, labels)
+    def compute_ned_end_loss(self, ned_end_logits, end_labels):
+        if ned_end_logits is None:
+            return 0
+        ned_end_labels = torch.zeros_like(end_labels)
+        ned_end_labels[end_labels == -100] = -100
+        ned_end_labels[end_labels > 0] = 1
+        return self.compute_loss(ned_end_logits, ned_end_labels)
+    def compute_ned_type_loss(
+        self,
+        disambiguation_labels,
+        re_ned_entities_logits,
+        ned_type_logits,
+        re_entities_logits,
+        entity_types,
+    ):
+        if self.entity_type_loss and self.relation_disambiguation_loss:
+            return self.compute_loss(disambiguation_labels, re_ned_entities_logits)
+        if self.entity_type_loss:
+            return self.compute_loss(
+                disambiguation_labels[:, :, :entity_types], ned_type_logits
+            )
+        if self.relation_disambiguation_loss:
+            return self.compute_loss(disambiguation_labels, re_entities_logits)
+        return 0
+    def compute_relation_loss(self, relation_labels, re_logits):
+        return self.compute_loss(
+            re_logits, relation_labels, relation_labels.view(-1) != -100
+        )
+    def forward(
+        self,
+        input_ids: torch.Tensor,
+        attention_mask: torch.Tensor,
+        token_type_ids: torch.Tensor,
+        prediction_mask: Optional[torch.Tensor] = None,
+        special_symbols_mask: Optional[torch.Tensor] = None,
+        special_symbols_mask_entities: Optional[torch.Tensor] = None,
+        start_labels: Optional[torch.Tensor] = None,
+        end_labels: Optional[torch.Tensor] = None,
+        disambiguation_labels: Optional[torch.Tensor] = None,
+        relation_labels: Optional[torch.Tensor] = None,
+        is_validation: bool = False,
+        is_prediction: bool = False,
+        *args,
+        **kwargs,
+    ) -> Dict[str, Any]:
+        batch_size = input_ids.shape[0]
+        model_features = self._get_model_features(
+            input_ids, attention_mask, token_type_ids
+        )
+        # named entity detection
+        if is_prediction and start_labels is not None:
+            ned_start_logits, ned_start_probabilities, ned_start_predictions = (
+                None,
+                None,
+                torch.zeros_like(start_labels),
+            )
+            ned_end_logits, ned_end_probabilities, ned_end_predictions = (
+                None,
+                None,
+                torch.zeros_like(end_labels),
+            )
+            ned_start_predictions[start_labels > 0] = 1
+            ned_end_predictions[end_labels > 0] = 1
+            ned_end_predictions = ned_end_predictions[~(end_labels == -100).all(2)]
+        else:
+            # start boundary prediction
+            ned_start_logits = self.ned_start_classifier(model_features)
+            ned_start_logits = self._mask_logits(
+                ned_start_logits, prediction_mask
+            )  # why?
+            ned_start_probabilities = torch.softmax(ned_start_logits, dim=-1)
+            ned_start_predictions = ned_start_probabilities.argmax(dim=-1)
+            # end boundary prediction
+            ned_start_labels = (
+                torch.zeros_like(start_labels) if start_labels is not None else None
+            )
+            # start_labels contain entity id at their position, we just need 1 for start of entity
+            if ned_start_labels is not None:
+                ned_start_labels[start_labels > 0] = 1
+            # compute end logits only if there are any start predictions.
+            # For each start prediction, n end predictions are made
+            ned_end_logits = self.compute_ned_end_logits(
+                ned_start_predictions,
+                ned_start_labels,
+                model_features,
+                prediction_mask,
+                batch_size,
+            )
+            # For each start prediction, n end predictions are made based on
+            # binary classification ie. argmax at each position.
+            ned_end_probabilities = torch.softmax(ned_end_logits, dim=-1)
+            ned_end_predictions = ned_end_probabilities.argmax(dim=-1)
+            if is_prediction or is_validation:
+                end_preds_count = ned_end_predictions.sum(1)
+                # If there are no end predictions for a start prediction, remove the start prediction
+                ned_start_predictions[ned_start_predictions == 1] = (
+                    end_preds_count != 0
+                ).long()
+                ned_end_predictions = ned_end_predictions[end_preds_count != 0]
+        if end_labels is not None:
+            end_labels = end_labels[~(end_labels == -100).all(2)]
+        start_position, end_position = (
+            (start_labels, end_labels)
+            if (not is_prediction and not is_validation)
+            else (ned_start_predictions, ned_end_predictions)
+        )
+        start_counts = (start_position > 0).sum(1)
+        ned_end_predictions = ned_end_predictions.split(start_counts.tolist())
+        # We can only predict relations if we have start and end predictions
+        if (end_position > 0).sum() > 0:
+            ends_count = (end_position > 0).sum(1)
+            model_subject_features = torch.cat(
+                [
+                    torch.repeat_interleave(
+                        model_features[start_position > 0], ends_count, dim=0
+                    ),  # start position features
+                    torch.repeat_interleave(model_features, start_counts, dim=0)[
+                        end_position > 0
+                    ],  # end position features
+                ],
+                dim=-1,
+            )
+            ents_count = torch.nn.utils.rnn.pad_sequence(
+                torch.split(ends_count, start_counts.tolist()),
+                batch_first=True,
+                padding_value=0,
+            ).sum(1)
+            model_subject_features = torch.nn.utils.rnn.pad_sequence(
+                torch.split(model_subject_features, ents_count.tolist()),
+                batch_first=True,
+                padding_value=-100,
+            )
+            if is_validation or is_prediction:
+                model_subject_features = model_subject_features[:, :30, :]
+            # entity disambiguation. Here relation_disambiguation_loss would only be useful to
+            # reduce the number of candidate relations for the next step, but currently unused.
+            if self.entity_type_loss or self.relation_disambiguation_loss:
+                (re_ned_entities_logits) = self.compute_entity_logits(
+                    model_subject_features,
+                    model_features[
+                        special_symbols_mask | special_symbols_mask_entities
+                    ].view(batch_size, -1, model_features.shape[-1]),
+                )
+                entity_types = torch.sum(special_symbols_mask_entities, dim=1)[0].item()
+                ned_type_logits = re_ned_entities_logits[:, :, :entity_types]
+                re_entities_logits = re_ned_entities_logits[:, :, entity_types:]
+                if self.entity_type_loss:
+                    ned_type_probabilities = torch.softmax(ned_type_logits, dim=-1)
+                    ned_type_predictions = ned_type_probabilities.argmax(dim=-1)
+                    ned_type_predictions = ned_type_predictions.argmax(dim=-1)
+                re_entities_probabilities = torch.softmax(re_entities_logits, dim=-1)
+                re_entities_predictions = re_entities_probabilities.argmax(dim=-1)
+            else:
+                (
+                    ned_type_logits,
+                    ned_type_probabilities,
+                    re_entities_logits,
+                    re_entities_probabilities,
+                ) = (None, None, None, None)
+                ned_type_predictions, re_entities_predictions = (
+                    torch.zeros([batch_size, 1], dtype=torch.long).to(input_ids.device),
+                    torch.zeros([batch_size, 1], dtype=torch.long).to(input_ids.device),
+                )
+            # Compute relation logits
+            re_logits = self.compute_relation_logits(
+                model_subject_features,
+                model_features[special_symbols_mask].view(
+                    batch_size, -1, model_features.shape[-1]
+                ),
+            )
+            re_probabilities = torch.softmax(re_logits, dim=-1)
+            # we set a thresshold instead of argmax in cause it needs to be tweaked
+            re_predictions = re_probabilities[:, :, :, :, 1] > 0.5
+            # re_predictions = re_probabilities.argmax(dim=-1)
+            re_probabilities = re_probabilities[:, :, :, :, 1]
+        else:
+            (
+                ned_type_logits,
+                ned_type_probabilities,
+                re_entities_logits,
+                re_entities_probabilities,
+            ) = (None, None, None, None)
+            ned_type_predictions, re_entities_predictions = (
+                torch.zeros([batch_size, 1], dtype=torch.long).to(input_ids.device),
+                torch.zeros([batch_size, 1], dtype=torch.long).to(input_ids.device),
+            )
+            re_logits, re_probabilities, re_predictions = (
+                torch.zeros(
+                    [batch_size, 1, 1, special_symbols_mask.sum(1)[0]], dtype=torch.long
+                ).to(input_ids.device),
+                torch.zeros(
+                    [batch_size, 1, 1, special_symbols_mask.sum(1)[0]], dtype=torch.long
+                ).to(input_ids.device),
+                torch.zeros(
+                    [batch_size, 1, 1, special_symbols_mask.sum(1)[0]], dtype=torch.long
+                ).to(input_ids.device),
+            )
+        # output build
+        output_dict = dict(
+            batch_size=batch_size,
+            ned_start_logits=ned_start_logits,
+            ned_start_probabilities=ned_start_probabilities,
+            ned_start_predictions=ned_start_predictions,
+            ned_end_logits=ned_end_logits,
+            ned_end_probabilities=ned_end_probabilities,
+            ned_end_predictions=ned_end_predictions,
+            ned_type_logits=ned_type_logits,
+            ned_type_probabilities=ned_type_probabilities,
+            ned_type_predictions=ned_type_predictions,
+            re_entities_logits=re_entities_logits,
+            re_entities_probabilities=re_entities_probabilities,
+            re_entities_predictions=re_entities_predictions,
+            re_logits=re_logits,
+            re_probabilities=re_probabilities,
+            re_predictions=re_predictions,
+        )
+        if (
+            start_labels is not None
+            and end_labels is not None
+            and relation_labels is not None
+        ):
+            ned_start_loss = self.compute_loss(ned_start_logits, ned_start_labels)
+            ned_end_loss = self.compute_ned_end_loss(ned_end_logits, end_labels)
+            if self.entity_type_loss or self.relation_disambiguation_loss:
+                ned_type_loss = self.compute_ned_type_loss(
+                    disambiguation_labels,
+                    re_ned_entities_logits,
+                    ned_type_logits,
+                    re_entities_logits,
+                    entity_types,
+                )
+            relation_loss = self.compute_relation_loss(relation_labels, re_logits)
+            # compute loss. We can skip the relation loss if we are in the first epochs (optional)
+            if self.entity_type_loss or self.relation_disambiguation_loss:
+                output_dict["loss"] = (
+                    ned_start_loss + ned_end_loss + relation_loss + ned_type_loss
+                ) / 4
+                output_dict["ned_type_loss"] = ned_type_loss
+            else:
+                output_dict["loss"] = (
+                    ned_start_loss + ned_end_loss + relation_loss
+                ) / 3
+            output_dict["ned_start_loss"] = ned_start_loss
+            output_dict["ned_end_loss"] = ned_end_loss
+            output_dict["re_loss"] = relation_loss
+        return output_dict

models/relik-reader-aida-deberta-small/pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:06ecdbcc11050fe88db21ad7b1e032ff2f28a5a819cb7ed6b6b3a62937c67637
+size 577138490

models/relik-reader-aida-deberta-small/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,112 @@

+{
+  "additional_special_tokens": [
+    "--NME--",
+    "[E-0]",
+    "[E-1]",
+    "[E-2]",
+    "[E-3]",
+    "[E-4]",
+    "[E-5]",
+    "[E-6]",
+    "[E-7]",
+    "[E-8]",
+    "[E-9]",
+    "[E-10]",
+    "[E-11]",
+    "[E-12]",
+    "[E-13]",
+    "[E-14]",
+    "[E-15]",
+    "[E-16]",
+    "[E-17]",
+    "[E-18]",
+    "[E-19]",
+    "[E-20]",
+    "[E-21]",
+    "[E-22]",
+    "[E-23]",
+    "[E-24]",
+    "[E-25]",
+    "[E-26]",
+    "[E-27]",
+    "[E-28]",
+    "[E-29]",
+    "[E-30]",
+    "[E-31]",
+    "[E-32]",
+    "[E-33]",
+    "[E-34]",
+    "[E-35]",
+    "[E-36]",
+    "[E-37]",
+    "[E-38]",
+    "[E-39]",
+    "[E-40]",
+    "[E-41]",
+    "[E-42]",
+    "[E-43]",
+    "[E-44]",
+    "[E-45]",
+    "[E-46]",
+    "[E-47]",
+    "[E-48]",
+    "[E-49]",
+    "[E-50]",
+    "[E-51]",
+    "[E-52]",
+    "[E-53]",
+    "[E-54]",
+    "[E-55]",
+    "[E-56]",
+    "[E-57]",
+    "[E-58]",
+    "[E-59]",
+    "[E-60]",
+    "[E-61]",
+    "[E-62]",
+    "[E-63]",
+    "[E-64]",
+    "[E-65]",
+    "[E-66]",
+    "[E-67]",
+    "[E-68]",
+    "[E-69]",
+    "[E-70]",
+    "[E-71]",
+    "[E-72]",
+    "[E-73]",
+    "[E-74]",
+    "[E-75]",
+    "[E-76]",
+    "[E-77]",
+    "[E-78]",
+    "[E-79]",
+    "[E-80]",
+    "[E-81]",
+    "[E-82]",
+    "[E-83]",
+    "[E-84]",
+    "[E-85]",
+    "[E-86]",
+    "[E-87]",
+    "[E-88]",
+    "[E-89]",
+    "[E-90]",
+    "[E-91]",
+    "[E-92]",
+    "[E-93]",
+    "[E-94]",
+    "[E-95]",
+    "[E-96]",
+    "[E-97]",
+    "[E-98]",
+    "[E-99]"
+  ],
+  "bos_token": "[CLS]",
+  "cls_token": "[CLS]",
+  "eos_token": "[SEP]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

models/relik-reader-aida-deberta-small/spm.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c679fbf93643d19aab7ee10c0b99e460bdbc02fedf34b92b05af343b4af586fd
+size 2464616

models/relik-reader-aida-deberta-small/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

models/relik-reader-aida-deberta-small/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,970 @@

+{
+  "add_prefix_space": true,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128000": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128001": {
+      "content": "--NME--",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "128002": {
+      "content": "[E-0]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "128003": {
+      "content": "[E-1]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "128004": {
+      "content": "[E-2]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "128005": {
+      "content": "[E-3]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "128006": {
+      "content": "[E-4]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "128007": {
+      "content": "[E-5]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "128008": {
+      "content": "[E-6]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "128009": {
+      "content": "[E-7]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "128010": {
+      "content": "[E-8]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "128011": {
+      "content": "[E-9]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "128012": {
+      "content": "[E-10]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "128013": {
+      "content": "[E-11]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "128014": {
+      "content": "[E-12]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "128015": {
+      "content": "[E-13]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "128016": {
+      "content": "[E-14]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "128017": {
+      "content": "[E-15]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "128018": {
+      "content": "[E-16]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "128019": {
+      "content": "[E-17]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "128020": {
+      "content": "[E-18]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "128021": {
+      "content": "[E-19]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "128022": {
+      "content": "[E-20]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "128023": {
+      "content": "[E-21]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "128024": {
+      "content": "[E-22]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "128025": {
+      "content": "[E-23]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "128026": {
+      "content": "[E-24]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "128027": {
+      "content": "[E-25]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "128028": {
+      "content": "[E-26]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "128029": {
+      "content": "[E-27]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "128030": {
+      "content": "[E-28]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "128031": {
+      "content": "[E-29]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "128032": {
+      "content": "[E-30]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "128033": {
+      "content": "[E-31]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "128034": {
+      "content": "[E-32]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "128035": {
+      "content": "[E-33]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "128036": {
+      "content": "[E-34]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "128037": {
+      "content": "[E-35]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "128038": {
+      "content": "[E-36]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "128039": {
+      "content": "[E-37]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "128040": {
+      "content": "[E-38]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "128041": {
+      "content": "[E-39]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "128042": {
+      "content": "[E-40]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "128043": {
+      "content": "[E-41]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "128044": {
+      "content": "[E-42]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "128045": {
+      "content": "[E-43]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "128046": {
+      "content": "[E-44]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "128047": {
+      "content": "[E-45]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "128048": {
+      "content": "[E-46]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "128049": {
+      "content": "[E-47]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "128050": {
+      "content": "[E-48]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "128051": {
+      "content": "[E-49]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "128052": {
+      "content": "[E-50]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "128053": {
+      "content": "[E-51]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "128054": {
+      "content": "[E-52]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "128055": {
+      "content": "[E-53]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "128056": {
+      "content": "[E-54]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "128057": {
+      "content": "[E-55]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "128058": {
+      "content": "[E-56]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "128059": {
+      "content": "[E-57]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "128060": {
+      "content": "[E-58]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "128061": {
+      "content": "[E-59]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "128062": {
+      "content": "[E-60]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "128063": {
+      "content": "[E-61]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "128064": {
+      "content": "[E-62]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "128065": {
+      "content": "[E-63]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "128066": {
+      "content": "[E-64]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "128067": {
+      "content": "[E-65]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "128068": {
+      "content": "[E-66]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "128069": {
+      "content": "[E-67]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "128070": {
+      "content": "[E-68]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "128071": {
+      "content": "[E-69]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "128072": {
+      "content": "[E-70]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "128073": {
+      "content": "[E-71]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "128074": {
+      "content": "[E-72]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "128075": {
+      "content": "[E-73]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "128076": {
+      "content": "[E-74]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "128077": {
+      "content": "[E-75]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "128078": {
+      "content": "[E-76]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "128079": {
+      "content": "[E-77]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "128080": {
+      "content": "[E-78]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "128081": {
+      "content": "[E-79]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "128082": {
+      "content": "[E-80]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "128083": {
+      "content": "[E-81]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "128084": {
+      "content": "[E-82]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "128085": {
+      "content": "[E-83]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "128086": {
+      "content": "[E-84]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "128087": {
+      "content": "[E-85]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "128088": {
+      "content": "[E-86]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "128089": {
+      "content": "[E-87]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "128090": {
+      "content": "[E-88]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "128091": {
+      "content": "[E-89]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "128092": {
+      "content": "[E-90]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "128093": {
+      "content": "[E-91]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "128094": {
+      "content": "[E-92]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "128095": {
+      "content": "[E-93]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "128096": {
+      "content": "[E-94]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "128097": {
+      "content": "[E-95]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "128098": {
+      "content": "[E-96]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "128099": {
+      "content": "[E-97]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "128100": {
+      "content": "[E-98]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "128101": {
+      "content": "[E-99]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_tokens": [
+    "--NME--",
+    "[E-0]",
+    "[E-1]",
+    "[E-2]",
+    "[E-3]",
+    "[E-4]",
+    "[E-5]",
+    "[E-6]",
+    "[E-7]",
+    "[E-8]",
+    "[E-9]",
+    "[E-10]",
+    "[E-11]",
+    "[E-12]",
+    "[E-13]",
+    "[E-14]",
+    "[E-15]",
+    "[E-16]",
+    "[E-17]",
+    "[E-18]",
+    "[E-19]",
+    "[E-20]",
+    "[E-21]",
+    "[E-22]",
+    "[E-23]",
+    "[E-24]",
+    "[E-25]",
+    "[E-26]",
+    "[E-27]",
+    "[E-28]",
+    "[E-29]",
+    "[E-30]",
+    "[E-31]",
+    "[E-32]",
+    "[E-33]",
+    "[E-34]",
+    "[E-35]",
+    "[E-36]",
+    "[E-37]",
+    "[E-38]",
+    "[E-39]",
+    "[E-40]",
+    "[E-41]",
+    "[E-42]",
+    "[E-43]",
+    "[E-44]",
+    "[E-45]",
+    "[E-46]",
+    "[E-47]",
+    "[E-48]",
+    "[E-49]",
+    "[E-50]",
+    "[E-51]",
+    "[E-52]",
+    "[E-53]",
+    "[E-54]",
+    "[E-55]",
+    "[E-56]",
+    "[E-57]",
+    "[E-58]",
+    "[E-59]",
+    "[E-60]",
+    "[E-61]",
+    "[E-62]",
+    "[E-63]",
+    "[E-64]",
+    "[E-65]",
+    "[E-66]",
+    "[E-67]",
+    "[E-68]",
+    "[E-69]",
+    "[E-70]",
+    "[E-71]",
+    "[E-72]",
+    "[E-73]",
+    "[E-74]",
+    "[E-75]",
+    "[E-76]",
+    "[E-77]",
+    "[E-78]",
+    "[E-79]",
+    "[E-80]",
+    "[E-81]",
+    "[E-82]",
+    "[E-83]",
+    "[E-84]",
+    "[E-85]",
+    "[E-86]",
+    "[E-87]",
+    "[E-88]",
+    "[E-89]",
+    "[E-90]",
+    "[E-91]",
+    "[E-92]",
+    "[E-93]",
+    "[E-94]",
+    "[E-95]",
+    "[E-96]",
+    "[E-97]",
+    "[E-98]",
+    "[E-99]"
+  ],
+  "bos_token": "[CLS]",
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_lower_case": false,
+  "eos_token": "[SEP]",
+  "mask_token": "[MASK]",
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "sp_model_kwargs": {},
+  "split_by_punct": false,
+  "tokenizer_class": "DebertaV2Tokenizer",
+  "unk_token": "[UNK]",
+  "vocab_type": "spm"
+}

models/relik-retriever-small-aida-blink-pretrain-omniencoder/document_index/config.yaml ADDED Viewed

	@@ -0,0 +1,8 @@

+_target_: relik.retriever.indexers.inmemory.InMemoryDocumentIndex
+documents:
+  _target_: relik.retriever.data.labels.Labels
+embeddings:
+  _target_: torch.Tensor
+name_or_dir: /media/data/EL/models/experiments/e5-small-15hard-400inbatch-64maxlen-32words-topics/2023-06-04/07-22-35/wandb/run-20230604_072319-3ql9q8oa/files/retriever/index
+device: cpu
+precision: null

models/relik-retriever-small-aida-blink-pretrain-omniencoder/document_index/documents.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9d367a0db7f8959d0d23f78d0af229856929a552d0195079422bf8afaaad2d70
+size 2813615153

models/relik-retriever-small-aida-blink-pretrain-omniencoder/document_index/embeddings.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fde55d5649350819a04dcbc242114486ccb31030df10f64b6b7213a983eecc0a
+size 4533909983

models/relik-retriever-small-aida-blink-pretrain-omniencoder/question_encoder/added_tokens.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "[CLS]": 101,
+  "[MASK]": 103,
+  "[PAD]": 0,
+  "[SEP]": 102,
+  "[UNK]": 100
+}

models/relik-retriever-small-aida-blink-pretrain-omniencoder/question_encoder/config.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+  "_name_or_path": "intfloat/e5-small-v2",
+  "architectures": [
+    "GoldenRetrieverModel"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "auto_map": {
+    "AutoModel": "hf.GoldenRetrieverModel"
+  },
+  "classifier_dropout": null,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 384,
+  "initializer_range": 0.02,
+  "intermediate_size": 1536,
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 0,
+  "position_embedding_type": "absolute",
+  "torch_dtype": "float32",
+  "transformers_version": "4.34.0",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 30522
+}

models/relik-retriever-small-aida-blink-pretrain-omniencoder/question_encoder/hf.py ADDED Viewed

	@@ -0,0 +1,88 @@

+from typing import Tuple, Union
+import torch
+from transformers import PretrainedConfig
+from transformers.modeling_outputs import BaseModelOutputWithPoolingAndCrossAttentions
+from transformers.models.bert.modeling_bert import BertModel
+class GoldenRetrieverConfig(PretrainedConfig):
+    model_type = "bert"
+    def __init__(
+        self,
+        vocab_size=30522,
+        hidden_size=768,
+        num_hidden_layers=12,
+        num_attention_heads=12,
+        intermediate_size=3072,
+        hidden_act="gelu",
+        hidden_dropout_prob=0.1,
+        attention_probs_dropout_prob=0.1,
+        max_position_embeddings=512,
+        type_vocab_size=2,
+        initializer_range=0.02,
+        layer_norm_eps=1e-12,
+        pad_token_id=0,
+        position_embedding_type="absolute",
+        use_cache=True,
+        classifier_dropout=None,
+        **kwargs,
+    ):
+        super().__init__(pad_token_id=pad_token_id, **kwargs)
+        self.vocab_size = vocab_size
+        self.hidden_size = hidden_size
+        self.num_hidden_layers = num_hidden_layers
+        self.num_attention_heads = num_attention_heads
+        self.hidden_act = hidden_act
+        self.intermediate_size = intermediate_size
+        self.hidden_dropout_prob = hidden_dropout_prob
+        self.attention_probs_dropout_prob = attention_probs_dropout_prob
+        self.max_position_embeddings = max_position_embeddings
+        self.type_vocab_size = type_vocab_size
+        self.initializer_range = initializer_range
+        self.layer_norm_eps = layer_norm_eps
+        self.position_embedding_type = position_embedding_type
+        self.use_cache = use_cache
+        self.classifier_dropout = classifier_dropout
+class GoldenRetrieverModel(BertModel):
+    config_class = GoldenRetrieverConfig
+    def __init__(self, config, *args, **kwargs):
+        super().__init__(config)
+        self.layer_norm_layer = torch.nn.LayerNorm(
+            config.hidden_size, eps=config.layer_norm_eps
+        )
+    def forward(
+        self, **kwargs
+    ) -> Union[Tuple[torch.Tensor], BaseModelOutputWithPoolingAndCrossAttentions]:
+        attention_mask = kwargs.get("attention_mask", None)
+        model_outputs = super().forward(**kwargs)
+        if attention_mask is None:
+            pooler_output = model_outputs.pooler_output
+        else:
+            token_embeddings = model_outputs.last_hidden_state
+            input_mask_expanded = (
+                attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
+            )
+            pooler_output = torch.sum(
+                token_embeddings * input_mask_expanded, 1
+            ) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)
+            pooler_output = self.layer_norm_layer(pooler_output)
+        if not kwargs.get("return_dict", True):
+            return (model_outputs[0], pooler_output) + model_outputs[2:]
+        return BaseModelOutputWithPoolingAndCrossAttentions(
+            last_hidden_state=model_outputs.last_hidden_state,
+            pooler_output=pooler_output,
+            past_key_values=model_outputs.past_key_values,
+            hidden_states=model_outputs.hidden_states,
+            attentions=model_outputs.attentions,
+            cross_attentions=model_outputs.cross_attentions,
+        )

models/relik-retriever-small-aida-blink-pretrain-omniencoder/question_encoder/pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:201092855fe86eff5afb1b68ea9cdaf0af98579fbb7191ad87d9726bb95e5d1f
+size 133508078

models/relik-retriever-small-aida-blink-pretrain-omniencoder/question_encoder/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

models/relik-retriever-small-aida-blink-pretrain-omniencoder/question_encoder/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

models/relik-retriever-small-aida-blink-pretrain-omniencoder/question_encoder/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,56 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_tokens": [],
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
+}

models/relik-retriever-small-aida-blink-pretrain-omniencoder/question_encoder/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

scripts/setup.sh CHANGED Viewed

File without changes