Upload model and tool

Browse files

Files changed (9) hide show

__init__.py +0 -0
config.json +17 -14
pair_classification.py +0 -33
pair_classification_tool.py +42 -0
pytorch_model.bin +2 -2
tokenizer.json +0 -0
tokenizer_config.json +2 -4
tool_config.json +3 -0
vocab.txt +0 -0

__init__.py ADDED Viewed

File without changes

config.json CHANGED Viewed

@@ -1,33 +1,36 @@
 {
   "architectures": [
     "BertForSequenceClassification"
   ],
   "attention_probs_dropout_prob": 0.1,
   "classifier_dropout": null,
-  "custom_pipelines": {
-    "pair-classification": {
-      "impl": "pair_classification.PairClassificationPipeline",
-      "pt": [
-        "AutoModelForSequenceClassification"
-      ],
-      "tf": []
-    }
-  },
   "hidden_act": "gelu",
   "hidden_dropout_prob": 0.1,
-  "hidden_size": 32,
   "initializer_range": 0.02,
-  "intermediate_size": 37,
   "layer_norm_eps": 1e-12,
   "max_position_embeddings": 512,
   "model_type": "bert",
-  "num_attention_heads": 4,
-  "num_hidden_layers": 5,
   "pad_token_id": 0,
   "position_embedding_type": "absolute",
   "torch_dtype": "float32",
   "transformers_version": "4.29.0.dev0",
   "type_vocab_size": 2,
   "use_cache": true,
-  "vocab_size": 99
 }

 {
+  "_name_or_path": "sgugger/bert-finetuned-mrpc",
   "architectures": [
     "BertForSequenceClassification"
   ],
   "attention_probs_dropout_prob": 0.1,
   "classifier_dropout": null,
+  "finetuning_task": "mrpc",
+  "gradient_checkpointing": false,
   "hidden_act": "gelu",
   "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "id2label": {
+    "0": "not_equivalent",
+    "1": "equivalent"
+  },
   "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "label2id": {
+    "equivalent": 1,
+    "not_equivalent": 0
+  },
   "layer_norm_eps": 1e-12,
   "max_position_embeddings": 512,
   "model_type": "bert",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
   "pad_token_id": 0,
   "position_embedding_type": "absolute",
+  "problem_type": "single_label_classification",
   "torch_dtype": "float32",
   "transformers_version": "4.29.0.dev0",
   "type_vocab_size": 2,
   "use_cache": true,
+  "vocab_size": 28996
 }

pair_classification.py DELETED Viewed

@@ -1,33 +0,0 @@
-import numpy as np
-from transformers import Pipeline
-def softmax(outputs):
-    maxes = np.max(outputs, axis=-1, keepdims=True)
-    shifted_exp = np.exp(outputs - maxes)
-    return shifted_exp / shifted_exp.sum(axis=-1, keepdims=True)
-class PairClassificationPipeline(Pipeline):
-    def _sanitize_parameters(self, **kwargs):
-        preprocess_kwargs = {}
-        if "second_text" in kwargs:
-            preprocess_kwargs["second_text"] = kwargs["second_text"]
-        return preprocess_kwargs, {}, {}
-    def preprocess(self, text, second_text=None):
-        return self.tokenizer(text, text_pair=second_text, return_tensors=self.framework)
-    def _forward(self, model_inputs):
-        return self.model(**model_inputs)
-    def postprocess(self, model_outputs):
-        logits = model_outputs.logits[0].numpy()
-        probabilities = softmax(logits)
-        best_class = np.argmax(probabilities)
-        label = self.model.config.id2label[best_class]
-        score = probabilities[best_class].item()
-        logits = logits.tolist()
-        return {"label": label, "score": score, "logits": logits}

pair_classification_tool.py ADDED Viewed

	@@ -0,0 +1,42 @@

+import torch
+from transformers import AutoConfig, AutoModelForSequenceClassification, AutoTokenizer
+from transformers.tools import PipelineTool
+class TextPairClassificationTool(PipelineTool):
+    default_checkpoint = "sgugger/bert-finetuned-mrpc"
+    pre_processor_class = AutoTokenizer
+    model_class = AutoModelForSequenceClassification
+    description = (
+        "classifies if two texts in English are similar or not using the labels {labels}. It takes two inputs named "
+        "`text` and `second_text` which should be in English and returns a dictionary with two keys named 'label' "
+        "(the predicted label ) and 'score' (the probability associated to it)."
+    )
+    def post_init(self):
+        if isinstance(self.model, str):
+            config = AutoConfig.from_pretrained(self.model)
+        else:
+            config = self.model.config
+        labels = list(config.label2id.keys())
+        if len(labels) > 1:
+            labels = [f"'{label}'" for label in labels]
+            labels_string = ", ".join(labels[:-1])
+            labels_string += f", and {labels[-1]}"
+        else:
+            raise ValueError("Not enough labels.")
+        self.description = self.description.replace("{labels}", labels_string)
+    def encode(self, text, second_text):
+        return self.pre_processor(text, second_text, return_tensors="pt")
+    def decode(self, outputs):
+        logits = outputs.logits
+        scores = torch.nn.functional.softmax(logits, dim=-1)
+        label_id = torch.argmax(logits[0]).item()
+        label = self.model.config.id2label[label_id]
+        return {"label": label, "score": scores[0][label_id].item()}

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:882ec9af8732f10b0b2a63bcff2d0b6d245e542dbf9f89143322149fbfd2562e
-size 251775

 version https://git-lfs.github.com/spec/v1
+oid sha256:3d51a9228c2bfe086be5020b9627e5693324d9f65e7e99bfdb5a1952d213cafa
+size 433320053

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json CHANGED Viewed

@@ -1,11 +1,9 @@
 {
   "clean_up_tokenization_spaces": true,
   "cls_token": "[CLS]",
-  "do_basic_tokenize": true,
-  "do_lower_case": true,
   "mask_token": "[MASK]",
-  "model_max_length": 1000000000000000019884624838656,
-  "never_split": null,
   "pad_token": "[PAD]",
   "sep_token": "[SEP]",
   "strip_accents": null,

 {
   "clean_up_tokenization_spaces": true,
   "cls_token": "[CLS]",
+  "do_lower_case": false,
   "mask_token": "[MASK]",
+  "model_max_length": 512,
   "pad_token": "[PAD]",
   "sep_token": "[SEP]",
   "strip_accents": null,

tool_config.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+  "custom_tools": {"text-pair-classification": "pair_classification_tool.TextPairClassificationTool"}
+}

vocab.txt CHANGED Viewed

The diff for this file is too large to render. See raw diff