krplatz
/

gemma-med3-9b

text-generation-inference

Inference Endpoints

Model card Files Files and versions Community

krplatz commited on Sep 3

Commit

7d810d6

•

1 Parent(s): 91f01f3

Update handler.py

Files changed (1) hide show

handler.py +30 -9

handler.py CHANGED Viewed

@@ -1,9 +1,30 @@
-from transformers import AutoModelForSequenceClassification, AutoTokenizer
-# Example of loading a model with the ignore_mismatched_sizes flag
-model = AutoModelForSequenceClassification.from_pretrained(
-    "krplatz/gemma-med3-9b",
-    ignore_mismatched_sizes=True
-)
-tokenizer = AutoTokenizer.from_pretrained("krplatz/gemma-med3-9b")

+from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline
+import torch
+class Gemma2Handler:
+    def __init__(self, model_dir):
+        # Load the tokenizer
+        self.tokenizer = AutoTokenizer.from_pretrained(model_dir)
+        # Load the model with the `ignore_mismatched_sizes` flag
+        self.model = AutoModelForSequenceClassification.from_pretrained(
+            model_dir,
+            ignore_mismatched_sizes=True
+        )
+        # Initialize the pipeline
+        self.pipeline = pipeline(
+            "text-classification",
+            model=self.model,
+            tokenizer=self.tokenizer,
+            device=0 if torch.cuda.is_available() else -1  # Use GPU if available
+        )
+    def __call__(self, inputs):
+        # Perform inference using the pipeline
+        predictions = self.pipeline(inputs)
+        return predictions
+# Function to be called by Hugging Face Inference Toolkit
+def get_pipeline(model_dir):
+    return Gemma2Handler(model_dir)