binaryaaron commited on
Commit
ef30bfe
1 Parent(s): e521731

Add files to make sealion HF Endpoints / TGI compatible

Browse files

Hi! you can see the version that we hosted for the event on Nov 5th [here](https://huggingface.co/humane-intelligence/gemma2-9b-cpt-sealionv3-instruct-endpoint/tree/main) - this contribution should let others deploy this quickly via HF Endpoints.

Files changed (2) hide show
  1. handler.py +34 -0
  2. requirements.txt +1 -0
handler.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Dict, List, Any, Optional
2
+ import transformers
3
+ import torch
4
+
5
+ MAX_TOKENS=4096
6
+
7
+ class EndpointHandler(object):
8
+ def __init__(self, path=''):
9
+ self.pipeline: transformers.Pipeline = transformers.pipeline(
10
+ "text-generation",
11
+ model="ai-singapore/gemma2-9b-cpt-sealionv3-instruct",
12
+ model_kwargs={"torch_dtype": torch.bfloat16 },
13
+ device_map="auto",
14
+ )
15
+
16
+ def __call__(self, data: Dict[str, Any]) -> List[List[Dict[str, float]]]:
17
+ """
18
+ :param data:
19
+ inputs: message format
20
+ parameters: parameters for the pipeline
21
+ :return:
22
+ """
23
+ inputs = data.pop("inputs")
24
+ parameters: Optional[Dict] = data.pop("parameters", None)
25
+
26
+ if parameters is not None:
27
+ outputs = self.pipeline(
28
+ inputs,
29
+ **parameters
30
+ )
31
+ else:
32
+ outputs = self.pipeline(inputs, max_new_tokens=MAX_TOKENS)
33
+
34
+ return outputs
requirements.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ transformers[sklearn,sentencepiece,audio,vision,sentencepiece]==4.46.1