Spaces:
Sleeping
Sleeping
Chris Hoge
commited on
Commit
•
bfa1717
1
Parent(s):
9dad835
Initial commit
Browse files- Dockerfile +13 -0
- README.md +80 -11
- _wsgi.py +126 -0
- data/cnn.pt +3 -0
- data/vocab_obj.pt +3 -0
- docker-compose.yml +29 -0
- requirements.txt +6 -0
- sentiment_api.py +52 -0
Dockerfile
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM python:3.8-slim
|
2 |
+
|
3 |
+
ENV PYTHONUNBUFFERED=True \
|
4 |
+
PORT=7860
|
5 |
+
|
6 |
+
WORKDIR /app
|
7 |
+
COPY requirements.txt .
|
8 |
+
|
9 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
10 |
+
|
11 |
+
COPY . ./
|
12 |
+
|
13 |
+
CMD exec gunicorn --preload --bind :$PORT --workers 1 --threads 8 --timeout 0 _wsgi:app
|
README.md
CHANGED
@@ -1,11 +1,80 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
## Quickstart
|
2 |
+
|
3 |
+
Build and start Machine Learning backend on `http://localhost:9090`
|
4 |
+
|
5 |
+
```bash
|
6 |
+
docker-compose up
|
7 |
+
```
|
8 |
+
|
9 |
+
Check if it works:
|
10 |
+
|
11 |
+
```bash
|
12 |
+
$ curl http://localhost:9090/health
|
13 |
+
{"status":"UP"}
|
14 |
+
```
|
15 |
+
|
16 |
+
Then connect running backend to Label Studio using Machine Learning settings.
|
17 |
+
|
18 |
+
|
19 |
+
## Writing your own model
|
20 |
+
1. Place your scripts for model training & inference inside root directory. Follow the [API guidelines](#api-guidelines) described bellow. You can put everything in a single file, or create 2 separate one say `my_training_module.py` and `my_inference_module.py`
|
21 |
+
|
22 |
+
2. Write down your python dependencies in `requirements.txt`
|
23 |
+
|
24 |
+
3. Open `wsgi.py` and make your configurations under `init_model_server` arguments:
|
25 |
+
```python
|
26 |
+
from my_training_module import training_script
|
27 |
+
from my_inference_module import InferenceModel
|
28 |
+
|
29 |
+
init_model_server(
|
30 |
+
create_model_func=InferenceModel,
|
31 |
+
train_script=training_script,
|
32 |
+
...
|
33 |
+
```
|
34 |
+
|
35 |
+
4. Make sure you have docker & docker-compose installed on your system, then run
|
36 |
+
```bash
|
37 |
+
docker-compose up --build
|
38 |
+
```
|
39 |
+
|
40 |
+
## API guidelines
|
41 |
+
|
42 |
+
|
43 |
+
#### Inference module
|
44 |
+
In order to create module for inference, you have to declare the following class:
|
45 |
+
|
46 |
+
```python
|
47 |
+
from htx.base_model import BaseModel
|
48 |
+
|
49 |
+
# use BaseModel inheritance provided by pyheartex SDK
|
50 |
+
class MyModel(BaseModel):
|
51 |
+
|
52 |
+
# Describe input types (Label Studio object tags names)
|
53 |
+
INPUT_TYPES = ('Image',)
|
54 |
+
|
55 |
+
# Describe output types (Label Studio control tags names)
|
56 |
+
INPUT_TYPES = ('Choices',)
|
57 |
+
|
58 |
+
def load(self, resources, **kwargs):
|
59 |
+
"""Here you load the model into the memory. resources is a dict returned by training script"""
|
60 |
+
self.model_path = resources["model_path"]
|
61 |
+
self.labels = resources["labels"]
|
62 |
+
|
63 |
+
def predict(self, tasks, **kwargs):
|
64 |
+
"""Here you create list of model results with Label Studio's prediction format, task by task"""
|
65 |
+
predictions = []
|
66 |
+
for task in tasks:
|
67 |
+
# do inference...
|
68 |
+
predictions.append(task_prediction)
|
69 |
+
return predictions
|
70 |
+
```
|
71 |
+
|
72 |
+
#### Training module
|
73 |
+
Training could be made in a separate environment. The only one convention is that data iterator and working directory are specified as input arguments for training function which outputs JSON-serializable resources consumed later by `load()` function in inference module.
|
74 |
+
|
75 |
+
```python
|
76 |
+
def train(input_iterator, working_dir, **kwargs):
|
77 |
+
"""Here you gather input examples and output labels and train your model"""
|
78 |
+
resources = {"model_path": "some/model/path", "labels": ["aaa", "bbb", "ccc"]}
|
79 |
+
return resources
|
80 |
+
```
|
_wsgi.py
ADDED
@@ -0,0 +1,126 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import argparse
|
3 |
+
import logging
|
4 |
+
import logging.config
|
5 |
+
|
6 |
+
logging.config.dictConfig({
|
7 |
+
"version": 1,
|
8 |
+
"formatters": {
|
9 |
+
"standard": {
|
10 |
+
"format": "[%(asctime)s] [%(levelname)s] [%(name)s::%(funcName)s::%(lineno)d] %(message)s"
|
11 |
+
}
|
12 |
+
},
|
13 |
+
"handlers": {
|
14 |
+
"console": {
|
15 |
+
"class": "logging.StreamHandler",
|
16 |
+
"level": "DEBUG",
|
17 |
+
"stream": "ext://sys.stdout",
|
18 |
+
"formatter": "standard"
|
19 |
+
}
|
20 |
+
},
|
21 |
+
"root": {
|
22 |
+
"level": "ERROR",
|
23 |
+
"handlers": [
|
24 |
+
"console"
|
25 |
+
],
|
26 |
+
"propagate": True
|
27 |
+
}
|
28 |
+
})
|
29 |
+
|
30 |
+
from label_studio_ml.api import init_app
|
31 |
+
from sentiment_api import SentimentModel
|
32 |
+
|
33 |
+
|
34 |
+
_DEFAULT_CONFIG_PATH = os.path.join(os.path.dirname(__file__), 'config.json')
|
35 |
+
|
36 |
+
|
37 |
+
def get_kwargs_from_config(config_path=_DEFAULT_CONFIG_PATH):
|
38 |
+
if not os.path.exists(config_path):
|
39 |
+
return dict()
|
40 |
+
with open(config_path) as f:
|
41 |
+
config = json.load(f)
|
42 |
+
assert isinstance(config, dict)
|
43 |
+
return config
|
44 |
+
|
45 |
+
|
46 |
+
if __name__ == "__main__":
|
47 |
+
parser = argparse.ArgumentParser(description='Label studio')
|
48 |
+
parser.add_argument(
|
49 |
+
'-p', '--port', dest='port', type=int, default=9090,
|
50 |
+
help='Server port')
|
51 |
+
parser.add_argument(
|
52 |
+
'--host', dest='host', type=str, default='0.0.0.0',
|
53 |
+
help='Server host')
|
54 |
+
parser.add_argument(
|
55 |
+
'--kwargs', '--with', dest='kwargs', metavar='KEY=VAL', nargs='+', type=lambda kv: kv.split('='),
|
56 |
+
help='Additional LabelStudioMLBase model initialization kwargs')
|
57 |
+
parser.add_argument(
|
58 |
+
'-d', '--debug', dest='debug', action='store_true',
|
59 |
+
help='Switch debug mode')
|
60 |
+
parser.add_argument(
|
61 |
+
'--log-level', dest='log_level', choices=['DEBUG', 'INFO', 'WARNING', 'ERROR'], default=None,
|
62 |
+
help='Logging level')
|
63 |
+
parser.add_argument(
|
64 |
+
'--model-dir', dest='model_dir', default=os.path.dirname(__file__),
|
65 |
+
help='Directory where models are stored (relative to the project directory)')
|
66 |
+
parser.add_argument(
|
67 |
+
'--check', dest='check', action='store_true',
|
68 |
+
help='Validate model instance before launching server')
|
69 |
+
|
70 |
+
args = parser.parse_args()
|
71 |
+
|
72 |
+
# setup logging level
|
73 |
+
if args.log_level:
|
74 |
+
logging.root.setLevel(args.log_level)
|
75 |
+
|
76 |
+
def isfloat(value):
|
77 |
+
try:
|
78 |
+
float(value)
|
79 |
+
return True
|
80 |
+
except ValueError:
|
81 |
+
return False
|
82 |
+
|
83 |
+
def parse_kwargs():
|
84 |
+
param = dict()
|
85 |
+
for k, v in args.kwargs:
|
86 |
+
if v.isdigit():
|
87 |
+
param[k] = int(v)
|
88 |
+
elif v == 'True' or v == 'true':
|
89 |
+
param[k] = True
|
90 |
+
elif v == 'False' or v == 'False':
|
91 |
+
param[k] = False
|
92 |
+
elif isfloat(v):
|
93 |
+
param[k] = float(v)
|
94 |
+
else:
|
95 |
+
param[k] = v
|
96 |
+
return param
|
97 |
+
|
98 |
+
kwargs = get_kwargs_from_config()
|
99 |
+
|
100 |
+
if args.kwargs:
|
101 |
+
kwargs.update(parse_kwargs())
|
102 |
+
|
103 |
+
if args.check:
|
104 |
+
print('Check "' + SentimentModel.__name__ + '" instance creation..')
|
105 |
+
model = SentimentModel(**kwargs)
|
106 |
+
|
107 |
+
app = init_app(
|
108 |
+
model_class=SentimentModel,
|
109 |
+
model_dir=os.environ.get('MODEL_DIR', args.model_dir),
|
110 |
+
redis_queue=os.environ.get('RQ_QUEUE_NAME', 'default'),
|
111 |
+
redis_host=os.environ.get('REDIS_HOST', 'localhost'),
|
112 |
+
redis_port=os.environ.get('REDIS_PORT', 6379),
|
113 |
+
**kwargs
|
114 |
+
)
|
115 |
+
|
116 |
+
app.run(host=args.host, port=args.port, debug=args.debug)
|
117 |
+
|
118 |
+
else:
|
119 |
+
# for uWSGI use
|
120 |
+
app = init_app(
|
121 |
+
model_class=SentimentModel,
|
122 |
+
model_dir=os.environ.get('MODEL_DIR', os.path.dirname(__file__)),
|
123 |
+
redis_queue=os.environ.get('RQ_QUEUE_NAME', 'default'),
|
124 |
+
redis_host=os.environ.get('REDIS_HOST', 'localhost'),
|
125 |
+
redis_port=os.environ.get('REDIS_PORT', 6379)
|
126 |
+
)
|
data/cnn.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2c72c9a96e9a22f97d53dc3403397b5f7790cd83b0ebadcf0766378902c62713
|
3 |
+
size 27607519
|
data/vocab_obj.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b28acfc849558893989d00a107e6bb776e81e321f89c080b8734dd6e439558d8
|
3 |
+
size 367155
|
docker-compose.yml
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
version: "3.8"
|
2 |
+
|
3 |
+
services:
|
4 |
+
redis:
|
5 |
+
image: redis:alpine
|
6 |
+
container_name: redis
|
7 |
+
hostname: redis
|
8 |
+
volumes:
|
9 |
+
- "./data/redis:/data"
|
10 |
+
expose:
|
11 |
+
- 6379
|
12 |
+
server:
|
13 |
+
container_name: server
|
14 |
+
build: .
|
15 |
+
environment:
|
16 |
+
- MODEL_DIR=/data/models
|
17 |
+
- RQ_QUEUE_NAME=default
|
18 |
+
- REDIS_HOST=redis
|
19 |
+
- REDIS_PORT=6379
|
20 |
+
- LABEL_STUDIO_USE_REDIS=true
|
21 |
+
ports:
|
22 |
+
- 9090:9090
|
23 |
+
depends_on:
|
24 |
+
- redis
|
25 |
+
links:
|
26 |
+
- redis
|
27 |
+
volumes:
|
28 |
+
- "./data/server:/data"
|
29 |
+
- "./logs:/tmp"
|
requirements.txt
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
torch
|
2 |
+
torchtext
|
3 |
+
label-studio-ml
|
4 |
+
redis
|
5 |
+
rq
|
6 |
+
gunicorn
|
sentiment_api.py
ADDED
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from label_studio_ml.model import LabelStudioMLBase
|
2 |
+
from sentiment_cnn import SentimentCNN
|
3 |
+
|
4 |
+
import torch
|
5 |
+
import torch.nn as nn
|
6 |
+
import torchtext
|
7 |
+
|
8 |
+
class SentimentModel(LabelStudioMLBase):
|
9 |
+
def __init__(self, **kwargs):
|
10 |
+
super(SentimentModel, self).__init__(**kwargs)
|
11 |
+
|
12 |
+
self.sentiment_model = SentimentCNN(
|
13 |
+
state_dict='data/cnn.pt',
|
14 |
+
vocab='data/vocab_obj.pt')
|
15 |
+
|
16 |
+
self.label_map = {
|
17 |
+
1: "Positive",
|
18 |
+
0: "Negative"}
|
19 |
+
|
20 |
+
def predict(self, tasks, **kwargs):
|
21 |
+
predictions = []
|
22 |
+
|
23 |
+
# Get annotation tag first, and extract from_name/to_name keys from the labeling config
|
24 |
+
# to make predictions
|
25 |
+
from_name, schema = list(self.parsed_label_config.items())[0]
|
26 |
+
to_name = schema['to_name'][0]
|
27 |
+
data_name = schema['inputs'][0]['value']
|
28 |
+
|
29 |
+
for task in tasks:
|
30 |
+
# load the data and make a prediction with the model
|
31 |
+
text = task['data'][data_name]
|
32 |
+
predicted_class, predicted_prob = self.sentiment_model.predict_sentiment(text)
|
33 |
+
print("%s\nprediction: %s probability: %s" % (text, predicted_class, predicted_prob))
|
34 |
+
|
35 |
+
label = self.label_map[predicted_class]
|
36 |
+
|
37 |
+
# for each task, return classification results in the form of "choices" pre-annotations
|
38 |
+
prediction = {
|
39 |
+
'score': float(predicted_prob),
|
40 |
+
'result': [{
|
41 |
+
'from_name': from_name,
|
42 |
+
'to_name': to_name,
|
43 |
+
'type': 'choices',
|
44 |
+
'value': {
|
45 |
+
'choices': [
|
46 |
+
label
|
47 |
+
]
|
48 |
+
},
|
49 |
+
}]
|
50 |
+
}
|
51 |
+
predictions.append(prediction)
|
52 |
+
return predictions
|