File size: 6,865 Bytes
6c570a1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
import sys
import os
import pdb
import numpy as np
import random
import json
import shutil
import time
from scipy.stats import pearsonr
from sklearn.model_selection import KFold
import matplotlib.pyplot as plt
import xgboost as xgb
from tqdm import tqdm
random.seed(42)


import gzip
import numpy as np
import pandas as pd
import requests
from io import BytesIO
from concrete.ml.deployment import FHEModelClient, FHEModelDev, FHEModelServer

from concrete.ml.sklearn import DecisionTreeClassifier as DecisionTreeClassifierZAMA

from concrete.ml.sklearn import LinearSVC as LinearSVCZAMA

from sklearn.svm import LinearSVR as LinearSVR
import time
from shutil import copyfile
from tempfile import TemporaryDirectory
import pickle
import os
import time
import numpy as np


def convert_numpy(obj):
    if isinstance(obj, np.integer):
        return int(obj)
    elif isinstance(obj, np.floating):
        return float(obj)
    elif isinstance(obj, np.ndarray):
        return obj.tolist()
    else:
        return obj


class OnDiskNetwork:
    """Simulate a network on disk."""

    def __init__(self):
        # Create 3 temporary folder for server, client and dev with tempfile
        self.server_dir = TemporaryDirectory()
        self.client_dir = TemporaryDirectory()
        self.dev_dir = TemporaryDirectory()

    def client_send_evaluation_key_to_server(self, serialized_evaluation_keys):
        """Send the public key to the server."""
        with open(self.server_dir.name + "/serialized_evaluation_keys.ekl", "wb") as f:
            f.write(serialized_evaluation_keys)

    def client_send_input_to_server_for_prediction(self, encrypted_input):
        """Send the input to the server and execute on the server in FHE."""
        with open(self.server_dir.name + "/serialized_evaluation_keys.ekl", "rb") as f:
            serialized_evaluation_keys = f.read()
        time_begin = time.time()
        encrypted_prediction = FHEModelServer(self.server_dir.name).run(
            encrypted_input, serialized_evaluation_keys
        )
        time_end = time.time()
        with open(self.server_dir.name + "/encrypted_prediction.enc", "wb") as f:
            f.write(encrypted_prediction)
        return time_end - time_begin

    def dev_send_model_to_server(self):
        """Send the model to the server."""
        copyfile(
            self.dev_dir.name + "/server.zip", self.server_dir.name + "/server.zip"
        )

    def server_send_encrypted_prediction_to_client(self):
        """Send the encrypted prediction to the client."""
        with open(self.server_dir.name + "/encrypted_prediction.enc", "rb") as f:
            encrypted_prediction = f.read()
        return encrypted_prediction

    def dev_send_clientspecs_and_modelspecs_to_client(self):
        """Send the clientspecs and evaluation key to the client."""
        copyfile(
            self.dev_dir.name + "/client.zip", self.client_dir.name + "/client.zip"
        )

    def cleanup(self):
        """Clean up the temporary folders."""
        self.server_dir.cleanup()
        self.client_dir.cleanup()
        self.dev_dir.cleanup()


def generate_fingerprint(smiles, radius=2, bits=512):
    mol = Chem.MolFromSmiles(smiles)
    if mol is None:
        return np.nan

    fp = AllChem.GetMorganFingerprintAsBitVect(mol, radius=radius, nBits=bits)

    return np.array(fp)


def train_xgb_regressor(X_train, y_train, param_grid=None, verbose=10):
    if param_grid is None:
        param_grid = {
            "max_depth": [3, 6],
            "learning_rate": [0.01, 0.1, 0.2],
            "n_estimators": [20],
            "colsample_bytree": [0.3, 0.7],
        }

    xgb_regressor = xgb.XGBRegressor(objective="reg:squarederror")

    kfold = KFold(n_splits=5, shuffle=True, random_state=42)
    grid_search = GridSearchCV(
        estimator=xgb_regressor,
        param_grid=param_grid,
        cv=kfold,
        verbose=verbose,
        n_jobs=-1,
    )

    grid_search.fit(X_train, y_train)
    return (
        grid_search.best_params_,
        grid_search.best_score_,
        grid_search.best_estimator_,
    )


def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)
    pearsonr_score = pearsonr(y_test, y_pred).statistic
    return pearsonr_score


def setup_network(model_dev):
    network = OnDiskNetwork()
    fhemodel_dev = FHEModelDev(network.dev_dir.name, model_dev)
    fhemodel_dev.save(via_mlir=True)
    return network, fhemodel_dev


def copy_directory(source, destination="deployment"):
    try:
        # Check if the source directory exists
        if not os.path.exists(source):
            return False, "Source directory does not exist."

        # Check if the destination directory exists
        if not os.path.exists(destination):
            os.makedirs(destination)

        # Copy each item in the source directory
        for item in os.listdir(source):
            s = os.path.join(source, item)
            d = os.path.join(destination, item)
            if os.path.isdir(s):
                shutil.copytree(
                    s, d, dirs_exist_ok=True
                )  # dirs_exist_ok is available from Python 3.8
            else:
                shutil.copy2(s, d)

        return True, None

    except Exception as e:
        return False, str(e)


def client_server_interaction(network, fhemodel_client, X_client):
    decrypted_predictions = []
    execution_time = []
    for i in tqdm(range(X_client.shape[0])):
        clear_input = X_client[[i], :]
        encrypted_input = fhemodel_client.quantize_encrypt_serialize(clear_input)
        execution_time.append(
            network.client_send_input_to_server_for_prediction(encrypted_input)
        )
        encrypted_prediction = network.server_send_encrypted_prediction_to_client()
        decrypted_prediction = fhemodel_client.deserialize_decrypt_dequantize(
            encrypted_prediction
        )[0]
        decrypted_predictions.append(decrypted_prediction)
        #pdb.set_trace()
    return decrypted_predictions, execution_time


def train_zama(X_train, y_train):

    model_dev = LinearSVCZAMA()
    # LinearSVCZAMA()
    # DecisionTreeClassifierZAMA()

    print("Training Zama model...")
    model_dev.fit(X_train, y_train)
    print("compiling model...")
    model_dev.compile(X_train)
    print("done")

    return model_dev


def time_prediction(model, X_sample):
    time_begin = time.time()
    y_pred_fhe = model.predict(X_sample, fhe="execute")
    time_end = time.time()
    return time_end - time_begin


def setup_client(network, key_dir):
    fhemodel_client = FHEModelClient(network.client_dir.name, key_dir=key_dir)
    fhemodel_client.generate_private_and_evaluation_keys()
    serialized_evaluation_keys = fhemodel_client.get_serialized_evaluation_keys()
    return fhemodel_client, serialized_evaluation_keys