Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
File size: 6,865 Bytes
6c570a1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 |
import sys
import os
import pdb
import numpy as np
import random
import json
import shutil
import time
from scipy.stats import pearsonr
from sklearn.model_selection import KFold
import matplotlib.pyplot as plt
import xgboost as xgb
from tqdm import tqdm
random.seed(42)
import gzip
import numpy as np
import pandas as pd
import requests
from io import BytesIO
from concrete.ml.deployment import FHEModelClient, FHEModelDev, FHEModelServer
from concrete.ml.sklearn import DecisionTreeClassifier as DecisionTreeClassifierZAMA
from concrete.ml.sklearn import LinearSVC as LinearSVCZAMA
from sklearn.svm import LinearSVR as LinearSVR
import time
from shutil import copyfile
from tempfile import TemporaryDirectory
import pickle
import os
import time
import numpy as np
def convert_numpy(obj):
if isinstance(obj, np.integer):
return int(obj)
elif isinstance(obj, np.floating):
return float(obj)
elif isinstance(obj, np.ndarray):
return obj.tolist()
else:
return obj
class OnDiskNetwork:
"""Simulate a network on disk."""
def __init__(self):
# Create 3 temporary folder for server, client and dev with tempfile
self.server_dir = TemporaryDirectory()
self.client_dir = TemporaryDirectory()
self.dev_dir = TemporaryDirectory()
def client_send_evaluation_key_to_server(self, serialized_evaluation_keys):
"""Send the public key to the server."""
with open(self.server_dir.name + "/serialized_evaluation_keys.ekl", "wb") as f:
f.write(serialized_evaluation_keys)
def client_send_input_to_server_for_prediction(self, encrypted_input):
"""Send the input to the server and execute on the server in FHE."""
with open(self.server_dir.name + "/serialized_evaluation_keys.ekl", "rb") as f:
serialized_evaluation_keys = f.read()
time_begin = time.time()
encrypted_prediction = FHEModelServer(self.server_dir.name).run(
encrypted_input, serialized_evaluation_keys
)
time_end = time.time()
with open(self.server_dir.name + "/encrypted_prediction.enc", "wb") as f:
f.write(encrypted_prediction)
return time_end - time_begin
def dev_send_model_to_server(self):
"""Send the model to the server."""
copyfile(
self.dev_dir.name + "/server.zip", self.server_dir.name + "/server.zip"
)
def server_send_encrypted_prediction_to_client(self):
"""Send the encrypted prediction to the client."""
with open(self.server_dir.name + "/encrypted_prediction.enc", "rb") as f:
encrypted_prediction = f.read()
return encrypted_prediction
def dev_send_clientspecs_and_modelspecs_to_client(self):
"""Send the clientspecs and evaluation key to the client."""
copyfile(
self.dev_dir.name + "/client.zip", self.client_dir.name + "/client.zip"
)
def cleanup(self):
"""Clean up the temporary folders."""
self.server_dir.cleanup()
self.client_dir.cleanup()
self.dev_dir.cleanup()
def generate_fingerprint(smiles, radius=2, bits=512):
mol = Chem.MolFromSmiles(smiles)
if mol is None:
return np.nan
fp = AllChem.GetMorganFingerprintAsBitVect(mol, radius=radius, nBits=bits)
return np.array(fp)
def train_xgb_regressor(X_train, y_train, param_grid=None, verbose=10):
if param_grid is None:
param_grid = {
"max_depth": [3, 6],
"learning_rate": [0.01, 0.1, 0.2],
"n_estimators": [20],
"colsample_bytree": [0.3, 0.7],
}
xgb_regressor = xgb.XGBRegressor(objective="reg:squarederror")
kfold = KFold(n_splits=5, shuffle=True, random_state=42)
grid_search = GridSearchCV(
estimator=xgb_regressor,
param_grid=param_grid,
cv=kfold,
verbose=verbose,
n_jobs=-1,
)
grid_search.fit(X_train, y_train)
return (
grid_search.best_params_,
grid_search.best_score_,
grid_search.best_estimator_,
)
def evaluate_model(model, X_test, y_test):
y_pred = model.predict(X_test)
pearsonr_score = pearsonr(y_test, y_pred).statistic
return pearsonr_score
def setup_network(model_dev):
network = OnDiskNetwork()
fhemodel_dev = FHEModelDev(network.dev_dir.name, model_dev)
fhemodel_dev.save(via_mlir=True)
return network, fhemodel_dev
def copy_directory(source, destination="deployment"):
try:
# Check if the source directory exists
if not os.path.exists(source):
return False, "Source directory does not exist."
# Check if the destination directory exists
if not os.path.exists(destination):
os.makedirs(destination)
# Copy each item in the source directory
for item in os.listdir(source):
s = os.path.join(source, item)
d = os.path.join(destination, item)
if os.path.isdir(s):
shutil.copytree(
s, d, dirs_exist_ok=True
) # dirs_exist_ok is available from Python 3.8
else:
shutil.copy2(s, d)
return True, None
except Exception as e:
return False, str(e)
def client_server_interaction(network, fhemodel_client, X_client):
decrypted_predictions = []
execution_time = []
for i in tqdm(range(X_client.shape[0])):
clear_input = X_client[[i], :]
encrypted_input = fhemodel_client.quantize_encrypt_serialize(clear_input)
execution_time.append(
network.client_send_input_to_server_for_prediction(encrypted_input)
)
encrypted_prediction = network.server_send_encrypted_prediction_to_client()
decrypted_prediction = fhemodel_client.deserialize_decrypt_dequantize(
encrypted_prediction
)[0]
decrypted_predictions.append(decrypted_prediction)
#pdb.set_trace()
return decrypted_predictions, execution_time
def train_zama(X_train, y_train):
model_dev = LinearSVCZAMA()
# LinearSVCZAMA()
# DecisionTreeClassifierZAMA()
print("Training Zama model...")
model_dev.fit(X_train, y_train)
print("compiling model...")
model_dev.compile(X_train)
print("done")
return model_dev
def time_prediction(model, X_sample):
time_begin = time.time()
y_pred_fhe = model.predict(X_sample, fhe="execute")
time_end = time.time()
return time_end - time_begin
def setup_client(network, key_dir):
fhemodel_client = FHEModelClient(network.client_dir.name, key_dir=key_dir)
fhemodel_client.generate_private_and_evaluation_keys()
serialized_evaluation_keys = fhemodel_client.get_serialized_evaluation_keys()
return fhemodel_client, serialized_evaluation_keys
|