binoua's picture
chore: better accuracy
55714b8
import shutil
import sys
from pathlib import Path
from concrete.ml.deployment import FHEModelDev
from concrete.ml.deployment import FHEModelClient
def compile_and_make_it_deployable(model_dev, X_train):
path_to_model = Path("compiled_model")
# Compile into FHE
model_dev.compile(X_train, p_error=0.01)
# Saving the model
shutil.rmtree(path_to_model, ignore_errors=True)
fhemodel_dev = FHEModelDev(path_to_model, model_dev)
fhemodel_dev.save(via_mlir=True)
# To see the size of the key
fhemodel_client = FHEModelClient(path_to_model)
# Generate the keys
fhemodel_client.generate_private_and_evaluation_keys()
evaluation_keys = fhemodel_client.get_serialized_evaluation_keys()
print(f"Your keys will be {sys.getsizeof(evaluation_keys) / 1024 / 1024}-megabytes long")
# Check accuracy with p_error
y_pred_concrete = model_dev.predict_proba(x_test, fhe="simulate")[:, 1]
concrete_average_precision = average_precision_score(y_test, y_pred_concrete)
print(f"Concrete average precision score (simulate): {concrete_average_precision:0.2f}")
# This is the spam classifier. Taken from https://github.com/zama-ai/concrete-ml/blob/main/docs/advanced_examples/DecisionTreeClassifier.ipynb
import numpy
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
features, classes = fetch_openml(data_id=44, as_frame=False, cache=True, return_X_y=True)
classes = classes.astype(numpy.int64)
x_train, x_test, y_train, y_test = train_test_split(
features,
classes,
test_size=0.15,
random_state=42,
)
# Find best hyper parameters with cross validation
from sklearn.model_selection import GridSearchCV
from concrete.ml.sklearn import DecisionTreeClassifier as ConcreteDecisionTreeClassifier
# List of hyper parameters to tune
param_grid = {
"max_features": [None],
"min_samples_leaf": [10],
"min_samples_split": [100],
"max_depth": [None],
}
grid_search = GridSearchCV(
ConcreteDecisionTreeClassifier(),
param_grid,
cv=10,
scoring="average_precision",
error_score="raise",
n_jobs=1,
)
gs_results = grid_search.fit(x_train, y_train)
print("Best hyper parameters:", gs_results.best_params_)
print("Best score:", gs_results.best_score_)
# Build the model with best hyper parameters
model_dev = ConcreteDecisionTreeClassifier(
max_features=gs_results.best_params_["max_features"],
min_samples_leaf=gs_results.best_params_["min_samples_leaf"],
min_samples_split=gs_results.best_params_["min_samples_split"],
max_depth=gs_results.best_params_["max_depth"],
n_bits=6,
)
model_dev = model_dev.fit(x_train, y_train)
# Compute average precision on test
from sklearn.metrics import average_precision_score
# pylint: disable=no-member
y_pred_concrete = model_dev.predict_proba(x_test)[:, 1]
concrete_average_precision = average_precision_score(y_test, y_pred_concrete)
print(f"Concrete average precision score: {concrete_average_precision:0.2f}")
compile_and_make_it_deployable(model_dev, x_train)
print("Your model is ready to be deployable.")