|
''' |
|
ART Gradio Example App [Evasion] |
|
|
|
To run: |
|
- clone the repository |
|
- execute: gradio examples/gradio_app.py or python examples/gradio_app.py |
|
- navigate to local URL e.g. http://127.0.0.1:7860 |
|
''' |
|
|
|
import gradio as gr |
|
import numpy as np |
|
from carbon_theme import Carbon |
|
|
|
import numpy as np |
|
import torch |
|
import transformers |
|
|
|
from art.estimators.classification.hugging_face import HuggingFaceClassifierPyTorch |
|
from art.attacks.evasion import ProjectedGradientDescentPyTorch, AdversarialPatchPyTorch |
|
from art.utils import load_dataset |
|
|
|
from art.attacks.poisoning import PoisoningAttackBackdoor |
|
from art.attacks.poisoning.perturbations import insert_image |
|
|
|
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') |
|
|
|
css = """ |
|
|
|
.input-image { margin: auto !important } |
|
.plot-padding { padding: 20px; } |
|
.eta-bar.svelte-1occ011.svelte-1occ011 { |
|
background: #ccccff !important; |
|
} |
|
.center-text { text-align: center !important } |
|
.larger-gap { gap: 100px !important; } |
|
.symbols { text-align: center !important; margin: auto !important; } |
|
|
|
div.svelte-15lo0d8>*, div.svelte-15lo0d8>.form > * { |
|
min-width: 0px !important; |
|
} |
|
""" |
|
|
|
def sample_CIFAR10(): |
|
label_names = [ |
|
'airplane', |
|
'automobile', |
|
'bird', |
|
'cat', |
|
'deer', |
|
'dog', |
|
'frog', |
|
'horse', |
|
'ship', |
|
'truck', |
|
] |
|
(x_train, y_train), (_, _), _, _ = load_dataset('cifar10') |
|
x_train = np.transpose(x_train, (0, 3, 1, 2)).astype(np.float32) |
|
y_train = np.argmax(y_train, axis=1) |
|
gallery_out = [] |
|
for i, im in enumerate(x_train[:10]): |
|
gallery_out.append((im.transpose(1,2,0), label_names[y_train[i]])) |
|
return gallery_out |
|
|
|
def clf_evasion_evaluate(*args): |
|
''' |
|
Run a classification task evaluation |
|
''' |
|
attack = args[0] |
|
attack_max_iter = args[1] |
|
attack_eps = args[2] |
|
attack_eps_steps = args[3] |
|
x_location = args[4] |
|
y_location = args[5] |
|
patch_height = args[6] |
|
patch_width = args[7] |
|
|
|
model = transformers.AutoModelForImageClassification.from_pretrained( |
|
'facebook/deit-tiny-distilled-patch16-224', |
|
ignore_mismatched_sizes=True, |
|
num_labels=10 |
|
) |
|
upsampler = torch.nn.Upsample(scale_factor=7, mode='nearest') |
|
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4) |
|
loss_fn = torch.nn.CrossEntropyLoss() |
|
|
|
hf_model = HuggingFaceClassifierPyTorch( |
|
model=model, |
|
loss=loss_fn, |
|
optimizer=optimizer, |
|
input_shape=(3, 32, 32), |
|
nb_classes=10, |
|
clip_values=(0, 1), |
|
processor=upsampler |
|
) |
|
model_checkpoint_path = './state_dicts/deit_cifar_base_model.pt' |
|
hf_model.model.load_state_dict(torch.load(model_checkpoint_path, map_location=device)) |
|
|
|
(x_train, y_train), (_, _), _, _ = load_dataset('cifar10') |
|
x_train = np.transpose(x_train, (0, 3, 1, 2)).astype(np.float32) |
|
y_train = np.argmax(y_train, axis=1) |
|
|
|
classes = np.unique(y_train) |
|
samples_per_class = 1 |
|
|
|
x_subset = [] |
|
y_subset = [] |
|
|
|
for c in classes: |
|
indices = y_train == c |
|
x_subset.append(x_train[indices][:samples_per_class]) |
|
y_subset.append(y_train[indices][:samples_per_class]) |
|
|
|
x_subset = np.concatenate(x_subset) |
|
y_subset = np.concatenate(y_subset) |
|
|
|
label_names = [ |
|
'airplane', |
|
'automobile', |
|
'bird', |
|
'cat', |
|
'deer', |
|
'dog', |
|
'frog', |
|
'horse', |
|
'ship', |
|
'truck', |
|
] |
|
|
|
outputs = hf_model.predict(x_subset) |
|
clean_preds = np.argmax(outputs, axis=1) |
|
clean_acc = np.mean(clean_preds == y_subset) |
|
benign_gallery_out = [] |
|
for i, im in enumerate(x_subset): |
|
benign_gallery_out.append(( im.transpose(1,2,0), label_names[np.argmax(outputs[i])] )) |
|
|
|
if attack == "PGD": |
|
attacker = ProjectedGradientDescentPyTorch(hf_model, max_iter=attack_max_iter, |
|
eps=attack_eps, eps_step=attack_eps_steps) |
|
x_adv = attacker.generate(x_subset) |
|
|
|
outputs = hf_model.predict(x_adv) |
|
adv_preds = np.argmax(outputs, axis=1) |
|
adv_acc = np.mean(adv_preds == y_subset) |
|
adv_gallery_out = [] |
|
for i, im in enumerate(x_adv): |
|
adv_gallery_out.append(( im.transpose(1,2,0), label_names[np.argmax(outputs[i])] )) |
|
|
|
delta = ((x_subset - x_adv) + attack_eps) * 10 |
|
if delta.max()>1: |
|
delta = (delta-np.min(delta))/(np.max(delta)-np.min(delta)) |
|
delta[delta>1] = 1 |
|
delta[delta<0] = 0 |
|
delta_gallery_out = delta.transpose(0, 2, 3, 1) |
|
|
|
if attack == "Adversarial Patch": |
|
scale_min = 0.3 |
|
scale_max = 1.0 |
|
rotation_max = 0 |
|
learning_rate = 5000. |
|
attacker = AdversarialPatchPyTorch(hf_model, scale_max=scale_max, |
|
scale_min=scale_min, |
|
rotation_max=rotation_max, |
|
learning_rate=learning_rate, |
|
max_iter=attack_max_iter, patch_type='square', |
|
patch_location=(x_location, y_location), |
|
patch_shape=(3, patch_height, patch_width)) |
|
patch, _ = attacker.generate(x_subset) |
|
x_adv = attacker.apply_patch(x_subset, scale=0.3) |
|
|
|
outputs = hf_model.predict(x_adv) |
|
adv_preds = np.argmax(outputs, axis=1) |
|
adv_acc = np.mean(adv_preds == y_subset) |
|
adv_gallery_out = [] |
|
for i, im in enumerate(x_adv): |
|
adv_gallery_out.append(( im.transpose(1,2,0), label_names[np.argmax(outputs[i])] )) |
|
|
|
delta_gallery_out = np.expand_dims(patch, 0).transpose(0,2,3,1) |
|
|
|
return benign_gallery_out, adv_gallery_out, delta_gallery_out, clean_acc, adv_acc |
|
|
|
def show_params(type): |
|
''' |
|
Show model parameters based on selected model type |
|
''' |
|
if type!="Example": |
|
return gr.Column(visible=True) |
|
return gr.Column(visible=False) |
|
|
|
|
|
|
|
carbon_theme = Carbon() |
|
with gr.Blocks(css=css, theme='Tshackelton/IBMPlex-DenseReadable') as demo: |
|
import art |
|
text = art.__version__ |
|
|
|
with gr.Row(elem_classes="custom-text"): |
|
with gr.Column(scale=1,): |
|
gr.Image(value="./art_lfai.png", show_label=False, show_download_button=False, width=100, show_share_button=False) |
|
with gr.Column(scale=2): |
|
gr.Markdown(f"<h1>⚔️ Red-teaming HuggingFace with ART [Evasion]</h1>", elem_classes="plot-padding") |
|
|
|
|
|
gr.Markdown('''<p style="font-size: 20px; text-align: justify">ℹ️ Red-teaming in AI is an activity where we masquerade |
|
as evil attackers 😈 and attempt to find vulnerabilities in our AI models. Identifying scenarios where |
|
our AI models do not work as expected, or fail, is important as it helps us better understand |
|
its limitations and vulnerability when deployed in the real world 🧐</p>''') |
|
gr.Markdown('''<p style="font-size: 20px; text-align: justify">ℹ️ By attacking our AI models ourselves, we can better the risks associated with use |
|
in the real world and implement mechanisms which can mitigate and protect our model. The example below demonstrates a |
|
common red-team workflow to assess model vulnerability to evasion attacks ⚔️</p>''') |
|
|
|
gr.Markdown('''<p style="font-size: 18px; text-align: justify"><i>Check out the full suite of features provided by ART <a href="https://github.com/Trusted-AI/adversarial-robustness-toolbox" |
|
target="blank_">here</a>. To dive further into evasion attacks with Hugging Face and ART, check out our |
|
<a href="https://github.com/Trusted-AI/adversarial-robustness-toolbox/blob/main/notebooks/hugging_face_evasion.ipynb" |
|
target="_blank">notebook</a>. Also feel free to contribute and give our repo a ⭐.</i></p>''') |
|
|
|
gr.Markdown('''<hr/>''') |
|
|
|
|
|
with gr.Row(elem_classes=["larger-gap", "custom-text"]): |
|
with gr.Column(scale=1): |
|
gr.Markdown('''<p style="font-size: 20px; text-align: justify">ℹ️ First lets set the scene. You have a dataset of images, such as CIFAR-10.</p>''') |
|
gr.Markdown('''<p style="font-size: 18px; text-align: justify"><i>Note: CIFAR-10 images are low resolution images which span 10 different categories as shown.</i></p>''') |
|
gr.Markdown('''<p style="font-size: 20px; text-align: justify">ℹ️ Your goal is to have an AI model capable of classifying these images. So you |
|
train a model on this dataset, or use a pre-trained model from Hugging Face, |
|
such as Meta's Distilled Data-efficient Image Transformer.</p>''') |
|
with gr.Column(scale=1): |
|
gr.Markdown(''' |
|
<p style="font-size: 20px;"><b>Hugging Face dataset:</b> |
|
<a href="https://huggingface.co/datasets/cifar10" target="_blank">CIFAR-10</a></p> |
|
<p style="font-size: 18px; padding-left: 20px;"><i>CIFAR-10 labels:</i> |
|
<i>{airplane, automobile, bird, cat, deer, dog, |
|
frog, horse, ship, truck}</i> |
|
</p> |
|
<p style="font-size: 20px;"><b>Hugging Face model:</b><br/> |
|
<a href="https://huggingface.co/facebook/deit-tiny-patch16-224" |
|
target="_blank">facebook/deit-tiny-distilled-patch16-224</a></p> |
|
<br/> |
|
<p style="font-size: 20px;">👀 take a look at the sample images from the CIFAR-10 dataset and their respective labels.</p> |
|
''') |
|
with gr.Column(scale=1): |
|
gr.Gallery(label="CIFAR-10", preview=True, value=sample_CIFAR10(), height=420) |
|
|
|
gr.Markdown('''<hr/>''') |
|
|
|
gr.Markdown('''<p style="text-align: justify; font-size: 18px">ℹ️ Now as a responsible AI expert, you wish to assert that your model is not vulnerable to |
|
attacks which might manipulate the prediction. For instance, ships become classified as birds. To do this, you will deploy |
|
adversarial attacks against your own model and assess its performance.</p>''') |
|
|
|
gr.Markdown('''<p style="text-align: justify; font-size: 18px">ℹ️ Below are two common types of evasion attack. Both create adversarial images, which at first glance, seem the same as the original images, |
|
however they contain subtle changes which cause the AI model to make incorrect predictions.</p><br/>''') |
|
|
|
|
|
with gr.Accordion("Projected Gradient Descent", open=False, elem_classes="custom-text"): |
|
gr.Markdown('''This attack uses the PGD optimization algorithm to identify the optimal perturbations |
|
to add to an image (i.e. changing pixel values) to cause the model to misclassify images. See more |
|
<a href="https://github.com/Trusted-AI/adversarial-robustness-toolbox" |
|
target="blank_">here</a>.''') |
|
|
|
with gr.Row(): |
|
|
|
with gr.Column(scale=1): |
|
attack = gr.Textbox(visible=True, value="PGD", label="Attack", interactive=False) |
|
max_iter = gr.Slider(minimum=1, maximum=10, label="Max iterations", value=4) |
|
eps = gr.Slider(minimum=0.0001, maximum=1, label="Epslion", value=0.3) |
|
eps_steps = gr.Slider(minimum=0.0001, maximum=1, label="Epsilon steps", value=0.03) |
|
bt_eval_pgd = gr.Button("Evaluate") |
|
|
|
|
|
with gr.Column(scale=5): |
|
with gr.Row(elem_classes='symbols'): |
|
with gr.Column(scale=10): |
|
gr.Markdown('''<p style="font-size: 18px"><i>The unmodified, original CIFAR-10 images, with model predictions.</i></p><br>''') |
|
original_gallery = gr.Gallery(label="Original", preview=False, show_download_button=True) |
|
benign_output = gr.Label(num_top_classes=3, visible=False) |
|
clean_accuracy = gr.Number(label="Clean Accuracy", precision=2) |
|
with gr.Column(scale=1, min_width='0px', elem_classes='symbols'): |
|
gr.Markdown('''➕''') |
|
with gr.Column(scale=10): |
|
gr.Markdown('''<p style="font-size: 18px"><i>Visual representation of the calculated perturbations for attacking the model (black pixels indicate little to no perturbation).</i></p>''') |
|
delta_gallery = gr.Gallery(label="Added perturbation", preview=False, show_download_button=True) |
|
with gr.Column(scale=1, min_width='0px'): |
|
gr.Markdown('''🟰''', elem_classes='symbols') |
|
with gr.Column(scale=10): |
|
gr.Markdown('''<p style="font-size: 18px"><i>The original image (with optimized perturbations applied) gives us an adversarial image which fools the model.</i></p>''') |
|
adversarial_gallery = gr.Gallery(label="Adversarial", preview=False, show_download_button=True) |
|
adversarial_output = gr.Label(num_top_classes=3, visible=False) |
|
robust_accuracy = gr.Number(label="Robust Accuracy", precision=2) |
|
|
|
bt_eval_pgd.click(clf_evasion_evaluate, inputs=[attack, max_iter, eps, eps_steps, attack, attack, attack, attack], |
|
outputs=[original_gallery, adversarial_gallery, delta_gallery, clean_accuracy, |
|
robust_accuracy]) |
|
|
|
gr.Markdown('''<br/>''') |
|
|
|
with gr.Accordion("Adversarial Patch", open=False, elem_classes="custom-text"): |
|
gr.Markdown('''This attack optimizes pixels in a patch which can be overlayed on an image, causing a model to misclassify. See more |
|
<a href="https://github.com/Trusted-AI/adversarial-robustness-toolbox" |
|
target="blank_">here</a>.''') |
|
|
|
with gr.Row(): |
|
|
|
with gr.Column(scale=1): |
|
attack = gr.Textbox(visible=True, value="Adversarial Patch", label="Attack", interactive=False) |
|
max_iter = gr.Slider(minimum=1, maximum=1000, label="Max iterations", value=10) |
|
x_location = gr.Slider(minimum=1, maximum=32, label="Location (x)", value=1) |
|
y_location = gr.Slider(minimum=1, maximum=32, label="Location (y)", value=1) |
|
patch_height = gr.Slider(minimum=1, maximum=32, label="Patch height", value=12) |
|
patch_width = gr.Slider(minimum=1, maximum=32, label="Patch width", value=12) |
|
eval_btn_patch = gr.Button("Evaluate") |
|
|
|
|
|
with gr.Column(scale=3): |
|
with gr.Row(elem_classes='symbols'): |
|
with gr.Column(scale=10): |
|
gr.Markdown('''<p style="font-size: 18px"><i>The unmodified, original CIFAR-10 images, with model predictions.</i></p><br><br>''') |
|
original_gallery = gr.Gallery(label="Original", preview=False, show_download_button=True) |
|
clean_accuracy = gr.Number(label="Clean Accuracy", precision=2) |
|
|
|
with gr.Column(scale=1, min_width='0px', elem_classes='symbols'): |
|
gr.Markdown('''➕''') |
|
|
|
with gr.Column(scale=10): |
|
gr.Markdown('''<p style="font-size: 18px"><i>Visual representation of the optimized patch for attacking the model.</i></p><br><br>''') |
|
delta_gallery = gr.Gallery(label="Patches", preview=True, show_download_button=True) |
|
|
|
with gr.Column(scale=1, min_width='0px'): |
|
gr.Markdown('''🟰''', elem_classes='symbols') |
|
|
|
with gr.Column(scale=10): |
|
gr.Markdown('''<p style="font-size: 18px"><i>The original image (with optimized perturbations applied) gives us an adversarial image which fools the model.</i></p>''') |
|
adversarial_gallery = gr.Gallery(label="Adversarial", preview=False, show_download_button=True) |
|
robust_accuracy = gr.Number(label="Robust Accuracy", precision=2) |
|
|
|
eval_btn_patch.click(clf_evasion_evaluate, inputs=[attack, max_iter, eps, eps_steps, x_location, y_location, patch_height, |
|
patch_width], |
|
outputs=[original_gallery, adversarial_gallery, delta_gallery, clean_accuracy, |
|
robust_accuracy]) |
|
|
|
gr.Markdown('''<br/>''') |
|
|
|
if __name__ == "__main__": |
|
|
|
|
|
'''demo.launch(show_api=False, debug=True, share=False, |
|
server_name="0.0.0.0", |
|
server_port=7777, |
|
ssl_verify=False, |
|
max_threads=20)''' |
|
|
|
|
|
demo.launch() |