{ "cells": [ { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "import torch\n", "import torch.nn as nn\n", "import torch.optim as optim\n", "from torch.utils.data import Dataset, DataLoader, Subset\n", "from torchvision import transforms, datasets\n", "import os\n", "from PIL import Image\n", "from tqdm.auto import tqdm\n", "import torch.nn.functional as F" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "\n", "class CNN(nn.Module):\n", " def __init__(self):\n", " super(CNN, self).__init__()\n", " # Convolutional layers\n", " self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1)\n", " self.conv2 = nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1)\n", " self.conv3 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)\n", " \n", " # Pooling layer\n", " self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)\n", " \n", " # Fully connected layers\n", " self.fc1 = nn.Linear(64 * (224 // 8) * (224 // 8), 64) # Adjusted based on pooling layers\n", " self.fc2 = nn.Linear(64, 2) # 2 classes for binary classification\n", " \n", " def forward(self, x):\n", " # Convolutional layers with relu activation and pooling\n", " x = self.pool(F.relu(self.conv1(x)))\n", " x = self.pool(F.relu(self.conv2(x)))\n", " x = self.pool(F.relu(self.conv3(x)))\n", " \n", " # Flatten for fully connected layers\n", " x = torch.flatten(x, 1)\n", " \n", " # Fully connected layers with relu activation\n", " x = F.relu(self.fc1(x))\n", " x = self.fc2(x)\n", " \n", " return x" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "transform = transforms.Compose([\n", " transforms.Resize((224, 224)), # Resize to 224x224\n", " transforms.ToTensor(), # Convert to tensor\n", " transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # Normalize\n", "])" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model = CNN()\n", "\n", "model.load_state_dict(torch.load(\"trained-cnn-concrete-crack.model\", map_location=torch.device(\"cpu\")))" ] }, { "cell_type": "code", "execution_count": 41, "metadata": {}, "outputs": [], "source": [ "\n", "\n", "magmaify = colormaps['magma']\n", "\n", "\n", "\n", "def compute_gradcam(img_tensor, layer_idx):\n", " target_layers = [[model.conv1], [model.conv2], [model.conv3]]\n", "\n", " cam = GradCAM(model=model, target_layers=target_layers[layer_idx-1])\n", "\n", " grayscale_cam = cam(input_tensor=img_tensor, targets=None)\n", "\n", " return magmaify(grayscale_cam.reshape(224, 224))" ] }, { "cell_type": "code", "execution_count": 42, "metadata": {}, "outputs": [], "source": [ "\n", "def predict_and_gradcam(model, img, layer_idx):\n", " # Preprocess the image\n", " img = Image.fromarray(img.astype('uint8'), 'RGB') if isinstance(img, np.ndarray) else img\n", " img_tensor = transform(img).unsqueeze(0)\n", "\n", " # Get predicted class index\n", " with torch.no_grad():\n", " output = model(img_tensor)\n", " _, predicted = torch.max(output.data, 1)\n", " predicted_label = str(predicted.item())\n", "\n", " # Compute GradCAM\n", " gradcam = compute_gradcam(img_tensor, layer_idx)\n", "\n", " return predicted_label, gradcam" ] }, { "cell_type": "code", "execution_count": 47, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/Users/irsh/miniconda3/envs/speaker_verification/lib/python3.9/site-packages/gradio/interface.py:382: UserWarning: The `allow_flagging` parameter in `Interface` nowtakes a string value ('auto', 'manual', or 'never'), not a boolean. Setting parameter to: 'never'.\n", " warnings.warn(\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Running on local URL: http://127.0.0.1:7871\n", "\n", "To create a public link, set `share=True` in `launch()`.\n" ] }, { "data": { "text/html": [ "
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [] }, "execution_count": 47, "metadata": {}, "output_type": "execute_result" } ], "source": [ "\n", "\n", "idx_to_lbl = {\"0\": \"Cracked\", \"1\":\"Uncracked\"}\n", "\n", "# Define a function to be used in Gradio app\n", "def classify_image(image, layer_idx):\n", " \n", " # Predict label and get GradCAM\n", " label, gradcam_img = predict_and_gradcam(model, image, layer_idx)\n", "\n", " return idx_to_lbl[label], gradcam_img\n", "\n", "# Define Gradio interface\n", "iface = gr.Interface(\n", " fn=classify_image,\n", " inputs=[gr.Image(), gr.Slider(minimum=1, maximum=3, step=1, value=1)],\n", " outputs=[gr.Textbox(label=\"Predicted Label\"), gr.Image(label=\"GradCAM Heatmap\")],\n", " title=\"Concrete Crack Detection with GradCAM\",\n", " description=\"Upload an image of concrete and get the predicted label along with the GradCAM heatmap.\",\n", " allow_flagging=False\n", ")\n", "\n", "# Launch the interface\n", "iface.launch()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python-3.12", "language": "python", "name": "python-3.12" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.19" } }, "nbformat": 4, "nbformat_minor": 2 }