Spaces:

DanielXu0208
/

LUWA

Sleeping

App Files Files Community

DanielXu0208 commited on Oct 4

Commit

9e56473

•

1 Parent(s): ca68817

Update run_gradio.py

Browse files

Files changed (1) hide show

run_gradio.py +215 -233

run_gradio.py CHANGED Viewed

@@ -1,233 +1,215 @@
-import gradio as gr
-import torch
-import torchvision
-import pandas as pd
-import os
-from PIL import Image
-from utils.experiment_utils import get_model
-# Custom flagging logic to save flagged data to a CSV file
-class CustomFlagging(gr.FlaggingCallback):
-    def __init__(self, dir_name="flagged_data"):
-        self.dir = dir_name
-        self.image_dir = os.path.join(self.dir, "uploaded_images")
-        if not os.path.exists(self.dir):
-            os.makedirs(self.dir)
-        if not os.path.exists(self.image_dir):
-            os.makedirs(self.image_dir)
-    # Define setup as a no-op to fulfill abstract class requirement
-    def setup(self, *args, **kwargs):
-        pass
-    def flag(self, flag_data, flag_option=None, flag_index=None, username=None):
-        # Extract data
-        classification_mode, image, sensing_modality, predicted_class, correct_class = flag_data
-        # Save the uploaded image in the "uploaded_images" folder
-        image_filename = os.path.join(self.image_dir,
-                                      f"flagged_image_{pd.Timestamp.now().strftime('%Y%m%d_%H%M%S')}.png")
-        image.save(image_filename)  # Save image in PNG format
-        # Columns: Classification, Image Path, Sensing Modality, Predicted Class, Correct Class
-        data = {
-            "Classification Mode": classification_mode,
-            "Image Path": image_filename,  # Save path to image in CSV
-            "Sensing Modality": sensing_modality,
-            "Predicted Class": predicted_class,
-            "Correct Class": correct_class,
-        }
-        df = pd.DataFrame([data])
-        csv_file = os.path.join(self.dir, "flagged_data.csv")
-        # Append to CSV, or create if it doesn't exist
-        if os.path.exists(csv_file):
-            df.to_csv(csv_file, mode='a', header=False, index=False)
-        else:
-            df.to_csv(csv_file, mode='w', header=True, index=False)
-# Function to load the appropriate model based on the user's selection
-def load_model(modality, mode):
-    # For Few-Shot classification, always use the DINOv2 model
-    if mode == "Few-Shot":
-        class Args:
-            model = 'DINOv2'
-            pretrained = 'pretrained'
-            frozen = 'unfrozen'
-        args = Args()
-        model = get_model(args)  # Load DINOv2 model for Few-Shot classification
-    else:
-        # For Fully-Supervised classification, choose model based on the sensing modality
-        if modality == "Texture":
-            class Args:
-                model = 'DINOv2'
-                pretrained = 'pretrained'
-                frozen = 'unfrozen'
-            args = Args()
-            model = get_model(args)  # Load DINOv2 model for Texture modality
-        elif modality == "Heightmap":
-            class Args:
-                model = 'ResNet152'
-                pretrained = 'pretrained'
-                frozen = 'unfrozen'
-            args = Args()
-            model = get_model(args)  # Load ResNet152 model for Heightmap modality
-        else:
-            raise ValueError("Invalid modality selected!")
-    model.eval()  # Set the model to evaluation mode
-    return model
-# Prediction function that processes the image and returns the prediction results
-def predict(image, modality, mode):
-    # Load the appropriate model based on the user's selections
-    model = load_model(modality, mode)
-    # Print the selected mode and modality for debugging purposes
-    print(f"User selected Mode: {mode}, Modality: {modality}")
-    # Preprocess the image
-    transform = torchvision.transforms.Compose([
-        torchvision.transforms.Resize((224, 224)),
-        torchvision.transforms.ToTensor(),
-        torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
-    ])
-    image_tensor = transform(image).unsqueeze(0)  # Add batch dimension
-    with torch.no_grad():
-        output = model(image_tensor)  # Get model predictions
-        probabilities = torch.nn.functional.softmax(output, dim=1).squeeze().tolist()
-    # Class names for the predictions
-    class_names = ["ANTLER", "BEECHWOOD", "BEFOREUSE", "BONE", "IVORY", "SPRUCEWOOD"]
-    # Pair class names with their corresponding probabilities
-    predicted_class = class_names[probabilities.index(max(probabilities))]  # Get the predicted class
-    results = {class_names[i]: probabilities[i] for i in range(len(class_names))}
-    return predicted_class, results  # Return predicted class and probabilities
-# Create the Gradio interface using gr.Blocks
-def create_interface():
-    with gr.Blocks() as interface:
-        # Title at the top of the interface (centered and larger)
-        gr.Markdown("<h1 style='text-align: center; font-size: 36px;'>LUWA Dataset Image Classification</h1>")
-        # Add description for the interface
-        description = """
-        ### Image Classification Options
-        - **Fully-Supervised Classification**: Choose this for common or well-known materials with plenty of data (e.g., bone, wood).
-        - **Few-Shot Classification**: Choose this for rare or newly discovered materials where only a few examples exist.
-        ### **Don't forget to choose the Sensing Modality based on your uploaded images.**
-        ### **Please help us to flag the correct class for your uploaded image if you know it, it will help us to further develop our dataset. If you cannot find the correct class in the option, please click on the option 'Other' and type the correct class for us!**
-        """
-        gr.Markdown(description)
-        # Top-level selector for Fully-Supervised vs. Few-Shot classification
-        mode_selector = gr.Radio(choices=["Fully Supervised", "Few-Shot"], label="Classification Mode",
-                                 value="Fully Supervised")
-        # Sensing modality selector
-        modality_selector = gr.Radio(choices=["Texture", "Heightmap"], label="Sensing Modality", value="Texture")
-        # Image upload input
-        image_input = gr.Image(type="pil", label="Image")
-        # Predicted classification output and class probabilities
-        with gr.Row():
-            predicted_output = gr.Label(num_top_classes=1, label="Predicted Classification")
-            probabilities_output = gr.Label(label="Prediction Probabilities")
-        # Add the "Run Prediction" button under the Prediction Probabilities
-        predict_button = gr.Button("Run Prediction")
-        # Dropdown for user to select the correct class if the model prediction is wrong
-        correct_class_selector = gr.Radio(
-            choices=["ANTLER", "BEECHWOOD", "BEFOREUSE", "BONE", "IVORY", "SPRUCEWOOD", "Other"],
-            label="Select Correct Class"
-        )
-        # Text box for user to type the correct class if "Other" is selected
-        other_class_input = gr.Textbox(label="If Other, enter the correct class", visible=False)
-        # Logic to dynamically update visibility of the "Other" class text box
-        def update_visibility(selected_class):
-            return gr.update(visible=selected_class == "Other")
-        correct_class_selector.change(fn=update_visibility, inputs=correct_class_selector, outputs=other_class_input)
-        # Create a flagging instance
-        flagging_instance = CustomFlagging(dir_name="flagged_data")
-        # Define function for the confirmation pop-up
-        def confirm_flag_selection(correct_class, other_class):
-            # Generate confirmation message
-            if correct_class == "Other":
-                message = f"Are you sure the class you selected is '{other_class}' for this picture?"
-            else:
-                message = f"Are you sure the class you selected is '{correct_class}' for this picture?"
-            return message, gr.update(visible=True), gr.update(visible=True)
-        # Final flag submission function
-        def flag_data_save(correct_class, other_class, mode, image, modality, predicted_class, confirmed):
-            if confirmed == "Yes":
-                # Save the flagged data
-                correct_class_final = correct_class if correct_class != "Other" else other_class
-                flagging_instance.flag([mode, image, modality, predicted_class, correct_class_final])
-                return "Flagged successfully!"
-            else:
-                return "No flag submitted, please select again."
-        # Flagging button
-        flag_button = gr.Button("Flag")
-        # Confirmation box for user input and confirmation flag
-        confirmation_text = gr.Textbox(visible=False)
-        yes_no_choice = gr.Radio(choices=["Yes", "No"], label="Are you sure?", visible=False)
-        confirmation_button = gr.Button("Confirm Flag", visible=False)
-        # Prediction action
-        predict_button.click(
-            fn=predict,
-            inputs=[image_input, modality_selector, mode_selector],
-            outputs=[predicted_output, probabilities_output]
-        )
-        # Flagging action with confirmation
-        flag_button.click(
-            fn=confirm_flag_selection,
-            inputs=[correct_class_selector, other_class_input],
-            outputs=[confirmation_text, yes_no_choice, confirmation_button]
-        )
-        # Final flag submission after confirmation
-        confirmation_button.click(
-            fn=flag_data_save,
-            inputs=[correct_class_selector, other_class_input, mode_selector, image_input, modality_selector,
-                    predicted_output, yes_no_choice],
-            outputs=gr.Textbox(label="Flagging Status")
-        )
-    return interface
-if __name__ == "__main__":
-    interface = create_interface()
-    interface.launch(share=True)

+import matplotlib.pyplot as plt
+import torch
+from torchvision.transforms.functional import resize, normalize, to_pil_image
+from torchvision.io.image import read_image
+from torchvision.models import resnet50
+from sklearn.cluster import KMeans
+import numpy as np
+import os
+import logging
+from torchcam.utils import overlay_mask
+from PIL import Image
+from collections import Counter
+from scipy.spatial.distance import cdist
+# Initialize logger to monitor progress
+logging.basicConfig(level=logging.INFO)
+# Path to dataset and model
+dataset_path = "archive"
+model_path = "resnet50_finetuned_miniimagenet.pth"  # Update to your fine-tuned MiniImagenet weights
+n_clusters = 100  # Number of clusters for feature channels (can be adjusted)
+top_k_prototypes = 5  # Top k most similar examples to select as prototypes for each cluster
+batch_size = 8  # Reduce batch size to limit memory usage
+output_folder = "examples"  # Folder to save the images and heatmaps
+# Limit to 100 images per class
+images_per_class = 100
+# Create the output folder if it doesn't exist
+if not os.path.exists(output_folder):
+    os.makedirs(output_folder)
+# Set device to GPU if available, otherwise CPU
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+# Load the fine-tuned model weights from MiniImagenet
+model = resnet50(pretrained=False)
+# Modify the fully connected layer to match the number of classes in MiniImagenet (100 classes)
+model.fc = torch.nn.Linear(model.fc.in_features, 100)
+# Load the fine-tuned state_dict
+checkpoint = torch.load(model_path, weights_only=True)  # Updated to MiniImagenet weights
+model.load_state_dict(checkpoint)
+model.eval()
+# Move the model to GPU
+model.to(device)
+# Hook to capture activations from 'layer4'
+activation = {}
+def get_activation(name):
+    def hook(model, input, output):
+        activation[name] = output.detach()
+    return hook
+model.layer4.register_forward_hook(get_activation('layer4'))
+# Collecting activations of all feature channels across multiple images
+all_activations = []
+image_paths = []  # To keep track of the image paths
+image_labels = []  # To store the class labels of each image
+# Traverse through the dataset, accessing each class folder and collecting images and labels
+for class_folder in os.listdir(dataset_path):
+    class_folder_path = os.path.join(dataset_path, class_folder)
+    # Ensure we are looking at a directory (class folder)
+    if os.path.isdir(class_folder_path):
+        class_label = class_folder  # Use the folder name as the class label
+        # Get only up to 'images_per_class' images from each class folder
+        class_images = os.listdir(class_folder_path)[:images_per_class]
+        for img_name in class_images:
+            img_path = os.path.join(class_folder_path, img_name)
+            image_paths.append(img_path)  # Store image path for later use
+            image_labels.append(class_label)  # Store the corresponding class label
+# Log how many images we collected
+logging.info(f"Collected {len(image_paths)} images across {len(set(image_labels))} classes.")
+# Process the images for clustering and Grad-CAM calculation in batches
+for batch_idx in range(0, len(image_paths), batch_size):
+    batch_image_paths = image_paths[batch_idx: batch_idx + batch_size]
+    with torch.no_grad():  # Disable gradient calculations
+        for img_path in batch_image_paths:
+            # Read and preprocess the image
+            img = read_image(img_path)
+            # Ensure the image has 3 channels (convert grayscale or 4-channel images to RGB)
+            if img.shape[0] == 1:  # If the image is grayscale (1 channel), repeat the single channel to make it RGB
+                img = img.repeat(3, 1, 1)  # Convert it to 3-channel by repeating the single channel
+            elif img.shape[0] == 4:  # If the image has 4 channels (e.g., RGBA), drop the alpha channel
+                img = img[:3, :, :]  # Keep only the first 3 channels (RGB)
+            # Resize and normalize the image
+            input_tensor = normalize(resize(img, (224, 224)) / 255., [0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
+            # Move the input tensor to the GPU
+            input_tensor = input_tensor.to(device)
+            # Feed the preprocessed image to the model
+            out = model(input_tensor.unsqueeze(0))
+            # Get the activations from layer4 (with 2,048 feature channels)
+            layer4_activations = activation['layer4'].cpu().numpy()
+            # For each image, store the activation values across all channels (2048 channels)
+            all_activations.append(layer4_activations.squeeze())
+    # Log progress
+    logging.info(f"Processed batch {batch_idx // batch_size + 1}/{len(image_paths) // batch_size + 1}")
+# Convert the collected activations into a numpy array of shape (n_images, 2048, H * W)
+all_activations = np.array(all_activations)
+# Now we average the spatial dimensions (H*W) to get the activation vector for each channel
+# This gives us an array of shape (n_images, 2048), where each value is the averaged activation for that channel
+avg_activations_per_image = np.mean(all_activations, axis=(-2, -1))  # Average over spatial dimensions
+# Now we want to transpose the array to get activations for each channel across all images
+# Shape will be (2048, n_images), where each row is the activation of a channel across all images
+channel_activation_vectors = avg_activations_per_image.T
+# Perform KMeans clustering on the feature channels
+kmeans = KMeans(n_clusters=n_clusters, random_state=0).fit(channel_activation_vectors)
+# Get cluster assignments for feature channels
+channel_clusters = kmeans.labels_
+# Find prototypes for each cluster based on channel activation similarities
+prototypes = {}
+for cluster_id in range(n_clusters):
+    cluster_indices = np.where(channel_clusters == cluster_id)[0]  # Get the feature channels in this cluster
+    if len(cluster_indices) == 0:
+        continue  # Skip empty clusters
+    # Find the majority class for the images that activate the feature channels in this cluster
+    cluster_activation_vectors = channel_activation_vectors[cluster_indices]  # Activation vectors for this cluster
+    # Use the majority class of the images for this cluster
+    majority_class = Counter([image_labels[i] for i in range(len(image_paths))]).most_common(1)[0][0]
+    # Filter the images by the majority class before selecting prototypes
+    majority_class_indices = [i for i, label in enumerate(image_labels) if label == majority_class]
+    filtered_cluster_activation_vectors = cluster_activation_vectors[:, majority_class_indices]  # Filtered activations
+    # Compute pairwise distances between the activation vectors of the feature channels
+    distances = cdist(filtered_cluster_activation_vectors.T, filtered_cluster_activation_vectors.T, 'euclidean')
+    # Sum the distances for each image (to find the closest/most representative sample)
+    distance_sums = distances.sum(axis=1)
+    # Get the indices of the top-5 closest images from the filtered list
+    top_k_indices = np.argsort(distance_sums)[:top_k_prototypes]
+    # Store the prototypes for this cluster (top-k most representative images of the majority class)
+    prototypes[cluster_id] = [image_paths[majority_class_indices[i]] for i in top_k_indices]
+    # Print the top 5 image paths for this cluster
+    logging.info(f"Cluster {cluster_id} Prototypes: {prototypes[cluster_id]}")
+    # Now download and save the images and their corresponding Grad-CAM heatmaps
+    for idx, img_path in enumerate(prototypes[cluster_id]):
+        # Read and preprocess the image
+        img = read_image(img_path)
+        # Ensure the image has 3 channels
+        if img.shape[0] == 1:
+            img = img.repeat(3, 1, 1)
+        elif img.shape[0] == 4:
+            img = img[:3, :, :]
+        # Resize and normalize the image
+        input_tensor = normalize(resize(img, (224, 224)) / 255., [0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
+        # Move the input tensor to the GPU
+        input_tensor = input_tensor.to(device)
+        # Feed the preprocessed image to the model
+        out = model(input_tensor.unsqueeze(0))
+        # Manually calculate Grad-CAM by averaging only the channels in the cluster
+        cam_activations = activation['layer4'].squeeze().cpu().numpy()
+        cluster_activations = cam_activations[cluster_indices]
+        averaged_cluster_activation = np.mean(cluster_activations, axis=0)
+        # Normalize the activation map
+        averaged_cluster_activation = (averaged_cluster_activation - averaged_cluster_activation.min()) / (
+                averaged_cluster_activation.max() - averaged_cluster_activation.min())
+        # Overlay the CAM on the original image
+        overlayed_img = overlay_mask(to_pil_image(img.cpu()), to_pil_image(averaged_cluster_activation, mode='F'),
+                                     alpha=0.5)
+        # Save the original image and the heatmap overlay
+        img_name = f"cluster_{cluster_id}_prototype_{idx + 1}.png"
+        heatmap_name = f"cluster_{cluster_id}_prototype_{idx + 1}_heatmap.png"
+        # Save original image
+        Image.fromarray(img.permute(1, 2, 0).cpu().numpy().astype(np.uint8)).save(os.path.join(output_folder, img_name))
+        # Save the heatmap overlay
+        overlayed_img.save(os.path.join(output_folder, heatmap_name))
+# Done
+logging.info("Saved all representative images and their corresponding heatmaps.")