File size: 1,696 Bytes
3dc8b10
d0d42a2
 
41a89a9
d0d42a2
2ba15e2
29ad364
 
 
3dc8b10
ff3b43f
29ad364
 
 
 
 
d0d42a2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2ba15e2
2a0e034
41a89a9
d0d42a2
2a0e034
29ad364
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
from transformers import SegformerFeatureExtractor, SegformerForSemanticSegmentation
from torch import nn
import numpy as np
import matplotlib.pyplot as plt
from palette import ade_palette
from PIL import Image
import gradio as gr

def seg(image):
    feature_extractor = SegformerFeatureExtractor.from_pretrained("nvidia/mit-b5")
    model = SegformerForSemanticSegmentation.from_pretrained("nvidia/mit-b5")
    print(model)
    
    inputs = feature_extractor(images=image, return_tensors="pt")
    outputs = model(**inputs)
    
    ## The model outputs logits of shape (batch_size, num_labels, height/4, width/4). 
    # We first rescale the logits to match the original size of the image using 
    # "bilinear interpolation". Next, we perform an argmax on the class dimension,
    #  and we create a color map which we draw over the image.

    # First, rescale logits to original image size
    logits = nn.functional.interpolate(outputs.logits.detach().cpu(),
                size=image.size[::-1], # (height, width)
                mode='bilinear',
                align_corners=False)
    # Second, apply argmax on the class dimension
    seg = logits.argmax(dim=1)[0]
    color_seg = np.zeros((seg.shape[0], seg.shape[1], 3), dtype=np.uint8) # height, width, 3
    palette = np.array(ade_palette())
    for label, color in enumerate(palette):
        color_seg[seg == label, :] = color
    # Convert to BGR
    color_seg = color_seg[..., ::-1]
    img = np.array(image) * 0.5 + color_seg * 0.5
    img = img.astype(np.uint8)
    img = Image.fromarray(img)
    return img


iface = gr.Interface(fn=seg, inputs=gr.inputs.Image(type='pil'), outputs=gr.outputs.Image('pil'))
iface.launch()