分割图像的着色与相似度匹配

2023-08-08
Author AsanoSaki
~11.10K words

1. 分割图像着色
2. 分割图相似度匹配

介绍图像分割后产生的 Mask 灰度图的着色以及相似度匹配计算。

1. 分割图像着色

以 SAM 分割为例，我们分割出来产生的 masks 为一个 List，长度为分割出来的类别数，List 中的每个元素为一个 Dict，记录了分割目标的面积、边界框等信息，其中的 segmentation 字段为分割出来的二值图，宽高与原图一致，目标像素点为 True，否则为 False。

我们实现两种方式分别对分割出来的 masks 以及保存下来的若干张分割图像进行合并与上色。灰度图像和伪彩色图像都对应一个索引表，这个索引表又叫调色板。图像的像素值就是索引，灰度图的索引表为：

像素值	R	G	B
0	0	0	0
1	1	1	1
2	2	2	2
3	3	3	3
...	...	...	...
255	255	255	255

索引表不同的像素值对应的 RGB 值就是该像素的颜色，灰度图像的索引表中的 RGB 值都与像素值相同。同理，只要修改这些 RGB 数值，就可以显示伪彩色图像了。注意调色板的索引从0-255，因此，调色板的每个索引对应的 RGB 值都要进行设置。

代码如下：

import numpy as np
import matplotlib.pyplot as plt
import os
from PIL import Image


# 调色板
_palette = []
color_num = 100  # 不同的颜色数量
for i in range(color_num // 4):
    _palette.append([(255 + i * 8) % 256, (50 + i * 8) % 256, (100 + i * 8) % 256])
    _palette.append([(100 + i * 8) % 256, (50 + i * 8) % 256, (255 + i * 8) % 256])
    _palette.append([(33 + i * 8) % 256, (133 + i * 8) % 256, (233 + i * 8) % 256])
    _palette.append([(68 + i * 8) % 256, (218 + i * 8) % 256, (138 + i * 8) % 256])
for i in range(color_num, 256):  # 补上后面的灰度值索引
    _palette.append([i, i, i])
color_palette = np.array(_palette, dtype='uint8').reshape(-1, 3)
# print(color_palette.shape)  # (256, 3)


# 给mask上色
def colorize_mask(mask):
    mask = Image.fromarray(mask.astype(np.uint8))
    mask = mask.convert(mode='P')
    mask.putpalette(color_palette)
    # mask = mask.convert(mode='RGB')
    return mask


# 给mask上色并保存
def save_colorize_mask(mask, output_dir, file_name):
    save_mask = colorize_mask(mask)
    save_mask.save(os.path.join(output_dir, file_name))


# 显示SAM分割出来的masks
def show_origin_masks(masks):
    if len(masks) == 0:
        return
    sorted_masks = sorted(masks, key=(lambda x: x['area']), reverse=True)  # 按面积从大到小排序
    # ax = plt.gca()  # 在原本的图片上绘制
    # ax.set_autoscale_on(False)  # 在原本的图片上绘制

    img = np.ones((sorted_masks[0]['segmentation'].shape[0], sorted_masks[0]['segmentation'].shape[1], 4))  # img.shape: (1080, 1920, 4)
    img[:, :, 3] = 0
    for mask in sorted_masks:
        m = mask['segmentation']  # (1080, 1920)的True or False矩阵
        color_mask = np.concatenate([np.random.random(3), [0.35]])  # 第4维表示透明度
        img[m] = color_mask
    # ax.imshow(img)  # 在原本的图片上绘制
    plt.axis('off')
    plt.imshow(img)
    plt.show()


# 显示目录img_masks_path中的masks
def show_img_masks(masks_dir, height, width, threshold):
    mask = np.zeros((height, width))
    img_mask_names = os.listdir(masks_dir)

    for idx, img_mask_name in enumerate(img_mask_names):
        img_mask = Image.open(os.path.join(masks_dir, img_mask_name))
        img_mask = np.asarray(img_mask, dtype=np.bool_)
        if np.sum(img_mask) < threshold:
            continue
        mask[img_mask] = idx + 1

    Image.fromarray(mask).show()  # 上色前的mask
    mask = colorize_mask(mask)
    mask.show()  # 上色后的mask


# 读取原始图像
image = Image.open('../images/people.jpg')
width, height = image.size
image.show()


# 读取SAM分割的若干masks图像进行合并上色显示
masks_dir = '../images/people_sam/'
threshold = 200
show_img_masks(masks_dir, height, width, threshold)


# 展示上色前后的mask
test_mask_path = '../images/test_mask.png'
test_mask = Image.open(test_mask_path)
test_mask.show()
test_mask = np.array(test_mask, dtype=np.uint8)
print(test_mask.shape)  # (1080, 1920)

mask_gray = Image.fromarray(test_mask, 'P')
mask_gray.show()
mask_color = colorize_mask(test_mask)
mask_color.show()
print(list(np.unique(mask_color)))  # [0, 1, 2, 3, 4, 5, 6, ..., 44]，可以看成类别

2. 分割图相似度匹配

区域相似度（Region Similarity）：为了测量基于区域的分割相似度，即错误像素的数量，我们使用 Jaccard 索引 𝒥 表示， 𝒥 定义为预测的分割输出 Mask 和真值 Mask 之间的交并比 IoU（Intersection over Union），Jaccard 索引提供了关于错误分类像素的直观的信息。

边沿精度（Contour Accuracy）：边沿精度即计算 F-score，F-score 评估的是预测 Mask 的边界是否与真值 Mask 的边界对应。首先应提取预测 Mask 和真值 Mask 的边界元素坐标，将边界上的元素置为 True，非边界的元素置为 False。F-score 被定义为精度和召回率的调和平均数。

精度（Precision，P，也称查准率）：分母应是预测 Mask 的边界元素总数，分子则是在预测 Mask 为边界的那些元素中真正属于真值的。换句话说，预测 Mask 假设有100个元素为边界元素，但实际上可能只有70个存在于真值图的对应位置上，即70个真值的正样本被正确（True）预测为 Positive，属于 True Positive（TP），所以此时的查准率为70%，剩下的30个元素是错误（False）预测为 Positive，属于 False Positive（FP）。

召回率（Recall，R，也称查全率）：分母是真值 Mask 的边界元素总数，分子表示多少个本质的正样本被预测出来。例如真值 Mask 的边界有140个元素，但实际的预测 Mask 中只有70个真值的正样本被正确（True）预测为 Positive（TP），还有70个被错误（False）预测为 Negative（False Negative），那么此时的 Recall 为50%。

J & F 指标的计算代码如下：

import os
import math
import numpy as np
from PIL import Image
from skimage.morphology import binary_dilation, disk


def db_eval_iou(annotation, segmentation):
    """
    Compute region similarity as the Jaccard Index.

    Arguments:
        annotation   (ndarray): binary annotation   map.
        segmentation (ndarray): binary segmentation map.

    Return:
        jaccard (float): region similarity
    """
    annotation = annotation.astype(np.bool_)
    segmentation = segmentation.astype(np.bool_)

    if np.isclose(np.sum(annotation), 0) and np.isclose(np.sum(segmentation), 0):
        return 1
    else:
        return np.sum((annotation & segmentation)) / np.sum((annotation | segmentation), dtype=np.float32)


def db_eval_boundary(foreground_mask, gt_mask, bound_th=0.008):
    """
    Compute mean, recall and decay from per-frame evaluation.
    Calculates precision/recall for boundaries between foreground_mask and
    gt_mask using morphological operators to speed it up.

    Arguments:
        foreground_mask (ndarray): binary segmentation image.
        gt_mask         (ndarray): binary annotated image.

    Returns:
        F (float): boundaries F-measure
        P (float): boundaries precision
        R (float): boundaries recall
    """
    assert np.atleast_3d(foreground_mask).shape[2] == 1

    bound_pix = bound_th if bound_th >= 1 else \
        np.ceil(bound_th * np.linalg.norm(foreground_mask.shape))  # np.linalg.norm计算范数，默认为L2范数

    # Get the pixel boundaries of both masks
    fg_boundary = seg2bmap(foreground_mask)  # 将边界置为True
    gt_boundary = seg2bmap(gt_mask)

    fg_dil = binary_dilation(fg_boundary, disk(bound_pix))  # 二值化膨胀
    gt_dil = binary_dilation(gt_boundary, disk(bound_pix))

    # Get the intersection
    gt_match = gt_boundary * fg_dil  # 计算GT中与FG边缘匹配的像素
    fg_match = fg_boundary * gt_dil  # 计算FG中与GT边缘匹配的像素

    # Area of the intersection
    n_fg = np.sum(fg_boundary)  # FG边缘像素数量
    n_gt = np.sum(gt_boundary)  # GT边缘像素数量

    #% Compute precision and recall
    if n_fg == 0 and n_gt > 0:
        precision = 1
        recall = 0
    elif n_fg > 0 and n_gt == 0:
        precision = 0
        recall = 1
    elif n_fg == 0 and n_gt == 0:
        precision = 1
        recall = 1
    else:
        precision = np.sum(fg_match) / float(n_fg)
        recall = np.sum(gt_match) / float(n_gt)

    # Compute F measure
    if precision + recall == 0:
        F_score = 0
    else:
        F_score = 2 * precision * recall / (precision + recall)

    return F_score


def seg2bmap(seg, width=None, height=None):
    """
    From a segmentation, compute a binary boundary map with 1 pixel wide
    boundaries.  The boundary pixels are offset by 1/2 pixel towards the
    origin from the actual segment boundary.

    Arguments:
        seg     : Segments labeled from 1..k.
        width      :	Width of desired bmap  <= seg.shape[1]
        height  :    Height of desired bmap <= seg.shape[0]

    Returns:
        bmap (ndarray):    Binary boundary map.
    """
    seg = seg.astype(np.bool_)
    seg[seg > 0] = 1

    assert np.atleast_3d(seg).shape[2] == 1

    width = seg.shape[1] if width is None else width
    height = seg.shape[0] if height is None else height

    h,w = seg.shape[:2]

    ar1 = float(width) / float(height)
    ar2 = float(w) / float(h)

    assert not (width > w | height > h | abs(ar1 - ar2) > 0.01),\
            'Can''t convert %dx%d seg to %dx%d bmap.'%(w, h, width, height)

    e = np.zeros_like(seg)
    s = np.zeros_like(seg)
    se = np.zeros_like(seg)

    e[:, :-1] = seg[:, 1:]
    s[:-1, :] = seg[1:, :]
    se[:-1, :-1] = seg[1:, 1:]

    b = seg^e | seg^s | seg^se
    b[-1, :] = seg[-1, :]^e[-1, :]
    b[:, -1] = seg[:, -1]^s[:, -1]
    b[-1, -1] = 0

    if w == width and h == height:
        bmap = b
    else:
        bmap = np.zeros((height, width))
        for x in range(w):
            for y in range(h):
                if b[y, x]:
                    j = 1 + math.floor((y - 1) + height / h)
                    i = 1 + math.floor((x - 1) + width / h)
                    bmap[j, i] = 1

    return bmap


database_path = '../data/mask_database/'
test_path = '../data/mask_test/'
database_img_name_list = os.listdir(database_path)
test_img_name_list = os.listdir(test_path)

for test_img_name in test_img_name_list:
    test_img = Image.open(test_path + test_img_name)
    h, w = test_img.size
    test_img = np.asarray(test_img)

    best_iou, best_F, best_iou_dbname, best_F_dbname = 0.0, 0.0, None, None
    for database_img_name in database_img_name_list:
        database_img = Image.open(database_path + database_img_name).resize((h, w))
        database_img = np.asarray(database_img)

        iou = db_eval_iou(test_img, database_img)
        F_score = db_eval_boundary(test_img, database_img)

        if iou > best_iou:
            best_iou = iou
            best_iou_dbname = database_img_name
        if F_score > best_F:
            best_F = F_score
            best_F_dbname = database_img_name

    print(f'[{test_img_name}] best iou: {best_iou:.4f} ({best_iou_dbname}), best F: {best_F:.4f} ({best_F_dbname})')
    # [0_70.png] best iou: 0.7433 (0_9_156_197_151.png), best F: 0.2681 (0_2_152_212_77.png)
    # [0_71.png] best iou: 0.9399 (0_3_140_238_157.png), best F: 0.7001 (0_3_140_238_157.png)
    # [0_72.png] best iou: 0.7066 (0_10_190_185_95.png), best F: 0.2735 (0_7_93_244_223.png)
    # [0_75.png] best iou: 0.9089 (0_5_241_130_227.png), best F: 0.5160 (0_5_241_130_227.png)
    # [0_77.png] best iou: 0.5177 (0_18_252_79_113.png), best F: 0.2171 (0_1_245_116_182.png)
    # [0_78.png] best iou: 0.7393 (0_4_251_231_252.png), best F: 0.2872 (0_9_156_197_151.png)

AsanoSaki

1. 分割图像着色

2. 分割图相似度匹配