YOLO v3 中关于 anchor 的 k-means 聚类代码

1. k-means 聚类代码我使用的代码是：https://github.com/lars76/kmeans-anchor-boxes另一个代码是：https://github.com/qqwweee/keras-yolo3/blob/master/kmeans.py输入是存放 xml标签文件的文件夹：只需要更改 example.py 中的一行代码：ANNOTATIONS_PATH =...

文章共4,360字 · 阅读需要大约15分钟

一键AI生成摘要，助你高效阅读

问答

轮子去哪儿了

5385人浏览 · 2019-09-06 10:05:26

轮子去哪儿了 · 2019-09-06 10:05:26 发布

文章目录

1. k-means 聚类代码

我使用的代码是：https://github.com/lars76/kmeans-anchor-boxes
其他的k-means 代码（没用过）是：

https://github.com/qqwweee/keras-yolo3/blob/master/kmeans.py
https://github.com/AlexeyAB/darknet/blob/master/scripts/gen_anchors.py
输入是存放 xml标签文件的文件夹：
只需要更改 example.py 中的一行代码：

ANNOTATIONS_PATH = "xmlLabel/train"  # 更改自己的路径（存放训练标签 xml 的文件路径）

运行 example.py 计算当前数据集的需要设置的 anchor 的大小（相对于416输入而言）

在我的数据集上的输出结果如下：

rows =  8607  #  我的 label 目标的数量
[[0.01416016 0.015625  ]  # 每一个 anchor的宽/图像的宽 ，高/高
 [0.00830078 0.00927734]
 [0.06542969 0.06982422]
 [0.03417969 0.03662109]
 [0.01123047 0.01220703]
 [0.02685547 0.02832031]
 [0.01757812 0.01953125]
 [0.04443359 0.04833984]
 [0.02148438 0.0234375 ]]
Accuracy: 83.41%
Boxes:
 [ 5.890625  3.453125 27.21875  14.21875   4.671875 11.171875  7.3125   18.484375  8.9375  ]-  # 每个 anchor 的宽
 [ 6.5       3.859375 29.046875 15.234375  5.078125 11.78125   8.125    20.109375  9.75    ]  # # 每个 anchor 的高
Ratios:
 [0.89, 0.9, 0.91, 0.92, 0.92, 0.92, 0.93, 0.94, 0.95]  # 每个 anchor 的 宽/高

对输入anchor 进行排序后的结果是（取整数是为了好看）：

[3, 4, 5, 7, 8, 11, 14, 18, 27]
[3, 5, 6, 8, 9, 11, 15, 20, 29]
anchor_416 = 3, 3, 4, 5, 5, 6,   7, 8, 8, 9, 11, 11,   14, 15, 18, 20, 27, 29

anchor_416_2 = 6, 7, 9, 10, 11, 13,   14, 16, 17, 19, 22, 23,   28, 30, 36, 40, 54, 58
anchor_416_3 = 10, 11, 14, 15, 17, 19,   21, 24, 26, 29, 33, 35,  42, 45, 55, 60, 81, 87
anchor_416_4 = 13, 15, 18, 20, 23, 26,   29,32, 35,39, 44,47,  56,60, 73,80, 108,116
anchor_416_5 = 17, 19, 23, 25, 29, 32,   36, 40, 44, 48, 55, 58,   71, 76, 92, 100, 136, 145

将 anchor 排序的代码如下（自己写的）：

import numpy as np 

# anchors = [10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326]
# for i in range(0, len(anchors), 2):
#   print(anchors[i] * anchors[i + 1])


x = [5.890625,  3.453125, 27.21875,  14.21875,   4.671875, 11.171875,  7.3125,   18.484375,  8.9375]
y = [6.5,       3.859375, 29.046875, 15.234375,  5.078125, 11.78125,   8.125,    20.109375,  9.75 ]     
area = []

for i in range(len(x)):
    area.append(x[i] * y[i])

print(area)
print(np.argsort(area))

new_x = [0 for _ in range(len(x))]
new_y = [0 for _ in range(len(y))]

for i in range(len(np.argsort(area))):
    new_x[i] = int(x[np.argsort(area)[i]])
    new_y[i] = int(y[np.argsort(area)[i]])

anchors = []
for i in range(len(new_x)):
	anchors.append(new_x[i])
	anchors.append(new_y[i])

print(anchors)


for i in range(len(new_x)):
    print(new_x[i] * new_y[i])

2. YOLOv3 中默认的 anchor

anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326

一共有 18个数字，9个anchor，每一个anchor的大小，面积依次是：

130, 480,759,     1830, 2790, 7021,     10440, 30888, 121598

3. github 上的代码复制如下：

example.py

import glob
import xml.etree.ElementTree as ET

import numpy as np

from kmeans import kmeans, avg_iou

ANNOTATIONS_PATH = "Annotations"
CLUSTERS = 5

def load_dataset(path):
	dataset = []
	for xml_file in glob.glob("{}/*xml".format(path)):
		tree = ET.parse(xml_file)

		height = int(tree.findtext("./size/height"))
		width = int(tree.findtext("./size/width"))

		for obj in tree.iter("object"):
			xmin = int(float(obj.findtext("bndbox/xmin"))) / width
			ymin = int(float(obj.findtext("bndbox/ymin"))) / height
			xmax = int(float(obj.findtext("bndbox/xmax"))) / width
			ymax = int(float(obj.findtext("bndbox/ymax"))) / height

			dataset.append([xmax - xmin, ymax - ymin])

	return np.array(dataset)


data = load_dataset(ANNOTATIONS_PATH)
out = kmeans(data, k=CLUSTERS)
print("Accuracy: {:.2f}%".format(avg_iou(data, out) * 100))
print("Boxes:\n {}".format(out))

ratios = np.around(out[:, 0] / out[:, 1], decimals=2).tolist()
print("Ratios:\n {}".format(sorted(ratios)))

kmeans.py

import numpy as np


def iou(box, clusters):
    """
    Calculates the Intersection over Union (IoU) between a box and k clusters.
    :param box: tuple or array, shifted to the origin (i. e. width and height)
    :param clusters: numpy array of shape (k, 2) where k is the number of clusters
    :return: numpy array of shape (k, 0) where k is the number of clusters
    """
    x = np.minimum(clusters[:, 0], box[0])
    y = np.minimum(clusters[:, 1], box[1])
    if np.count_nonzero(x == 0) > 0 or np.count_nonzero(y == 0) > 0:
        raise ValueError("Box has no area")

    intersection = x * y
    box_area = box[0] * box[1]
    cluster_area = clusters[:, 0] * clusters[:, 1]

    iou_ = intersection / (box_area + cluster_area - intersection)

    return iou_


def avg_iou(boxes, clusters):
    """
    Calculates the average Intersection over Union (IoU) between a numpy array of boxes and k clusters.
    :param boxes: numpy array of shape (r, 2), where r is the number of rows
    :param clusters: numpy array of shape (k, 2) where k is the number of clusters
    :return: average IoU as a single float
    """
    return np.mean([np.max(iou(boxes[i], clusters)) for i in range(boxes.shape[0])])


def translate_boxes(boxes):
    """
    Translates all the boxes to the origin.
    :param boxes: numpy array of shape (r, 4)
    :return: numpy array of shape (r, 2)
    """
    new_boxes = boxes.copy()
    for row in range(new_boxes.shape[0]):
        new_boxes[row][2] = np.abs(new_boxes[row][2] - new_boxes[row][0])
        new_boxes[row][3] = np.abs(new_boxes[row][3] - new_boxes[row][1])
    return np.delete(new_boxes, [0, 1], axis=1)


def kmeans(boxes, k, dist=np.median):
    """
    Calculates k-means clustering with the Intersection over Union (IoU) metric.
    :param boxes: numpy array of shape (r, 2), where r is the number of rows
    :param k: number of clusters
    :param dist: distance function
    :return: numpy array of shape (k, 2)
    """
    rows = boxes.shape[0]

    distances = np.empty((rows, k))
    last_clusters = np.zeros((rows,))

    np.random.seed()

    # the Forgy method will fail if the whole array contains the same rows
    clusters = boxes[np.random.choice(rows, k, replace=False)]

    while True:
        for row in range(rows):
            distances[row] = 1 - iou(boxes[row], clusters)

        nearest_clusters = np.argmin(distances, axis=1)

        if (last_clusters == nearest_clusters).all():
            break

        for cluster in range(k):
            clusters[cluster] = dist(boxes[nearest_clusters == cluster], axis=0)

        last_clusters = nearest_clusters

    return clusters