yolov5训练自己的数据集(docker)

yolov5官方代码: 选择 tag v5.0
https://github.com/ultralytics/yolov5

训练方式可参照另一篇博客
https://blog.csdn.net/m0_46825740/article/details/119956658?spm=1001.2014.3001.5501

数据放置路径
在这里插入图片描述

1. 生成docker镜像

docker build -t yolov5:5.0 .

镜像成功生成
在这里插入图片描述

2. 生成容器

nvidia-docker run -it -p 2224:22 -p 6006:6006 --ipc=host -v /home/slifeai/project_object/num_2/yolov5-5.0:/usrc/app --name yolov5_train yolov5:5.0 /bin/bash

容器成功生成
在这里插入图片描述

3. 将训练数据拷贝进容器中

docker cp litter/ 8ac6770f1edb:/usr/src/app
  • 拷贝前
    在这里插入图片描述
  • 拷贝后
    在这里插入图片描述

4. 开始训练

python train.py --data data/mydata.yaml --cfg models/yolov5s.yaml --weights 'yolov5s.pt' --batch-size 64

在这里插入图片描述在这里插入图片描述

出现bug
  File "train.py", line 543, in <module>
    train(hyp, opt, device, tb_writer)
  File "train.py", line 87, in train
    ckpt = torch.load(weights, map_location=device)  # load checkpoint
  File "/opt/conda/lib/python3.8/site-packages/torch/serialization.py", line 592, in load
    return _load(opened_zipfile, map_location, pickle_module, **pickle_load_args)
  File "/opt/conda/lib/python3.8/site-packages/torch/serialization.py", line 851, in _load
    result = unpickler.load()
AttributeError: Can't get attribute 'SPPF' on <module 'models.common' from '/usr/src/app/models/common.py'>

说明model/common.py这里没有SPPF这个方法,我把yolov5-master的model/common.py里的SPPF方法拷贝进去,就能成功运行了

yolov5生成的pt权重转换

参考代码:https://github.com/soloIife/yolov5_for_rknn

数据放置路径
在这里插入图片描述

1. 生成docker镜像

docker build -t yolov5_for_rknn:master .

镜像成功生成
在这里插入图片描述

2. 生成容器

nvidia-docker run -it -p 2225:22  --ipc=host -v /home/slifeai/project_object/num_3/yolov5_for_rknn-master:/usrc/app --name yolov5_convert_weight yolov5_for_rknn:master /bin/bash

容器成功生成
在这里插入图片描述

3.pt转onnx

用这个容器映射到本机的这个脚本将pt转换成onnx
D:/rknn/yolov5_for_rknn-master/yolov5_original/export_no_focus.py

4.onnx转rknn

D:/rknn/rknn_convert/onnx2rknn.py ---->同D:/rknn/yolov5_for_rknn-master/yolov5_original/onnx2rknn.py

import argparse
import os
from rknn.api import RKNN

if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument("-i", '--onnx', type=str, default='weights/litter_10.26.onnx', help='weights path')  # from yolov5/models/
    parser.add_argument('--rknn', type=str, default='weights/litter_10.26.rknn', help='保存路径')
    parser.add_argument("-p", '--precompile', action="store_true", help='是否是预编译模型')
    parser.add_argument("-o", '--original', action="store_true", help='是否是yolov5原生的模型')
    parser.add_argument("-bs", '--batch-size', type=int, default=1, help='batch size')
    opt = parser.parse_args()
    ONNX_MODEL = opt.onnx
    if opt.rknn:
        RKNN_MODEL = opt.rknn
    else:
        RKNN_MODEL = "%s.rknn" % os.path.splitext(ONNX_MODEL)[0]
    rknn = RKNN()
    print('--> config model')

    rknn.config(mean_values=[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]],
                std_values=[[255.0, 255.0, 255.0, 255.0, 255.0, 255.0, 255.0, 255.0, 255.0, 255.0, 255.0, 255.0]],
                batch_size=opt.batch_size, reorder_channel='0 1 2')  # reorder_channel='0 1 2',

    # Load tensorflow model
    print('--> Loading model')
    ret = rknn.load_onnx(model=ONNX_MODEL)
    assert ret == 0, "Load onnx failed!"
    # Build model
    print('--> Building model')
    if opt.precompile:
        ret = rknn.build(do_quantization=True, dataset='./data/dataset1.txt', pre_compile=True)  # pre_compile=True
    else:
        ret = rknn.build(do_quantization=True, dataset='./data/dataset1.txt')
    assert ret == 0, "Build onnx failed!"
    # Export rknn model
    print('--> Export RKNN model')
    ret = rknn.export_rknn(RKNN_MODEL)
    assert ret == 0, "Export %s.rknn failed!" % opt.rknn
    print('done')

5.rknn检测

D:/rknn/rknn_convert/rknn_detect.py

import cv2
import time
import random
import numpy as np
from rknn.api import RKNN

"""
yolov5 预测脚本 for rknn
"""


def get_max_scale(img, max_w, max_h):
    h, w = img.shape[:2]
    scale = min(max_w / w, max_h / h, 1)
    return scale


def get_new_size(img, scale):
    return tuple(map(int, np.array(img.shape[:2][::-1]) * scale))


def sigmoid(x):
    return 1 / (1 + np.exp(-x))


def filter_boxes(boxes, box_confidences, box_class_probs, conf_thres):
    box_scores = box_confidences * box_class_probs  # 条件概率, 在该cell存在物体的概率的基础上是某个类别的概率
    box_classes = np.argmax(box_scores, axis=-1)  # 找出概率最大的类别索引
    box_class_scores = np.max(box_scores, axis=-1)  # 最大类别对应的概率值
    pos = np.where(box_class_scores >= conf_thres)  # 找出概率大于阈值的item
    # pos = box_class_scores >= OBJ_THRESH  # 找出概率大于阈值的item
    boxes = boxes[pos]
    classes = box_classes[pos]
    scores = box_class_scores[pos]
    return boxes, classes, scores


def nms_boxes(boxes, scores, iou_thres):
    x = boxes[:, 0]
    y = boxes[:, 1]
    w = boxes[:, 2]
    h = boxes[:, 3]

    areas = w * h
    order = scores.argsort()[::-1]

    keep = []
    while order.size > 0:
        i = order[0]
        keep.append(i)

        xx1 = np.maximum(x[i], x[order[1:]])
        yy1 = np.maximum(y[i], y[order[1:]])
        xx2 = np.minimum(x[i] + w[i], x[order[1:]] + w[order[1:]])
        yy2 = np.minimum(y[i] + h[i], y[order[1:]] + h[order[1:]])

        w1 = np.maximum(0.0, xx2 - xx1 + 0.00001)
        h1 = np.maximum(0.0, yy2 - yy1 + 0.00001)
        inter = w1 * h1

        ovr = inter / (areas[i] + areas[order[1:]] - inter)
        inds = np.where(ovr <= iou_thres)[0]
        order = order[inds + 1]
    keep = np.array(keep)
    return keep


def plot_one_box(x, img, color=None, label=None, line_thickness=None):
    tl = line_thickness or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1  # line/font thickness
    color = color or [random.randint(0, 255) for _ in range(3)]
    c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3]))
    cv2.rectangle(img, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA)
    if label:
        tf = max(tl - 1, 1)  # font thickness
        t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
        c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
        cv2.rectangle(img, c1, c2, color, -1, cv2.LINE_AA)  # filled
        cv2.putText(img, label, (c1[0], c1[1] - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA)


def auto_resize(img, max_w, max_h):
    h, w = img.shape[:2]
    scale = min(max_w / w, max_h / h, 1)
    new_size = tuple(map(int, np.array(img.shape[:2][::-1]) * scale))
    return cv2.resize(img, new_size), scale


def letterbox(img, new_wh=(416, 416), color=(114, 114, 114)):
    new_img, scale = auto_resize(img, *new_wh)
    shape = new_img.shape
    new_img = cv2.copyMakeBorder(new_img, 0, new_wh[1] - shape[0], 0, new_wh[0] - shape[1], cv2.BORDER_CONSTANT,
                                 value=color)
    return new_img, (new_wh[0] / scale, new_wh[1] / scale)


def load_model(model_path, npu_id):
    rknn = RKNN()
    devs = rknn.list_devices()
    device_id_dict = {}
    for index, dev_id in enumerate(devs[-1]):
        if dev_id[:2] != 'TS':
            device_id_dict[0] = dev_id
        if dev_id[:2] == 'TS':
            device_id_dict[1] = dev_id
    print('-->loading model : ' + model_path)
    rknn.load_rknn(model_path)
    # print('--> Init runtime environment on: ' + device_id_dict[npu_id])
    ret = rknn.init_runtime()
    if ret != 0:
        print('Init runtime environment failed')
        exit(ret)
    print('done')
    return rknn


#
# def load_model(path, platform):
#     rknn = RKNN()
#     print('-->loading model')
#     rknn.load_rknn(path)
#     print('loading model done')
#     print('--> Init runtime environment')
#     # ret = rknn.init_runtime(target='rk1808', target_sub_class='AICS')
#     ret = rknn.init_runtime(target=platform)
#     if ret != 0:
#         print('Init runtime environment failed')
#         exit(ret)
#     print('done')
#     return rknn


class Detector:
    def __init__(self, opt):
        opt = opt['opt']
        self.opt = opt
        print(opt)

        model = opt['model']
        wh = opt['size']
        masks = opt['masks']
        anchors = opt['anchors']
        names = opt['names']
        conf_thres = opt['conf_thres']
        iou_thres = opt['iou_thres']
        platform = opt['platform']

        self.wh = wh
        self.size = wh
        self._masks = masks
        self._anchors = anchors
        self.names = list(
            filter(lambda a: len(a) > 0, map(lambda x: x.strip(), open(names, "r").read().split()))) if isinstance(
            names, str) else names
        self.conf_thres = conf_thres
        self.iou_thres = iou_thres
        if isinstance(model, str):
            model = load_model(model, platform)
        self._rknn = model
        self.draw_box = False

    def _predict(self, img_src, img, gain):
        src_h, src_w = img_src.shape[:2]
        # _img = cv2.cvtColor(_img, cv2.COLOR_BGR2RGB)

        # img = img[:, :, ::-1].transpose(2, 0, 1)[None]
        # # _img = np.transpose(_img[None], (0, 3, 1, 2))
        # img = np.concatenate([img[..., ::2, ::2], img[..., 1::2, ::2], img[..., ::2, 1::2], img[..., 1::2, 1::2]], 1)
        # img = np.transpose(img, (0, 2, 3, 1))

        img = img[..., ::-1]  # ?
        img = np.concatenate([img[::2, ::2], img[1::2, ::2], img[::2, 1::2], img[1::2, 1::2]], 2)

        t0 = time.time()
        pred_onx = self._rknn.inference(inputs=[img])
        print("inference time:\t", time.time() - t0)
        boxes, classes, scores = [], [], []
        for t in range(3):
            input0_data = sigmoid(pred_onx[t][0])
            input0_data = np.transpose(input0_data, (1, 2, 0, 3))
            grid_h, grid_w, channel_n, predict_n = input0_data.shape
            anchors = [self._anchors[i] for i in self._masks[t]]
            box_confidence = input0_data[..., 4]
            box_confidence = np.expand_dims(box_confidence, axis=-1)
            box_class_probs = input0_data[..., 5:]
            box_xy = input0_data[..., :2]
            box_wh = input0_data[..., 2:4]
            col = np.tile(np.arange(0, grid_w), grid_h).reshape(-1, grid_w)
            row = np.tile(np.arange(0, grid_h).reshape(-1, 1), grid_w)
            col = col.reshape((grid_h, grid_w, 1, 1)).repeat(3, axis=-2)
            row = row.reshape((grid_h, grid_w, 1, 1)).repeat(3, axis=-2)
            grid = np.concatenate((col, row), axis=-1)
            box_xy = box_xy * 2 - 0.5 + grid
            box_wh = (box_wh * 2) ** 2 * anchors
            box_xy /= (grid_w, grid_h)  # 计算原尺寸的中心
            box_wh /= self.wh  # 计算原尺寸的宽高
            box_xy -= (box_wh / 2.)  # 计算原尺寸的中心
            box = np.concatenate((box_xy, box_wh), axis=-1)
            res = filter_boxes(box, box_confidence, box_class_probs, self.conf_thres)
            boxes.append(res[0])
            classes.append(res[1])
            scores.append(res[2])
        boxes, classes, scores = np.concatenate(boxes), np.concatenate(classes), np.concatenate(scores)
        nboxes, nclasses, nscores = [], [], []
        for c in set(classes):
            inds = np.where(classes == c)
            b = boxes[inds]
            c = classes[inds]
            s = scores[inds]
            keep = nms_boxes(b, s, self.iou_thres)
            nboxes.append(b[keep])
            nclasses.append(c[keep])
            nscores.append(s[keep])
        if len(nboxes) < 1:
            return [], []
        boxes = np.concatenate(nboxes)
        classes = np.concatenate(nclasses)
        scores = np.concatenate(nscores)
        label_list = []
        box_list = []
        score_list = []
        for (x, y, w, h), score, cl in zip(boxes, scores, classes):
            x *= gain[0]
            y *= gain[1]
            w *= gain[0]
            h *= gain[1]
            x1 = max(0, np.floor(x).astype(int))
            y1 = max(0, np.floor(y).astype(int))
            x2 = min(src_w, np.floor(x + w + 0.5).astype(int))
            y2 = min(src_h, np.floor(y + h + 0.5).astype(int))
            # label_list.append(self.names[cl])
            label_list.append(cl)
            score = round(score, 3)
            score_list.append(score)
            box_list.append((x1, y1, x2, y2))
            if self.draw_box:
                plot_one_box((x1, y1, x2, y2), img_src, label=self.names[cl])
        print("label_list", label_list)
        print("score_list", score_list)
        print("box_list", box_list)
        return label_list, np.array(box_list)

    def detect_resize(self, img_src):
        """
        预测一张图片,预处理使用resize
        return: labels,boxes
        """
        _img = cv2.resize(img_src, self.wh)
        gain = img_src.shape[:2][::-1]
        return self._predict(img_src, _img, gain)

    def detect(self, img_src):
        """
        预测一张图片,预处理保持宽高比
        return: labels,boxes
        """
        _img, gain = letterbox(img_src, self.wh)
        return self._predict(img_src, _img, gain)

    def close(self):
        self._rknn.release()

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.close()

    def __del__(self):
        self.close()


# def test_video(det, video_path):
#     reader = cv2.VideoCapture()
#     reader.open(video_path)
#     while True:
#         ret, frame = reader.read()
#         if not ret:
#             break
#         t0 = time.time()
#         det.detect(frame)
#         print("total time", time.time() - t0)
#         cv2.imshow("res", auto_resize(frame, 1200, 600)[0])
#         cv2.waitKey(1)


if __name__ == '__main__':
    import yaml
    import cv2

    image = cv2.imread("img/0625_Bin_046.jpg")
    with open("yolov5_rknn_640x640.yaml", "rb") as f:
        cfg = yaml.load(f, yaml.FullLoader)
        d = Detector(cfg)
        d.draw_box = True
        d.detect(image)
    #     cv2.imshow("res", image)
    #     cv2.waitKey()
    # cv2.destroyAllWindows()
Logo

旨在为数千万中国开发者提供一个无缝且高效的云端环境,以支持学习、使用和贡献开源项目。

更多推荐