【1】batchimageprocess.py #批量图片处理、改名字、改类型

#!/usr/bin/env python
# -*- encoding: utf-8 -*-
'''
@File    :   batchimageprocess.py
@Time    :   2021/06/03 11:19:16
@Author  :   Jian Song 
@Contact :   1248975661@qq.com
@Desc    :   None
'''

# here put the import lib

'''
该程序功能:完成图像批量处理、改名字、图片类型、图像增强
'''
from numpy.core.fromnumeric import resize
import tensorflow as tf
import os
import numpy as np
import  cv2

 
#从文件夹载入多种图片,处理后将结果保存到一个文件夹
#处理范围包括:图片类型的转换,名字转换,文件名导出
def read__image(open_path,save_path):
    nums=0
    images=[]
    for dir_image in os.listdir(open_path): # os.listdir() 方法用于返回指定的文件夹包含的文件或文件夹的名字的列表
        full_path = os.path.abspath(os.path.join(open_path,dir_image))
        if dir_image.endswith('.jpg'):
            image = cv2.imread(full_path)
            p,f = os.path.split(full_path)    #切分文件名和路径
            n,ext = os.path.splitext(f)       #切分后缀和路径       
            image= cv2.resize(image, (500, 500), interpolation=cv2.INTER_CUBIC)
            image_path = (save_path+'%s.png' % (n))  # 注意这里图片名一定要加上扩展名,否则后面imwrite的时候会报错
            cv2.imwrite(image_path, image)
            nums=nums+1
    print("Output over!The total of image are %s ."%nums)

#导出图片的名字,写入到文件夹中
def Outputimgname(imgpath):
    nums=0
    images=[]
    for dir_image in os.listdir(imgpath): # os.listdir() 方法用于返回指定的文件夹包含的文件或文件夹的名字的列表
        full_path = os.path.abspath(os.path.join(imgpath,dir_image))
        if dir_image.endswith('.jpg'):
            image = cv2.imread(full_path)
            p,f = os.path.split(full_path)    #切分文件名和路径
            n,ext = os.path.splitext(f)       #切分后缀和路径 
            #将图片名写入txt文档
            with open("testdata.txt","a") as f:
                f.write(n)           
                f.write('\n')
            nums=nums+1
    print("Output over!The total of image are %s ."%nums)

#载入图片,处理后保存到一个列表中
def GetImg(open_path):
    patch=[]
    for dir_image in os.listdir(open_path): # os.listdir() 方法用于返回指定的文件夹包含的文件或文件夹的名字的列表
        full_path = os.path.abspath(os.path.join(open_path,dir_image))
        if dir_image.endswith('.jpg'):
            image = cv2.imread(full_path)
            resImg=cv2.resize(image,(227,227))
            patch.append(resImg)
    return  patch
 
#输入一张图片,产生旋转,缩放,长宽调整等形式
def DateArgutation(images):
    img=images
    imgInfo=img.shape
    height=imgInfo[0]
    width=imgInfo[1]
    agutationimg=[]
    #旋转3张
    angle=[90,180,280]
    for a in angle:
        matRotate=cv2.getRotationMatrix2D((height*0.5,width*0.5),a,0.5)
        rotateImg=cv2.warpAffine(img,matRotate,(height,width))
        agutationimg.append( rotateImg)
 
    #缩放3张
    scale=[0.5,2,4]
    for s in scale:
        dstHeight=int(height*s)
        dstWidth=int(width*s)
        resImg=cv2.resize(img,(dstWidth,dstHeight))
        agutationimg.append(resImg)
 
    #长宽
    change=[[40,50],[60,50],[60,40]]
    for h,w in change:
        reshwImg=cv2.resize(img,(h,w))
        agutationimg.append( reshwImg)
 
 
    print('success!')
    return  agutationimg 


if __name__=='__main__':
    #打开路径
    openpath1="F:/pytorchdeeplearnbook/program/programs/data/VOC2012/01mylittletestdata/testdata01/"
    #保存路径
    savepath1="F:/pytorchdeeplearnbook/program/programs/data/VOC2012/01mylittletestdata/testdatalabel01/"
    read__image(openpath1,savepath1)

【2】dataimagetest.py #多种图片测试(此处是8张图片)

#!/usr/bin/env python
# -*- encoding: utf-8 -*-
'''
@File    :   dataimagetest.py
@Time    :   2021/06/02 20:41:02
@Author  :   Jian Song 
@Contact :   1248975661@qq.com
@Desc    :   None
'''

# here put the import lib

from dateset import *
from segmentmodel import *
from torch.types import Device
from segmentmodelaccuracy import *
import cv2


'''
【1】--------------------------------------------------------------------------------------------------
测试多张图片的分割效果,注意要点如下:
[1] 需要测试图像的大小必须大于( high,width = 320,480),可以对整体图片进行缩放差值调整分辨率的大小为(500,500);
[2] 数据的运行流程:
    (1)需要测试的数据、标签、txt文档---->(2)调用数据预处理类。数据预处理、随机剪裁等---->
    (3)返回图像、标签---->(4)可视化显示标签和图像---->(5)计算pixelaccuary
[3] 计算整体模型的分割效果,分割精度。

【2】--------------------------------------------------------------------------------------------------
未解决要点:
[1] 测试图像理论上不需要调用数据预处理类,因为会造成roi区域的损失。
[2] 测试图像为jpg格式的、label则为png格式的。

【3】--------------------------------------------------------------------------------------------------
程序运行需要更改的地方:
[1] 路径,txt、image、label的路径
[2] 可以修改图像的大小
'''

# 解决plt中文显示问题
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False


if __name__=='__main__':

    print("The test is starting!")

    #------------------------------------------需要更改的地方-----------------------------------------------#
    # 加载数据   
    high,width = 320,480
    #验证图像的图像名称txt文档
    path="F:/pytorchdeeplearnbook/program/programs/data/VOC2012/01mylittletestdata/testdata.txt"
    #所有图像的主路径,JPEGImages里面为图片
    imagepath="F:/pytorchdeeplearnbook/program/programs/data/VOC2012/01mylittletestdata/testdata01/"
    #所有标签的主路径,SegmentationClass为图像标签
    #----------------注意---此处可以不需要标签,但是为了将图片批量喂入网路以观察分割效果,所以假设存在标签方便程序运行。
    labelpath="F:/pytorchdeeplearnbook/program/programs/data/VOC2012/01mylittletestdata/testsegdatalabel01/"
    #-------------------------------------------------------------------------------------------------------#


    voc_test = MyDataset(path,imagepath,labelpath,high,width, img_transforms,colormap)
    # 创建数据加载器每个batch使用4张图像
    val_loader = Data.DataLoader(voc_test, batch_size=8,shuffle=False,num_workers=1,pin_memory=True)

    # 加载模型
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model=FCN8s(21).to(device)    #fcn8s网络    
    model=torch.load("savemodel/fcn8s.pth") 


    ##  从验证集中获取一个batch的数据
    for step, (b_x, b_y) in enumerate(val_loader):  
        #print("第%s"%step)
        if step > 0:
            break
        print(b_x.size())
 
    ## 对验证集中一个batch的数据进行预测,并可视化预测效果
    model.eval()      
    b_x  =b_x.float().to(device)
    b_y  =b_y.long().to(device)
    out = model(b_x)
    out = F.log_softmax(out,dim=1)
    pre_lab = torch.argmax(out,1)
    ## 可视化一个batch的图像,检查数据预处理 是否正确
    b_x_numpy = b_x.cpu().data.numpy()
    b_x_numpy = b_x_numpy.transpose(0,2,3,1) 
    b_y_numpy = b_y.cpu().data.numpy()   
    pre_lab_numpy = pre_lab.cpu().data.numpy()
    plt.figure(figsize=(16,9))   
    for ii in range(4):
        plt.subplot(3,4,ii+1)
        print(ii,b_x_numpy[ii].size)
        plt.imshow(inv_normalize_image(b_x_numpy[ii]))
        plt.axis("off")
        plt.subplot(3,4,ii+5)
        plt.imshow(label2image(b_y_numpy[ii],colormap))
        plt.axis("off")
        plt.subplot(3,4,ii+9)
        plt.imshow(label2image(pre_lab_numpy[ii],colormap))
        plt.axis("off")
        #保存分割的结果图像
        img_GT=inv_normalize_image(b_x_numpy[ii])#三通道
        img_R=pre_lab_numpy[ii]#单通道
        name="./segmentresult/"+str(ii)+".png"
        cv2.imwrite(name,img_R)
    plt.subplots_adjust(wspace=0.05, hspace=0.05)
    plt.show()
    print("test was over!")

【3】dateset.py #图像预处理和加载

#!/usr/bin/env python
# -*- encoding: utf-8 -*-
'''
@File    :   ownsegnet.py
@Time    :   2021/05/31 10:34:20
@Author  :   Jian Song 
@Contact :   1248975661@qq.com
@Desc    :   None
'''


import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image
import glob
from time import time
import os
from skimage.io import imread
import copy
import time

import torch
from torch import nn
from torch import optim
import torch.nn.functional as F
import torch.utils.data as Data
from torchvision import transforms
from torchvision.models import vgg19
from torchsummary import summary

'''
此文件包含前期图像数据的处理,导入图像数据并对图像数据处理的相关函数
'''

## 定义计算设备
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

## 定义一个读取图像的函数,返回的是训练图像和训练图像的标签train_testpath,train_imagepath,train_labelpath
def read_image(testpath,imagepath,labelpath):
    """读取指定路径下的所指定的图像文件"""
    image = np.loadtxt(testpath,dtype=str)
    n = len(image)
    data, label = [None]*n , [None]*n    
    for i, fname in enumerate(image):
        data[i] = imread(imagepath+"%s.jpg" %(fname))
        label[i] = imread(labelpath+"%s.png" %(fname))
    return data,label

## 列出每个物体对应背景的RGB值
classes = ['background','aeroplane','bicycle','bird','boat',
           'bottle','bus','car','cat','chair','cow','diningtable',
           'dog','horse','motorbike','person','potted plant',
           'sheep','sofa','train','tv/monitor']
# 每个类的RGB值
colormap = [[0,0,0],[128,0,0],[0,128,0], [128,128,0], [0,0,128],
            [128,0,128],[0,128,128],[128,128,128],[64,0,0],[192,0,0],
            [64,128,0],[192,128,0],[64,0,128],[192,0,128],
            [64,128,128],[192,128,128],[0,64,0],[128,64,0],
            [0,192,0],[128,192,0],[0,64,128]]

## 给定一个标号图片,将像素值对应的物体找出来
def image2label(image,colormap):
    ## 将标签转化为没个像素值为1类数据
    cm2lbl = np.zeros(256**3)
    for i,cm in enumerate(colormap):
        cm2lbl[((cm[0]*256+cm[1])*256+cm[2])] = i
    ## 对一张图像准换
    image = np.array(image, dtype="int64")
    ix = (image[:,:,0]*256+image[:,:,1]*256+image[:,:,2])
    image2 = cm2lbl[ix]
    return image2

#中心检测
def center_crop(data, label, height, width):
    """data, label都是PIL.Image读取的图像"""
    ##使用中心裁剪(因为图像大小是一样的)
    data = transforms.CenterCrop((height, width))(data)
    label = transforms.CenterCrop((height, width))(label)
    return data, label

## 随机裁剪图像数据
def rand_crop(data,label,high,width):
    im_width,im_high = data.size
    ## 生成图像随机点的位置
    left = np.random.randint(0,im_width - width)
    top = np.random.randint(0,im_high - high)
    right = left+width
    bottom = top+high
    data = data.crop((left, top, right, bottom)) 
    label = label.crop((left, top, right, bottom)) 
    return data,label

## 单个图像的转换操作,随机剪裁、预处理、正则化
def img_transforms(data, label, high,width,colormap):
    data, label = rand_crop(data, label, high,width)
    data_tfs = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406],
                             [0.229, 0.224, 0.225])])
    data = data_tfs(data)
    label = torch.from_numpy(image2label(label,colormap))
    return data, label

## 定义一列出需要读取的数据路径的函数,返回的是图像的地址
def read_image_path(testpath,imagepath,labelpath):
    """保存指定路径下的所有需要读取的图像文件路径"""
    image = np.loadtxt(testpath,dtype=str)
    n = len(image)
    data, label = [None]*n , [None]*n
    for i, fname in enumerate(image):
        data[i] = imagepath+"%s.jpg" %(fname)
        label[i] = labelpath+"%s.png" %(fname)     
    return data,label

## 最后我们定义一个 MyDataset 继承于torch.utils.data.Dataset构成我们自定的训练集
class MyDataset(Data.Dataset):
    """用于读取图像,并进行相应的裁剪等"""
    def __init__(self, data_root,imagepath,labelpath,high,width, imtransform,colormap):
        ## data_root:数据所对应的文件名,high,width:图像裁剪后的尺寸,
        ## imtransform:预处理操作,colormap:颜色
        self.data_root = data_root
        self.high = high
        self.width = width
        self.imtransform = imtransform
        self.colormap = colormap
        data_list, label_list = read_image_path(testpath=data_root,imagepath=imagepath,labelpath=labelpath)
        self.data_list = self._filter(data_list)
        self.label_list = self._filter(label_list)
        
    #定私有函数,过滤掉图片大小小于指定high,width的图片    
    def _filter(self, images): 
        return [im for im in images if (Image.open(im).size[1] > self.high and Image.open(im).size[0] > self.width)]
    
    def __getitem__(self, idx):
        img = self.data_list[idx]
        label = self.label_list[idx]
        img = Image.open(img)
        label = Image.open(label).convert('RGB')
        img, label = self.imtransform(img, label, self.high,
                                      self.width,self.colormap)
        return img, label
    
    def __len__(self):
        return len(self.data_list)


## 将标准化后的图像转化为0~1的区间
def inv_normalize_image(data):
    rgb_mean = np.array([0.485, 0.456, 0.406])
    rgb_std = np.array([0.229, 0.224, 0.225])
    data = data.astype('float32') * rgb_std + rgb_mean
    return data.clip(0,1)

## 从预测的标签转化为图像的操作
def label2image(prelabel,colormap):
    ## 预测的到的标签转化为图像,针对一个标签图
    h,w = prelabel.shape
    prelabel = prelabel.reshape(h*w,-1)
    image = np.zeros((h*w,3),dtype="int32")
    for ii in range(len(colormap)):
        index = np.where(prelabel == ii)
        image[index,:] = colormap[ii]
    return image.reshape(h,w,3)

## 可视化一个batch的图像,检查数据预处理 是否正确


if __name__=='__main__':

    # 【0】显示待分割的图像类别数目
    print("The total of class is :",len(classes), len(colormap))

    # 【1】------------------------------------查看数据集是否正确------------------------------------------
    print('-'*60)
    ## 读取训练数据,分别为图片名字的txt文档、图片所在地址、标签图片所在地址。
    train_testpath="F:/pytorchdeeplearnbook/program/programs/data/VOC2012/ImageSets/Segmentation/train.txt"
    train_imagepath="F:/pytorchdeeplearnbook/program/programs/data/VOC2012/JPEGImages/"
    train_labelpath="F:/pytorchdeeplearnbook/program/programs/data/VOC2012/SegmentationClass/"
    traindata,trainlabel = read_image(train_testpath,train_imagepath,train_labelpath)
    
    ## 读取验证数据集,分别为图片名字的txt文档、图片所在地址、标签图片所在地址。
    val_testpath="F:/pytorchdeeplearnbook/program/programs/data/VOC2012/ImageSets/Segmentation/val.txt"
    val_imagepath="F:/pytorchdeeplearnbook/program/programs/data/VOC2012/JPEGImages/"
    val_labelpath="F:/pytorchdeeplearnbook/program/programs/data/VOC2012/SegmentationClass/"

    valdata,vallabel = read_image(val_testpath,val_imagepath,val_labelpath)
    print("The lenth of date:", len(traindata),len(valdata))   

    ## 查看训练集和验证集的一些图像
    # plt.title("test-val image",fontsize=24)
    plt.figure(figsize=(12,8))
    plt.subplot(2,2,1)
    plt.imshow(traindata[0])
    plt.subplot(2,2,2)
    plt.imshow(trainlabel[0])
    plt.subplot(2,2,3)
    plt.imshow(traindata[10])
    plt.subplot(2,2,4)
    plt.imshow(trainlabel[10])
    plt.show()

    #【2】------------------------------------可视化需要训练和测试的数据------------------------------------------
    print('-'*60)

    ## 读取数据
    high,width = 320,480
    #训练图像的图像名称txt文档
    train_path="F:/pytorchdeeplearnbook/program/programs/data/VOC2012/ImageSets/Segmentation/train.txt"
    #验证图像的图像名称txt文档
    val_path="F:/pytorchdeeplearnbook/program/programs/data/VOC2012/ImageSets/Segmentation/val.txt"
    #所有图像的主路径,JPEGImages里面为图片
    imagepath="F:/pytorchdeeplearnbook/program/programs/data/VOC2012/JPEGImages/"
    #所有标签的主路径,SegmentationClass为图像标签
    labelpath="F:/pytorchdeeplearnbook/program/programs/data/VOC2012/SegmentationClass/"

    voc_train = MyDataset(train_path,imagepath,labelpath,high,width, img_transforms,colormap)
    voc_val   = MyDataset(val_path,imagepath,labelpath,high,width, img_transforms,colormap)

    # 创建数据加载器每个batch使用4张图像
    train_loader = Data.DataLoader(voc_train, batch_size=4,shuffle=True,
                                num_workers=2,pin_memory=True)
    val_loader = Data.DataLoader(voc_val, batch_size=4,shuffle=True,
                                num_workers=2,pin_memory=True)

    # 显示一个batch的数据的维度是否正确
    for step, (b_x, b_y) in enumerate(train_loader): 
        if step > 0:
            break
    print("b_x.shape:",b_x.shape)
    print("b_y.shape:",b_y.shape)
    print("b_x.dtype:",b_x.dtype)
    print("b_y.dtype:",b_y.dtype)

    # 可视化一个batch的数据
    b_x_numpy = b_x.data.numpy()
    b_x_numpy = b_x_numpy.transpose(0,2,3,1)
    b_y_numpy = b_y.data.numpy()
    plt.figure(figsize=(16,6))
    for ii in range(4):
        # plt.title("batch-image",fontsize=24)
        plt.subplot(2,4,ii+1)
        plt.imshow(inv_normalize_image(b_x_numpy[ii]))
        plt.axis("off")
        plt.subplot(2,4,ii+5)
        plt.imshow(label2image(b_y_numpy[ii],colormap))
        plt.axis("off")
    plt.subplots_adjust(wspace=0.1, hspace=0.1)
    plt.show()
    print("show success!")

【4】oneimagetest.py #单张图片测试

#!/usr/bin/env python
# -*- encoding: utf-8 -*-
'''
@File    :   evalmodel.py
@Time    :   2021/06/01 14:21:52
@Author  :   Jian Song 
@Contact :   1248975661@qq.com
@Desc    :   None
'''

# here put the import lib
'''
分割模型测试
测试单张图片,读入的维度存在问题

参考解决方案
【1】 https://blog.csdn.net/qq_38284961/article/details/100144258?utm_medium=distribute.pc_relevant.none-task-blog-2%7Edefault%7EBlogCommendFromMachineLearnPai2%7Edefault-3.control&depth_1-utm_source=distribute.pc_relevant.none-task-blog-2%7Edefault%7EBlogCommendFromMachineLearnPai2%7Edefault-3.control

'''

## 导入本章所需要的模块
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import PIL
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True
import torch
from torchvision import transforms
import torchvision
from segmentmodel import *



# [1]读取照片
image = PIL.Image.open("./data/04.jpg")
# 照片预处理,转化到0-1之间,标准化处理
image_transf = transforms.Compose([transforms.ToTensor(), 
    transforms.Normalize(mean = [0.485, 0.456, 0.406], 
                         std = [0.229, 0.224, 0.225])
])
image_tensor = image_transf(image).unsqueeze(0)

#[2] 加载模型
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model=FCN8s(21).to(device)    #fcn8s网络
model=torch.load("savemodel/fcn8s.pth")
model.eval()


#output = model(image_tensor)["out"]
print(image_tensor.shape)
output = model(image_tensor)["out"]
print(output.shape)

outputarg = torch.argmax(output.squeeze(), dim=0).numpy()

## 对得到的输出结果进行编码
def decode_segmaps(image,label_colors, nc=21):
    """函数将输出的2D图像会将不同的类编码为不同的颜色"""
    r = np.zeros_like(image).astype(np.uint8)
    g = np.zeros_like(image).astype(np.uint8)
    b = np.zeros_like(image).astype(np.uint8)
    for cla in range(0, nc):
        idx = image == cla
        r[idx] = label_colors[cla, 0]
        g[idx] = label_colors[cla, 1]
        b[idx] = label_colors[cla, 2]
    rgbimage = np.stack([r, g, b], axis=2)
    return rgbimage


label_colors = np.array([(0, 0, 0),  # 0=background
               # 1=aeroplane, 2=bicycle, 3=bird, 4=boat, 5=bottle
               (128, 0, 0), (0, 128, 0), (128, 128, 0), (0, 0, 128), (128, 0, 128),
               # 6=bus, 7=car, 8=cat, 9=chair, 10=cow
               (0, 128, 128), (128, 128, 128), (64, 0, 0), (192, 0, 0), (64, 128, 0),
               # 11=dining table, 12=dog, 13=horse, 14=motorbike, 15=person
               (192, 128, 0), (64, 0, 128), (192, 0, 128), (64, 128, 128), (192, 128, 128),
               # 16=potted plant, 17=sheep, 18=sofa, 19=train, 20=tv/monitor
               (0, 64, 0), (128, 64, 0), (0, 192, 0), (128, 192, 0), (0, 64, 128)])

outputrgb = decode_segmaps(outputarg,label_colors)
plt.figure(figsize=(20,8))
plt.subplot(1,2,1)
plt.imshow(image)
plt.axis("off")
plt.subplot(1,2,2)
plt.imshow(outputrgb)
plt.axis("off")
plt.subplots_adjust(wspace=0.05)
plt.show()

【5】segmentmodel.py #分割模型搭建

#!/usr/bin/env python
# -*- encoding: utf-8 -*-
'''
@File    :   segmentmodel.py
@Time    :   2021/06/01 13:53:00
@Author  :   Jian Song 
@Contact :   1248975661@qq.com
@Desc    :   None
'''

# here put the import lib

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image
import glob
from time import time
import os
from skimage.io import imread
import copy
import time
import torch
from torch import nn
from torch import optim
import torch.nn.functional as F
import torch.utils.data as Data
from torchvision import transforms
from torchvision.models import vgg19
from torchsummary import summary
import hiddenlayer as h1

from visualize import  make_dot   


'''
定义分割模型的网络结构
'''


## 定义FCN语义分割网络
class FCN8s(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        # num_classes:训练数据的类别
        self.num_classes = num_classes
        model_vgg19 = vgg19(pretrained=True)
        ## 不使用vgg9网络中的后面的AdaptiveAvgPool2d和Linear层
        self.base_model = model_vgg19.features
        ## 定义几个需要的层操作,并且使用转置卷积将特征映射进行升维
        self.relu    = nn.ReLU(inplace=True)
        self.deconv1 = nn.ConvTranspose2d(512, 512, kernel_size=3, stride=2,
                                          padding=1, dilation=1, output_padding=1)
        self.bn1     = nn.BatchNorm2d(512)
        self.deconv2 = nn.ConvTranspose2d(512, 256, 3, 2, 1, 1, 1)
        self.bn2     = nn.BatchNorm2d(256)
        self.deconv3 = nn.ConvTranspose2d(256, 128, 3, 2, 1, 1, 1)
        self.bn3     = nn.BatchNorm2d(128)
        self.deconv4 = nn.ConvTranspose2d(128, 64, 3, 2, 1, 1, 1)
        self.bn4     = nn.BatchNorm2d(64)
        self.deconv5 = nn.ConvTranspose2d(64, 32, 3, 2, 1, 1, 1)
        self.bn5     = nn.BatchNorm2d(32)
        self.classifier = nn.Conv2d(32, num_classes, kernel_size=1)
    
        ## vgg19中MaxPool2d所在的层
        self.layers = {"4": "maxpool_1","9": "maxpool_2", 
                       "18": "maxpool_3", "27": "maxpool_4", 
                       "36": "maxpool_5"}

    def forward(self, x):
        output = {}
        for name, layer in self.base_model._modules.items():
            ## 从第一层开始获取图像的特征
            x = layer(x)
            ## 如果是layers参数指定的特征,那就保存到output中
            if name in self.layers:
                output[self.layers[name]] = x
        x5 = output["maxpool_5"]  # size=(N, 512, x.H/32, x.W/32)
        x4 = output["maxpool_4"]  # size=(N, 512, x.H/16, x.W/16)
        x3 = output["maxpool_3"]  # size=(N, 256, x.H/8,  x.W/8)
        ## 对特征进行相关的转置卷积操作,逐渐将图像放大到原始图像大小
        # size=(N, 512, x.H/16, x.W/16)
        score = self.relu(self.deconv1(x5))
        # 对应的元素相加, size=(N, 512, x.H/16, x.W/16)
        score = self.bn1(score + x4)  
        # size=(N, 256, x.H/8, x.W/8)
        score = self.relu(self.deconv2(score)) 
        # 对应的元素相加, size=(N, 256, x.H/8, x.W/8)
        score = self.bn2(score + x3)  
        # size=(N, 128, x.H/4, x.W/4)
        score = self.bn3(self.relu(self.deconv3(score))) 
        # size=(N, 64, x.H/2, x.W/2)
        score = self.bn4(self.relu(self.deconv4(score)))
        # size=(N, 32, x.H, x.W)
        score = self.bn5(self.relu(self.deconv5(score)))  
        score = self.classifier(score)                    

        return score  # size=(N, n_class, x.H/1, x.W/1)

if __name__=='__main__':
    ## 定义计算设备
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    high,width = 320,480
    ## 注意输入图像的尺寸应该是32的整数倍

    #打印出网络模型的每一层的参数
    fcn8s = FCN8s(21).to(device)
    summary(fcn8s,input_size=(3, high,width))

    #打印出网络模型的pdf文档
    y = fcn8s (torch.zeros([1,3,320,480]))
    g = make_dot(y)
    g.view()

【6】segmentmodelaccuracy.py #分割精度测试

#!/usr/bin/env python
# -*- encoding: utf-8 -*-
'''
@File    :   segmentmodelaccuracy.py
@Time    :   2021/06/04 16:21:23
@Author  :   Jian Song 
@Contact :   1248975661@qq.com
@Desc    :   None
'''

# here put the import lib
'''
此文档完成图像分割结果精度的评估
参考博客
https://geekzw.blog.csdn.net/article/details/80408465?utm_medium=distribute.pc_relevant.none-task-blog-2%7Edefault%7EBlogCommendFromMachineLearnPai2%7Edefault-2.control&depth_1-utm_source=distribute.pc_relevant.none-task-blog-2%7Edefault%7EBlogCommendFromMachineLearnPai2%7Edefault-2.control
'''


import cv2
import os
import numpy as np
from matplotlib import pyplot as plt
 
 
# 计算DICE系数,即DSI
def calDSI(binary_GT,binary_R):
    row, col = binary_GT.shape  # 矩阵的行与列
    DSI_s,DSI_t = 0,0
    for i in range(row):
        for j in range(col):
            if binary_GT[i][j] == 255 and binary_R[i][j] == 255:
                DSI_s += 1
            if binary_GT[i][j] == 255:
                DSI_t += 1
            if binary_R[i][j]  == 255:
                DSI_t += 1
    DSI = 2*DSI_s/DSI_t
    # print(DSI)
    return DSI
 
# 计算VOE系数,即VOE
def calVOE(binary_GT,binary_R):
    row, col = binary_GT.shape  # 矩阵的行与列
    VOE_s,VOE_t = 0,0
    for i in range(row):
        for j in range(col):
            if binary_GT[i][j] == 255:
                VOE_s += 1
            if binary_R[i][j]  == 255:
                VOE_t += 1
    VOE = 2*(VOE_t - VOE_s)/(VOE_t + VOE_s)
    return VOE
 
# 计算RVD系数,即RVD
def calRVD(binary_GT,binary_R):
    row, col = binary_GT.shape  # 矩阵的行与列
    RVD_s,RVD_t = 0,0
    for i in range(row):
        for j in range(col):
            if binary_GT[i][j] == 255:
                RVD_s += 1
            if binary_R[i][j]  == 255:
                RVD_t += 1
    RVD = RVD_t/RVD_s - 1
    return RVD
 
# 计算Prevision系数,即Precison
def calPrecision(binary_GT,binary_R):
    row, col = binary_GT.shape  # 矩阵的行与列
    P_s,P_t = 0,0
    for i in range(row):
        for j in range(col):
            if binary_GT[i][j] == 255 and binary_R[i][j] == 255:
                P_s += 1
            if binary_R[i][j]   == 255:
                P_t += 1
 
    Precision = P_s/P_t
    return Precision
 
# 计算Recall系数,即Recall
def calRecall(binary_GT,binary_R):
    row, col = binary_GT.shape  # 矩阵的行与列
    R_s,R_t = 0,0
    for i in range(row):
        for j in range(col):
            if binary_GT[i][j] == 255 and binary_R[i][j] == 255:
                R_s += 1
            if binary_GT[i][j]   == 255:
                R_t += 1
 
    Recall = R_s/R_t
    return Recall
 
def segmentaccuracy(img_GT,img_R):
    # step 1:二值化
    # 利用大律法,全局自适应阈值 参数0可改为任意数字但不起作用
    ret_GT, binary_GT = cv2.threshold(img_GT, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
    ret_R, binary_R   = cv2.threshold(img_R, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
 
    # step 2: 显示二值化后的分割图像与真值图像
    plt.figure()
    plt.subplot(121),plt.imshow(binary_GT),plt.title('Really image')
    plt.axis('off')
    plt.subplot(122),plt.imshow(binary_R),plt.title('Segment image')
    plt.axis('off')
    plt.show()
 
 
    # step 3:计算DSI
    print('(1)DICE计算结果,      DSI       = {0:.4}'.format(calDSI(binary_GT,binary_R)))  # 保留四位有效数字 
    # step 4:计算VOE
    print('(2)VOE计算结果,       VOE       = {0:.4}'.format(calVOE(binary_GT,binary_R))) 
    # step 5:计算RVD
    print('(3)RVD计算结果,       RVD       = {0:.4}'.format(calRVD(binary_GT,binary_R))) 
    # step 6:计算Precision
    print('(4)Precision计算结果, Precision = {0:.4}'.format(calPrecision(binary_GT,binary_R))) 
    # step 7:计算Recall
    print('(5)Recall计算结果,    Recall    = {0:.4}'.format(calRecall(binary_GT,binary_R))) 


#导出图片的名字,写入到文件夹中
def Outputimgname(imgpath,txtfilename):
    nums=0
    images=[]
    for dir_image in os.listdir(imgpath): # os.listdir() 方法用于返回指定的文件夹包含的文件或文件夹的名字的列表
        full_path = os.path.abspath(os.path.join(imgpath,dir_image))
        if dir_image.endswith('.jpg'):
            image = cv2.imread(full_path)
            p,f = os.path.split(full_path)    #切分文件名和路径
            n,ext = os.path.splitext(f)       #切分后缀和路径 
            #将图片名写入txt文档
            with open(txtfilename,"a") as f:
                f.write(n)           
                f.write('\n')
            nums=nums+1
    print("Output over!The total of image are %s! "%nums)

#载入图片
# rimgpath:真实目标图片路径;simgpath:分割结果图片路径;imgefilename:图片名字
def GetImg(rimgpath,simgpath,imgefilename):
    image = np.loadtxt(imgefilename,dtype=str)
    n = len(image)
    rimg, simg = [None]*n , [None]*n    
    for i, fname in enumerate(image):
        rimg[i] = cv2.imread(rimgpath+"%s.jpg" %(fname))
        simg[i] =cv2.imread(simgpath+"%s.png" %(fname))
    return rimg,simg 

#从文件夹载入多种图片,处理后将结果保存到一个文件夹
#处理范围包括:图片类型的转换,名字转换,文件名导出
def read__image(open_path,save_path):
    nums=0
    images=[]
    for dir_image in os.listdir(open_path): # os.listdir() 方法用于返回指定的文件夹包含的文件或文件夹的名字的列表
        full_path = os.path.abspath(os.path.join(open_path,dir_image))
        if dir_image.endswith('.jpg'):
            image = cv2.imread(full_path)
            p,f = os.path.split(full_path)    #切分文件名和路径
            n,ext = os.path.splitext(f)       #切分后缀和路径       
            #image= cv2.resize(image, (500, 500), interpolation=cv2.INTER_CUBIC)
            image_path = (save_path+'%s.png' % (n))  # 注意这里图片名一定要加上扩展名,否则后面imwrite的时候会报错
            cv2.imwrite(image_path, image)
            nums=nums+1
    print("Translate over!The total of image are %s !"%nums)


if __name__ == '__main__':

    # 单张图片测试
    # img_GT = cv2.imread('accuracylable/2007_000033.jpg',0)
    # img_R  = cv2.imread('accuracylable/2007_000033.png',0)
    # segmentaccuracy(img_GT,img_R)
    # print("calculate end!")

    #批量导入图片测试
    #[1]获取批量图片的文件名
    imgpath="F:\\pytorchdeeplearnbook\\program\\programs\\data\\VOC2012\\03segauracytest\\rimg\\"   
    txtfilename="testdata.txt"
    Outputimgname(imgpath,txtfilename)

    #[2]批量转换jpg为png
    rimgpath="F:\\pytorchdeeplearnbook\\program\\programs\\data\\VOC2012\\03segauracytest\\rimg\\"  
    simgpath="F:\\pytorchdeeplearnbook\\program\\programs\\data\\VOC2012\\03segauracytest\\simg\\"  
    read__image(rimgpath,simgpath)

    #[3]读入原始图片和分割后的图片
    rimg,simg=GetImg(rimgpath,simgpath,txtfilename)
    if len(rimg)==len(simg):
        for index in range(len(rimg)):
            r_img=rimg[index]
            s_img=simg[index]
            gr_img = cv2.cvtColor(r_img,cv2.COLOR_RGB2GRAY)
            gs_img = cv2.cvtColor(s_img,cv2.COLOR_RGB2GRAY)
            segmentaccuracy(gr_img,gs_img)

【7】trainmodel.py #训练模型

#!/usr/bin/env python
# -*- encoding: utf-8 -*-
'''
@File    :   trainmodel.py
@Time    :   2021/06/01 14:14:19
@Author  :   Jian Song 
@Contact :   1248975661@qq.com
@Desc    :   None
'''

# here put the import lib
'''
[1]训练fcns8网络模型,并保存最优的模型。

[2]测试fcns8网络模型的训练结果,并可视化结果。
'''

import numpy as np
from dateset import *
from segmentmodel import *
from torch.types import Device
from visdom import Visdom
import time

#创建一个不存在的文件夹
def  makefilepath(folder_path):   
    if not os.path.exists(folder_path): 
        os.makedirs(folder_path)

# 网络的训练函数
def train_model(model, criterion, optimizer,traindataloader,
                valdataloader, num_epochs=10):
    """
    model:网络模型;criterion:损失函数;optimizer:优化方法;
    traindataloader:训练数据集,valdataloader:验证数据集
    num_epochs:训练的轮数
    """    
    since = time.time() 
    best_model_wts = copy.deepcopy(model.state_dict())
    best_loss = 1e10
    train_loss_all = []
    train_acc_all = []
    val_loss_all = []
    val_acc_all = []
    since = time.time()
    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)
        train_loss = 0.0
        train_num = 0
        val_loss = 0.0
        val_num = 0
        # 每个epoch包括训练和验证阶段
        model.train() ## 设置模型为训练模式
        for step,(b_x,b_y) in enumerate(traindataloader):
            print(step)
            optimizer.zero_grad() 
            b_x  =b_x.float().to(device)
            b_y  =b_y.long().to(device)
            out = model(b_x)
            out = F.log_softmax(out,dim=1)
            pre_lab = torch.argmax(out,1) # 预测的标签
            loss = criterion(out, b_y) # 计算损失函数值       
            loss.backward()       
            optimizer.step()  
            train_loss += loss.item() * len(b_y)
            train_num += len(b_y)
            # if step/500==0:
            #     image = b_x[0].cpu().data
            #     vis.image(image,f'input (epoch: {epoch}, step: {step})')
            #     vis.image(np.uint8(out[0].cpu().max(0)[1].data) ,f'output (epoch: {epoch}, step: {step})')
            #     vis.image(np.uint8(b_y[0].cpu().data), f'target (epoch: {epoch}, step: {step})')

        ## 计算一个epoch在训练集上的损失和精度
        train_loss_all.append(train_loss / train_num)
        print('{} Train Loss: {:.4f}'.format(epoch, train_loss_all[-1]))  
        # vis = Visdom()
        # vis.line(Y=np.array([train_loss_all.item()]), X=np.array([epoch]),
        #     win=('train_loss'),
        #     opts=dict(title='train_loss'),
        #     update=None if epoch == 0 else 'append')

        ## 计算一个epoch的训练后在验证集上的损失
        model.eval() ## 设置模型为训练模式评估模式 
        for step,(b_x,b_y)  in enumerate(valdataloader):
            b_x  =b_x.float().to(device)
            b_y  =b_y.long().to(device)
            out = model(b_x)
            out = F.log_softmax(out,dim=1)
            pre_lab = torch.argmax(out,1)
            loss = criterion(out, b_y)   
            val_loss += loss.item() * len(b_y)
            val_num += len(b_y)
        ## 计算一个epoch在训练集上的损失和精度
        val_loss_all.append(val_loss / val_num)
        print('{} Val Loss: {:.4f}'.format(epoch, val_loss_all[-1]))
        ## 保存最好的网络参数
        if val_loss_all[-1] < best_loss:
                best_loss = val_loss_all[-1]
                best_model_wts = copy.deepcopy(model.state_dict())
        ## 没个epoch的花费时间
        time_use = time.time() - since
        print("Train and val complete in {:.0f}m {:.0f}s".format(
            time_use // 60, time_use % 60))
    train_process = pd.DataFrame(data={"epoch":range(num_epochs), "train_loss_all":train_loss_all,"val_loss_all":val_loss_all})  
    ## 输出最好的模型
    model.load_state_dict(best_model_wts)
    return model,train_process


if __name__=='__main__':

    # 加载数据    
    high,width = 320,480

   #训练图像的图像名称txt文档
    train_path="F:/pytorchdeeplearnbook/program/programs/data/VOC2012/ImageSets/Segmentation/train.txt"
    #验证图像的图像名称txt文档
    val_path="F:/pytorchdeeplearnbook/program/programs/data/VOC2012/ImageSets/Segmentation/val.txt"
    #所有图像的主路径,JPEGImages里面为图片
    imagepath="F:/pytorchdeeplearnbook/program/programs/data/VOC2012/JPEGImages/"
    #所有标签的主路径,SegmentationClass为图像标签
    labelpath="F:/pytorchdeeplearnbook/program/programs/data/VOC2012/SegmentationClass/"

    voc_train = MyDataset(train_path,imagepath,labelpath,high,width, img_transforms,colormap)
    voc_val   = MyDataset(val_path,  imagepath,labelpath,high,width, img_transforms,colormap)                

    # 创建数据加载器每个batch使用4张图像
    train_loader = Data.DataLoader(voc_train, batch_size=6,shuffle=True,
                                num_workers=2,pin_memory=True)
    val_loader = Data.DataLoader(voc_val, batch_size=6,shuffle=True,
                                num_workers=2,pin_memory=True)

    ## 定义损失函数和优化器
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    LR = 0.0003
    criterion = nn.NLLLoss()
    fcn8s=FCN8s(21).to(device)
    optimizer = optim.Adam(fcn8s.parameters(), lr=LR,weight_decay=1e-4)
    ## 对模型进行迭代训练,对所有的数据训练EPOCH轮
    fcn8s,train_process = train_model(fcn8s,criterion, optimizer,train_loader,val_loader,num_epochs=4)
    ## 保存训练好的网络fcn8s
    makefilepath("./0604savemodel")
    # net.state_dict和net.state_dict(),前者输出的是网络结构,后者才是网络的参数。
    torch.save(fcn8s,"./0604savemodel/fcn8s.pth") 
    ## 可视化模型训练过程中
    plt.figure(figsize=(10,6))
    plt.plot(train_process.epoch,train_process.train_loss_all,
            "ro-",label = "Train loss")
    plt.plot(train_process.epoch,train_process.val_loss_all,
            "bs-",label = "Val loss")
    plt.legend()
    plt.xlabel("epoch")
    plt.ylabel("Loss")
    plt.show()

     ##  从验证集中获取一个batch的数据
    for step, (b_x, b_y) in enumerate(val_loader):  
        if step > 0:
            break
    ## 对验证集中一个batch的数据进行预测,并可视化预测效果
    fcn8s.eval()
    b_x  =b_x.float().to(device)
    b_y  =b_y.long().to(device)
    out = fcn8s(b_x)
    out = F.log_softmax(out,dim=1)
    pre_lab = torch.argmax(out,1)
    ## 可视化一个batch的图像,检查数据预处理 是否正确
    b_x_numpy = b_x.cpu().data.numpy()
    b_x_numpy = b_x_numpy.transpose(0,2,3,1)
    b_y_numpy = b_y.cpu().data.numpy()
    pre_lab_numpy = pre_lab.cpu().data.numpy()
    plt.figure(figsize=(16,9))
    for ii in range(4):
        plt.subplot(3,4,ii+1)
        plt.imshow(inv_normalize_image(b_x_numpy[ii]))
        plt.axis("off")
        plt.subplot(3,4,ii+5)
        plt.imshow(label2image(b_y_numpy[ii],colormap))
        plt.axis("off")
        plt.subplot(3,4,ii+9)
        plt.imshow(label2image(pre_lab_numpy[ii],colormap))
        plt.axis("off")
    plt.subplots_adjust(wspace=0.05, hspace=0.05)
    plt.show()

【8】visualize.py #模型pdf转换的文件

#!/usr/bin/env python
# -*- encoding: utf-8 -*-
'''
@File    :   visualize.py
@Time    :   2021/06/04 09:45:49
@Author  :   Jian Song 
@Contact :   1248975661@qq.com
@Desc    :   None
'''

# here put the import lib
'''
该文档用于网络模型可视化显示
'''

from graphviz import Digraph
import torch
from torch.autograd import Variable
 
 
def make_dot(var, params=None):
    """ Produces Graphviz representation of PyTorch autograd graph
    Blue nodes are the Variables that require grad, orange are Tensors
    saved for backward in torch.autograd.Function
    Args:
        var: output Variable
        params: dict of (name, Variable) to add names to node that
            require grad (TODO: make optional)
    """
    if params is not None:
        assert isinstance(params.values()[0], Variable)
        param_map = {id(v): k for k, v in params.items()}
 
    node_attr = dict(style='filled',
                     shape='box',
                     align='left',
                     fontsize='12',
                     ranksep='0.1',
                     height='0.2')
    dot = Digraph(node_attr=node_attr, graph_attr=dict(size="12,12"))
    seen = set()
 
    def size_to_str(size):
        return '('+(', ').join(['%d' % v for v in size])+')'
 
    def add_nodes(var):
        if var not in seen:
            if torch.is_tensor(var):
                dot.node(str(id(var)), size_to_str(var.size()), fillcolor='orange')
            elif hasattr(var, 'variable'):
                u = var.variable
                name = param_map[id(u)] if params is not None else ''
                node_name = '%s\n %s' % (name, size_to_str(u.size()))
                dot.node(str(id(var)), node_name, fillcolor='lightblue')
            else:
                dot.node(str(id(var)), str(type(var).__name__))
            seen.add(var)
            if hasattr(var, 'next_functions'):
                for u in var.next_functions:
                    if u[0] is not None:
                        dot.edge(str(id(u[0])), str(id(var)))
                        add_nodes(u[0])
            if hasattr(var, 'saved_tensors'):
                for t in var.saved_tensors:
                    dot.edge(str(id(t)), str(id(var)))
                    add_nodes(t)
    add_nodes(var.grad_fn)
    return dot

【9】其他文件

knowdege.md #相关知识的补充


此文件存放在编写模型的过程中存在知识盲点。

[1]---------------------文件路径切分----------------------------------------------

import  os.path
# 常用函数有三种:分隔路径,找出文件名.找出盘符(windows系统),找出文件的扩展名.
# 根据你机器的实际情况修改下面参数.
spath = " D:/download/repository.7z "

#  case 1:
p,f = os.path.split(spath);
print ( " dir is: " + p)
print ( " file is: " + f)

#  case 2:
drv,left = os.path.splitdrive(spath);
print ( " driver is: " + drv)
print ( " left is: " + left)
#  case 3:
f,ext = os.path.splitext(spath);
print ( " f is: " + f)
print ( " ext is: " + ext)
'''
    知识点:    这三个函数都返回二元组.
    * case1 分隔目录和文件名
    * case2 分隔盘符和文件名
    * case3 分隔文件和扩展名
'''

总结:5个函数
os.walk(spath)
os.path.split(spath)
os.path.splitdrive(spath)
os.path.splitext(spath)
os.path.join(path1,path2)


[2]---------------------网络模型pdf显示----------------------------------------------

参考博客如下:

(1)pytorch 模型可视化
https://blog.csdn.net/weixin_42445501/article/details/81221362?utm_medium=distribute.pc_relevant.none-task-blog-2%7Edefault%7EBlogCommendFromMachineLearnPai2%7Edefault-11.control&depth_1-utm_source=distribute.pc_relevant.none-task-blog-2%7Edefault%7EBlogCommendFromMachineLearnPai2%7Edefault-11.control

(2)问题:graphviz.backend.ExecutableNotFound: failed to execute ['dot', '-Tpdf', '-O', 'Digraph.gv']问题解决
#解决方法:https://blog.csdn.net/qq_41997920/article/details/1009287293)可视化参考方法:
https://blog.csdn.net/qq_27825451/article/details/96856217?utm_medium=distribute.pc_relevant.none-task-blog-2%7Edefault%7EBlogCommendFromBaidu%7Edefault-4.control&depth_1-utm_source=distribute.pc_relevant.none-task-blog-2%7Edefault%7EBlogCommendFromBaidu%7Edefault-4.control


[3]---------------------visdom训练过程可视化显示----------------------------------------------

pytorch学习(十九)—Visdom可视化训练过程
https://www.jianshu.com/p/eba610995ca7


[4]---------------------机器学习&图像分割——模型评价总结----------------------------------------------1)机器学习&图像分割——模型评价总结(含完整代码)
https://geekzw.blog.csdn.net/article/details/80408465?utm_medium=distribute.pc_relevant.none-task-blog-2%7Edefault%7EBlogCommendFromMachineLearnPai2%7Edefault-2.control&depth_1-utm_source=distribute.pc_relevant.none-task-blog-2%7Edefault%7EBlogCommendFromMachineLearnPai2%7Edefault-2.control


[5]-------------------=======-文件夹文件归档建立文件树------=----------------------------------------
打开命令窗口输入:tree /f > structure.txt
参考文献:https://www.cnblogs.com/woshimrf/p/tree.html

【10】完整代码下载

链接:https://pan.baidu.com/s/1vcclju-raaseSf9i-cuiMQ
提取码:ml1c

Logo

CSDN联合极客时间,共同打造面向开发者的精品内容学习社区,助力成长!

更多推荐