FCNs代码学习
原文:Fully Convolutional Networks for Semantic Segmentation (FCNs) http://arxiv.org/pdf/1605.06211v1.pdf源码:https://github.com/shekkizh/FCN.tensorflow在vgg网络的基础上,将最后三层全连接改为cnn,即全都为卷积层了,fully convolutional
原文:Fully Convolutional Networks for Semantic Segmentation (FCNs) http://arxiv.org/pdf/1605.06211v1.pdf
源码:https://github.com/shekkizh/FCN.tensorflow
参考:用自己数据集的:https://www.freesion.com/article/8700112083/
测试部分参考这里改的:https://zhuanlan.zhihu.com/p/33419692
https://zhuanlan.zhihu.com/p/32053317
在vgg网络的基础上,将最后三层全连接改为cnn,即全都为卷积层了,fully convolutional networks,然后再反卷积,使输出与原始图像一致,这样就能画出每个像素点属于哪个类了,即实现了分割。
?是batchsize大小
参考:https://blog.csdn.net/qq_16949707/article/details/56011484
在FCN 项目部分代码学习的注释基础上进一步添加注释
from __future__ import print_function
import tensorflow as tf
import numpy as np
import TensorflowUtils as utils
import read_MITSceneParsingData as scene_parsing
import datetime
import BatchDatsetReader as dataset
# six.moves 是用来处理那些在2 和 3里面函数的位置有变化的,直接用six.moves就可以屏蔽掉这些变化
# xrange 用来处理数据类型切换
from six.moves import xrange
# 执行main函数之前首先进行flags的解析,也就是说TensorFlow通过设置flags来传递tf.app.run()所需要的参数,
# 我们可以直接在程序运行前初始化flags,也可以在运行程序的时候设置命令行参数来达到传参的目的。
# #调用flags内部的DEFINE_string函数来制定解析规则
FLAGS = tf.flags.FLAGS
tf.flags.DEFINE_integer("batch_size", "2", "batch size for training")
tf.flags.DEFINE_string("logs_dir", "logs/", "path to logs directory")
tf.flags.DEFINE_string("data_dir", "Data_zoo/MIT_SceneParsing/", "path to dataset")
tf.flags.DEFINE_float("learning_rate", "1e-4", "Learning rate for Adam Optimizer")
tf.flags.DEFINE_string("model_dir", "Model_zoo/", "Path to vgg model mat")
tf.flags.DEFINE_bool('debug', "False", "Debug mode: True/ False")
tf.flags.DEFINE_string('mode', "train", "Mode train/ test/ visualize")
# 如果没有找到Vgg-19的模型,将会从这个网址进行下载
MODEL_URL = 'http://www.vlfeat.org/matconvnet/models/beta16/imagenet-vgg-verydeep-19.mat'
MAX_ITERATION = int(1e5 + 1)
NUM_OF_CLASSESS = 151 #类别数
IMAGE_SIZE = 224 #图片尺寸
"""
"""
# 解析imagenet-vgg-verydeep-19.mat中的信息,获取kernels和bias
# weights 是imagenet-vgg-verydeep-19.mat传参进来, image是normalization之后的待预测的图像的向量
# kernels的获取方式是data['layers'][0][第i层][0][0][0][0][0],形状为[width, height, in_channels, out_channels],
# bias的获取方式是data['layers'][0][第i层][0][0][0][0][0],形状为[1,out_channels]。
# 对于VGG-19的卷积,全部采用了3X3的filters,所以width为3,height为3。
# 这里面的层数i,指的是包括conv、relu、pool、fc各种操作。因此,i=0为卷积核,i=1为relu,i=2为卷积核,i=3为relu,i=4为pool,i=5为卷积核,……,i=37为全连接层,以此类推。
# VGG-19的pooling采用了长宽为2X2的max-pooling。
def vgg_net(weights, image):
# 先定义层的名字,在根据类型添加到net里
# # fcn的前五层网络就是vgg网络
layers = (
'conv1_1', 'relu1_1', 'conv1_2', 'relu1_2', 'pool1',
'conv2_1', 'relu2_1', 'conv2_2', 'relu2_2', 'pool2',
'conv3_1', 'relu3_1', 'conv3_2', 'relu3_2', 'conv3_3',
'relu3_3', 'conv3_4', 'relu3_4', 'pool3',
'conv4_1', 'relu4_1', 'conv4_2', 'relu4_2', 'conv4_3',
'relu4_3', 'conv4_4', 'relu4_4', 'pool4',
'conv5_1', 'relu5_1', 'conv5_2', 'relu5_2', 'conv5_3',
'relu5_3', 'conv5_4', 'relu5_4'
)
net = {}
current = image # 输入图片
for i, name in enumerate(layers):
kind = name[:4]
# 卷积层
if kind == 'conv':
kernels, bias = weights[i][0][0][0][0]
# matconvnet: weights are [width, height, in_channels, out_channels]
# tensorflow: weights are [height, width, in_channels, out_channels]
# 由于 imagenet-vgg-verydeep-19.mat 中的参数矩阵和我们定义的长宽位置颠倒了
# 原来索引号(reshape(2,2,3))是012,现在是102
kernels = utils.get_variable(np.transpose(kernels, (1, 0, 2, 3)), name=name + "_w") # (1, 0, 2, 3)是索引号,np.transpose作用是调换数组索引值位置
bias = utils.get_variable(bias.reshape(-1), name=name + "_b") # reshape(-1)把bias参数数组合并成一行
current = utils.conv2d_basic(current, kernels, bias)
# 激活层
elif kind == 'relu':
current = tf.nn.relu(current, name=name)
if FLAGS.debug:
utils.add_activation_summary(current)
# 池化层
elif kind == 'pool':
# # vgg 的前5层的stride都是2,也就是前5层的size依次减小1倍
# # 这里处理了前4层的stride,用的是平均池化
# # 第5层的pool在下文的外部处理了,用的是最大池化
# # pool1 size缩小2倍
# # pool2 size缩小4倍
# # pool3 size缩小8倍
# # pool4 size缩小16倍
current = utils.avg_pool_2x2(current) #平均池化
net[name] = current
# # vgg每层的结果都保存再net中了
return net
# # 预测流程,image是输入图像的向量,keep_prob是dropout rate
def inference(image, keep_prob):
"""
Semantic segmentation network definition #语义分割网络定义
:param image: input image. Should have values in range 0-255
:param keep_prob: dropout的保留率
:return:
"""
# 获取训练好的vgg部分的model
# 主要是获取VGG模型预先训练好的模型系数文件。该文件为Mat格式 imagenet-vgg-verydeep-19.mat
print("setting up vgg initialized conv layers ...")
model_data = utils.get_model_data(FLAGS.model_dir, MODEL_URL)
# imagenet-vgg-verydeep-19.mat中存放预训练好的模型
# 获取图片像素的均值
mean = model_data['normalization'][0][0][0]
mean_pixel = np.mean(mean, axis=(0, 1))
weights = np.squeeze(model_data['layers'])
# 将图像的向量值都减去平均像素值,进行 normalization
processed_image = utils.process_image(image, mean_pixel)
with tf.variable_scope("inference"):
# 在vgg_net中解析前五层层神经网络的kernels和bias
image_net = vgg_net(weights, processed_image)
# 第5层的池化
conv_final_layer = image_net["conv5_3"]
# pool1 size缩小2倍
# pool2 size缩小4倍
# pool3 size缩小8倍
# pool4 size缩小16倍
# pool5 size缩小32倍
pool5 = utils.max_pool_2x2(conv_final_layer)
# # 初始化第6层的w、b
# # 7*7 卷积核的视野很大
# # 在第6层没有进行池化,所以经过第6层后 size缩小仍为32倍
W6 = utils.weight_variable([7, 7, 512, 4096], name="W6")
b6 = utils.bias_variable([4096], name="b6")
conv6 = utils.conv2d_basic(pool5, W6, b6)
relu6 = tf.nn.relu(conv6, name="relu6")
if FLAGS.debug:
utils.add_activation_summary(relu6)
relu_dropout6 = tf.nn.dropout(relu6, keep_prob=keep_prob)
# # 初始化第7层的w、b
W7 = utils.weight_variable([1, 1, 4096, 4096], name="W7")
b7 = utils.bias_variable([4096], name="b7")
conv7 = utils.conv2d_basic(relu_dropout6, W7, b7)
relu7 = tf.nn.relu(conv7, name="relu7")
if FLAGS.debug:
utils.add_activation_summary(relu7)
relu_dropout7 = tf.nn.dropout(relu7, keep_prob=keep_prob)
# # 初始化第8层的w、b
# # 输出维度为NUM_OF_CLASSESS
W8 = utils.weight_variable([1, 1, 4096, NUM_OF_CLASSESS], name="W8")
b8 = utils.bias_variable([NUM_OF_CLASSESS], name="b8")
conv8 = utils.conv2d_basic(relu_dropout7, W8, b8)
# annotation_pred1 = tf.argmax(conv8, dimension=3, name="prediction1")
# now to upscale to actual image size
# 开始将size提升为图像原始尺寸(反卷积)
deconv_shape1 = image_net["pool4"].get_shape()
W_t1 = utils.weight_variable([4, 4, deconv_shape1[3].value, NUM_OF_CLASSESS], name="W_t1")
b_t1 = utils.bias_variable([deconv_shape1[3].value], name="b_t1")
# 对第8层的结果进行反卷积(上采样),通道数也由NUM_OF_CLASSESS变为第4层的通道数
conv_t1 = utils.conv2d_transpose_strided(conv8, W_t1, b_t1, output_shape=tf.shape(image_net["pool4"]))
fuse_1 = tf.add(conv_t1, image_net["pool4"], name="fuse_1")
# 对上一层上采样的结果进行反卷积(上采样),通道数也由上一层的通道数变为第3层的通道数
deconv_shape2 = image_net["pool3"].get_shape()
W_t2 = utils.weight_variable([4, 4, deconv_shape2[3].value, deconv_shape1[3].value], name="W_t2")
b_t2 = utils.bias_variable([deconv_shape2[3].value], name="b_t2")
conv_t2 = utils.conv2d_transpose_strided(fuse_1, W_t2, b_t2, output_shape=tf.shape(image_net["pool3"]))
# 对应论文原文中的"2× upsampled prediction + pool3 prediction"
fuse_2 = tf.add(conv_t2, image_net["pool3"], name="fuse_2")
# 原始图像的height、width和通道数
shape = tf.shape(image)
deconv_shape3 = tf.stack([shape[0], shape[1], shape[2], NUM_OF_CLASSESS])
W_t3 = utils.weight_variable([16, 16, NUM_OF_CLASSESS, deconv_shape2[3].value], name="W_t3")
b_t3 = utils.bias_variable([NUM_OF_CLASSESS], name="b_t3")
# 再进行一次反卷积,将上一层的结果转化为和原始图像相同size、通道数为分类数的形式数据
conv_t3 = utils.conv2d_transpose_strided(fuse_2, W_t3, b_t3, output_shape=deconv_shape3, stride=8)
# 目前conv_t3的形式为size为和原始图像相同的size,通道数与分类数相同
# 这句我的理解是对于每个像素位置,根据3个维度(通道数即RGB的值)通过argmax能计算出这个像素点属于哪个分类
# 也就是对于每个像素而言,NUM_OF_CLASSESS个通道中哪个数值最大,这个像素就属于哪个分类
annotation_pred = tf.argmax(conv_t3, dimension=3, name="prediction")
return tf.expand_dims(annotation_pred, dim=3), conv_t3
# 训练:定义训练损失优化器及训练的梯度下降方法以更新参数
def train(loss_val, var_list): # 测试损失
optimizer = tf.train.AdamOptimizer(FLAGS.learning_rate)
grads = optimizer.compute_gradients(loss_val, var_list=var_list)
if FLAGS.debug:
# print(len(var_list))
for grad, var in grads:
utils.add_gradient_summary(grad, var)
return optimizer.apply_gradients(grads)
# 主函数
def main(argv=None):
# 定义dropout的占位符
keep_probability = tf.placeholder(tf.float32, name="keep_probabilty")
# 定义原图和标签的占位符用来动态存储传入的图片
image = tf.placeholder(tf.float32, shape=[None, IMAGE_SIZE, IMAGE_SIZE, 3], name="input_image") # 原始图像的形式,None为自动查看相应的样本数
annotation = tf.placeholder(tf.int32, shape=[None, IMAGE_SIZE, IMAGE_SIZE, 1], name="annotation") #原始图片对应的标签形式
# 输入原始图像向量、保留率,得到预测的标签图像和随后一层的网络logits输出
pred_annotation, logits = inference(image, keep_probability)
# 为了方便查看图像预处理的效果,可以利用 TensorFlow 提供的 tensorboard 工具进行可视化
# tf.summary作用:在tensorboard中直观展示训练信息,将图像写入 summary
# 可视化原图、标签和预测标签
tf.summary.image("input_image", image, max_outputs=2)
tf.summary.image("ground_truth", tf.cast(annotation, tf.uint8), max_outputs=2)
tf.summary.image("pred_annotation", tf.cast(pred_annotation, tf.uint8), max_outputs=2)
# 计算预测标注图像和真实标注图像的交叉熵用来确定损失函数和以产生训练过程中的损失
loss = tf.reduce_mean((tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,
labels=tf.squeeze(annotation, squeeze_dims=[3]),
name="entropy")))
# 可视化模型训练过程中的损失
loss_summary = tf.summary.scalar("entropy", loss)
# 返回需要训练的变量列表并进行规范化
trainable_var = tf.trainable_variables()
if FLAGS.debug:
for var in trainable_var:
utils.add_to_regularization_and_summary(var)
train_op = train(loss, trainable_var)
print("Setting up summary op...")
# 定义合并变量操作,一次性生成所有摘要数据
summary_op = tf.summary.merge_all()
print("Setting up image reader...")
# 读取训练数据集、验证数据集
# 注意读取的时候是调用scene_parsing.read_dataset函数,这个函数可以把数据转为列表形式的pickle文件
train_records, valid_records = scene_parsing.read_dataset(FLAGS.data_dir)
print(len(train_records))
print(len(valid_records))
print("Setting up dataset reader")
# 将训练数据集、验证数据集的格式转换为网络需要的格式
image_options = {'resize': True, 'resize_size': IMAGE_SIZE}
# 从文件夹images 和annotations获取数据
if FLAGS.mode == 'train':
# 注意train和test分开执行train指令时顺便也把测试的给执行了后面还有个预测可视化。
train_dataset_reader = dataset.BatchDatset(train_records, image_options)
validation_dataset_reader = dataset.BatchDatset(valid_records, image_options)
sess = tf.Session()
print("Setting up Saver...")
saver = tf.train.Saver()
# create two summary writers to show training loss and validation loss in the same graph
# need to create two folders 'train' and 'validation' inside FLAGS.logs_dir
# 写入logs为将来可视化做准备
train_writer = tf.summary.FileWriter(FLAGS.logs_dir + '/train', sess.graph)
validation_writer = tf.summary.FileWriter(FLAGS.logs_dir + '/validation')
# 初始化模型的参数 sess:表示当前会话,之前保存的结果将被加载入这个会话
sess.run(tf.global_variables_initializer())
# 加载之前的checkpoint(检查点日志)检查点保存在logs文件里
ckpt = tf.train.get_checkpoint_state(FLAGS.logs_dir)
# .model_checkpoint_path:表示模型存储的位置,不需要提供模型的名字,它会去查看checkpoint文件,看看最新的是谁,叫做什么
if ckpt and ckpt.model_checkpoint_path:
saver.restore(sess, ckpt.model_checkpoint_path)
print("Model restored...")
# 输入指令开始训练
if FLAGS.mode == "train":
# MAX_ITERATION在这里指的是最大迭代的次数
for itr in xrange(MAX_ITERATION):
# 读取训练集的一个batch
# 调用BatchDatset里的next_batch函数该函数主要定义bachsize,还有结合bachsize对epoch初始化,开始到结束
# FLAGS.batch_size是设置bachsize大小在该程序文件包开头有设置
train_images, train_annotations = train_dataset_reader.next_batch(FLAGS.batch_size)
# 将数据以字典形式读入
feed_dict = {image: train_images, annotation: train_annotations, keep_probability: 0.85}
# 执行优化器优化损失操作(train_op),网络跑起来了
sess.run(train_op, feed_dict=feed_dict)
# 打印模型训练过程训练集损失每10步打印一次并可视化。
# 每10步搜集所有的写文件
if itr % 10 == 0:
train_loss, summary_str = sess.run([loss, loss_summary], feed_dict=feed_dict)
print("Step: %d, Train_loss:%g" % (itr, train_loss))
train_writer.add_summary(summary_str, itr)
# 每500步打印测试集送入模型后的预测损失保存生成的检查点文件
if itr % 500 == 0:
valid_images, valid_annotations = validation_dataset_reader.next_batch(FLAGS.batch_size)
valid_loss, summary_sva = sess.run([loss, loss_summary], feed_dict={image: valid_images, annotation: valid_annotations,
keep_probability: 1.0})
print("%s ---> Validation_loss: %g" % (datetime.datetime.now(), valid_loss))
# add validation loss to TensorBoard
validation_writer.add_summary(summary_sva, itr)
saver.save(sess, FLAGS.logs_dir + "model.ckpt", itr)
# visualize指令预测结果可视化过程
elif FLAGS.mode == "visualize":
valid_images, valid_annotations = validation_dataset_reader.get_random_batch(FLAGS.batch_size)
pred = sess.run(pred_annotation, feed_dict={image: valid_images, annotation: valid_annotations,
keep_probability: 1.0})
# 压缩维度,np.squeeze用于从数组的形状中删除单维度条目,即把shape中为1的维度去掉,(?什么作用?)
valid_annotations = np.squeeze(valid_annotations, axis=3)
# 去掉pred索引为3位置的维度(我认为是通道数只留logits值)
pred = np.squeeze(pred, axis=3)
# 循环迭代显示并给原图、标签、预测标签命名。str(5+itr)可以修改图片的索引号,修改bachsize的值等
# 用于你的测试集图片数就可以显示所有的预测图片。
for itr in range(FLAGS.batch_size):
utils.save_image(valid_images[itr].astype(np.uint8), FLAGS.logs_dir, name="inp_" + str(5+itr))
utils.save_image(valid_annotations[itr].astype(np.uint8), FLAGS.logs_dir, name="gt_" + str(5+itr))
utils.save_image(pred[itr].astype(np.uint8), FLAGS.logs_dir, name="pred_" + str(5+itr))
print("Saved image: %d" % itr)
# 以下两行程序为必须要有的关于程序启动运行的
if __name__ == "__main__":
tf.app.run()
git上的tensorflow版本较低,我安装了2.0.0版本之后,需要有一些改动
1 .FLAGS异常,导出tensorflow的地方改成向下兼容V1版的
FCN.py TensorflowUtils.py 中
# import tensorflow as tf # 版本问题
import tensorflow.compat.v1 as tf
2.AttributeError: 'module' object has no attribute 'imread' 解决办法:安装pillow,scipy降级
1.pip install pillow
2.pip install scipy==1.2.1
3.shape()的value属性去掉了
deconv_shape1 = image_net["pool4"].get_shape()
# W_t1 = utils.weight_variable([4, 4, deconv_shape1[3].value, NUM_OF_CLASSESS], name="W_t1")
W_t1 = utils.weight_variable([4, 4, deconv_shape1[3], NUM_OF_CLASSESS], name="W_t1")
# b_t1 = utils.bias_variable([deconv_shape1[3].value], name="b_t1")
b_t1 = utils.bias_variable([deconv_shape1[3]], name="b_t1")
4.改的最心虚得地方,axis=3这里报错了,查了一下说axis从0开始,改成2之后没再报错,先记下来回头查查
def _read_images(self):
self.__channels = True
self.images = np.array([self._transform(filename['image']) for filename in self.files])
self.__channels = False
self.annotations = np.array(
# [np.expand_dims(self._transform(filename['annotation']), axis=3) for filename in self.files])
[np.expand_dims(self._transform(filename['annotation']), axis=2) for filename in self.files])
print (self.images.shape)
print (self.annotations.shape)
5.版本原因,placeholder之前要加tf.compat.v1.disable_eager_execution()
tf.compat.v1.disable_eager_execution() # 版本问题
keep_probability = tf.placeholder(tf.float32, name="keep_probabilty")
6 测试部分,用ADEChallengeData2016的测试集跑了一下,遇到黑白图的问题,分开处理了,测试集跑完打包提交到官网发现提交不上去,不知道咋解决,之后就没有再处理了
# 新加的test集路径
tf.flags.DEFINE_string('test_dir', "/2t/zsy/data/ADEChallengeData2016/release_test/testing", "path to test data dataset")
tf.flags.DEFINE_string('testResult_dir', "/2t/zsy/FCN.tensorflow/testResult/", "path to test Result")
elif FLAGS.mode == "test":
list = os.listdir(FLAGS.test_dir) # 遍历测试集
for i in range(0, len(list)):
path = os.path.join(FLAGS.test_dir, list[i])
if os.path.isfile(path):
print("dealingImg: %s" % os.path.basename(path))
test_image = misc.imread(path)
print(test_image.shape)
resize_image = misc.imresize(test_image, [224, 224], interp='nearest')
print(len(resize_image.shape))
if len(resize_image.shape) == 3:
print("彩色图像")
a = np.expand_dims(resize_image, axis=0)
a = np.array(a)
pred = sess.run(pred_annotation, feed_dict={image: a, keep_probability: 0.85})
pred = np.squeeze(pred, axis=3)
utils.save_image(pred[0].astype(np.uint8), FLAGS.testResult_dir,
name=os.path.basename(path).split('.')[0])
print("Saved image: succeed %s" % os.path.split('.')[0])
elif len(resize_image.shape) == 2:
print("黑白图像")
tmp_image = np.expand_dims(resize_image, axis=2) # 加一维,变成1通道的
print(tmp_image.shape)
a = np.expand_dims(tmp_image, axis=0)
a = np.concatenate((a, a, a), axis=-1) # 一通道的变三通道的
a = np.array(a)
print(a.shape)
pred = sess.run(pred_annotation, feed_dict={image: a, keep_probability: 0.85})
pred = np.squeeze(pred, axis=3)
utils.save_image(pred[0].astype(np.uint8), FLAGS.testResult_dir,
name=os.path.basename(path).split('.')[0])
print("Saved image: succeed %s" % os.path.split('.')[0])
更多推荐
所有评论(0)