1.pytorch环境配置(docker)

  • docker环境--配置过程(配置成功--能使用nvidia-docker命令)
  • 拉取pytorch镜像--参考网站(命令--docker pull nvcr.io/nvidia/pytorch:19.01-py3)
  • 启动docker容器--参考网站(命令--sudo nvidia-docker run -it --rm -v local_dir:container_dir nvcr.io/nvidia/pytorch:<xx.xx>-py3)

2. pytorch中tensor的基本操作

参考网站:https://pytorch.org/tutorials/beginner/blitz/tensor_tutorial.html#sphx-glr-beginner-blitz-tensor-tutorial-py

2.1 pytorch测试

# coding=utf-8
# 这是一个用于练习的文档
from __future__ import print_function
import torch
x = torch.rand(5,3)
print("x={}".format(x)) #5行3列随机数
print(torch.empty(5,3))
print(torch.zeros(5,3,dtype=torch.long))
print(torch.tensor([5.5,3]))

x=x.new_ones(5,3,dtype=torch.double)
print("x={}".format(x))
x=torch.rand_like(x,dtype=torch.float) #Returns a tensor with the same size as input that is filled with random numbers from a normal distribution with mean 0 and variance 1
print("x={}".format(x))
print(x.dtype)
print(x.size())

2.2 tensor加法

# tensor加法(pytorch)
y=(torch.rand(5,3))
print("y={}".format(y))
print("x+y={}".format(x+y)) #相加就是对应位置值相加,也可以用torch.add(x,y)
print("torch.add(x,y)={}".format(torch.add(x,y)))

result=torch.empty(5,3)
torch.add(x,y,out=result)
print("result={}".format(result)) #带输出的加法,result必须是与x,y相同的类型

2.3  in-place方法

# in-place方法,即不添加多余变量,直接内部放置
y.add_(x)
print("y={}".format(y)) # y=y+x
# pytorch都可以通过添加_实现in-place操作,如x.copy_(y),x.t_()
print("x={}".format(x))
x.copy_(y)
print("x={}".format(x))
x.t_()
print("x={}".format(x)) # x转置赋给x

# tensor resize/reshape(torch.view)
x = torch.rand(4,4)
print("x={}".format(x))
y = x.view(16)
print("y=x.view(16)={}".format(y))
print("x.view(-1,8)={}".format(x.view(-1,8))) #负数索引一般表示从右(最后一个元素为-1)往左数,这里的-1解释为:the size -1 is inferred from other dimensions
print("x.view(-1,4)={}".format(x.view(-1,4))) #进一步验证:-1表示从另一个维度推算,another_dim=8,-1表示2; another_dim=4,-1表示4
#print("x.view(-1,5)={}".format(x.view(-1,5)))#RuntimeError: shape '[-1, 5]' is invalid for input of size 16
print("x.view(2,8)={}".format(x.view(2,8)))
print("x.view(8,2)={}".format(x.view(8,2)))

2.4 tensor resize/reshape 

# tensor resize/reshape(torch.view)
x = torch.rand(4,4)
print("x={}".format(x))
y = x.view(16)
print("y=x.view(16)={}".format(y))
print("x.view(-1,8)={}".format(x.view(-1,8))) #负数索引一般表示从右(最后一个元素为-1)往左数,这里的-1解释为:the size -1 is inferred from other dimensions
print("x.view(-1,4)={}".format(x.view(-1,4))) #进一步验证:-1表示从另一个维度推算,another_dim=8,-1表示2; another_dim=4,-1表示4
#print("x.view(-1,5)={}".format(x.view(-1,5)))#RuntimeError: shape '[-1, 5]' is invalid for input of size 16
print("x.view(2,8)={}".format(x.view(2,8)))
print("x.view(8,2)={}".format(x.view(8,2)))

2.5 获取tensor某一element的值

# 获取某一个元素的值
print("x[1][1]={}".format(x[1][1]))
print("x[1][1].item()={}".format(x[1][1].item()))

x=torch.randn(1)
print("x={}".format(x))
print("x.item()={}".format(x.item()))
print("x[0].item()={}".format(x[0].item()))

2.6 转换Torch tensor到numpy

# 转换Torch tensor到numpy
a = torch.ones(6)
print("a={},type is {}".format(a,type(a)))
b = a.numpy()
print("b=a.numpy()={},type is {}".format(b,type(b)))

# numpy中的加法:改变numpy的值,只需要改变Torch tensor对应的值即可
#b.add_(2)#AttributeError: 'numpy.ndarray' object has no attribute 'add_'
print("b+2={}".format(b+2))
a.add_(3)
print("a={},type is {}".format(a,type(a)))
print("b=a.numpy()={},type is {}".format(b,type(b)))

2.7 转换numpy到Torch tensor

# 转换numpy到Torch tensor
import numpy as np
a = np.ones(2)
print("a={},type is {}".format(a,type(a)))
b = torch.from_numpy(a)
print("b=torch.from_numpy(a)={},type is {}".format(b,type(b)))

# 改变numpy,会改变Torch tensor的值吗?经过验证,答案是“会”
np.add(a,1,out=a) # out --- A location into which the result is stored
print("a={},type is {}".format(a,type(a)))
print("b={},type is {}".format(b,type(b)))

2.8 tensor传入GPU

# pytorch cuda tensors
if torch.cuda.is_available():
    device = torch.device("cuda")
    y = torch.ones_like(x,device=device) #直接创建一个与x大小相同的tensor,放于GPU上
    print("\nx={},dtype is {}".format(x,x.dtype))
    print("y=torch.ones_like(x,device=device)={},dtype is {}".format(y,y.dtype))
    #z = x+y #RuntimeError: expected type torch.FloatTensor but got torch.cuda.FloatTensor一个在GPU上,一个在CPU上,无法运算
    x = x.to(device) # 将x传到GPU上
    print("x=x.to(device)={},dtype is {}".format(x,x.dtype))
    z = x + y
    print("z=x+y={},dtype is {}".format(z,z.dtype))

3.pytorch反向传播

3.1 requires_grad和grad_fn

# coding=utf-8
import torch
x = torch.ones(2,2,requires_grad=True)
print("x={}".format(x))
y = x + 3
print("y={},y.grad_fn={}".format(y,y.grad_fn)) #y.grad_fn=<AddBackward0 object at 0x7fd67a0f9be0>
z = x * y * 4
print("z={},z.grad_fn={}".format(z,z.grad_fn)) #z.grad_fn=<MulBackward0 object at 0x7fd67a0f9be0>

3.2 反向传播backward(标量对向量求导)

要实现反向传播,求偏导的自变量(tensor) 必须满足条件:a.requires_grad=True(以下述snippet为例)

# coding=utf-8
import torch

a = torch.randn(2,2)
a = (a*3/(a-1))
print("a.requires_grad is {}".format(a.requires_grad))  #The input flag defaults to False if not given默认Torch tensor是不需要梯度的,即a.requires_grad=False

#a.requires_grad_(True) #验证求偏导的自变量(tensor)的requires_grad=True,否则无法反向传播

b = a.sum()
print("b=a.sum()={},b.grad_fn is {}".format(b,b.grad_fn))
# 反向传播backward
#b.backward()  # out.backward() is equivalent to out.backward(torch.tensor(1.)) a.requires_grad=False 报错--RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn

正确例子如下:

c = \frac{1}{4}\sum a_{i},因此,\frac{\partial c}{\partial a}=\begin{bmatrix} \frac{\partial c}{\partial a_{0}}& \frac{\partial c}{\partial a_{1}}\\ \frac{\partial c}{\partial a_{2}}& \frac{\partial c}{\partial a_{3}} \end{bmatrix}=\begin{bmatrix} 0.25& 0.25\\ 0.25& 0.25 \end{bmatrix}

# coding=utf-8
import torch

a = torch.randn(2,2)
a = (a*3/(a-1))
print("a.requires_grad is {}".format(a.requires_grad))  #The input flag defaults to False if not given默认Torch tensor是不需要梯度的,即a.requires_grad=False

a.requires_grad_(True) #验证求偏导的自变量(tensor)的requires_grad=True,否则无法反向传播
print("a.requires_grad is {}".format(a.requires_grad)) #满足a.requires_grad=True
print("\na={}".format(a))

# 标量(saclar)对向量(vector)求偏导
c = a.mean()
print("c=a.mean()={}".format(c))
c.backward()  #实现了求平均值的函数的反向求导
print("a.grad={}".format(a.grad))  '''a.grad=tensor([[0.2500, 0.2500],[0.2500, 0.2500]])'''

b = a.sum()
b.backward()
print("a.grad={}".format(a.grad)) '''a.grad=tensor([[1.2500, 1.2500],[1.2500, 1.2500]])累加了!'''

如果多次使用backward(),则a.grad会被累加运算!

3.3 范数求解

# 范数求解
x = torch.randn(3,requires_grad=True)
y = x * 2
while y.data.norm()<10: #这里默认求解2-范数,也可以通过torch.norm(y,2)求解2-范数
    y = y * 2
print("\ny={},y.data={},y.data.norm()={},torch.norm(y,2)={}".format(y,y.data,y.data.norm(),torch.norm(y,2)))

3.4 反向传播backward(向量对向量求导)

理论公式推导可参考矩阵求导

x=(x_{1},x_{2},x_{3}),y=(y_{1},y_{2},y_{3}),\frac{\partial y^{T}}{\partial x}=J^{T}=\bigl(\begin{smallmatrix} \frac{\partial y_{1}}{\partial x_{1}}& \frac{\partial y_{2}}{\partial x_{1}}& \frac{\partial y_{3}}{\partial x_{1}}\\ \frac{\partial y_{1}}{\partial x_{2}}& \frac{\partial y_{2}}{\partial x_{2}}&\frac{\partial y_{3}}{\partial x_{2}} \\ \frac{\partial y_{1}}{\partial x_{3}}& \frac{\partial y_{2}}{\partial x_{3}}& \frac{\partial y_{3}}{\partial x_{3}} \end{smallmatrix}\bigr)

'''
# 向量(vector)对向量(vector)求偏导,backward(),数学上1*3的vector对1*3的vector求导会得到一个3*3的矩阵(vector-Jacobian product),但这里需要加一个向量v
# 可参考https://pytorch.org/tutorials/beginner/blitz/autograd_tutorial.html
'''
v = torch.tensor([1.2,0.4,0.003],dtype=torch.float)
#y.backward() #这是计算标量倒数的方法,报错--RuntimeError: grad can be implicitly created only for scalar outputs
y.backward(v) #传入一个1*3的tensor相当于vector-Jacobian的转置*v的转置
print("x.grad={}".format(x.grad))

上述程序得到y=x*8,求导所得\frac{\partial y^{T}}{\partial x}=J^{T}=\bigl(\begin{smallmatrix} 8& 0& 0\\ 0& 8&0\\ 0& 0& 8 \end{smallmatrix}\bigr)

x=tensor([ 1.3471, -0.0893, -0.7166], requires_grad=True)

y=tensor([10.7771, -0.7143, -5.7330], grad_fn=<MulBackward0>)

x.grad=tensor([9.6000, 3.2000, 0.0240])

3.5 停止自动求导

# 停止自动求导运算
print("(x**2).requires_grad={}".format((x ** 2).requires_grad)) # True
with torch.no_grad():
    print("(x**2).requires_grad={}".format((x ** 2).requires_grad)) # False
print("(x**2).requires_grad={}".format((x ** 2).requires_grad)) # True

4.神经网络

公式推导可以参考神经网络

4.1 基本要求

A typical training procedure for a neural network is as follows:

  • Define the neural network that has some learnable parameters (or weights)  定义神经网络(拥有一些可学习的参数)
  • Iterate over a dataset of inputs  在输入数据集上进行迭代
  • Process input through the network  通过网络处理输入
  • Compute the loss (how far is the output from being correct)  计算损失
  • Propagate gradients back into the network’s parameters  反向传播梯度给网络参数
  • Update the weights of the network, typically using a simple update rule: weight = weight -learning_rate * gradient 更新网络权重

4.2 实现步骤

  • 定义神经网络:
# coding=utf-8
'''
卷积、全连接在torch.nn,池化在torch.nn.functional
'''
import torch
import torch.nn as nn
import torch.nn.functional as F

class Net(nn.Module):
    def __init__(self):
        super(Net,self).__init__()
        self.conv1 = nn.Conv2d(1,3,2) # in_channels=1,out_channels=3,kernel_size=2*2,后面默认stride=1,padding=0,dilation=1,group=1,bias=True
        self.conv2 = nn.Conv2d(3,3,2)
        # 全连接层
        self.fc1 = nn.Linear(3*7*7,5) #3通道,conv2计算1*32*32的矩阵后得到7*7大小的feature map,即上层输出3*7*7个数,有3*7*7个神经元
        self.fc2 = nn.Linear(5,4)
        self.fc3 = nn.Linear(4,3)
        '''
        以下为官网设置
        '''
        #self.conv1 = nn.Conv2d(1,6,5) # in_channels=1,out_channels=3,kernel_size=2*2,后面默认stride=1,padding=0,dilation=1,group=1,bias=True
        #self.conv2 = nn.Conv2d(6,16,5)
        #self.fc1 = nn.Linear(16*5*5,120) #16通道,5*5大小的feature map,即上层有16*5*5个神经元
        #self.fc2 = nn.Linear(120,84)
        #self.fc3 = nn.Linear(84,3)

    def forward(self,x):
        #import pdb;pdb.set_trace()
        x = F.max_pool2d(F.relu(self.conv1(x)),(2,2)) # 1.卷积;2.relu;3.最大池化(2*2)
        x = F.max_pool2d(F.relu(self.conv2(x)),2) # 设置为2,其实就是(2,2),# If the size is a square you can only specify a single number
        x = x.view(-1,self.num_flat_features(x)) # reshape/resize,这里就是平铺x成1维
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        #import pdb;pdb.set_trace()
        x = self.fc3(x) # 这里就是把全连接层前面的所有神经元(不管2维还是多维)平铺成1维
    def num_flat_features(self,x):
        size = x.size()[1:] # 除了batch维度外的所有维度
        num_features = 1
        for s in size:
            num_features *= s
        return num_features
        return x
  • 查看网络:
net = Net() # 网络实例化
print("net:{}".format(net)) # 查看网络构成
param = list(net.parameters())
print("length:{}\nparam[0].size()={}\nparam[1].size()={}\nparam[2].size()={}\nparam[3].size()={}\nparam[4].size()={}\nparam[5].size()={}\n".format(len(param),param[0].size(),param[1].size(),param[2].size(),param[3].size(),param[4].size(),param[5].size())) # 这里的length:10,因为每一个tensor都有weight和bias
  • 数据输入网络:
'''
# input--The entire torch.nn package only supports inputs that are a mini-batch of samples, and not a single sample
# 就是需要实现对齐,举个例子:a single sample 可能就是3*32*32(nChannels*Height*Width),a mini-batch of samples就是1*3*32*32(nSamples*nChannels*Height*Width))
# 如果输入是a single sample,则需要转换为a mini-batch of samples(虚构一个nSamples的维度)
'''
input = torch.randn(1,1,32,32)
print("input=torch.randn(1,1,32,32)={}".format(input))
out = net(input) # 自动执行forward函数
print("out=net(input)={},size={}".format(out,out.size()))
  • 计算损失:
net.zero_grad() # Zero the gradient buffers of all parameters将所有梯度设置为0,Sets gradients of all model parameters to zero.

#out.backward(torch.randn(1,3),retain_graph=True)

target = torch.tensor([0.8,0.1,0.1],dtype=torch.float)
print("target变换前:{},size={}".format(target,target.size()))
target = target.view(1,-1) # 需要将target转换成与out相同维度
print("target变换后={},size={}".format(target,target.size()))

# loss function(mean-squared error)使用类nn.MSELoss
mse_loss = nn.MSELoss()    #1/3*((y0-t0)^2+(y1-t1)^2+(y2-t2)^2),其中y为fc3层计算输出,t为目标标签
#loss = mse_loss(target,out) #loss.grad_fn:<MeanBackward0 object at 0x7fa3aab3ee10>这里必须是out,target的顺序
loss = mse_loss(out,target) #loss.grad_fn:<MseLossBackward object at 0x7f89413e0780>这里必须是out,target的顺序
print("loss={}\nloss.grad_fn:{}\nloss.grad_fn.next_functions[0][0]={}\nloss.grad_fn.next_functions[0][0].next_functions[0][0]={}".format(loss,loss.grad_fn,loss.grad_fn.next_functions[0][0],loss.grad_fn.next_functions[0][0].next_functions[0][0]))
  • 反向传播求梯度:
# backprop
print("\nbefore zero_grad---net.conv1.bias.grad={}".format(net.conv1.bias.grad))

net.zero_grad()
print("\nbefore backprop---net.conv1.bias.grad={}".format(net.conv1.bias.grad))
'''
# pytorch构建的一个graph中,只能进行一次backward,如果上述过程已经使用过一次,则会报错:RuntimeError: Trying to backward through the graph a second time, but the buffers have already been freed. Specify retain_graph=True when calling backward the first time.
'''
loss.backward() # 如果前面使用过backward,则前面的backward添加retain_graph=True,即可
print("\nafter backprop---net.conv1.bias.grad={}".format(net.conv1.bias.grad))
  • 两种迭代方式(更新weight和bias):
# a simple implementing method 试一下多次迭代!!!!Method 1
learning_rate = 0.01
iter_count = 0
while loss>0.00001:
    net.zero_grad()
    output = net(input)
    loss = mse_loss(output,target)
    loss.backward()
    for f in net.parameters():
        f.data.sub_(f.grad.data * learning_rate)
    iter_count = iter_count + 1
    if iter_count%30 == 0:
        print("第{}次迭代,loss:{}".format(iter_count,loss))

'''
# torch.optim优化,试一下多次迭代!!!!             Method 2
iter_count = 0
import torch.optim as optim
while loss>0.000000001:
    opt = optim.SGD(net.parameters(),lr=0.01)
    opt.zero_grad() # 每一次迭代都需要将梯度缓存改为0,否则会导致梯度叠加问题
    output = net(input)
    loss = mse_loss(output,target)
    loss.backward()
    opt.step()
    iter_count = iter_count + 1
    if iter_count%30 == 0:
        print("第{}次迭代,loss:{}".format(iter_count,loss))
'''

5.分类网络(CIFAR10)

5.1 基本步骤

  • Load and normalizing the CIFAR10 training and test datasets using torchvision 下载并载入cifar10的数据
  • Define a Convolutional Neural Network  定义分类网络
  • Define a loss function   定义损失函数
  • Train the network on the training data  训练数据
  • Test the network on the test data  测试

5.2 实现方法

  • 下载cifar10数据:
# coding=utf-8
import torch
import torchvision
import torchvision.transforms as transforms

transform = transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))]) 
'''
#Compose组合tensor到一起,transforms.ToTensor()--转换一个PIL图像到tensor,Convert a PIL Image or numpy.ndarray to tensor;
#transforms.Normalize(mean,std)--规范化一个tensor图像,input[channel] = (input[channel] - mean[channel]) / std[channel]
'''
traindatasets = torchvision.datasets.CIFAR10(root="./data",train=True,download=False,transform=transform) 
#从root目录读取,download=True则先下载再读取;train=True则表示读取train数据集,否则读取test数据集;按照transform设定的方式读取返回至traindatasets
trainloader = torch.utils.data.DataLoader(traindatasets,batch_size=4,shuffle=True,num_workers=2) #num_workers--用于数据载入的subprocesses数量

testdatasets = torchvision.datasets.CIFAR10(root="./data",train=False,download=False,transform=transform)
testLoader = torch.utils.data.DataLoader(testdatasets,batch_size=4,shuffle=True,num_workers=2)

classes = ("plane","car","bird","cat","deer","dog","frog","horse","ship","truck")

import matplotlib.pyplot as plt
import numpy as np
def imshow(img):
    img = img/2 +0.5 #载入的图片input[channel] = (input[channel] - mean[channel]) / std[channel],所以这里是input[channel]=std[channel]*input[channel]+mean[channel]
    np_img = img.numpy() #转换为numpy格式
    plt.imshow(np.transpose(np_img,(1,2,0)))
    plt.savefig("1.jpg")
#dataiter = iter(trainloader)
#images,labels = dataiter.next()
#
##imshow(torchvision.utils.make_grid(images)) # make a grid of images图像网格,images是一个tensor,所以imshow函数里面需要转换为numpy格式的
#print(' '.join("%5s"% classes[labels[i]] for i in range(4)))
  • 定义分类网络:
traindatasets = torchvision.datasets.CIFAR10(root="./data",train=True,download=False,transform=transform) 
#从root目录读取,download=True则先下载再读取;train=True则表示读取train数据集,否则读取test数据集;按照transform设定的方式读取返回至traindatasets
trainloader = torch.utils.data.DataLoader(traindatasets,batch_size=1,shuffle=True,num_workers=2) #num_workers--用于数据载入的subprocesses数量

testdatasets = torchvision.datasets.CIFAR10(root="./data",train=False,download=False,transform=transform)
testloader = torch.utils.data.DataLoader(testdatasets,batch_size=4,shuffle=True,num_workers=2)

classes = ("plane","car","bird","cat","deer","dog","frog","horse","ship","truck")
import torch.nn as nn
import torch.nn.functional as F

class ClassifyNet(nn.Module):
    def __init__(self):
        super(ClassifyNet,self).__init__()
        self.conv1 = nn.Conv2d(3,6,3)  #input channels=3   output channels = 5   kernel=3*3
        self.conv2 = nn.Conv2d(6,10,3)
        self.pool = nn.MaxPool2d(2,2)
        self.fc1 = nn.Linear(1960,120) #1960 = 1*10*14*14 = batch_size*channels*width*height
        self.fc2 = nn.Linear(120,60)  #torch.nn.Linear(in_features, out_features, bias=True)
        self.fc3 = nn.Linear(60,10)

    def forward(self,x):
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = self.pool(x)
        #import pdb; pdb.set_trace()
        x = x.view(-1,1960)
        x = self.fc1(x)
        x = self.fc2(x)
        x = self.fc3(x)
        return x

①这里重新载入数据集,采用batch_size=1,即训练过程保持单张图训练,速度慢。但学习阶段,需要慢慢搞懂每一步!

②fc1的定义需要计算图像计算到这一层的[batchsize,channels,height,width],然后设置当前层的in_features,即输入神经元个数。

③前向传播forward计算到fc1时,需要平铺卷积高维torch tensors。

  • 定义损失函数:

net = ClassifyNet() #实例化网络
print(net)

# define loss
loss_cross = nn.CrossEntropyLoss()
import torch.optim as optim
opt = optim.SGD(net.parameters(),lr=0.001)
running_loss = 0
  • 训练分类网络(利用cifar10):
# train step
for i,data in enumerate(trainloader,0): # 把trainloader对象组合为一个索引序列,所以下标从0开始,把trainloader中所有的训练数据训练一遍
    inputs,labels = data
    opt.zero_grad()
    outputs = net(inputs)
    #import pdb;pdb.set_trace()
    loss = loss_cross(outputs,labels)
    loss.backward()
    opt.step()
    running_loss += loss.item()
    if i%2000 == 1999:
        print("第{}次迭代,loss:{}".format(i+1,running_loss/2000))
    running_loss = 0

print("Finish Training")

训练图片50000张,迭代50000次:

……
第46000次迭代,loss:0.000776898443698883
第48000次迭代,loss:0.001786381721496582
第50000次迭代,loss:0.0005364646911621094

  • 测试分类网络:
# test step
testdataiter = iter(testloader)
images,labels = testdataiter.next()
imshow(torchvision.utils.make_grid(images))
print("GT:",' '.join("%5s"% classes[labels[i]] for i in range(4)))

outputs = net(images) # testdataset设置的batchsize为4,则计算得到的outputs也有4个10维输出
_, predicts = torch.max(outputs,1) # Returns the maximum value of each row of the input tensor in the given dimension dim这里在维度序号为1的list中的最大值,即每一个1*10list中的最大值
print(predicts)
print("Predicts:",' '.join("%5s"% classes[predicts[i]] for i in range(4)))

这里仅仅测试了4张图,预测都是正确的。

GT:   car  bird  frog   dog

Predicts:   car  bird  frog   dog

在测试集上测试:(正确率0.4979)

# test on testdatasets
correct = 0
total = 0
with torch.no_grad(): # 不需要求解梯度
    for i,data in enumerate(testloader,0):
        images,labels = data
        outputs = net(images)
        _, predicts = torch.max(outputs,1)
        total += labels.size(0)
        correct = correct + (predicts == labels).sum().item() #predicts与labels相同则为1,求和即得正确预测的个数
print("The accuracy of classifyNet on {} test images:{}".format(total,correct/total))
  • 在GPU上训练:
# train step
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("device:{}".format(device))
for i,data in enumerate(trainloader,0): # 把trainloader对象组合为一个索引序列,所以下标从0开始,把trainloader中所有的训练数据训练一遍
    net.to(device) #①将网络放到GPU上
    inputs,labels = data
    inputs,labels = inputs.to(device),labels.to(device) #②将数据放到GPU上
    opt.zero_grad()
    outputs = net(inputs)
    #import pdb;pdb.set_trace()
    loss = loss_cross(outputs,labels)
    loss.backward()
    opt.step()
    running_loss += loss.item()
    if i%2000 == 1999:
        print("第{}次迭代,loss:{}".format(i+1,running_loss/2000))
    running_loss = 0

print("Finish Training")

①将网络net放到GPU上;

②将需要训练的数据放到GPU上。

在GPU上训练所需时间:----real    4m8.954s----user    5m22.688s----sys    0m49.932s----

在CPU上训练所需时间:----real    2m15.048s----user    9m12.332s----sys    15m32.658s----

CPU更快!!!奇怪不奇怪!!!官网解释:Why dont I notice MASSIVE speedup compared to CPU? Because your network is realllly small.

 

6.分类网络(自定义数据)

6.1 数据构成

数据来源:ImageNet

数据类别:dog 和 cat

数据放置:train文件夹下放2个文件夹(cat 和 dog),每个文件夹分别放各自的图片。val文件夹做同样的操作。但是train和val中放置的图片一般不能有重复的图片。

6.2 数据读取

参考pytorch官网(github)给出的一个例子,数据会被很规范的读入,类似CIFAR10一样,train和val下面的文件夹名字自然会被分为0和1两类:

# Data loading code
    traindir = os.path.join(args.data, 'train')
    valdir = os.path.join(args.data, 'val')
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

    train_dataset = datasets.ImageFolder(
        traindir,
        transforms.Compose([
            transforms.RandomResizedCrop(224),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize,
        ]))
 CIFAR10myDatasets
functiontraindatasets=torchvision.datasets.CIFAR10()mytraindatasets = torchvision.datasets.ImageFolder()
pytorch structure

Dataset CIFAR10
    Number of datapoints: 50000
    Split: train
    Root Location: ./data
    Transforms (if any): Compose(
                             ToTensor()
                             Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))
                         )
    Target Transforms (if any): None

Dataset ImageFolder
    Number of datapoints: 1876
    Root Location: ./data/mydatasets/datasets/train
    Transforms (if any): Compose(
                             RandomResizedCrop(size=(224, 224), scale=(0.08, 1.0), ratio=(0.75, 1.3333), interpolation=PIL.Image.BILINEAR)
                             RandomHorizontalFlip(p=0.5)
                             ToTensor()
                             Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
                         )
    Target Transforms (if any): None
DataLoader

torch.utils.data.DataLoader(mytraindatasets, batch_size=1, shuffle=True, num_workers=0)

官网说法:Combines a dataset and a sampler, and provides single- or multi-process iterators over the dataset.

获取其中一个数据traindatasets.__getitem__(0)

矩阵是图像,6是类别标签

(tensor([[[-0.5373, -0.6627, -0.6078,  ...,  0.2392,  0.1922,  0.1608],
         [-0.8745, -1.0000, -0.8588,  ..., -0.0353, -0.0667, -0.0431],
         [-0.8039, -0.8745, -0.6157,  ..., -0.0745, -0.0588, -0.1451],
         ...,
         [-0.2471, -0.7333, -0.7961,  ..., -0.4510, -0.9451, -0.8431],
         [-0.2471, -0.6706, -0.7647,  ..., -0.2627, -0.7333, -0.7333],
         [-0.0902, -0.2627, -0.3176,  ...,  0.0980, -0.3412, -0.4353]]]), 6)

7. pytorch网络可视化(docker下的tensorboard)

  • 安装自然很简单:
pip install -i https://pypi.tuna.tsinghua.edu.cn/simple tensorflow tensorboard tensorboardX
  • TensorBoard运行在docker容器下:

因为TensorBoard默认运行端口在6006,如果在docker下直接运行,则使用浏览器访问TensorBoard时,无法访问docker容器下的TensorBoard服务器,只能访问主机的TensorBoard。因此需要把docker容器的6006端口映射到主机,进而访问主机的TensorBoard服务器时,间接访问docker容器的6006端口服务资源。(-p 6006:6006)

 sudo nvidia-docker run --rm -it -v /media/lab/873821cf-d234-44cf-bd63-4372eac823a1/pytorch/:/home/pytorch -p 6006:6006 pytorch:v0 bash
  • TensorBoard网络可视化构建(代码):
# coding=utf-8
import torch
import torchvision
import torchvision.transforms as transforms
from visualization import visualize

import torch.nn as nn
import torch.nn.functional as F

class ClassifyNet(nn.Module):
    def __init__(self):
        super(ClassifyNet,self).__init__()
        self.conv1 = nn.Conv2d(3,4,3)  #input channels=3   output channels = 4   kernel=3*3
        self.pool = nn.MaxPool2d(2,2)
        self.fc1 = nn.Linear(49284,60) #1960 = 1*4*111*111 = batch_size*channels*width*height
        self.fc2 = nn.Linear(60,2)

    def forward(self,x):
        x = F.relu(self.conv1(x))
        x = self.pool(x)
        #import pdb;pdb.set_trace()
        x = x.view(-1,49284)
        x = self.fc1(x)
        x = self.fc2(x)
        return x

net = ClassifyNet()
print(net)

'''
visualization
method:tensorbordX
'''

from tensorboardX import SummaryWriter
with SummaryWriter(comment="Net") as w:
    w.add_graph(net,(torch.rand(1,3,224,224),))

程序运行之后,当前程序所在目录下会生成一个runs目录 

  • 运行TensorBoard服务器资源:
tensorboard --logdir=runs/
  • 局域网下的浏览器访问TensorBoard(主机地址+端口):
host_addr:6006  

8. pytorch使用AlexNet训练minst

8.1 代码

#coding:utf-8
'''#直接使用AlexNet是不行的,minst数据集图像大小是28*28,如果使用AlexNet的卷积核设置,最终会导致:Given input size: (192x2x2). Calculated output size: (192x0x0). Output size is too small
#原始AlexNet位置:https://github.com/pytorch/vision/blob/master/torchvision/models/alexnet.py
Start Training!
[1, 60000] loss:0.4914
[2, 60000] loss:0.0042
[3, 60000] loss:0.0063
[4, 60000] loss:0.0038
[5, 60000] loss:0.0172
Finished Traning
Accuracy of the network on the 10000 test images:98%'''
import torch
import torch.nn as nn
import torchvision
import torch.optim as optim
import torchvision.transforms as transforms
import torch.nn.functional as F
import time,pdb,random,os
try:
    from torch.hub import load_state_dict_from_url
except ImportError:
    from torch.utils import load_state_dict_from_url


__all__ = ['AlexNet', 'alexnet']


model_urls = {
    'alexnet': 'https://download.pytorch.org/models/alexnet-owt-4df8aa71.pth',
}


class AlexNet(nn.Module):

    def __init__(self, num_classes=10):
        super(AlexNet, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )
        self.avgpool = nn.AdaptiveAvgPool2d((6, 6))
        self.classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(256*6*6, 1024),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(1024, 1024),
            nn.ReLU(inplace=True),
            nn.Linear(1024, num_classes),
        )

    def forward(self, x):
        x = self.features(x)
        x = self.avgpool(x)
        #x = torch.flatten(x, 1)
        x = x.view(-1, 256*6*6)
        x = self.classifier(x)
        return x


def alexnet(pretrained=False, progress=True, **kwargs):
    r"""AlexNet model architecture from the
    `"One weird trick..." <https://arxiv.org/abs/1404.5997>`_ paper.
    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
        progress (bool): If True, displays a progress bar of the download to stderr
    """
    model = AlexNet(**kwargs)
    if pretrained:
        state_dict = load_state_dict_from_url(model_urls['alexnet'],
                                              progress=progress)
        model.load_state_dict(state_dict)
    return model

#截取图片中的指定区域或在指定区域添加某一图片
def crop_pad_image(src1):
    movedirection = [0,0,0,0]  #up down left right
    directionindex = random.randint(0,3)
    movedirection[directionindex] = 3
    channel,height,width = src1.shape
    src2 = src1[:,0+movedirection[0]:height-movedirection[1], 0+movedirection[2]:width-movedirection[3]]
    
    if directionindex==0:
        srcTemp = src1[:,0:movedirection[0],0:width]
        move_im = torch.cat((src2,srcTemp),1)
    if directionindex==1:
        srcTemp = src1[:,height-movedirection[1]:height,0:width]
        move_im = torch.cat((srcTemp,src2),1)
    if directionindex==2:
        srcTemp = src1[:,0:height,0:movedirection[2]]
        move_im = torch.cat((src2,srcTemp),2)
    if directionindex==3:
        srcTemp = src1[:,0:height,width-movedirection[3]:width]
        move_im = torch.cat((srcTemp,src2),2)
    #pdb.set_trace()
    return move_im

from torchvision.utils import save_image
def save_img(img, save_path,name_index):
    if os.path.exists(save_path)==False:
        os.makedirs(save_path)
    img = 0.5 * (img + 1)
    img = img.clamp(0, 1)
    img = img.view(-1, 1, 28, 28)
    save_image(img, os.path.join(save_path,str(name_index)+".jpg"))

if __name__=="__main__":
    #transform
    
    transforms= transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.5,),(0.5,))]) 

    #trainset = torchvision.datasets.MNIST(root='./data',train=True,download=True,transform=transforms)
    #trainloader = torch.utils.data.DataLoader(trainset, batch_size=10,shuffle=True,num_workers=0)
    testset = torchvision.datasets.MNIST(root='./data',train=False,download=True,transform=transforms)
    testloader = torch.utils.data.DataLoader(testset,batch_size=10,shuffle=False,num_workers=0)
    net = AlexNet()
    #损失函数:这里用交叉熵
    criterion = nn.CrossEntropyLoss()   
    #优化器 这里用SGD
    optimizer = optim.SGD(net.parameters(),lr=1e-3, momentum=0.9)
    #device : GPU or CPU
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    net.to(device)
    #print("Start Training!")
    #num_epochs = 5 #训练次数
    #for epoch in range(num_epochs):
    #    running_loss = 0
    #    batch_size = 10
    #    for i, data in enumerate(trainloader):
    #        inputs, labels = data
    #        inputs, labels = inputs.to(device), labels.to(device)
    #
    #        outputs = net(inputs)
    #        #pdb.set_trace()
    #        loss = criterion(outputs, labels)
    #        optimizer.zero_grad()
    #        loss.backward()
    #        optimizer.step()
    #
    #    print('[%d, %5d] loss:%.4f'%(epoch+1, (i+1)*10, loss.item()))
    #
    #print("Finished Traning")
    #
    ##保存训练模型
    #torch.save(net, 'MNIST.pkl')
    net = torch.load('MNIST.pkl')
    #开始识别
    with torch.no_grad():
        #在接下来的代码中,所有Tensor的requires_grad都会被设置为False
        correct = 0
        total = 0
        tplist = list(0. for i in range(10)) #正类预测为正类
        fnlist = list(0. for i in range(10)) #正类预测为负类
        tp_fplist = list(0. for i in range(10)) #负类预测为正类
        tp_fnlist = list(0. for i in range(10)) #负类预测为负类
        totallist = list(0. for i in range(10))
     
        for data in testloader:
            images, labels = data
            images, labels = images.to(device), labels.to(device)
            #这段代码用于minst数字随机上下左右移动3个像素点,证明CNN的平移不变性
            #testbatchsize=images.size()[0]
            #for i in range(testbatchsize):
            #    #保存查看minst数字随机上下左右移动3个像素点后的真实图片展示
            #    #save_img(images[i,:,:,:],"original_images",labels[i].item())
            #    temp = torch.empty(1,28,28)
            #    temp.copy_(images[i])
            #    images[i].copy_(crop_pad_image(images[i]))
            #    #保存查看minst数字随机上下左右移动3个像素点后的真实图片展示,经验证,的确移动了
            #    #save_img(images[i,:,:,:],"move_images",labels[i].item())
            #    #pdb.set_trace()
            out = net(images)
            _, predicted = torch.max(out.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            res = predicted == labels
            for label_id in range(len(labels)):
                label_single = labels[label_id] #当前类
                tplist[label_single] += res[label_id].item()
                fnlist[label_single] += (res[label_id]==0).item()
                for i in range(len(labels)):
                    tp_fplist[i] += (predicted[label_id].item()==i)
                    tp_fnlist[i] += (labels[label_id].item()==i)
                totallist[label_single] += 1
        #pdb.set_trace()
        print('Accuracy of the network on the 10000 test images:{}%'.format(100 * float(correct) / total)) #输出识别准确率
        mRecall = 0
        mPrecision = 0
        for i in range(len(tplist)):
            print("数字{}的召回率(查全率)recall:{:.3f},精确率(查准率)precision:{:.3f}".format(i,tplist[i]/tp_fnlist[i],tplist[i]/tp_fplist[i]))
            mRecall += tplist[i]/tp_fnlist[i]
            mPrecision += tplist[i]/tp_fplist[i]
        print("mRecall:{:.3f}".format(mRecall/10))
        print("mPrecision:{:.3f}".format(mPrecision/10))
  • 所有测试集数据随机上、下、左、右移动3个像素

 

  • 数据预处理Normalize 

参考网站:https://blog.csdn.net/xys430381_1/article/details/85724668?utm_source=distribute.pc_relevant.none-task 

(单通道)transforms= transforms.Compose([transforms.ToTensor(),transforms.Normalize(mean=(0.5,),std=(0.5,))])

(三通道)transforms= transforms.Compose([transforms.ToTensor(),transforms.Normalize(mean=(0.5,0.5,0.5),std=(0.5,0.5,0.5))])

表示数据集图片预处理过程中,将0-255的像素值通过“torchvision.transforms.ToTensor”转换到0-1之间,然后“transforms.Normalize(mean=(0.5,),std=(0.5,))”按照均值为0.5,方差为0.5的方式normalize归一化0-1数据至-1到1。 计算方法举例:((0,1)-0.5)/0.5=(-1,1)

  • tensor数据保存为图像

参考网站:https://pytorch.org/docs/master/_modules/torchvision/utils.html#save_image

网站中提供了tensor数据(像素值-1到1)直接保存为可视图像,最重要的一点,使用ndarr = grid.mul(255).add_(0.5).clamp_(0, 255).permute(1, 2, 0).to('cpu', torch.uint8).numpy()将数据从-1至1转换为0-255,以及调整了维度顺序。通常tensor的各维度信息为[batchsize,dim,height,width],而opencv或者pillow图像各维度为[height,width,dim],所以使用opencv或pillow保存图像需要将tensor转换过来。

  • 缩小图像

tensor.resize_()不能实现,这个函数只会截取原图像数据矩阵中一部分,因此目前我能找到的方法就是先将tensor转换为opencv可用的数据维度,包括调整-1至1为0-255,以及调整维度顺序,然后使用opencv中的可插值resize方法。以minst图像缩小一半为例:

def resize_image(src,transforms=None):
    channel,height,width = src.shape
    temp = torch.empty(1,28,28)
    temp.copy_(src)
    temp = temp.mul(255).add_(0.5).clamp_(0,255).permute(1, 2, 0).to('cpu', torch.uint8).numpy()
    tempCV = cv2.cvtColor(temp, cv2.COLOR_GRAY2BGR)
    tempCV = cv2.resize(tempCV, (14,14), interpolation = cv2.INTER_AREA)
    #cv2.imwrite("temp_np.jpg",tempCV)
    resizeTemp = transforms(tempCV)
    outTemp = -torch.ones(1,28,28)
    outTemp[0,7:21,7:21]=resizeTemp[0,:,:]
    #save_img(outTemp,[1,28,28],"resize_images","resize")
    #pdb.set_trace()
    return outTemp

在应用时,将8.1中的代码179行修改为images[i].copy_(resize_image(images[i],transforms))即可实现minst测试集图像缩小功能。压缩后测试结果如下:

Accuracy of the network on the 10000 test images:57.2%
数字0的召回率(查全率)recall:0.004,精确率(查准率)precision:1.000
数字1的召回率(查全率)recall:0.959,精确率(查准率)precision:0.639
数字2的召回率(查全率)recall:0.479,精确率(查准率)precision:0.653
数字3的召回率(查全率)recall:0.685,精确率(查准率)precision:0.504
数字4的召回率(查全率)recall:0.648,精确率(查准率)precision:0.747
数字5的召回率(查全率)recall:0.952,精确率(查准率)precision:0.357
数字6的召回率(查全率)recall:0.507,精确率(查准率)precision:0.452
数字7的召回率(查全率)recall:0.545,精确率(查准率)precision:0.930
数字8的召回率(查全率)recall:0.331,精确率(查准率)precision:0.782
数字9的召回率(查全率)recall:0.583,精确率(查准率)precision:0.696
mRecall:0.569
mPrecision:0.676

随机缩小为原图的0.5-0.7倍:

def resize_image(src,transforms=None):
    zoom = 0.1*random.randint(5,7)
    channel,height,width = src.shape
    temp = torch.empty(1,height,width)
    temp.copy_(src)
    temp = temp.mul(255).add_(0.5).clamp_(0,255).permute(1, 2, 0).to('cpu', torch.uint8).numpy(
)
    tempCV = cv2.cvtColor(temp, cv2.COLOR_GRAY2BGR)
    tempCV = cv2.resize(tempCV, (int(height*zoom),int(width*zoom)), interpolation = cv2.INTER_A
REA)
    #cv2.imwrite("temp_np.jpg",tempCV)
    resizeTemp = transforms(tempCV)
    outTemp = -torch.ones(1,height,width)
    outTemp[0,int(height*(1-zoom)*0.5):int(height*(1-zoom)*0.5)+int(height*zoom),int(width*(1-z
oom)*0.5):int(width*(1-zoom)*0.5)+int(width*zoom)]=resizeTemp[0,:,:]
    #save_img(outTemp,[1,28,28],"resize_images","resize")
    #pdb.set_trace()
    return outTemp

 

Logo

权威|前沿|技术|干货|国内首个API全生命周期开发者社区

更多推荐