pytorch学习
1.pytorch环境配置(docker)docker环境--配置过程(配置成功--能使用nvidia-docker命令)拉取pytorch镜像--参考网站(命令--docker pull nvcr.io/nvidia/pytorch:19.01-py3)启动docker容器--参考网站(命令--sudo nvidia-docker run -it --rm -v local_dir:...
1.pytorch环境配置(docker)
- docker环境--配置过程(配置成功--能使用nvidia-docker命令)
- 拉取pytorch镜像--参考网站(命令--docker pull nvcr.io/nvidia/pytorch:19.01-py3)
- 启动docker容器--参考网站(命令--sudo nvidia-docker run -it --rm -v local_dir:container_dir nvcr.io/nvidia/pytorch:<xx.xx>-py3)
2. pytorch中tensor的基本操作
2.1 pytorch测试
# coding=utf-8
# 这是一个用于练习的文档
from __future__ import print_function
import torch
x = torch.rand(5,3)
print("x={}".format(x)) #5行3列随机数
print(torch.empty(5,3))
print(torch.zeros(5,3,dtype=torch.long))
print(torch.tensor([5.5,3]))
x=x.new_ones(5,3,dtype=torch.double)
print("x={}".format(x))
x=torch.rand_like(x,dtype=torch.float) #Returns a tensor with the same size as input that is filled with random numbers from a normal distribution with mean 0 and variance 1
print("x={}".format(x))
print(x.dtype)
print(x.size())
2.2 tensor加法
# tensor加法(pytorch)
y=(torch.rand(5,3))
print("y={}".format(y))
print("x+y={}".format(x+y)) #相加就是对应位置值相加,也可以用torch.add(x,y)
print("torch.add(x,y)={}".format(torch.add(x,y)))
result=torch.empty(5,3)
torch.add(x,y,out=result)
print("result={}".format(result)) #带输出的加法,result必须是与x,y相同的类型
2.3 in-place方法
# in-place方法,即不添加多余变量,直接内部放置
y.add_(x)
print("y={}".format(y)) # y=y+x
# pytorch都可以通过添加_实现in-place操作,如x.copy_(y),x.t_()
print("x={}".format(x))
x.copy_(y)
print("x={}".format(x))
x.t_()
print("x={}".format(x)) # x转置赋给x
# tensor resize/reshape(torch.view)
x = torch.rand(4,4)
print("x={}".format(x))
y = x.view(16)
print("y=x.view(16)={}".format(y))
print("x.view(-1,8)={}".format(x.view(-1,8))) #负数索引一般表示从右(最后一个元素为-1)往左数,这里的-1解释为:the size -1 is inferred from other dimensions
print("x.view(-1,4)={}".format(x.view(-1,4))) #进一步验证:-1表示从另一个维度推算,another_dim=8,-1表示2; another_dim=4,-1表示4
#print("x.view(-1,5)={}".format(x.view(-1,5)))#RuntimeError: shape '[-1, 5]' is invalid for input of size 16
print("x.view(2,8)={}".format(x.view(2,8)))
print("x.view(8,2)={}".format(x.view(8,2)))
2.4 tensor resize/reshape
# tensor resize/reshape(torch.view)
x = torch.rand(4,4)
print("x={}".format(x))
y = x.view(16)
print("y=x.view(16)={}".format(y))
print("x.view(-1,8)={}".format(x.view(-1,8))) #负数索引一般表示从右(最后一个元素为-1)往左数,这里的-1解释为:the size -1 is inferred from other dimensions
print("x.view(-1,4)={}".format(x.view(-1,4))) #进一步验证:-1表示从另一个维度推算,another_dim=8,-1表示2; another_dim=4,-1表示4
#print("x.view(-1,5)={}".format(x.view(-1,5)))#RuntimeError: shape '[-1, 5]' is invalid for input of size 16
print("x.view(2,8)={}".format(x.view(2,8)))
print("x.view(8,2)={}".format(x.view(8,2)))
2.5 获取tensor某一element的值
# 获取某一个元素的值
print("x[1][1]={}".format(x[1][1]))
print("x[1][1].item()={}".format(x[1][1].item()))
x=torch.randn(1)
print("x={}".format(x))
print("x.item()={}".format(x.item()))
print("x[0].item()={}".format(x[0].item()))
2.6 转换Torch tensor到numpy
# 转换Torch tensor到numpy
a = torch.ones(6)
print("a={},type is {}".format(a,type(a)))
b = a.numpy()
print("b=a.numpy()={},type is {}".format(b,type(b)))
# numpy中的加法:改变numpy的值,只需要改变Torch tensor对应的值即可
#b.add_(2)#AttributeError: 'numpy.ndarray' object has no attribute 'add_'
print("b+2={}".format(b+2))
a.add_(3)
print("a={},type is {}".format(a,type(a)))
print("b=a.numpy()={},type is {}".format(b,type(b)))
2.7 转换numpy到Torch tensor
# 转换numpy到Torch tensor
import numpy as np
a = np.ones(2)
print("a={},type is {}".format(a,type(a)))
b = torch.from_numpy(a)
print("b=torch.from_numpy(a)={},type is {}".format(b,type(b)))
# 改变numpy,会改变Torch tensor的值吗?经过验证,答案是“会”
np.add(a,1,out=a) # out --- A location into which the result is stored
print("a={},type is {}".format(a,type(a)))
print("b={},type is {}".format(b,type(b)))
2.8 tensor传入GPU
# pytorch cuda tensors
if torch.cuda.is_available():
device = torch.device("cuda")
y = torch.ones_like(x,device=device) #直接创建一个与x大小相同的tensor,放于GPU上
print("\nx={},dtype is {}".format(x,x.dtype))
print("y=torch.ones_like(x,device=device)={},dtype is {}".format(y,y.dtype))
#z = x+y #RuntimeError: expected type torch.FloatTensor but got torch.cuda.FloatTensor一个在GPU上,一个在CPU上,无法运算
x = x.to(device) # 将x传到GPU上
print("x=x.to(device)={},dtype is {}".format(x,x.dtype))
z = x + y
print("z=x+y={},dtype is {}".format(z,z.dtype))
3.pytorch反向传播
3.1 requires_grad和grad_fn
# coding=utf-8
import torch
x = torch.ones(2,2,requires_grad=True)
print("x={}".format(x))
y = x + 3
print("y={},y.grad_fn={}".format(y,y.grad_fn)) #y.grad_fn=<AddBackward0 object at 0x7fd67a0f9be0>
z = x * y * 4
print("z={},z.grad_fn={}".format(z,z.grad_fn)) #z.grad_fn=<MulBackward0 object at 0x7fd67a0f9be0>
3.2 反向传播backward(标量对向量求导)
要实现反向传播,求偏导的自变量(tensor) 必须满足条件:a.requires_grad=True(以下述snippet为例)
# coding=utf-8
import torch
a = torch.randn(2,2)
a = (a*3/(a-1))
print("a.requires_grad is {}".format(a.requires_grad)) #The input flag defaults to False if not given默认Torch tensor是不需要梯度的,即a.requires_grad=False
#a.requires_grad_(True) #验证求偏导的自变量(tensor)的requires_grad=True,否则无法反向传播
b = a.sum()
print("b=a.sum()={},b.grad_fn is {}".format(b,b.grad_fn))
# 反向传播backward
#b.backward() # out.backward() is equivalent to out.backward(torch.tensor(1.)) a.requires_grad=False 报错--RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn
正确例子如下:
,因此,
# coding=utf-8
import torch
a = torch.randn(2,2)
a = (a*3/(a-1))
print("a.requires_grad is {}".format(a.requires_grad)) #The input flag defaults to False if not given默认Torch tensor是不需要梯度的,即a.requires_grad=False
a.requires_grad_(True) #验证求偏导的自变量(tensor)的requires_grad=True,否则无法反向传播
print("a.requires_grad is {}".format(a.requires_grad)) #满足a.requires_grad=True
print("\na={}".format(a))
# 标量(saclar)对向量(vector)求偏导
c = a.mean()
print("c=a.mean()={}".format(c))
c.backward() #实现了求平均值的函数的反向求导
print("a.grad={}".format(a.grad)) '''a.grad=tensor([[0.2500, 0.2500],[0.2500, 0.2500]])'''
b = a.sum()
b.backward()
print("a.grad={}".format(a.grad)) '''a.grad=tensor([[1.2500, 1.2500],[1.2500, 1.2500]])累加了!'''
如果多次使用backward(),则a.grad会被累加运算!
3.3 范数求解
# 范数求解
x = torch.randn(3,requires_grad=True)
y = x * 2
while y.data.norm()<10: #这里默认求解2-范数,也可以通过torch.norm(y,2)求解2-范数
y = y * 2
print("\ny={},y.data={},y.data.norm()={},torch.norm(y,2)={}".format(y,y.data,y.data.norm(),torch.norm(y,2)))
3.4 反向传播backward(向量对向量求导)
理论公式推导可参考矩阵求导
'''
# 向量(vector)对向量(vector)求偏导,backward(),数学上1*3的vector对1*3的vector求导会得到一个3*3的矩阵(vector-Jacobian product),但这里需要加一个向量v
# 可参考https://pytorch.org/tutorials/beginner/blitz/autograd_tutorial.html
'''
v = torch.tensor([1.2,0.4,0.003],dtype=torch.float)
#y.backward() #这是计算标量倒数的方法,报错--RuntimeError: grad can be implicitly created only for scalar outputs
y.backward(v) #传入一个1*3的tensor相当于vector-Jacobian的转置*v的转置
print("x.grad={}".format(x.grad))
上述程序得到y=x*8,求导所得
x=tensor([ 1.3471, -0.0893, -0.7166], requires_grad=True)
y=tensor([10.7771, -0.7143, -5.7330], grad_fn=<MulBackward0>)
x.grad=tensor([9.6000, 3.2000, 0.0240])
3.5 停止自动求导
# 停止自动求导运算
print("(x**2).requires_grad={}".format((x ** 2).requires_grad)) # True
with torch.no_grad():
print("(x**2).requires_grad={}".format((x ** 2).requires_grad)) # False
print("(x**2).requires_grad={}".format((x ** 2).requires_grad)) # True
4.神经网络
公式推导可以参考神经网络
4.1 基本要求
A typical training procedure for a neural network is as follows:
- Define the neural network that has some learnable parameters (or weights) 定义神经网络(拥有一些可学习的参数)
- Iterate over a dataset of inputs 在输入数据集上进行迭代
- Process input through the network 通过网络处理输入
- Compute the loss (how far is the output from being correct) 计算损失
- Propagate gradients back into the network’s parameters 反向传播梯度给网络参数
- Update the weights of the network, typically using a simple update rule:
weight = weight -learning_rate * gradient
更新网络权重
4.2 实现步骤
- 定义神经网络:
# coding=utf-8
'''
卷积、全连接在torch.nn,池化在torch.nn.functional
'''
import torch
import torch.nn as nn
import torch.nn.functional as F
class Net(nn.Module):
def __init__(self):
super(Net,self).__init__()
self.conv1 = nn.Conv2d(1,3,2) # in_channels=1,out_channels=3,kernel_size=2*2,后面默认stride=1,padding=0,dilation=1,group=1,bias=True
self.conv2 = nn.Conv2d(3,3,2)
# 全连接层
self.fc1 = nn.Linear(3*7*7,5) #3通道,conv2计算1*32*32的矩阵后得到7*7大小的feature map,即上层输出3*7*7个数,有3*7*7个神经元
self.fc2 = nn.Linear(5,4)
self.fc3 = nn.Linear(4,3)
'''
以下为官网设置
'''
#self.conv1 = nn.Conv2d(1,6,5) # in_channels=1,out_channels=3,kernel_size=2*2,后面默认stride=1,padding=0,dilation=1,group=1,bias=True
#self.conv2 = nn.Conv2d(6,16,5)
#self.fc1 = nn.Linear(16*5*5,120) #16通道,5*5大小的feature map,即上层有16*5*5个神经元
#self.fc2 = nn.Linear(120,84)
#self.fc3 = nn.Linear(84,3)
def forward(self,x):
#import pdb;pdb.set_trace()
x = F.max_pool2d(F.relu(self.conv1(x)),(2,2)) # 1.卷积;2.relu;3.最大池化(2*2)
x = F.max_pool2d(F.relu(self.conv2(x)),2) # 设置为2,其实就是(2,2),# If the size is a square you can only specify a single number
x = x.view(-1,self.num_flat_features(x)) # reshape/resize,这里就是平铺x成1维
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
#import pdb;pdb.set_trace()
x = self.fc3(x) # 这里就是把全连接层前面的所有神经元(不管2维还是多维)平铺成1维
def num_flat_features(self,x):
size = x.size()[1:] # 除了batch维度外的所有维度
num_features = 1
for s in size:
num_features *= s
return num_features
return x
- 查看网络:
net = Net() # 网络实例化
print("net:{}".format(net)) # 查看网络构成
param = list(net.parameters())
print("length:{}\nparam[0].size()={}\nparam[1].size()={}\nparam[2].size()={}\nparam[3].size()={}\nparam[4].size()={}\nparam[5].size()={}\n".format(len(param),param[0].size(),param[1].size(),param[2].size(),param[3].size(),param[4].size(),param[5].size())) # 这里的length:10,因为每一个tensor都有weight和bias
- 数据输入网络:
'''
# input--The entire torch.nn package only supports inputs that are a mini-batch of samples, and not a single sample
# 就是需要实现对齐,举个例子:a single sample 可能就是3*32*32(nChannels*Height*Width),a mini-batch of samples就是1*3*32*32(nSamples*nChannels*Height*Width))
# 如果输入是a single sample,则需要转换为a mini-batch of samples(虚构一个nSamples的维度)
'''
input = torch.randn(1,1,32,32)
print("input=torch.randn(1,1,32,32)={}".format(input))
out = net(input) # 自动执行forward函数
print("out=net(input)={},size={}".format(out,out.size()))
- 计算损失:
net.zero_grad() # Zero the gradient buffers of all parameters将所有梯度设置为0,Sets gradients of all model parameters to zero.
#out.backward(torch.randn(1,3),retain_graph=True)
target = torch.tensor([0.8,0.1,0.1],dtype=torch.float)
print("target变换前:{},size={}".format(target,target.size()))
target = target.view(1,-1) # 需要将target转换成与out相同维度
print("target变换后={},size={}".format(target,target.size()))
# loss function(mean-squared error)使用类nn.MSELoss
mse_loss = nn.MSELoss() #1/3*((y0-t0)^2+(y1-t1)^2+(y2-t2)^2),其中y为fc3层计算输出,t为目标标签
#loss = mse_loss(target,out) #loss.grad_fn:<MeanBackward0 object at 0x7fa3aab3ee10>这里必须是out,target的顺序
loss = mse_loss(out,target) #loss.grad_fn:<MseLossBackward object at 0x7f89413e0780>这里必须是out,target的顺序
print("loss={}\nloss.grad_fn:{}\nloss.grad_fn.next_functions[0][0]={}\nloss.grad_fn.next_functions[0][0].next_functions[0][0]={}".format(loss,loss.grad_fn,loss.grad_fn.next_functions[0][0],loss.grad_fn.next_functions[0][0].next_functions[0][0]))
- 反向传播求梯度:
# backprop
print("\nbefore zero_grad---net.conv1.bias.grad={}".format(net.conv1.bias.grad))
net.zero_grad()
print("\nbefore backprop---net.conv1.bias.grad={}".format(net.conv1.bias.grad))
'''
# pytorch构建的一个graph中,只能进行一次backward,如果上述过程已经使用过一次,则会报错:RuntimeError: Trying to backward through the graph a second time, but the buffers have already been freed. Specify retain_graph=True when calling backward the first time.
'''
loss.backward() # 如果前面使用过backward,则前面的backward添加retain_graph=True,即可
print("\nafter backprop---net.conv1.bias.grad={}".format(net.conv1.bias.grad))
- 两种迭代方式(更新weight和bias):
# a simple implementing method 试一下多次迭代!!!!Method 1
learning_rate = 0.01
iter_count = 0
while loss>0.00001:
net.zero_grad()
output = net(input)
loss = mse_loss(output,target)
loss.backward()
for f in net.parameters():
f.data.sub_(f.grad.data * learning_rate)
iter_count = iter_count + 1
if iter_count%30 == 0:
print("第{}次迭代,loss:{}".format(iter_count,loss))
'''
# torch.optim优化,试一下多次迭代!!!! Method 2
iter_count = 0
import torch.optim as optim
while loss>0.000000001:
opt = optim.SGD(net.parameters(),lr=0.01)
opt.zero_grad() # 每一次迭代都需要将梯度缓存改为0,否则会导致梯度叠加问题
output = net(input)
loss = mse_loss(output,target)
loss.backward()
opt.step()
iter_count = iter_count + 1
if iter_count%30 == 0:
print("第{}次迭代,loss:{}".format(iter_count,loss))
'''
5.分类网络(CIFAR10)
5.1 基本步骤
- Load and normalizing the CIFAR10 training and test datasets using
torchvision 下载并载入cifar10的数据
- Define a Convolutional Neural Network 定义分类网络
- Define a loss function 定义损失函数
- Train the network on the training data 训练数据
- Test the network on the test data 测试
5.2 实现方法
- 下载cifar10数据:
# coding=utf-8
import torch
import torchvision
import torchvision.transforms as transforms
transform = transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))])
'''
#Compose组合tensor到一起,transforms.ToTensor()--转换一个PIL图像到tensor,Convert a PIL Image or numpy.ndarray to tensor;
#transforms.Normalize(mean,std)--规范化一个tensor图像,input[channel] = (input[channel] - mean[channel]) / std[channel]
'''
traindatasets = torchvision.datasets.CIFAR10(root="./data",train=True,download=False,transform=transform)
#从root目录读取,download=True则先下载再读取;train=True则表示读取train数据集,否则读取test数据集;按照transform设定的方式读取返回至traindatasets
trainloader = torch.utils.data.DataLoader(traindatasets,batch_size=4,shuffle=True,num_workers=2) #num_workers--用于数据载入的subprocesses数量
testdatasets = torchvision.datasets.CIFAR10(root="./data",train=False,download=False,transform=transform)
testLoader = torch.utils.data.DataLoader(testdatasets,batch_size=4,shuffle=True,num_workers=2)
classes = ("plane","car","bird","cat","deer","dog","frog","horse","ship","truck")
import matplotlib.pyplot as plt
import numpy as np
def imshow(img):
img = img/2 +0.5 #载入的图片input[channel] = (input[channel] - mean[channel]) / std[channel],所以这里是input[channel]=std[channel]*input[channel]+mean[channel]
np_img = img.numpy() #转换为numpy格式
plt.imshow(np.transpose(np_img,(1,2,0)))
plt.savefig("1.jpg")
#dataiter = iter(trainloader)
#images,labels = dataiter.next()
#
##imshow(torchvision.utils.make_grid(images)) # make a grid of images图像网格,images是一个tensor,所以imshow函数里面需要转换为numpy格式的
#print(' '.join("%5s"% classes[labels[i]] for i in range(4)))
- 定义分类网络:
traindatasets = torchvision.datasets.CIFAR10(root="./data",train=True,download=False,transform=transform)
#从root目录读取,download=True则先下载再读取;train=True则表示读取train数据集,否则读取test数据集;按照transform设定的方式读取返回至traindatasets
trainloader = torch.utils.data.DataLoader(traindatasets,batch_size=1,shuffle=True,num_workers=2) #num_workers--用于数据载入的subprocesses数量
testdatasets = torchvision.datasets.CIFAR10(root="./data",train=False,download=False,transform=transform)
testloader = torch.utils.data.DataLoader(testdatasets,batch_size=4,shuffle=True,num_workers=2)
classes = ("plane","car","bird","cat","deer","dog","frog","horse","ship","truck")
import torch.nn as nn
import torch.nn.functional as F
class ClassifyNet(nn.Module):
def __init__(self):
super(ClassifyNet,self).__init__()
self.conv1 = nn.Conv2d(3,6,3) #input channels=3 output channels = 5 kernel=3*3
self.conv2 = nn.Conv2d(6,10,3)
self.pool = nn.MaxPool2d(2,2)
self.fc1 = nn.Linear(1960,120) #1960 = 1*10*14*14 = batch_size*channels*width*height
self.fc2 = nn.Linear(120,60) #torch.nn.Linear(in_features, out_features, bias=True)
self.fc3 = nn.Linear(60,10)
def forward(self,x):
x = F.relu(self.conv1(x))
x = F.relu(self.conv2(x))
x = self.pool(x)
#import pdb; pdb.set_trace()
x = x.view(-1,1960)
x = self.fc1(x)
x = self.fc2(x)
x = self.fc3(x)
return x
①这里重新载入数据集,采用batch_size=1,即训练过程保持单张图训练,速度慢。但学习阶段,需要慢慢搞懂每一步!
②fc1的定义需要计算图像计算到这一层的[batchsize,channels,height,width],然后设置当前层的in_features,即输入神经元个数。
③前向传播forward计算到fc1时,需要平铺卷积高维torch tensors。
-
定义损失函数:
net = ClassifyNet() #实例化网络
print(net)
# define loss
loss_cross = nn.CrossEntropyLoss()
import torch.optim as optim
opt = optim.SGD(net.parameters(),lr=0.001)
running_loss = 0
- 训练分类网络(利用cifar10):
# train step
for i,data in enumerate(trainloader,0): # 把trainloader对象组合为一个索引序列,所以下标从0开始,把trainloader中所有的训练数据训练一遍
inputs,labels = data
opt.zero_grad()
outputs = net(inputs)
#import pdb;pdb.set_trace()
loss = loss_cross(outputs,labels)
loss.backward()
opt.step()
running_loss += loss.item()
if i%2000 == 1999:
print("第{}次迭代,loss:{}".format(i+1,running_loss/2000))
running_loss = 0
print("Finish Training")
训练图片50000张,迭代50000次:
……
第46000次迭代,loss:0.000776898443698883
第48000次迭代,loss:0.001786381721496582
第50000次迭代,loss:0.0005364646911621094
- 测试分类网络:
# test step
testdataiter = iter(testloader)
images,labels = testdataiter.next()
imshow(torchvision.utils.make_grid(images))
print("GT:",' '.join("%5s"% classes[labels[i]] for i in range(4)))
outputs = net(images) # testdataset设置的batchsize为4,则计算得到的outputs也有4个10维输出
_, predicts = torch.max(outputs,1) # Returns the maximum value of each row of the input tensor in the given dimension dim这里在维度序号为1的list中的最大值,即每一个1*10list中的最大值
print(predicts)
print("Predicts:",' '.join("%5s"% classes[predicts[i]] for i in range(4)))
这里仅仅测试了4张图,预测都是正确的。
GT: car bird frog dog
Predicts: car bird frog dog
在测试集上测试:(正确率0.4979)
# test on testdatasets
correct = 0
total = 0
with torch.no_grad(): # 不需要求解梯度
for i,data in enumerate(testloader,0):
images,labels = data
outputs = net(images)
_, predicts = torch.max(outputs,1)
total += labels.size(0)
correct = correct + (predicts == labels).sum().item() #predicts与labels相同则为1,求和即得正确预测的个数
print("The accuracy of classifyNet on {} test images:{}".format(total,correct/total))
- 在GPU上训练:
# train step
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("device:{}".format(device))
for i,data in enumerate(trainloader,0): # 把trainloader对象组合为一个索引序列,所以下标从0开始,把trainloader中所有的训练数据训练一遍
net.to(device) #①将网络放到GPU上
inputs,labels = data
inputs,labels = inputs.to(device),labels.to(device) #②将数据放到GPU上
opt.zero_grad()
outputs = net(inputs)
#import pdb;pdb.set_trace()
loss = loss_cross(outputs,labels)
loss.backward()
opt.step()
running_loss += loss.item()
if i%2000 == 1999:
print("第{}次迭代,loss:{}".format(i+1,running_loss/2000))
running_loss = 0
print("Finish Training")
①将网络net放到GPU上;
②将需要训练的数据放到GPU上。
在GPU上训练所需时间:----real 4m8.954s----user 5m22.688s----sys 0m49.932s----
在CPU上训练所需时间:----real 2m15.048s----user 9m12.332s----sys 15m32.658s----
CPU更快!!!奇怪不奇怪!!!官网解释:Why dont I notice MASSIVE speedup compared to CPU? Because your network is realllly small.
6.分类网络(自定义数据)
6.1 数据构成
数据来源:ImageNet
数据类别:dog 和 cat
数据放置:train文件夹下放2个文件夹(cat 和 dog),每个文件夹分别放各自的图片。val文件夹做同样的操作。但是train和val中放置的图片一般不能有重复的图片。
6.2 数据读取
参考pytorch官网(github)给出的一个例子,数据会被很规范的读入,类似CIFAR10一样,train和val下面的文件夹名字自然会被分为0和1两类:
# Data loading code
traindir = os.path.join(args.data, 'train')
valdir = os.path.join(args.data, 'val')
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
train_dataset = datasets.ImageFolder(
traindir,
transforms.Compose([
transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
normalize,
]))
CIFAR10 | myDatasets | |
function | traindatasets=torchvision.datasets.CIFAR10() | mytraindatasets = torchvision.datasets.ImageFolder() |
pytorch structure | Dataset CIFAR10 | Dataset ImageFolder Number of datapoints: 1876 Root Location: ./data/mydatasets/datasets/train Transforms (if any): Compose( RandomResizedCrop(size=(224, 224), scale=(0.08, 1.0), ratio=(0.75, 1.3333), interpolation=PIL.Image.BILINEAR) RandomHorizontalFlip(p=0.5) ToTensor() Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ) Target Transforms (if any): None |
DataLoader | torch.utils.data.DataLoader(mytraindatasets, batch_size=1, shuffle=True, num_workers=0) 官网说法:Combines a dataset and a sampler, and provides single- or multi-process iterators over the dataset. | |
获取其中一个数据traindatasets.__getitem__(0) 矩阵是图像,6是类别标签 | (tensor([[[-0.5373, -0.6627, -0.6078, ..., 0.2392, 0.1922, 0.1608], |
7. pytorch网络可视化(docker下的tensorboard)
- 安装自然很简单:
pip install -i https://pypi.tuna.tsinghua.edu.cn/simple tensorflow tensorboard tensorboardX
- TensorBoard运行在docker容器下:
因为TensorBoard默认运行端口在6006,如果在docker下直接运行,则使用浏览器访问TensorBoard时,无法访问docker容器下的TensorBoard服务器,只能访问主机的TensorBoard。因此需要把docker容器的6006端口映射到主机,进而访问主机的TensorBoard服务器时,间接访问docker容器的6006端口服务资源。(-p 6006:6006)
sudo nvidia-docker run --rm -it -v /media/lab/873821cf-d234-44cf-bd63-4372eac823a1/pytorch/:/home/pytorch -p 6006:6006 pytorch:v0 bash
- TensorBoard网络可视化构建(代码):
# coding=utf-8
import torch
import torchvision
import torchvision.transforms as transforms
from visualization import visualize
import torch.nn as nn
import torch.nn.functional as F
class ClassifyNet(nn.Module):
def __init__(self):
super(ClassifyNet,self).__init__()
self.conv1 = nn.Conv2d(3,4,3) #input channels=3 output channels = 4 kernel=3*3
self.pool = nn.MaxPool2d(2,2)
self.fc1 = nn.Linear(49284,60) #1960 = 1*4*111*111 = batch_size*channels*width*height
self.fc2 = nn.Linear(60,2)
def forward(self,x):
x = F.relu(self.conv1(x))
x = self.pool(x)
#import pdb;pdb.set_trace()
x = x.view(-1,49284)
x = self.fc1(x)
x = self.fc2(x)
return x
net = ClassifyNet()
print(net)
'''
visualization
method:tensorbordX
'''
from tensorboardX import SummaryWriter
with SummaryWriter(comment="Net") as w:
w.add_graph(net,(torch.rand(1,3,224,224),))
程序运行之后,当前程序所在目录下会生成一个runs目录
- 运行TensorBoard服务器资源:
tensorboard --logdir=runs/
- 局域网下的浏览器访问TensorBoard(主机地址+端口):
host_addr:6006
8. pytorch使用AlexNet训练minst
8.1 代码
#coding:utf-8
'''#直接使用AlexNet是不行的,minst数据集图像大小是28*28,如果使用AlexNet的卷积核设置,最终会导致:Given input size: (192x2x2). Calculated output size: (192x0x0). Output size is too small
#原始AlexNet位置:https://github.com/pytorch/vision/blob/master/torchvision/models/alexnet.py
Start Training!
[1, 60000] loss:0.4914
[2, 60000] loss:0.0042
[3, 60000] loss:0.0063
[4, 60000] loss:0.0038
[5, 60000] loss:0.0172
Finished Traning
Accuracy of the network on the 10000 test images:98%'''
import torch
import torch.nn as nn
import torchvision
import torch.optim as optim
import torchvision.transforms as transforms
import torch.nn.functional as F
import time,pdb,random,os
try:
from torch.hub import load_state_dict_from_url
except ImportError:
from torch.utils import load_state_dict_from_url
__all__ = ['AlexNet', 'alexnet']
model_urls = {
'alexnet': 'https://download.pytorch.org/models/alexnet-owt-4df8aa71.pth',
}
class AlexNet(nn.Module):
def __init__(self, num_classes=10):
super(AlexNet, self).__init__()
self.features = nn.Sequential(
nn.Conv2d(1, 32, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2),
nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=2),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2),
nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2),
)
self.avgpool = nn.AdaptiveAvgPool2d((6, 6))
self.classifier = nn.Sequential(
nn.Dropout(),
nn.Linear(256*6*6, 1024),
nn.ReLU(inplace=True),
nn.Dropout(),
nn.Linear(1024, 1024),
nn.ReLU(inplace=True),
nn.Linear(1024, num_classes),
)
def forward(self, x):
x = self.features(x)
x = self.avgpool(x)
#x = torch.flatten(x, 1)
x = x.view(-1, 256*6*6)
x = self.classifier(x)
return x
def alexnet(pretrained=False, progress=True, **kwargs):
r"""AlexNet model architecture from the
`"One weird trick..." <https://arxiv.org/abs/1404.5997>`_ paper.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
progress (bool): If True, displays a progress bar of the download to stderr
"""
model = AlexNet(**kwargs)
if pretrained:
state_dict = load_state_dict_from_url(model_urls['alexnet'],
progress=progress)
model.load_state_dict(state_dict)
return model
#截取图片中的指定区域或在指定区域添加某一图片
def crop_pad_image(src1):
movedirection = [0,0,0,0] #up down left right
directionindex = random.randint(0,3)
movedirection[directionindex] = 3
channel,height,width = src1.shape
src2 = src1[:,0+movedirection[0]:height-movedirection[1], 0+movedirection[2]:width-movedirection[3]]
if directionindex==0:
srcTemp = src1[:,0:movedirection[0],0:width]
move_im = torch.cat((src2,srcTemp),1)
if directionindex==1:
srcTemp = src1[:,height-movedirection[1]:height,0:width]
move_im = torch.cat((srcTemp,src2),1)
if directionindex==2:
srcTemp = src1[:,0:height,0:movedirection[2]]
move_im = torch.cat((src2,srcTemp),2)
if directionindex==3:
srcTemp = src1[:,0:height,width-movedirection[3]:width]
move_im = torch.cat((srcTemp,src2),2)
#pdb.set_trace()
return move_im
from torchvision.utils import save_image
def save_img(img, save_path,name_index):
if os.path.exists(save_path)==False:
os.makedirs(save_path)
img = 0.5 * (img + 1)
img = img.clamp(0, 1)
img = img.view(-1, 1, 28, 28)
save_image(img, os.path.join(save_path,str(name_index)+".jpg"))
if __name__=="__main__":
#transform
transforms= transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.5,),(0.5,))])
#trainset = torchvision.datasets.MNIST(root='./data',train=True,download=True,transform=transforms)
#trainloader = torch.utils.data.DataLoader(trainset, batch_size=10,shuffle=True,num_workers=0)
testset = torchvision.datasets.MNIST(root='./data',train=False,download=True,transform=transforms)
testloader = torch.utils.data.DataLoader(testset,batch_size=10,shuffle=False,num_workers=0)
net = AlexNet()
#损失函数:这里用交叉熵
criterion = nn.CrossEntropyLoss()
#优化器 这里用SGD
optimizer = optim.SGD(net.parameters(),lr=1e-3, momentum=0.9)
#device : GPU or CPU
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
net.to(device)
#print("Start Training!")
#num_epochs = 5 #训练次数
#for epoch in range(num_epochs):
# running_loss = 0
# batch_size = 10
# for i, data in enumerate(trainloader):
# inputs, labels = data
# inputs, labels = inputs.to(device), labels.to(device)
#
# outputs = net(inputs)
# #pdb.set_trace()
# loss = criterion(outputs, labels)
# optimizer.zero_grad()
# loss.backward()
# optimizer.step()
#
# print('[%d, %5d] loss:%.4f'%(epoch+1, (i+1)*10, loss.item()))
#
#print("Finished Traning")
#
##保存训练模型
#torch.save(net, 'MNIST.pkl')
net = torch.load('MNIST.pkl')
#开始识别
with torch.no_grad():
#在接下来的代码中,所有Tensor的requires_grad都会被设置为False
correct = 0
total = 0
tplist = list(0. for i in range(10)) #正类预测为正类
fnlist = list(0. for i in range(10)) #正类预测为负类
tp_fplist = list(0. for i in range(10)) #负类预测为正类
tp_fnlist = list(0. for i in range(10)) #负类预测为负类
totallist = list(0. for i in range(10))
for data in testloader:
images, labels = data
images, labels = images.to(device), labels.to(device)
#这段代码用于minst数字随机上下左右移动3个像素点,证明CNN的平移不变性
#testbatchsize=images.size()[0]
#for i in range(testbatchsize):
# #保存查看minst数字随机上下左右移动3个像素点后的真实图片展示
# #save_img(images[i,:,:,:],"original_images",labels[i].item())
# temp = torch.empty(1,28,28)
# temp.copy_(images[i])
# images[i].copy_(crop_pad_image(images[i]))
# #保存查看minst数字随机上下左右移动3个像素点后的真实图片展示,经验证,的确移动了
# #save_img(images[i,:,:,:],"move_images",labels[i].item())
# #pdb.set_trace()
out = net(images)
_, predicted = torch.max(out.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
res = predicted == labels
for label_id in range(len(labels)):
label_single = labels[label_id] #当前类
tplist[label_single] += res[label_id].item()
fnlist[label_single] += (res[label_id]==0).item()
for i in range(len(labels)):
tp_fplist[i] += (predicted[label_id].item()==i)
tp_fnlist[i] += (labels[label_id].item()==i)
totallist[label_single] += 1
#pdb.set_trace()
print('Accuracy of the network on the 10000 test images:{}%'.format(100 * float(correct) / total)) #输出识别准确率
mRecall = 0
mPrecision = 0
for i in range(len(tplist)):
print("数字{}的召回率(查全率)recall:{:.3f},精确率(查准率)precision:{:.3f}".format(i,tplist[i]/tp_fnlist[i],tplist[i]/tp_fplist[i]))
mRecall += tplist[i]/tp_fnlist[i]
mPrecision += tplist[i]/tp_fplist[i]
print("mRecall:{:.3f}".format(mRecall/10))
print("mPrecision:{:.3f}".format(mPrecision/10))
- 所有测试集数据随机上、下、左、右移动3个像素
- 数据预处理Normalize
(单通道)transforms= transforms.Compose([transforms.ToTensor(),transforms.Normalize(mean=(0.5,),std=(0.5,))])
(三通道)transforms= transforms.Compose([transforms.ToTensor(),transforms.Normalize(mean=(0.5,0.5,0.5),std=(0.5,0.5,0.5))])
表示数据集图片预处理过程中,将0-255的像素值通过“torchvision.transforms.ToTensor”转换到0-1之间,然后“transforms.Normalize(mean=(0.5,),std=(0.5,))”按照均值为0.5,方差为0.5的方式normalize归一化0-1数据至-1到1。 计算方法举例:((0,1)-0.5)/0.5=(-1,1)
- tensor数据保存为图像
参考网站:https://pytorch.org/docs/master/_modules/torchvision/utils.html#save_image
网站中提供了tensor数据(像素值-1到1)直接保存为可视图像,最重要的一点,使用ndarr = grid.mul(255).add_(0.5).clamp_(0, 255).permute(1, 2, 0).to('cpu', torch.uint8).numpy()将数据从-1至1转换为0-255,以及调整了维度顺序。通常tensor的各维度信息为[batchsize,dim,height,width],而opencv或者pillow图像各维度为[height,width,dim],所以使用opencv或pillow保存图像需要将tensor转换过来。
- 缩小图像
tensor.resize_()不能实现,这个函数只会截取原图像数据矩阵中一部分,因此目前我能找到的方法就是先将tensor转换为opencv可用的数据维度,包括调整-1至1为0-255,以及调整维度顺序,然后使用opencv中的可插值resize方法。以minst图像缩小一半为例:
def resize_image(src,transforms=None):
channel,height,width = src.shape
temp = torch.empty(1,28,28)
temp.copy_(src)
temp = temp.mul(255).add_(0.5).clamp_(0,255).permute(1, 2, 0).to('cpu', torch.uint8).numpy()
tempCV = cv2.cvtColor(temp, cv2.COLOR_GRAY2BGR)
tempCV = cv2.resize(tempCV, (14,14), interpolation = cv2.INTER_AREA)
#cv2.imwrite("temp_np.jpg",tempCV)
resizeTemp = transforms(tempCV)
outTemp = -torch.ones(1,28,28)
outTemp[0,7:21,7:21]=resizeTemp[0,:,:]
#save_img(outTemp,[1,28,28],"resize_images","resize")
#pdb.set_trace()
return outTemp
在应用时,将8.1中的代码179行修改为images[i].copy_(resize_image(images[i],transforms))即可实现minst测试集图像缩小功能。压缩后测试结果如下:
Accuracy of the network on the 10000 test images:57.2%
数字0的召回率(查全率)recall:0.004,精确率(查准率)precision:1.000
数字1的召回率(查全率)recall:0.959,精确率(查准率)precision:0.639
数字2的召回率(查全率)recall:0.479,精确率(查准率)precision:0.653
数字3的召回率(查全率)recall:0.685,精确率(查准率)precision:0.504
数字4的召回率(查全率)recall:0.648,精确率(查准率)precision:0.747
数字5的召回率(查全率)recall:0.952,精确率(查准率)precision:0.357
数字6的召回率(查全率)recall:0.507,精确率(查准率)precision:0.452
数字7的召回率(查全率)recall:0.545,精确率(查准率)precision:0.930
数字8的召回率(查全率)recall:0.331,精确率(查准率)precision:0.782
数字9的召回率(查全率)recall:0.583,精确率(查准率)precision:0.696
mRecall:0.569
mPrecision:0.676
随机缩小为原图的0.5-0.7倍:
def resize_image(src,transforms=None):
zoom = 0.1*random.randint(5,7)
channel,height,width = src.shape
temp = torch.empty(1,height,width)
temp.copy_(src)
temp = temp.mul(255).add_(0.5).clamp_(0,255).permute(1, 2, 0).to('cpu', torch.uint8).numpy(
)
tempCV = cv2.cvtColor(temp, cv2.COLOR_GRAY2BGR)
tempCV = cv2.resize(tempCV, (int(height*zoom),int(width*zoom)), interpolation = cv2.INTER_A
REA)
#cv2.imwrite("temp_np.jpg",tempCV)
resizeTemp = transforms(tempCV)
outTemp = -torch.ones(1,height,width)
outTemp[0,int(height*(1-zoom)*0.5):int(height*(1-zoom)*0.5)+int(height*zoom),int(width*(1-z
oom)*0.5):int(width*(1-zoom)*0.5)+int(width*zoom)]=resizeTemp[0,:,:]
#save_img(outTemp,[1,28,28],"resize_images","resize")
#pdb.set_trace()
return outTemp
更多推荐
所有评论(0)