如何采用GPU训练
方法1:对网络模型,数据(数据、标注),损失函数调用.cuda()即可
import torchimport torchvisionfrom torchimport nnfrom torch.utils.dataimport DataLoaderfrom torch.utils.tensorboardimport SummaryWriterimport time# 将model文件夹中有的东西都引入过来# from model import *# 准备数据集
train_data= torchvision.datasets.CIFAR10("data", train=True, transform=torchvision.transforms.ToTensor(),
download=True)
test_data= torchvision.datasets.CIFAR10("data", train=False, transform=torchvision.transforms.ToTensor(),
download=True)# 看一下训练数据集和测试数据集有多少张 len-length 长度
train_data_size=len(train_data)
test_data_size=len(test_data)# python中常用的写法:字符串格式化print("训练数据集的长度为:{}".format(train_data_size))print("测试数据集的长度为:{}".format(test_data_size))# 用DataLoader加载数据集
train_dataloader= DataLoader(train_data, batch_size=64)
test_dataloader= DataLoader(test_data, batch_size=64)# 搭建神经网络classPeipei(nn.Module):def__init__(self):super(Peipei, self).__init__()
self.model= nn.Sequential(
nn.Conv2d(3,32,5,1,2),
nn.MaxPool2d(2),
nn.Conv2d(32,32,5,1,2),
nn.MaxPool2d(2),
nn.Conv2d(32,64,5,1,2),
nn.MaxPool2d(2),
nn.Flatten(),
nn.Linear(64*4*4,64),
nn.Linear(64,10))defforward(self, x):
x= self.model(x)return x# 创建网络模型
peipei= Peipei()if torch.cuda.is_available():
peipei= peipei.cuda()# 创建损失函数
loss_fn= nn.CrossEntropyLoss()if torch.cuda.is_available():
loss_fn= loss_fn.cuda()# 定义优化器# 1e-2 = 0.01
learning_rate=1e-2
optimizer= torch.optim.SGD(peipei.parameters(), lr=learning_rate)# 设置训练网络的一些参数# 记录训练次数
total_train_step=0# 记录测试次数
total_test_step=0# 训练的轮数
epoch=10# 添加tensorboard
writer= SummaryWriter("logs_train")
start_time= time.time()# i从0-9for iinrange(epoch):print("--------------------第{}轮训练开始--------------------".format(i+1))# 训练步骤开始# 使模型进入训练状态,但只对特定层(Dropout,BatchNorm层)起作用
peipei.train()for datain train_dataloader:
imgs, targets= dataif torch.cuda.is_available():
imgs= imgs.cuda()
targets= targets.cuda()
outputs= peipei(imgs)# 计算损失函数
loss= loss_fn(outputs, targets)# 优化器优化模型
optimizer.zero_grad()
loss.backward()
optimizer.step()
total_train_step= total_train_step+1if total_train_step%100==0:
end_time= time.time()print(end_time-start_time)print("训练次数:{},loss:{}".format(total_train_step, loss.item()))
writer.add_scalar("train_loss", loss.item(), total_train_step)# 测试步骤开始# 使模型进入验证状态,但只对特定层(Dropout,BatchNorm层)起作用
peipei.eval()
total_test_loss=0
totel_accuracy=0with torch.no_grad():for datain test_dataloader:
imgs, targets= dataif torch.cuda.is_available():
imgs= imgs.cuda()
targets= targets.cuda()
outputs= peipei(imgs)
loss= loss_fn(outputs, targets)# 计算整体测试集损失函数
total_test_loss= total_test_loss+ loss.item()# 计算整体正确率
accuracy=(outputs.argmax(1)== targets).sum()
totel_accuracy= totel_accuracy+ accuracyprint("整体测试集上的Loss:{}".format(total_test_loss))print("整体测试集上的正确率:{}".format(totel_accuracy/ test_data_size))
writer.add_scalar("test_loss", total_test_loss, total_test_step)
writer.add_scalar("test_accuracy", totel_accuracy/ test_data_size, total_test_step)
total_test_step= total_test_step+1# 对每轮训练完的模型保存
torch.save(peipei,"peipei_{}.pth".format(i))
torch.save(peipei.state_dict(),"peipei_{}.pth".format(i))
writer.close()
CPU
GPU
方法2
import torchvisionfrom torchimport nnfrom torch.utils.dataimport DataLoaderfrom torch.utils.tensorboardimport SummaryWriterimport time# 将model文件夹中有的东西都引入过来# from model import *# 定义训练的设备
device= torch.device("cuda"if torch.cuda.is_available()else"cpu")# 准备数据集
train_data= torchvision.datasets.CIFAR10("data", train=True, transform=torchvision.transforms.ToTensor(),
download=True)
test_data= torchvision.datasets.CIFAR10("data", train=False, transform=torchvision.transforms.ToTensor(),
download=True)# 看一下训练数据集和测试数据集有多少张 len-length 长度
train_data_size=len(train_data)
test_data_size=len(test_data)# python中常用的写法:字符串格式化print("训练数据集的长度为:{}".format(train_data_size))print("测试数据集的长度为:{}".format(test_data_size))# 用DataLoader加载数据集
train_dataloader= DataLoader(train_data, batch_size=64)
test_dataloader= DataLoader(test_data, batch_size=64)# 搭建神经网络classPeipei(nn.Module):def__init__(self):super(Peipei, self).__init__()
self.model= nn.Sequential(
nn.Conv2d(3,32,5,1,2),
nn.MaxPool2d(2),
nn.Conv2d(32,32,5,1,2),
nn.MaxPool2d(2),
nn.Conv2d(32,64,5,1,2),
nn.MaxPool2d(2),
nn.Flatten(),
nn.Linear(64*4*4,64),
nn.Linear(64,10))defforward(self, x):
x= self.model(x)return x# 创建网络模型
peipei= Peipei()
peipei= peipei.to(device)# 创建损失函数
loss_fn= nn.CrossEntropyLoss()
loss_fn= loss_fn.to(device)# 定义优化器# 1e-2 = 0.01
learning_rate=1e-2
optimizer= torch.optim.SGD(peipei.parameters(), lr=learning_rate)# 设置训练网络的一些参数# 记录训练次数
total_train_step=0# 记录测试次数
total_test_step=0# 训练的轮数
epoch=10# 添加tensorboard
writer= SummaryWriter("logs_train")
start_time= time.time()# i从0-9for iinrange(epoch):print("--------------------第{}轮训练开始--------------------".format(i+1))# 训练步骤开始# 使模型进入训练状态,但只对特定层(Dropout,BatchNorm层)起作用
peipei.train()for datain train_dataloader:
imgs, targets= data
imgs= imgs.to(device)
targets= targets.to(device)
outputs= peipei(imgs)# 计算损失函数
loss= loss_fn(outputs, targets)# 优化器优化模型
optimizer.zero_grad()
loss.backward()
optimizer.step()
total_train_step= total_train_step+1if total_train_step%100==0:
end_time= time.time()print(end_time- start_time)print("训练次数:{},loss:{}".format(total_train_step, loss.item()))
writer.add_scalar("train_loss", loss.item(), total_train_step)# 测试步骤开始# 使模型进入验证状态,但只对特定层(Dropout,BatchNorm层)起作用
peipei.eval()
total_test_loss=0
totel_accuracy=0with torch.no_grad():for datain test_dataloader:
imgs, targets= data
imgs= imgs.to(device)
targets= targets.to(device)
outputs= peipei(imgs)
loss= loss_fn(outputs, targets)# 计算整体测试集损失函数
total_test_loss= total_test_loss+ loss.item()# 计算整体正确率
accuracy=(outputs.argmax(1)== targets).sum()
totel_accuracy= totel_accuracy+ accuracyprint("整体测试集上的Loss:{}".format(total_test_loss))print("整体测试集上的正确率:{}".format(totel_accuracy/ test_data_size))
writer.add_scalar("test_loss", total_test_loss, total_test_step)
writer.add_scalar("test_accuracy", totel_accuracy/ test_data_size, total_test_step)
total_test_step= total_test_step+1# 对每轮训练完的模型保存
torch.save(peipei,"peipei_{}.pth".format(i))
torch.save(peipei.state_dict(),"peipei_{}.pth".format(i))
writer.close()
GPU
CPU