理解 torch.nn
train_dl, valid_dl = get_data(train_ds, valid_ds, bs)
model, opt = get_model()
fit(epochs, model, loss_func, opt, train_dl, valid_dl)
切换到 CNN
我们将使用 PyTorch 预定义的 Conv2d类构建卷积层,我们定义3个卷积层,每个卷积层后跟着 ReLU,最后执行平均池化 ( PyTorch 里的 view 类似于 numpy 的 reshape )
import pickle
import gzip
import torch
import math
import torch.nn.functional as F
from torch import nn
from torch import optim
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader
from pathlib import Path
import requests
import numpy as np
DATA_PATH = Path("data")
PATH = DATA_PATH / "mnist"
PATH.mkdir(parents=True, exist_ok=True)
FILENAME = "mnist.pkl.gz"
# 读取数据到 x_train, y_train 和 x_valid, y_valid
with gzip.open((PATH / FILENAME).as_posix(), "rb") as f:
((x_train, y_train), (x_valid, y_valid),
_) = pickle.load(f, encoding="latin-1")
# 数据转换
x_train, y_train, x_valid, y_valid = map(
torch.tensor, (x_train, y_train, x_valid, y_valid))
class Mnist_CNN(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = nn.Conv2d(1, 16, 3, 2, 1)
self.conv2 = nn.Conv2d(16, 16, 3, 2, 1)
self.conv3 = nn.Conv2d(16, 10, 3, 2, 1)
def forward(self, x):
x = x.view(-1, 1, 28, 28)
x = F.relu(self.conv1(x))
x = F.relu(self.conv2(x))
x = F.relu(self.conv3(x))
x = F.avg_pool2d(x, 4)
return x.view(-1, x.size(1))
# 损失函数
loss_func = F.cross_entropy
def accuracy(out, yb):
"""
# 定义 accuracy
"""
preds = torch.argmax(out, dim=1)
return (preds == yb).float().mean()
def loss_batch(model, loss_func, xb, yb, opt=None):
loss = loss_func(model(xb), yb)
if opt is not None:
loss.backward()
opt.step()
opt.zero_grad()
return loss.item(), len(xb)
# 训练模型
lr = 0.1 # 学习率
epochs = 5 # 训练的轮数
bs = 64 # batch size
# # 数据 dataloader
train_ds = TensorDataset(x_train, y_train)
# train_dl = DataLoader(train_ds, batch_size=bs)
# ##
valid_ds = TensorDataset(x_valid, y_valid)
# valid_dl = DataLoader(valid_ds, batch_size=bs * 2)
def get_data(train_ds, valid_ds, bs):
return (
DataLoader(train_ds, batch_size=bs, shuffle=True),
DataLoader(valid_ds, batch_size=bs * 2),
)
def loss_batch(model, loss_func, xb, yb, opt=None):
loss = loss_func(model(xb), yb)
if opt is not None:
loss.backward()
opt.step()
opt.zero_grad()
return loss.item(), len(xb)
def fit(epochs, model, loss_func, opt, train_dl, valid_dl):
for epoch in range(epochs):
model.train()
for xb, yb in train_dl:
loss_batch(model, loss_func, xb, yb, opt)
model.eval()
with torch.no_grad():
losses, nums = zip(
*[loss_batch(model, loss_func, xb, yb) for xb, yb in valid_dl]
)
val_loss = np.sum(np.multiply(losses, nums)) / np.sum(nums)
print(epoch, val_loss)
train_dl, valid_dl = get_data(train_ds, valid_ds, bs)
model = Mnist_CNN()
opt = optim.SGD(model.parameters(), lr=lr, momentum=0.9)
fit(epochs, model, loss_func, opt, train_dl, valid_dl)
nn.Sequential
torch.nn 还有另一个方便的类,我们可以使用它来简化代码:Sequential。Sequential 对象以顺序方式运行其中包含的每个模块。这是编写神经网络的一种更简单的方法。
为了发挥它的优势,我们需要通过函数轻松定义定制的层。例如,PyTorch 没有 view 层,我们需要在网络中创建一个, Lambda 将会创建这个层,并应用于 Sequential 定义的网络中。
nn.Sequential 使用如下:
class Lambda(nn.Module):
def __init__(self, func):
super().__init__()
self.func = func
def forward(self, x):
return self.func(x)
def preprocess(x):
return x.view(-1, 1, 28, 28)
model = nn.Sequential(
Lambda(preprocess),
nn.Conv2d(1, 16, 3, 2, 1),
nn.ReLU(),
nn.Conv2d(16, 16, 3, 2, 1),
nn.ReLU(),
nn.Conv2d(16, 10, 3, 2, 1),
nn.ReLU(),
nn.AvgPool2d(4),
Lambda(lambda x: x.view(x.size(0), -1)),
)
opt = optim.SGD(model.parameters(), lr=lr, momentum=0.9)
fit(epochs, model, loss_func, opt, train_dl, valid_dl)
包装 DataLoader
我们的 CNN 很方便,但是只能适用于 MNIST, 因为
- 假设输入是
28*28 CNN最后的grid大小为4*4
让我们去掉这两个假设,这样我们的模型可以处理任何二维单通道图像。首先,我们可以通过将数据预处理移动到生成器中来移除初始 Lambda 层:接下来,我们可以将 nn.AvgPool2d 替换为 nn.AdaptiveAvgPool2d ,这允许我们定义所需的输出张量的大小,而不是现有的输入张量。因此,我们的模型可以处理任何大小的输入。
完整代码如下
import pickle
import gzip
import torch
import math
import torch.nn.functional as F
from torch import nn
from torch import optim
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader
from pathlib import Path
import requests
import numpy as np
DATA_PATH = Path("data")
PATH = DATA_PATH / "mnist"
PATH.mkdir(parents=True, exist_ok=True)
FILENAME = "mnist.pkl.gz"
# 读取数据到 x_train, y_train 和 x_valid, y_valid
with gzip.open((PATH / FILENAME).as_posix(), "rb") as f:
((x_train, y_train), (x_valid, y_valid),
_) = pickle.load(f, encoding="latin-1")
# 数据转换
x_train, y_train, x_valid, y_valid = map(
torch.tensor, (x_train, y_train, x_valid, y_valid))
class Lambda(nn.Module):
def __init__(self, func):
super().__init__()
self.func = func
def forward(self, x):
return self.func(x)
def preprocess(x, y):
return x.view(-1, 1, 28, 28), y
class WrappedDataLoader:
def __init__(self, dl, func):
self.dl = dl
self.func = func
def __len__(self):
return len(self.dl)
def __iter__(self):
batches = iter(self.dl)
for b in batches:
yield (self.func(*b))
def get_data(train_ds, valid_ds, bs):
return (
DataLoader(train_ds, batch_size=bs, shuffle=True),
DataLoader(valid_ds, batch_size=bs * 2),
)
lr = 0.1 # 学习率
epochs = 5 # 训练的轮数
bs = 64 # batch size
train_ds = TensorDataset(x_train, y_train)
valid_ds = TensorDataset(x_valid, y_valid)
train_dl, valid_dl = get_data(train_ds, valid_ds, bs)
train_dl = WrappedDataLoader(train_dl, preprocess)
valid_dl = WrappedDataLoader(valid_dl, preprocess)
loss_func = F.cross_entropy
model = nn.Sequential(
nn.Conv2d(1, 16, kernel_size=3, stride=2, padding=1),
nn.ReLU(),
nn.Conv2d(16, 16, kernel_size=3, stride=2, padding=1),
nn.ReLU(),
nn.Conv2d(16, 10, kernel_size=3, stride=2, padding=1),
nn.ReLU(),
nn.AdaptiveAvgPool2d(1),
Lambda(lambda x: x.view(x.size(0), -1)),
)
def loss_batch(model, loss_func, xb, yb, opt=None):
loss = loss_func(model(xb), yb)
if opt is not None:
loss.backward()
opt.step()
opt.zero_grad()
return loss.item(), len(xb)
def fit(epochs, model, loss_func, opt, train_dl, valid_dl):
for epoch in range(epochs):
model.train()
for xb, yb in train_dl:
loss_batch(model, loss_func, xb, yb, opt)
model.eval()
with torch.no_grad():
losses, nums = zip(
*[loss_batch(model, loss_func, xb, yb) for xb, yb in valid_dl]
)
val_loss = np.sum(np.multiply(losses, nums)) / np.sum(nums)
print(epoch, val_loss)
opt = optim.SGD(model.parameters(), lr=lr, momentum=0.9)
fit(epochs, model, loss_func, opt, train_dl, valid_dl)
0 0.38525849063396456
1 0.2657669427156448
2 0.24119357098937036
3 0.1926776697874069
4 0.1697926277399063
节后语
我们已经有一个通用的数据流程和训练流程,你可以用 PyTorch 训练任何数据。当然,还有很多事情需要做,例如:数据增强、超参数调节、训练监控、迁移学习等等。
- torch.nn
Module: 创建一个行为类似于函数的可调用函数,但也可以包含状态(如神经网络层权重),它知道Parameter包含哪些参数,并可以将他们的梯度归零,在循环中更新权重等等。Parameter:tensor的包装器,告诉Module需要更新的权重,仅更新requires_grad属性的tensor.functional: 一个模块(通常按照惯例导入为 F 名字空间),包含了激活函数,损失函数,以及卷积和线性层等非状态版本的层。
torch.optim: 包含了优化器如SGD等,用于在反向传播中更新权重。Dataset: 具有__len__和__getitem__对象的抽象接口;DataLoader: 获取任何数据集并创建一个迭代器,该迭代器返回一批数据。
【参考】
What is torch.nn really? — PyTorch Tutorials 1.13.1+cu117 documentation
文章介绍了如何在PyTorch中使用torch.nn构建卷积神经网络(CNN),包括定义卷积层、ReLU激活函数和平均池化。通过预处理数据、创建数据加载器和使用预定义的nn.Sequential简化模型结构。文章还展示了训练过程和损失计算,并讨论了nn.Module、Parameter和优化器的作用。最后,提出了通用的数据处理方法,使模型能适应不同大小的输入图像。
1523

被折叠的 条评论
为什么被折叠?



