Day36 图像数据特征与显存

核心知识点梳理

1. 图像数据的核心特征(与结构化数据对比)
维度结构化数据(表格)图像数据(MNIST/CIFAR-10)
形状(样本数,特征数) → 一维向量(样本数,通道数,高,宽) → 保留空间信息
通道数灰度图 = 1,彩色图 (RGB)=3
数据类型多为 float/int(直接输入模型)原始 uint8 (0-255) → 转换为 float32 (0-1)
预处理重点归一化 / 编码张量转换 + 归一化 + 维度调整
2. MLP 处理图像的核心适配逻辑
图像类型输入形状展平后维度MLP 输入层维度核心适配点
MNIST 灰度图(1, 28, 28)784784展平二维空间信息为一维向量
CIFAR-10 彩色图(3, 32, 32)30723072展平三通道空间信息为一维向量
3. batch_size 核心知识点
关联组件是否涉及 batch_size核心逻辑
模型定义❌ 不涉及模型自动处理 batch 维度,仅需定义单样本形状
torchsummary❌ 不涉及input_size 仅指定单样本形状(通道 × 高 × 宽)
DataLoader✅ 核心参数控制每次加载的样本数,影响显存占用和训练效率
训练循环✅ 数据输入形式数据以 (batch_size, 通道,高,宽) 形式输入模型
4. 显存占用核心组成(FP32 精度)
占用部分计算方式影响因素
模型参数参数总数 × 4 Byte(float32)模型结构(层数 / 神经元数)
梯度与参数占用相同(每个参数对应一个梯度)模型参数总数
优化器状态SGD=0 额外占用,Adam = 参数占用 ×2(动量 + 平方梯度)优化器类型
批量数据batch_size × 单样本显存占用batch_size + 图像尺寸 + 通道数
中间变量batch_size × 中间层维度 × 4 Bytebatch_size + 模型中间层神经元数
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from torchsummary import summary
import matplotlib.pyplot as plt
import numpy as np
import os

# ==================== 全局配置 ====================
# 设置随机种子(保证结果可复现)
torch.manual_seed(42)
# 设备配置(自动选择GPU/CPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"使用设备:{device}")
# 打印GPU信息(如有)
if torch.cuda.is_available():
    print(f"GPU名称:{torch.cuda.get_device_name(0)}")
    print(f"GPU显存总量:{torch.cuda.get_device_properties(0).total_memory / 1024 / 1024:.2f} MB")

# ==================== 1. 图像数据加载与可视化 ====================
print("\n=== 1. 图像数据加载与可视化 ===")

# 1.1 MNIST灰度图像处理
# 预处理管道
mnist_transform = transforms.Compose([
    transforms.ToTensor(),  # 转换为张量:uint8(0-255) → float32(0-1),维度变为(1,28,28)
    transforms.Normalize((0.1307,), (0.3081,))  # 标准化:(x-mean)/std
])

# 加载MNIST数据集
mnist_train = datasets.MNIST(
    root='./data', train=True, download=True, transform=mnist_transform
)
mnist_test = datasets.MNIST(
    root='./data', train=False, transform=mnist_transform
)

# 可视化MNIST图像(反归一化)
def show_mnist_image(dataset, idx):
    """显示MNIST图像(反归一化)"""
    img, label = dataset[idx]
    # 反归一化:img = img*std + mean
    img = img * 0.3081 + 0.1307
    np_img = img.numpy()
    plt.figure(figsize=(3,3))
    plt.imshow(np_img[0], cmap='gray')
    plt.title(f"Label: {label}")
    plt.axis('off')
    plt.show()
    print(f"MNIST图像形状:{img.shape} (通道×高×宽)")
    print(f"单张MNIST图像显存占用(float32):{img.numel() * 4 / 1024:.2f} KB")

# 随机显示一张MNIST图像
show_mnist_image(mnist_train, torch.randint(0, len(mnist_train), (1,)).item())

# 1.2 CIFAR-10彩色图像处理
cifar_transform = transforms.Compose([
    transforms.ToTensor(),  # 转换为张量:uint8(0-255) → float32(0-1),维度变为(3,32,32)
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # 标准化到[-1,1]
])

# 加载CIFAR-10数据集
cifar_train = datasets.CIFAR10(
    root='./data', train=True, download=True, transform=cifar_transform
)
cifar_classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

# 可视化CIFAR-10图像(反归一化+维度调整)
def show_cifar_image(dataset, idx):
    """显示CIFAR-10图像(反归一化+维度调整)"""
    img, label = dataset[idx]
    # 反归一化:img = img/2 + 0.5 → 从[-1,1]转回[0,1]
    img = img / 2 + 0.5
    np_img = img.numpy()
    # 维度调整:(通道,高,宽) → (高,宽,通道)(适配matplotlib)
    np_img = np.transpose(np_img, (1, 2, 0))
    plt.figure(figsize=(3,3))
    plt.imshow(np_img)
    plt.title(f"Label: {cifar_classes[label]}")
    plt.axis('off')
    plt.show()
    print(f"CIFAR-10图像形状:{img.shape} (通道×高×宽)")
    print(f"单张CIFAR-10图像显存占用(float32):{img.numel() * 4 / 1024:.2f} KB")

# 随机显示一张CIFAR-10图像
show_cifar_image(cifar_train, torch.randint(0, len(cifar_train), (1,)).item())

# ==================== 2. MLP模型定义(适配图像数据) ====================
print("\n=== 2. MLP模型定义与参数计算 ===")

# 2.1 MNIST适配的MLP
class MNIST_MLP(nn.Module):
    def __init__(self):
        super().__init__()
        # 展平层:(1,28,28) → 784
        self.flatten = nn.Flatten()
        # 全连接层1:784 → 128
        self.fc1 = nn.Linear(784, 128)
        self.relu = nn.ReLU()
        # 全连接层2:128 → 10(10个数字类别)
        self.fc2 = nn.Linear(128, 10)
    
    def forward(self, x):
        # x.shape: (batch_size, 1, 28, 28)
        x = self.flatten(x)  # (batch_size, 784)
        x = self.fc1(x)      # (batch_size, 128)
        x = self.relu(x)     # (batch_size, 128)
        x = self.fc2(x)      # (batch_size, 10)
        return x

# 初始化MNIST MLP并打印结构
mnist_model = MNIST_MLP().to(device)
print("\nMNIST MLP模型结构:")
summary(mnist_model, input_size=(1, 28, 28))  # input_size不含batch维度

# 手动计算MNIST MLP参数
def calculate_mlp_params(model):
    """计算模型总参数和可训练参数"""
    total_params = sum(p.numel() for p in model.parameters())
    trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    return total_params, trainable_params

mnist_total_params, mnist_trainable_params = calculate_mlp_params(mnist_model)
print(f"\nMNIST MLP总参数:{mnist_total_params:,}")
print(f"MNIST MLP可训练参数:{mnist_trainable_params:,}")
# 计算参数显存占用(FP32)
mnist_param_memory = mnist_total_params * 4 / 1024  # 转为KB
print(f"MNIST MLP参数显存占用(FP32):{mnist_param_memory:.2f} KB")
print(f"MNIST MLP参数+梯度显存占用:{mnist_param_memory * 2:.2f} KB")

# 2.2 CIFAR-10适配的MLP
class CIFAR_MLP(nn.Module):
    def __init__(self, input_dim=3*32*32, hidden_dim=128, num_classes=10):
        super().__init__()
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_dim, num_classes)
    
    def forward(self, x):
        # x.shape: (batch_size, 3, 32, 32)
        x = self.flatten(x)  # (batch_size, 3072)
        x = self.fc1(x)      # (batch_size, 128)
        x = self.relu(x)     # (batch_size, 128)
        x = self.fc2(x)      # (batch_size, 10)
        return x

# 初始化CIFAR MLP并打印结构
cifar_model = CIFAR_MLP().to(device)
print("\nCIFAR-10 MLP模型结构:")
summary(cifar_model, input_size=(3, 32, 32))

# 计算CIFAR MLP参数和显存
cifar_total_params, cifar_trainable_params = calculate_mlp_params(cifar_model)
print(f"\nCIFAR-10 MLP总参数:{cifar_total_params:,}")
cifar_param_memory = cifar_total_params * 4 / 1024
print(f"CIFAR-10 MLP参数显存占用(FP32):{cifar_param_memory:.2f} KB")

# ==================== 3. batch_size与显存占用计算 ====================
print("\n=== 3. batch_size与显存占用计算 ===")

def calculate_batch_memory(dataset, batch_size, model, optimizer_type="SGD"):
    """
    计算不同batch_size下的总显存占用
    :param dataset: 数据集(MNIST/CIFAR-10)
    :param batch_size: 批量大小
    :param model: 模型
    :param optimizer_type: 优化器类型(SGD/Adam)
    :return: 总显存占用(MB)
    """
    # 1. 单样本显存占用(float32)
    sample, _ = dataset[0]
    single_sample_memory = sample.numel() * 4  # Byte
    batch_data_memory = batch_size * single_sample_memory  # Byte
    
    # 2. 模型参数+梯度占用
    total_params = sum(p.numel() for p in model.parameters())
    param_gradient_memory = total_params * 4 * 2  # 参数+梯度(Byte)
    
    # 3. 优化器状态占用
    if optimizer_type == "Adam":
        optimizer_memory = total_params * 4 * 2  # 动量+平方梯度(Byte)
    else:  # SGD
        optimizer_memory = 0
    
    # 4. 中间变量占用(简化计算:batch_size × 隐藏层维度 × 4)
    hidden_dim = 128  # MLP隐藏层维度
    intermediate_memory = batch_size * hidden_dim * 4  # Byte
    
    # 总占用(转为MB)
    total_memory = (batch_data_memory + param_gradient_memory + optimizer_memory + intermediate_memory) / 1024 / 1024
    
    return {
        "batch_data": batch_data_memory / 1024 / 1024,
        "param_gradient": param_gradient_memory / 1024 / 1024,
        "optimizer": optimizer_memory / 1024 / 1024,
        "intermediate": intermediate_memory / 1024 / 1024,
        "total": total_memory
    }

# 测试不同batch_size的显存占用(MNIST + SGD)
batch_sizes = [64, 256, 1024, 4096]
print("\nMNIST + SGD 显存占用(MB):")
print(f"{'batch_size':<10} {'数据占用':<10} {'参数+梯度':<10} {'优化器':<10} {'中间变量':<10} {'总占用':<10}")
for bs in batch_sizes:
    memory = calculate_batch_memory(mnist_train, bs, mnist_model, "SGD")
    print(f"{bs:<10} {memory['batch_data']:<10.2f} {memory['param_gradient']:<10.2f} {memory['optimizer']:<10.2f} {memory['intermediate']:<10.2f} {memory['total']:<10.2f}")

# 测试不同batch_size的显存占用(MNIST + Adam)
print("\nMNIST + Adam 显存占用(MB):")
print(f"{'batch_size':<10} {'数据占用':<10} {'参数+梯度':<10} {'优化器':<10} {'中间变量':<10} {'总占用':<10}")
for bs in batch_sizes:
    memory = calculate_batch_memory(mnist_train, bs, mnist_model, "Adam")
    print(f"{bs:<10} {memory['batch_data']:<10.2f} {memory['param_gradient']:<10.2f} {memory['optimizer']:<10.2f} {memory['intermediate']:<10.2f} {memory['total']:<10.2f}")

# ==================== 4. batch_size实战测试(DataLoader + 训练) ====================
print("\n=== 4. batch_size实战测试 ===")

# 4.1 不同batch_size的DataLoader
mnist_loaders = {
    "bs_64": DataLoader(mnist_train, batch_size=64, shuffle=True),
    "bs_256": DataLoader(mnist_train, batch_size=256, shuffle=True),
    "bs_1024": DataLoader(mnist_train, batch_size=1024, shuffle=True)
}

# 4.2 简单训练函数(验证batch_size不影响模型结构)
def simple_train(model, dataloader, epochs=1, lr=0.001):
    """简单训练函数,验证不同batch_size的训练过程"""
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=lr)
    
    model.train()
    for epoch in range(epochs):
        total_loss = 0.0
        for batch_idx, (data, target) in enumerate(dataloader):
            # 数据迁移到设备
            data, target = data.to(device), target.to(device)
            
            # 前向传播
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, target)
            
            # 反向传播+优化
            loss.backward()
            optimizer.step()
            
            total_loss += loss.item()
            
            # 每100批次打印一次
            if batch_idx % 100 == 0:
                print(f"Epoch {epoch+1}, Batch {batch_idx}, Loss: {loss.item():.4f}, Batch Size: {data.shape[0]}")
        
        avg_loss = total_loss / len(dataloader)
        print(f"Epoch {epoch+1} Average Loss: {avg_loss:.4f}")

# 测试batch_size=64的训练(仅1轮,演示效果)
print("\n测试batch_size=64训练:")
simple_train(mnist_model, mnist_loaders["bs_64"], epochs=1)

# ==================== 5. OOM问题处理建议 ====================
print("\n=== 5. 显存不足(OOM)处理建议 ===")
oom_solutions = [
    "1. 减小batch_size:从当前值减半,直到不报错(如64→32→16)",
    "2. 使用混合精度训练:torch.cuda.amp自动将部分数据转为float16,显存占用减半",
    "3. 减少模型参数:减小隐藏层神经元数(如128→64),或使用更轻量化的模型",
    "4. 清理显存:训练循环中定期调用torch.cuda.empty_cache()",
    "5. 使用梯度累积:小batch_size训练,多次迭代后再更新参数(模拟大batch效果)",
    "6. 关闭不必要的梯度计算:验证/测试时使用torch.no_grad()",
    "7. 选择更轻量的优化器:用SGD替代Adam,减少优化器状态显存占用"
]
for solution in oom_solutions:
    print(solution)

# 混合精度训练示例(解决OOM)
from torch.cuda.amp import autocast, GradScaler
def train_with_amp(model, dataloader, epochs=1):
    """混合精度训练(减少显存占用)"""
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    scaler = GradScaler()  # 梯度缩放器(防止float16梯度下溢)
    
    model.train()
    for epoch in range(epochs):
        total_loss = 0.0
        for batch_idx, (data, target) in enumerate(dataloader):
            data, target = data.to(device), target.to(device)
            
            optimizer.zero_grad()
            # 混合精度前向传播
            with autocast():
                output = model(data)
                loss = criterion(output, target)
            
            # 反向传播(自动缩放梯度)
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
            
            total_loss += loss.item()
        
        avg_loss = total_loss / len(dataloader)
        print(f"混合精度训练 Epoch {epoch+1} Average Loss: {avg_loss:.4f}")

# 演示混合精度训练(可选)
# print("\n混合精度训练测试:")
# train_with_amp(mnist_model, mnist_loaders["bs_1024"], epochs=1)

@浙大疏锦行

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值