返回

使用 NumPy 实现 CNN:从头开始构建神经网络

人工智能

引言

深度学习算法,特别是卷积神经网络 (CNN),在图像处理、计算机视觉和自然语言处理等领域取得了重大成功。虽然使用预训练模型和机器学习工具箱非常方便,但从头开始构建和训练 CNN 可以提供对底层机制的宝贵理解。

本教程将使用流行的 NumPy 库,逐步指导您完成 CNN 的构建过程。NumPy 提供了高效的数组处理功能,非常适合用于数字计算密集型任务,如深度学习。

CNN 架构

CNN 是由一系列卷积层、池化层和全连接层组成的深度神经网络。卷积层使用卷积滤波器来提取输入数据的特征,而池化层通过减少特征图的空间维度来降低计算量。全连接层用于将提取的特征分类或预测目标变量。

使用 NumPy 构建 CNN

导入库和数据

import numpy as np
import matplotlib.pyplot as plt

# 加载 MNIST 数据集
from keras.datasets import mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()

预处理数据

# 归一化输入数据
x_train = x_train.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0

# 将标签转换为 one-hot 编码
y_train = np.eye(10)[y_train]
y_test = np.eye(10)[y_test]

定义模型架构

# 定义卷积层
conv1 = np.random.randn(3, 3, 1, 32) / np.sqrt(3 * 3 * 1)
conv2 = np.random.randn(3, 3, 32, 64) / np.sqrt(3 * 3 * 32)

# 定义池化层
pool1 = np.max_pool

# 定义全连接层
fc1 = np.random.randn(64 * 4 * 4, 64) / np.sqrt(64 * 4 * 4)
fc2 = np.random.randn(64, 10) / np.sqrt(64)

前向传播

def forward(x):
    # 卷积层 1
    x = np.convolve(x, conv1, mode='valid')
    x = np.relu(x)

    # 池化层 1
    x = pool1(x)

    # 卷积层 2
    x = np.convolve(x, conv2, mode='valid')
    x = np.relu(x)

    # 池化层 2
    x = pool1(x)

    # 展平
    x = x.reshape(x.shape[0], -1)

    # 全连接层 1
    x = np.matmul(x, fc1)
    x = np.relu(x)

    # 全连接层 2
    x = np.matmul(x, fc2)

    return x

反向传播

def backward(x, y, out):
    # 初始化梯度
    dconv1 = np.zeros_like(conv1)
    dconv2 = np.zeros_like(conv2)
    dfc1 = np.zeros_like(fc1)
    dfc2 = np.zeros_like(fc2)

    # 全连接层 2
    dfc2 = out - y
    dx = np.matmul(dfc2, fc2.T)

    # 全连接层 1
    dfc1 = np.matmul(dx, fc1) * np.where(fc1 > 0, 1, 0)
    dx = np.matmul(dfc1, fc1.T)

    # 池化层 2
    dx = pool1(dx, mode='grad')

    # 卷积层 2
    dconv2 = np.convolve(dx, np.rot90(conv2, 2), mode='valid') * np.where(conv2 > 0, 1, 0)
    dx = np.convolve(dx, np.rot90(dconv2, 2), mode='full')

    # 池化层 1
    dx = pool1(dx, mode='grad')

    # 卷积层 1
    dconv1 = np.convolve(dx, np.rot90(conv1, 2), mode='valid') * np.where(conv1 > 0, 1, 0)
    dx = np.convolve(dx, np.rot90(dconv1, 2), mode='full')

    return dconv1, dconv2, dfc1, dfc2

训练模型

# 学习率
lr = 0.01

# 迭代次数
epochs = 10

# 批量大小
batch_size = 64

for epoch in range(epochs):
    # 训练集随机采样
    idx = np.random.choice(len(x_train), batch_size, replace=False)
    x_batch = x_train[idx]
    y_batch = y_train[idx]

    # 前向传播
    out = forward(x_batch)

    # 计算损失函数
    loss = np.mean((out - y_batch) ** 2)

    # 反向传播
    dconv1, dconv2, dfc1, dfc2 = backward(x_batch, y_batch, out)

    # 更新权重
    conv1 -= lr * dconv1
    conv2 -= lr * dconv2
    fc1 -= lr * dfc1
    fc2 -= lr * dfc2

    # 打印损失函数
    if epoch % 100 == 0:
        print(f'Epoch {epoch}: Loss {loss}')

评估模型

# 前向传播测试集
out_test = forward(x_test)

# 计算测试集准确率
accuracy = np.mean(np.argmax(out_test, axis=1) == np.argmax(y_test, axis=1))
print(f'Test accuracy: {accuracy}')

示例代码

import numpy as np
import matplotlib.pyplot as plt
from keras.datasets import mnist

# 加载 MNIST 数据集
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# 归一化输入数据
x_train = x_train.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0

# 将标签转换为 one-hot 编码
y_train = np.eye(10)[y_train]
y_test = np.eye(10)[y_test]

# 定义卷积层
conv1 = np.random.randn(3, 3, 1, 32) / np.sqrt(3 * 3 * 1)
conv2 = np.random.randn(3, 3, 32, 64) / np.sqrt(3 * 3 * 32)

# 定义池化层
pool1 = np.max_pool

# 定义全连接层
fc1 = np.random.randn(64 * 4 * 4, 64) / np.sqrt(64 * 4 * 4)
fc2 = np.random.randn(64, 10) / np.sqrt(64)

# 前向传播
def forward(x):
    # 卷积层 1
    x = np.convolve(x, conv1, mode='valid')
    x = np.relu(x)

    # 池化层 1
    x = pool1(x)

    # 卷积层 2
    x = np.convolve(x, conv2, mode='valid')
    x = np.relu(x)

    # 池化层 2
    x = pool1(x)

    # 展平
    x = x.reshape(x.shape[0], -1)