返回
使用 NumPy 实现 CNN:从头开始构建神经网络
人工智能
2023-10-08 20:35:00
引言
深度学习算法,特别是卷积神经网络 (CNN),在图像处理、计算机视觉和自然语言处理等领域取得了重大成功。虽然使用预训练模型和机器学习工具箱非常方便,但从头开始构建和训练 CNN 可以提供对底层机制的宝贵理解。
本教程将使用流行的 NumPy 库,逐步指导您完成 CNN 的构建过程。NumPy 提供了高效的数组处理功能,非常适合用于数字计算密集型任务,如深度学习。
CNN 架构
CNN 是由一系列卷积层、池化层和全连接层组成的深度神经网络。卷积层使用卷积滤波器来提取输入数据的特征,而池化层通过减少特征图的空间维度来降低计算量。全连接层用于将提取的特征分类或预测目标变量。
使用 NumPy 构建 CNN
导入库和数据
import numpy as np
import matplotlib.pyplot as plt
# 加载 MNIST 数据集
from keras.datasets import mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
预处理数据
# 归一化输入数据
x_train = x_train.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0
# 将标签转换为 one-hot 编码
y_train = np.eye(10)[y_train]
y_test = np.eye(10)[y_test]
定义模型架构
# 定义卷积层
conv1 = np.random.randn(3, 3, 1, 32) / np.sqrt(3 * 3 * 1)
conv2 = np.random.randn(3, 3, 32, 64) / np.sqrt(3 * 3 * 32)
# 定义池化层
pool1 = np.max_pool
# 定义全连接层
fc1 = np.random.randn(64 * 4 * 4, 64) / np.sqrt(64 * 4 * 4)
fc2 = np.random.randn(64, 10) / np.sqrt(64)
前向传播
def forward(x):
# 卷积层 1
x = np.convolve(x, conv1, mode='valid')
x = np.relu(x)
# 池化层 1
x = pool1(x)
# 卷积层 2
x = np.convolve(x, conv2, mode='valid')
x = np.relu(x)
# 池化层 2
x = pool1(x)
# 展平
x = x.reshape(x.shape[0], -1)
# 全连接层 1
x = np.matmul(x, fc1)
x = np.relu(x)
# 全连接层 2
x = np.matmul(x, fc2)
return x
反向传播
def backward(x, y, out):
# 初始化梯度
dconv1 = np.zeros_like(conv1)
dconv2 = np.zeros_like(conv2)
dfc1 = np.zeros_like(fc1)
dfc2 = np.zeros_like(fc2)
# 全连接层 2
dfc2 = out - y
dx = np.matmul(dfc2, fc2.T)
# 全连接层 1
dfc1 = np.matmul(dx, fc1) * np.where(fc1 > 0, 1, 0)
dx = np.matmul(dfc1, fc1.T)
# 池化层 2
dx = pool1(dx, mode='grad')
# 卷积层 2
dconv2 = np.convolve(dx, np.rot90(conv2, 2), mode='valid') * np.where(conv2 > 0, 1, 0)
dx = np.convolve(dx, np.rot90(dconv2, 2), mode='full')
# 池化层 1
dx = pool1(dx, mode='grad')
# 卷积层 1
dconv1 = np.convolve(dx, np.rot90(conv1, 2), mode='valid') * np.where(conv1 > 0, 1, 0)
dx = np.convolve(dx, np.rot90(dconv1, 2), mode='full')
return dconv1, dconv2, dfc1, dfc2
训练模型
# 学习率
lr = 0.01
# 迭代次数
epochs = 10
# 批量大小
batch_size = 64
for epoch in range(epochs):
# 训练集随机采样
idx = np.random.choice(len(x_train), batch_size, replace=False)
x_batch = x_train[idx]
y_batch = y_train[idx]
# 前向传播
out = forward(x_batch)
# 计算损失函数
loss = np.mean((out - y_batch) ** 2)
# 反向传播
dconv1, dconv2, dfc1, dfc2 = backward(x_batch, y_batch, out)
# 更新权重
conv1 -= lr * dconv1
conv2 -= lr * dconv2
fc1 -= lr * dfc1
fc2 -= lr * dfc2
# 打印损失函数
if epoch % 100 == 0:
print(f'Epoch {epoch}: Loss {loss}')
评估模型
# 前向传播测试集
out_test = forward(x_test)
# 计算测试集准确率
accuracy = np.mean(np.argmax(out_test, axis=1) == np.argmax(y_test, axis=1))
print(f'Test accuracy: {accuracy}')
示例代码
import numpy as np
import matplotlib.pyplot as plt
from keras.datasets import mnist
# 加载 MNIST 数据集
(x_train, y_train), (x_test, y_test) = mnist.load_data()
# 归一化输入数据
x_train = x_train.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0
# 将标签转换为 one-hot 编码
y_train = np.eye(10)[y_train]
y_test = np.eye(10)[y_test]
# 定义卷积层
conv1 = np.random.randn(3, 3, 1, 32) / np.sqrt(3 * 3 * 1)
conv2 = np.random.randn(3, 3, 32, 64) / np.sqrt(3 * 3 * 32)
# 定义池化层
pool1 = np.max_pool
# 定义全连接层
fc1 = np.random.randn(64 * 4 * 4, 64) / np.sqrt(64 * 4 * 4)
fc2 = np.random.randn(64, 10) / np.sqrt(64)
# 前向传播
def forward(x):
# 卷积层 1
x = np.convolve(x, conv1, mode='valid')
x = np.relu(x)
# 池化层 1
x = pool1(x)
# 卷积层 2
x = np.convolve(x, conv2, mode='valid')
x = np.relu(x)
# 池化层 2
x = pool1(x)
# 展平
x = x.reshape(x.shape[0], -1)