返回
DQN深度强化学习实战:用GPU和CPU玩转Flappy Bird
人工智能
2023-09-04 10:06:59
在深度强化学习领域,DQN(深度Q网络)算法可谓独领风骚。今天,我们就把DQN搬到现实中来,教大家用GPU和CPU玩转经典游戏——Flappy Bird。
本文分两部分,第一部分 为GPU版教程,第二部分 为CPU版教程。手把手带你从零开始,用DQN让小鸟在管道中自由翱翔!
GPU版教程
准备工作
- 安装Anaconda或Miniconda
- 安装Python 3.6或更高版本
- 安装PyTorch 1.0或更高版本
- 安装gym库
- 安装tqdm库
代码详解
导入必要的库
import gym
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from tqdm import tqdm
创建环境
env = gym.make('FlappyBird-v0')
定义神经网络
class QNetwork(nn.Module):
def __init__(self):
super(QNetwork, self).__init__()
self.fc1 = nn.Linear(4, 64)
self.fc2 = nn.Linear(64, 32)
self.fc3 = nn.Linear(32, 2)
def forward(self, x):
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
定义训练函数
def train(model, env, num_episodes=1000, gamma=0.99, lr=0.0001, batch_size=32):
optimizer = optim.Adam(model.parameters(), lr=lr)
for episode in tqdm(range(num_episodes)):
state = env.reset()
done = False
while not done:
action = model(torch.Tensor(state)).argmax().item()
next_state, reward, done, _ = env.step(action)
loss = F.mse_loss(model(torch.Tensor(state))[action], reward + gamma * torch.max(model(torch.Tensor(next_state))))
optimizer.zero_grad()
loss.backward()
optimizer.step()
state = next_state
测试模型
model = QNetwork()
train(model, env)
score = 0
for i in range(100):
state = env.reset()
done = False
while not done:
env.render()
action = model(torch.Tensor(state)).argmax().item()
state, reward, done, _ = env.step(action)
score += reward
print("Average score:", score / 100)
CPU版教程
准备工作
- 安装Anaconda或Miniconda
- 安装Python 3.6或更高版本
- 安装TensorFlow 2.0或更高版本
- 安装gym库
- 安装tqdm库
代码详解
导入必要的库
import gym
import tensorflow as tf
import numpy as np
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from tqdm import tqdm
创建环境
env = gym.make('FlappyBird-v0')
定义神经网络
model = Sequential()
model.add(Dense(64, activation='relu', input_dim=4))
model.add(Dense(32, activation='relu'))
model.add(Dense(2))
定义训练函数
def train(model, env, num_episodes=1000, gamma=0.99, lr=0.0001, batch_size=32):
optimizer = tf.keras.optimizers.Adam(learning_rate=lr)
for episode in tqdm(range(num_episodes)):
state = env.reset()
done = False
while not done:
action = np.argmax(model.predict(np.array([state])))
next_state, reward, done, _ = env.step(action)
target = reward + gamma * np.max(model.predict(np.array([next_state])))
target_f = model.predict(np.array([state]))
target_f[0][action] = target
optimizer.minimize(tf.keras.losses.mse, target_f, np.array([state]))
state = next_state
测试模型
model = Sequential()
model.add(Dense(64, activation='relu', input_dim=4))
model.add(Dense(32, activation='relu'))
model.add(Dense(2))
train(model, env)
score = 0
for i in range(100):
state = env.reset()
done = False
while not done:
env.render()
action = np.argmax(model.predict(np.array([state])))
state, reward, done, _ = env.step(action)
score += reward
print("Average score:", score / 100)
总结
通过本文,大家已经掌握了用GPU和CPU玩转Flappy Bird的技巧。DQN算法的神奇之处在于,它无需人工设定规则,就能让小鸟学会自主避开管道,这正是强化学习的魅力所在!