Lab files for Windows 10
This commit is contained in:
BIN
CM3020 Artificial Intelligence/Week 4/4.13 DQN Flowchart A.pdf
Normal file
BIN
CM3020 Artificial Intelligence/Week 4/4.13 DQN Flowchart A.pdf
Normal file
Binary file not shown.
@ -0,0 +1,270 @@
|
||||
## This entire file has been adapted from code
|
||||
## by Jacob Chapman and Mathias Lechner, available here as of
|
||||
## 11/11/2021
|
||||
## https://github.com/keras-team/keras-io/blob/master/examples/rl/deep_q_network_breakout.py
|
||||
## Changes made:
|
||||
## * use breakwall version of breakout instead of atari
|
||||
## * log in tensorboard compatible format and print logs
|
||||
## * save weights of model each time moving episodic reward reaches a new max
|
||||
|
||||
from baselines.common.atari_wrappers import make_atari, wrap_deepmind
|
||||
import numpy as np
|
||||
import tensorflow as tf
|
||||
from tensorflow import keras
|
||||
from tensorflow.keras import layers
|
||||
import datetime
|
||||
|
||||
# logging code
|
||||
# for tensorboard
|
||||
# https://www.tensorflow.org/tensorboard/get_started
|
||||
current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
|
||||
# train_log_dir = 'logs/gradient_tape/' + current_time + '/train'
|
||||
# train_summary_writer = tf.summary.create_file_writer(train_log_dir)
|
||||
# end of logging code
|
||||
# for tensorboard
|
||||
|
||||
# Configuration paramaters for the whole setup
|
||||
seed = 42
|
||||
gamma = 0.99 # Discount factor for past rewards
|
||||
epsilon = 1.0 # Epsilon greedy parameter
|
||||
epsilon_min = 0.1 # Minimum epsilon greedy parameter
|
||||
epsilon_max = 1.0 # Maximum epsilon greedy parameter
|
||||
epsilon_interval = (
|
||||
epsilon_max - epsilon_min
|
||||
) # Rate at which to reduce chance of random action being taken
|
||||
batch_size = 32 # Size of batch taken from replay buffer
|
||||
max_steps_per_episode = 10000
|
||||
|
||||
#env_name = "BreakoutNoFrameskip-v4" # for windows?
|
||||
env_name = "gym_gs:BreakwallNoFrameskip-v1"
|
||||
#env_name2 = "BreakoutNoFrameskip-v4" # for windows?
|
||||
env_name2 = "gym_gs_BreakwallNoFrameskip-v1"
|
||||
|
||||
# Use the Baseline Atari environment because of Deepmind helper functions
|
||||
env = make_atari(env_name)
|
||||
|
||||
# Warp the frames, grey scale, stake four frame and scale to smaller ratio
|
||||
env = wrap_deepmind(env, frame_stack=True, scale=True)
|
||||
env.seed(seed)
|
||||
|
||||
"""
|
||||
## Implement the Deep Q-Network
|
||||
|
||||
This network learns an approximation of the Q-table, which is a mapping between
|
||||
the states ainnd actions that an agent will take. For every state we'll have four
|
||||
actions, that can be taken. The environment provides the state, and the action
|
||||
is chosen by selecting the larger of the four Q-values predicted in the output layer.
|
||||
"""
|
||||
|
||||
num_actions = 4
|
||||
|
||||
|
||||
def log(running_reward, last_reward, episode, mem_perc, epsilon, frame, tensorboard_log = False):
|
||||
"""
|
||||
log the running episodic reward, most recent reward,
|
||||
episode count, epsilon value and frame count plus mem_perc which
|
||||
is the percentage of the action memory that is full
|
||||
"""
|
||||
if tensorboard_log:
|
||||
with train_summary_writer.as_default():
|
||||
tf.summary.scalar('running reward', running_reward, step=episode)
|
||||
|
||||
template = 'Epoch,{}, Mem,{}%, Eps,{}, Frame,{}, Last reward:,{}, Running reward:,{}, '
|
||||
print (template.format(episode+1,
|
||||
np.round(mem_perc, 3),
|
||||
np.round(epsilon, 3),
|
||||
frame,
|
||||
last_reward,
|
||||
running_reward))
|
||||
|
||||
def save_weights(env_name2, model, episode, run_id):
|
||||
"""
|
||||
save the weights of the sent model
|
||||
with the env_name, episode and run_id used
|
||||
to gneerate the filename
|
||||
"""
|
||||
print("Saving weights")
|
||||
path = "./saves/"
|
||||
model.save_weights(path + env_name2 + "_" + str(run_id)+"_"+str(episode))
|
||||
|
||||
|
||||
def create_q_model():
|
||||
# Network defined by the Deepmind paper
|
||||
inputs = layers.Input(shape=(84, 84, 4,))
|
||||
# Convolutions on the frames on the screen
|
||||
layer1 = layers.Conv2D(32, 8, strides=4, activation="relu")(inputs)
|
||||
layer2 = layers.Conv2D(64, 4, strides=2, activation="relu")(layer1)
|
||||
layer3 = layers.Conv2D(64, 3, strides=1, activation="relu")(layer2)
|
||||
layer4 = layers.Flatten()(layer3)
|
||||
layer5 = layers.Dense(512, activation="relu")(layer4)
|
||||
action = layers.Dense(num_actions, activation="linear")(layer5)
|
||||
return keras.Model(inputs=inputs, outputs=action)
|
||||
|
||||
# The first model makes the predictions for Q-values which are used to
|
||||
# make a action.
|
||||
model = create_q_model()
|
||||
print(model.summary())
|
||||
# Build a target model for the prediction of future rewards.
|
||||
# The weights of a target model get updated every 10000 steps thus when the
|
||||
# loss between the Q-values is calculated the target Q-value is stable.
|
||||
model_target = create_q_model()
|
||||
|
||||
"""
|
||||
## Train
|
||||
"""
|
||||
# In the Deepmind paper they use RMSProp however then Adam optimizer
|
||||
# improves training time
|
||||
optimizer = keras.optimizers.Adam(learning_rate=0.00025, clipnorm=1.0)
|
||||
|
||||
# Experience replay buffers
|
||||
action_history = []
|
||||
state_history = []
|
||||
state_next_history = []
|
||||
rewards_history = []
|
||||
done_history = []
|
||||
episode_reward_history = []
|
||||
running_reward = 0
|
||||
episode_count = 0
|
||||
frame_count = 0
|
||||
# Number of frames to take random action and observe output
|
||||
epsilon_random_frames = 50000
|
||||
# Number of frames for exploration
|
||||
epsilon_greedy_frames = 1000000.0
|
||||
# Maximum replay length
|
||||
# Note: The Deepmind paper suggests 1000000 however this causes memory issues
|
||||
max_memory_length = 100000
|
||||
# Train the model after 4 actions
|
||||
update_after_actions = 4
|
||||
# How often to update the target network
|
||||
update_target_network = 10000
|
||||
# Using huber loss for stability
|
||||
loss_function = keras.losses.Huber()
|
||||
epoch = 0
|
||||
# use this to decide when it is time to save the weights
|
||||
max_reward = 0
|
||||
|
||||
while True: # Run until solved
|
||||
state = np.array(env.reset())
|
||||
episode_reward = 0
|
||||
print("Starting episode", episode_count, "played frames", frame_count)
|
||||
for timestep in range(1, max_steps_per_episode): #10000
|
||||
#env.render()# ; Adding this line would show the attempts
|
||||
# of the agent in a pop up window.
|
||||
if frame_count % 250 == 0:
|
||||
log(np.mean(episode_reward_history),
|
||||
episode_reward,
|
||||
episode_count,
|
||||
len(rewards_history) / max_memory_length * 100,
|
||||
epsilon,
|
||||
frame_count)
|
||||
|
||||
frame_count += 1
|
||||
# Use epsilon-greedy for exploration
|
||||
if frame_count < epsilon_random_frames or epsilon > np.random.rand(1)[0]:
|
||||
# Take random action
|
||||
action = np.random.choice(num_actions)
|
||||
else:
|
||||
# Predict action Q-values
|
||||
# From environment state
|
||||
state_tensor = tf.convert_to_tensor(state)
|
||||
state_tensor = tf.expand_dims(state_tensor, 0)
|
||||
action_probs = model(state_tensor, training=False)
|
||||
# Take best action
|
||||
action = tf.argmax(action_probs[0]).numpy()
|
||||
|
||||
# Decay probability of taking random action
|
||||
epsilon -= epsilon_interval / epsilon_greedy_frames
|
||||
epsilon = max(epsilon, epsilon_min)
|
||||
|
||||
# Apply the sampled action in our environment
|
||||
state_next, reward, done, _ = env.step(action)
|
||||
state_next = np.array(state_next)
|
||||
|
||||
#print("state shape:", state.shape)
|
||||
episode_reward += reward
|
||||
|
||||
# Save actions and states in replay buffer
|
||||
action_history.append(action)
|
||||
state_history.append(state)
|
||||
state_next_history.append(state_next)
|
||||
done_history.append(done)
|
||||
rewards_history.append(reward)
|
||||
state = state_next
|
||||
|
||||
# Update every fourth frame and once batch size is over 32
|
||||
if frame_count % update_after_actions == 0 and len(done_history) > batch_size:
|
||||
#print("Training neural network")
|
||||
# Get indices of samples for replay buffers
|
||||
indices = np.random.choice(range(len(done_history)), size=batch_size)
|
||||
|
||||
# Using list comprehension to sample from replay buffer
|
||||
state_sample = np.array([state_history[i] for i in indices])
|
||||
state_next_sample = np.array([state_next_history[i] for i in indices])
|
||||
rewards_sample = [rewards_history[i] for i in indices]
|
||||
action_sample = [action_history[i] for i in indices]
|
||||
done_sample = tf.convert_to_tensor(
|
||||
[float(done_history[i]) for i in indices]
|
||||
)
|
||||
|
||||
# Build the updated Q-values for the sampled future states
|
||||
# Use the target model for stability
|
||||
future_rewards = model_target.predict(state_next_sample)
|
||||
# Q value = reward + discount factor * expected future reward
|
||||
updated_q_values = rewards_sample + gamma * tf.reduce_max(
|
||||
future_rewards, axis=1
|
||||
)
|
||||
|
||||
# If final frame set the last value to -1
|
||||
updated_q_values = updated_q_values * (1 - done_sample) - done_sample
|
||||
|
||||
# Create a mask so we only calculate loss on the updated Q-values
|
||||
masks = tf.one_hot(action_sample, num_actions)
|
||||
|
||||
with tf.GradientTape() as tape:
|
||||
# Train the model on the states and updated Q-values
|
||||
q_values = model(state_sample)
|
||||
|
||||
# Apply the masks to the Q-values to get the Q-value for action taken
|
||||
q_action = tf.reduce_sum(tf.multiply(q_values, masks), axis=1)
|
||||
# Calculate loss between new Q-value and old Q-value
|
||||
loss = loss_function(updated_q_values, q_action)
|
||||
|
||||
# Backpropagation
|
||||
grads = tape.gradient(loss, model.trainable_variables)
|
||||
optimizer.apply_gradients(zip(grads, model.trainable_variables))
|
||||
|
||||
|
||||
if frame_count % update_target_network == 0:
|
||||
print("Updating q~")
|
||||
# update the the target network with new weights
|
||||
model_target.set_weights(model.get_weights())
|
||||
|
||||
|
||||
# Limit the state and reward history
|
||||
if len(rewards_history) > max_memory_length:
|
||||
del rewards_history[:1]
|
||||
del state_history[:1]
|
||||
del state_next_history[:1]
|
||||
del action_history[:1]
|
||||
del done_history[:1]
|
||||
|
||||
if done:
|
||||
break
|
||||
|
||||
# Update running reward to check condition for solving
|
||||
episode_reward_history.append(episode_reward)
|
||||
if len(episode_reward_history) > 100:
|
||||
del episode_reward_history[:1]
|
||||
running_reward = np.mean(episode_reward_history)
|
||||
|
||||
episode_count += 1
|
||||
|
||||
# save the weights if we've reached a new high
|
||||
if running_reward > max_reward:
|
||||
save_weights(env_name2, model, episode_count, current_time)
|
||||
max_reward = running_reward
|
||||
|
||||
if running_reward > 75: # Condition to consider the task solved
|
||||
save_weights(env_name2, model, episode_count, current_time)
|
||||
print("Solved at episode {}!".format(episode_count))
|
||||
break
|
||||
@ -0,0 +1,209 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Run a pre-trained model\n",
|
||||
"\n",
|
||||
"This notebook loads a pre-trained model and uses it to play games. \n",
|
||||
"Note that it does not render the image of the game, it just prints out the episodic score. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"pygame 2.1.2 (SDL 2.0.18, Python 3.10.8)\n",
|
||||
"Hello from the pygame community. https://www.pygame.org/contribute.html\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# sanity check: can we create breakwall?\n",
|
||||
"import gym\n",
|
||||
"e = gym.make('gym_gs:BreakwallNoFrameskip-v1')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Collecting git+https://github.com/openai/baselines.git\n",
|
||||
" Cloning https://github.com/openai/baselines.git to c:\\users\\gofor\\appdata\\local\\temp\\pip-req-build-s405pyio\n",
|
||||
" Resolved https://github.com/openai/baselines.git to commit ea25b9e8b234e6ee1bca43083f8f3cf974143998\n",
|
||||
" Preparing metadata (setup.py): started\n",
|
||||
" Preparing metadata (setup.py): finished with status 'done'\n",
|
||||
"Requirement already satisfied: gym<0.16.0,>=0.15.4 in c:\\users\\gofor\\myvenv\\lib\\site-packages (from baselines==0.1.6) (0.15.7)\n",
|
||||
"Requirement already satisfied: scipy in c:\\users\\gofor\\myvenv\\lib\\site-packages (from baselines==0.1.6) (1.9.3)\n",
|
||||
"Requirement already satisfied: tqdm in c:\\users\\gofor\\myvenv\\lib\\site-packages (from baselines==0.1.6) (4.64.1)\n",
|
||||
"Requirement already satisfied: joblib in c:\\users\\gofor\\myvenv\\lib\\site-packages (from baselines==0.1.6) (1.2.0)\n",
|
||||
"Requirement already satisfied: cloudpickle in c:\\users\\gofor\\myvenv\\lib\\site-packages (from baselines==0.1.6) (1.2.2)\n",
|
||||
"Requirement already satisfied: click in c:\\users\\gofor\\myvenv\\lib\\site-packages (from baselines==0.1.6) (8.1.3)\n",
|
||||
"Requirement already satisfied: opencv-python in c:\\users\\gofor\\myvenv\\lib\\site-packages (from baselines==0.1.6) (4.6.0.66)\n",
|
||||
"Requirement already satisfied: numpy>=1.10.4 in c:\\users\\gofor\\myvenv\\lib\\site-packages (from gym<0.16.0,>=0.15.4->baselines==0.1.6) (1.23.5)\n",
|
||||
"Requirement already satisfied: pyglet<=1.5.0,>=1.4.0 in c:\\users\\gofor\\myvenv\\lib\\site-packages (from gym<0.16.0,>=0.15.4->baselines==0.1.6) (1.5.0)\n",
|
||||
"Requirement already satisfied: six in c:\\users\\gofor\\myvenv\\lib\\site-packages (from gym<0.16.0,>=0.15.4->baselines==0.1.6) (1.16.0)\n",
|
||||
"Requirement already satisfied: colorama in c:\\users\\gofor\\myvenv\\lib\\site-packages (from click->baselines==0.1.6) (0.4.6)\n",
|
||||
"Requirement already satisfied: future in c:\\users\\gofor\\myvenv\\lib\\site-packages (from pyglet<=1.5.0,>=1.4.0->gym<0.16.0,>=0.15.4->baselines==0.1.6) (0.18.2)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" Running command git clone --filter=blob:none --quiet https://github.com/openai/baselines.git 'C:\\Users\\gofor\\AppData\\Local\\Temp\\pip-req-build-s405pyio'\n",
|
||||
"\n",
|
||||
"[notice] A new release of pip available: 22.2.2 -> 22.3.1\n",
|
||||
"[notice] To update, run: python.exe -m pip install --upgrade pip\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# install baselines and other stuff\n",
|
||||
"!pip install git+https://github.com/openai/baselines.git"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Loaded gym\n",
|
||||
"Model weights look loadable ./pre-trained/mac_hard_breakwall/gym_gs_BreakwallNoFrameskip-v1_20211018-114642_5424.data-00000-of-00001\n",
|
||||
"Model loaded weights - starting sim\n",
|
||||
"Game over at frame 278 rew 2.0 rewards/frame: 0.007194244604316547\n",
|
||||
"Game over at frame 453 rew 3.0 rewards/frame: 0.006622516556291391\n",
|
||||
"Game over at frame 631 rew 4.0 rewards/frame: 0.006339144215530904\n",
|
||||
"Game over at frame 906 rew 6.0 rewards/frame: 0.006622516556291391\n",
|
||||
"Game over at frame 976 rew 6.0 rewards/frame: 0.006147540983606557\n",
|
||||
"Sim ended : rew is 6.0\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"## full check - can we use the full opencv/ openai version \n",
|
||||
"## of the gym?\n",
|
||||
"\n",
|
||||
"# Script to test a pre-trained model\n",
|
||||
"# Written by Matthew Yee-King\n",
|
||||
"# MIT license \n",
|
||||
"# https://mit-license.org/\n",
|
||||
"\n",
|
||||
"import sys\n",
|
||||
"import os\n",
|
||||
"from baselines.common.atari_wrappers import make_atari, wrap_deepmind\n",
|
||||
"import numpy as np\n",
|
||||
"import tensorflow as tf\n",
|
||||
"from tensorflow import keras\n",
|
||||
"from tensorflow.keras import layers\n",
|
||||
"import datetime\n",
|
||||
"import random\n",
|
||||
"import time \n",
|
||||
"\n",
|
||||
"env_name = \"gym_gs:BreakwallNoFrameskip-v1\" \n",
|
||||
"# for notebook users - make sure you have uploaded your pre-trained\n",
|
||||
"# models... then adjust this to reflect the file path\n",
|
||||
"model_file = \"./pre-trained/mac_hard_breakwall/gym_gs_BreakwallNoFrameskip-v1_20211018-114642_5424\"\n",
|
||||
"\n",
|
||||
"def create_q_model(num_actions):\n",
|
||||
" # Network defined by the Deepmind paper\n",
|
||||
" inputs = layers.Input(shape=(84, 84, 4,))\n",
|
||||
" # Convolutions on the frames on the screen\n",
|
||||
" layer1 = layers.Conv2D(32, 8, strides=4, activation=\"relu\")(inputs) \n",
|
||||
" layer2 = layers.Conv2D(64, 4, strides=2, activation=\"relu\")(layer1)\n",
|
||||
" layer3 = layers.Conv2D(64, 3, strides=1, activation=\"relu\")(layer2)\n",
|
||||
" layer4 = layers.Flatten()(layer3)\n",
|
||||
" layer5 = layers.Dense(512, activation=\"relu\")(layer4) \n",
|
||||
" action = layers.Dense(num_actions, activation=\"linear\")(layer5) \n",
|
||||
" return keras.Model(inputs=inputs, outputs=action)\n",
|
||||
"\n",
|
||||
"def create_env(env_name, seed=42):\n",
|
||||
" try:\n",
|
||||
" # Use the Baseline Atari environment because of Deepmind helper functions\n",
|
||||
" env = make_atari(env_name)\n",
|
||||
" # Warp the frames, grey scale, stake four frame and scale to smaller ratio\n",
|
||||
" env = wrap_deepmind(env, frame_stack=True, scale=True)\n",
|
||||
" print(\"Loaded gym\")\n",
|
||||
" env.seed(seed)\n",
|
||||
" return env\n",
|
||||
" except:\n",
|
||||
" print(\"Failed to make gym env\", env_name)\n",
|
||||
" return None\n",
|
||||
"\n",
|
||||
"def run_sim(env, model, frame_count):\n",
|
||||
" state = np.array(env.reset())\n",
|
||||
" total_reward = 0\n",
|
||||
" for i in range(frame_count):\n",
|
||||
" # in the notebook version we cannot really \n",
|
||||
" # render in realtime, so you just have\n",
|
||||
" # to check the score :( \n",
|
||||
" env.render('human')\n",
|
||||
" state_tensor = keras.backend.constant(state)\n",
|
||||
" state_tensor = keras.backend.expand_dims(state_tensor, 0)\n",
|
||||
" action_values = model(state_tensor, training=False)\n",
|
||||
" # Take best action\n",
|
||||
" action = keras.backend.argmax(action_values[0]).numpy()\n",
|
||||
" state, reward, done, _ = env.step(action)\n",
|
||||
" state = np.array(state)\n",
|
||||
" total_reward += reward\n",
|
||||
" if done:\n",
|
||||
" print(\"Game over at frame\", i, \"rew\", total_reward, \"rewards/frame: \", total_reward/i)\n",
|
||||
" env.reset()\n",
|
||||
" #break\n",
|
||||
" #time.sleep(0.1)\n",
|
||||
" print(\"Sim ended : rew is \", total_reward)\n",
|
||||
"\n",
|
||||
"def main(env_name, model_file,frame_count=1000, seed=42):\n",
|
||||
" env = create_env(env_name=env_name)\n",
|
||||
" assert env is not None, \"Failed to make env \" + env_name\n",
|
||||
" model = create_q_model(num_actions=env.action_space.n)\n",
|
||||
" model_testfile = model_file + \".data-00000-of-00001\"\n",
|
||||
" assert os.path.exists(model_testfile), \"Failed to load model: \" + model_testfile\n",
|
||||
" print(\"Model weights look loadable\", model_testfile)\n",
|
||||
" model.load_weights(model_file)\n",
|
||||
" print(\"Model loaded weights - starting sim\")\n",
|
||||
" run_sim(env, model, frame_count)\n",
|
||||
" \n",
|
||||
"main(env_name, model_file, frame_count=1000)\n",
|
||||
"\n",
|
||||
"# LEV"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.8"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
@ -0,0 +1,77 @@
|
||||
# Script to test a pre-trained model
|
||||
# Written by Matthew Yee-King
|
||||
# MIT license
|
||||
# https://mit-license.org/
|
||||
|
||||
import sys
|
||||
import os
|
||||
from baselines.common.atari_wrappers import make_atari, wrap_deepmind
|
||||
import numpy as np
|
||||
import tensorflow as tf
|
||||
from tensorflow import keras
|
||||
from tensorflow.keras import layers
|
||||
import datetime
|
||||
import random
|
||||
import time
|
||||
|
||||
env_name = "gym_gs:BreakwallNoFrameskip-v1"
|
||||
model_file = "./pre-trained/mac_hard_breakwall/gym_gs:BreakwallNoFrameskip-v1_20211018-114642_5424"
|
||||
|
||||
def create_q_model(num_actions):
|
||||
# Network defined by the Deepmind paper
|
||||
inputs = layers.Input(shape=(84, 84, 4,))
|
||||
# Convolutions on the frames on the screen
|
||||
layer1 = layers.Conv2D(32, 8, strides=4, activation="relu")(inputs)
|
||||
layer2 = layers.Conv2D(64, 4, strides=2, activation="relu")(layer1)
|
||||
layer3 = layers.Conv2D(64, 3, strides=1, activation="relu")(layer2)
|
||||
layer4 = layers.Flatten()(layer3)
|
||||
layer5 = layers.Dense(512, activation="relu")(layer4)
|
||||
action = layers.Dense(num_actions, activation="linear")(layer5)
|
||||
return keras.Model(inputs=inputs, outputs=action)
|
||||
|
||||
def create_env(env_name, seed=42):
|
||||
try:
|
||||
# Use the Baseline Atari environment because of Deepmind helper functions
|
||||
env = make_atari(env_name)
|
||||
# Warp the frames, grey scale, stake four frame and scale to smaller ratio
|
||||
env = wrap_deepmind(env, frame_stack=True, scale=True)
|
||||
print("Loaded gym")
|
||||
env.seed(seed)
|
||||
return env
|
||||
except:
|
||||
print("Failed to make gym env", env_name)
|
||||
return None
|
||||
|
||||
def run_sim(env, model, frame_count):
|
||||
state = np.array(env.reset())
|
||||
total_reward = 0
|
||||
for i in range(frame_count):
|
||||
env.render('human')
|
||||
state_tensor = keras.backend.constant(state)
|
||||
state_tensor = keras.backend.expand_dims(state_tensor, 0)
|
||||
action_values = model(state_tensor, training=False)
|
||||
# Take best action
|
||||
action = keras.backend.argmax(action_values[0]).numpy()
|
||||
state, reward, done, _ = env.step(action)
|
||||
state = np.array(state)
|
||||
total_reward += reward
|
||||
if done:
|
||||
print("Game over at frame", i, "rew", total_reward)
|
||||
env.reset()
|
||||
#break
|
||||
#time.sleep(0.1)
|
||||
print("Sim ended : rew is ", total_reward)
|
||||
|
||||
def main(env_name, model_file,frame_count=1000, seed=42):
|
||||
env = create_env(env_name=env_name)
|
||||
assert env is not None, "Failed to make env " + env_name
|
||||
model = create_q_model(num_actions=env.action_space.n)
|
||||
model_testfile = model_file + ".data-00000-of-00001"
|
||||
assert os.path.exists(model_testfile), "Failed to load model: " + model_testfile
|
||||
print("Model weights look loadable", model_testfile)
|
||||
model.load_weights(model_file)
|
||||
print("Model loaded weights - starting sim")
|
||||
run_sim(env, model, frame_count)
|
||||
|
||||
main(env_name, model_file, frame_count=1000)
|
||||
|
||||
Reference in New Issue
Block a user