Lab files for Windows 10

This commit is contained in:
levdoescode
2022-11-28 15:27:10 -05:00
parent e3624cbe8a
commit e8554d8d03
4 changed files with 556 additions and 0 deletions

View File

@ -0,0 +1,270 @@
## This entire file has been adapted from code
## by Jacob Chapman and Mathias Lechner, available here as of
## 11/11/2021
## https://github.com/keras-team/keras-io/blob/master/examples/rl/deep_q_network_breakout.py
## Changes made:
## * use breakwall version of breakout instead of atari
## * log in tensorboard compatible format and print logs
## * save weights of model each time moving episodic reward reaches a new max
from baselines.common.atari_wrappers import make_atari, wrap_deepmind
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import datetime
# logging code
# for tensorboard
# https://www.tensorflow.org/tensorboard/get_started
current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
# train_log_dir = 'logs/gradient_tape/' + current_time + '/train'
# train_summary_writer = tf.summary.create_file_writer(train_log_dir)
# end of logging code
# for tensorboard
# Configuration paramaters for the whole setup
seed = 42
gamma = 0.99 # Discount factor for past rewards
epsilon = 1.0 # Epsilon greedy parameter
epsilon_min = 0.1 # Minimum epsilon greedy parameter
epsilon_max = 1.0 # Maximum epsilon greedy parameter
epsilon_interval = (
epsilon_max - epsilon_min
) # Rate at which to reduce chance of random action being taken
batch_size = 32 # Size of batch taken from replay buffer
max_steps_per_episode = 10000
#env_name = "BreakoutNoFrameskip-v4" # for windows?
env_name = "gym_gs:BreakwallNoFrameskip-v1"
#env_name2 = "BreakoutNoFrameskip-v4" # for windows?
env_name2 = "gym_gs_BreakwallNoFrameskip-v1"
# Use the Baseline Atari environment because of Deepmind helper functions
env = make_atari(env_name)
# Warp the frames, grey scale, stake four frame and scale to smaller ratio
env = wrap_deepmind(env, frame_stack=True, scale=True)
env.seed(seed)
"""
## Implement the Deep Q-Network
This network learns an approximation of the Q-table, which is a mapping between
the states ainnd actions that an agent will take. For every state we'll have four
actions, that can be taken. The environment provides the state, and the action
is chosen by selecting the larger of the four Q-values predicted in the output layer.
"""
num_actions = 4
def log(running_reward, last_reward, episode, mem_perc, epsilon, frame, tensorboard_log = False):
"""
log the running episodic reward, most recent reward,
episode count, epsilon value and frame count plus mem_perc which
is the percentage of the action memory that is full
"""
if tensorboard_log:
with train_summary_writer.as_default():
tf.summary.scalar('running reward', running_reward, step=episode)
template = 'Epoch,{}, Mem,{}%, Eps,{}, Frame,{}, Last reward:,{}, Running reward:,{}, '
print (template.format(episode+1,
np.round(mem_perc, 3),
np.round(epsilon, 3),
frame,
last_reward,
running_reward))
def save_weights(env_name2, model, episode, run_id):
"""
save the weights of the sent model
with the env_name, episode and run_id used
to gneerate the filename
"""
print("Saving weights")
path = "./saves/"
model.save_weights(path + env_name2 + "_" + str(run_id)+"_"+str(episode))
def create_q_model():
# Network defined by the Deepmind paper
inputs = layers.Input(shape=(84, 84, 4,))
# Convolutions on the frames on the screen
layer1 = layers.Conv2D(32, 8, strides=4, activation="relu")(inputs)
layer2 = layers.Conv2D(64, 4, strides=2, activation="relu")(layer1)
layer3 = layers.Conv2D(64, 3, strides=1, activation="relu")(layer2)
layer4 = layers.Flatten()(layer3)
layer5 = layers.Dense(512, activation="relu")(layer4)
action = layers.Dense(num_actions, activation="linear")(layer5)
return keras.Model(inputs=inputs, outputs=action)
# The first model makes the predictions for Q-values which are used to
# make a action.
model = create_q_model()
print(model.summary())
# Build a target model for the prediction of future rewards.
# The weights of a target model get updated every 10000 steps thus when the
# loss between the Q-values is calculated the target Q-value is stable.
model_target = create_q_model()
"""
## Train
"""
# In the Deepmind paper they use RMSProp however then Adam optimizer
# improves training time
optimizer = keras.optimizers.Adam(learning_rate=0.00025, clipnorm=1.0)
# Experience replay buffers
action_history = []
state_history = []
state_next_history = []
rewards_history = []
done_history = []
episode_reward_history = []
running_reward = 0
episode_count = 0
frame_count = 0
# Number of frames to take random action and observe output
epsilon_random_frames = 50000
# Number of frames for exploration
epsilon_greedy_frames = 1000000.0
# Maximum replay length
# Note: The Deepmind paper suggests 1000000 however this causes memory issues
max_memory_length = 100000
# Train the model after 4 actions
update_after_actions = 4
# How often to update the target network
update_target_network = 10000
# Using huber loss for stability
loss_function = keras.losses.Huber()
epoch = 0
# use this to decide when it is time to save the weights
max_reward = 0
while True: # Run until solved
state = np.array(env.reset())
episode_reward = 0
print("Starting episode", episode_count, "played frames", frame_count)
for timestep in range(1, max_steps_per_episode): #10000
#env.render()# ; Adding this line would show the attempts
# of the agent in a pop up window.
if frame_count % 250 == 0:
log(np.mean(episode_reward_history),
episode_reward,
episode_count,
len(rewards_history) / max_memory_length * 100,
epsilon,
frame_count)
frame_count += 1
# Use epsilon-greedy for exploration
if frame_count < epsilon_random_frames or epsilon > np.random.rand(1)[0]:
# Take random action
action = np.random.choice(num_actions)
else:
# Predict action Q-values
# From environment state
state_tensor = tf.convert_to_tensor(state)
state_tensor = tf.expand_dims(state_tensor, 0)
action_probs = model(state_tensor, training=False)
# Take best action
action = tf.argmax(action_probs[0]).numpy()
# Decay probability of taking random action
epsilon -= epsilon_interval / epsilon_greedy_frames
epsilon = max(epsilon, epsilon_min)
# Apply the sampled action in our environment
state_next, reward, done, _ = env.step(action)
state_next = np.array(state_next)
#print("state shape:", state.shape)
episode_reward += reward
# Save actions and states in replay buffer
action_history.append(action)
state_history.append(state)
state_next_history.append(state_next)
done_history.append(done)
rewards_history.append(reward)
state = state_next
# Update every fourth frame and once batch size is over 32
if frame_count % update_after_actions == 0 and len(done_history) > batch_size:
#print("Training neural network")
# Get indices of samples for replay buffers
indices = np.random.choice(range(len(done_history)), size=batch_size)
# Using list comprehension to sample from replay buffer
state_sample = np.array([state_history[i] for i in indices])
state_next_sample = np.array([state_next_history[i] for i in indices])
rewards_sample = [rewards_history[i] for i in indices]
action_sample = [action_history[i] for i in indices]
done_sample = tf.convert_to_tensor(
[float(done_history[i]) for i in indices]
)
# Build the updated Q-values for the sampled future states
# Use the target model for stability
future_rewards = model_target.predict(state_next_sample)
# Q value = reward + discount factor * expected future reward
updated_q_values = rewards_sample + gamma * tf.reduce_max(
future_rewards, axis=1
)
# If final frame set the last value to -1
updated_q_values = updated_q_values * (1 - done_sample) - done_sample
# Create a mask so we only calculate loss on the updated Q-values
masks = tf.one_hot(action_sample, num_actions)
with tf.GradientTape() as tape:
# Train the model on the states and updated Q-values
q_values = model(state_sample)
# Apply the masks to the Q-values to get the Q-value for action taken
q_action = tf.reduce_sum(tf.multiply(q_values, masks), axis=1)
# Calculate loss between new Q-value and old Q-value
loss = loss_function(updated_q_values, q_action)
# Backpropagation
grads = tape.gradient(loss, model.trainable_variables)
optimizer.apply_gradients(zip(grads, model.trainable_variables))
if frame_count % update_target_network == 0:
print("Updating q~")
# update the the target network with new weights
model_target.set_weights(model.get_weights())
# Limit the state and reward history
if len(rewards_history) > max_memory_length:
del rewards_history[:1]
del state_history[:1]
del state_next_history[:1]
del action_history[:1]
del done_history[:1]
if done:
break
# Update running reward to check condition for solving
episode_reward_history.append(episode_reward)
if len(episode_reward_history) > 100:
del episode_reward_history[:1]
running_reward = np.mean(episode_reward_history)
episode_count += 1
# save the weights if we've reached a new high
if running_reward > max_reward:
save_weights(env_name2, model, episode_count, current_time)
max_reward = running_reward
if running_reward > 75: # Condition to consider the task solved
save_weights(env_name2, model, episode_count, current_time)
print("Solved at episode {}!".format(episode_count))
break

View File

@ -0,0 +1,209 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Run a pre-trained model\n",
"\n",
"This notebook loads a pre-trained model and uses it to play games. \n",
"Note that it does not render the image of the game, it just prints out the episodic score. "
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"pygame 2.1.2 (SDL 2.0.18, Python 3.10.8)\n",
"Hello from the pygame community. https://www.pygame.org/contribute.html\n"
]
}
],
"source": [
"# sanity check: can we create breakwall?\n",
"import gym\n",
"e = gym.make('gym_gs:BreakwallNoFrameskip-v1')"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Collecting git+https://github.com/openai/baselines.git\n",
" Cloning https://github.com/openai/baselines.git to c:\\users\\gofor\\appdata\\local\\temp\\pip-req-build-s405pyio\n",
" Resolved https://github.com/openai/baselines.git to commit ea25b9e8b234e6ee1bca43083f8f3cf974143998\n",
" Preparing metadata (setup.py): started\n",
" Preparing metadata (setup.py): finished with status 'done'\n",
"Requirement already satisfied: gym<0.16.0,>=0.15.4 in c:\\users\\gofor\\myvenv\\lib\\site-packages (from baselines==0.1.6) (0.15.7)\n",
"Requirement already satisfied: scipy in c:\\users\\gofor\\myvenv\\lib\\site-packages (from baselines==0.1.6) (1.9.3)\n",
"Requirement already satisfied: tqdm in c:\\users\\gofor\\myvenv\\lib\\site-packages (from baselines==0.1.6) (4.64.1)\n",
"Requirement already satisfied: joblib in c:\\users\\gofor\\myvenv\\lib\\site-packages (from baselines==0.1.6) (1.2.0)\n",
"Requirement already satisfied: cloudpickle in c:\\users\\gofor\\myvenv\\lib\\site-packages (from baselines==0.1.6) (1.2.2)\n",
"Requirement already satisfied: click in c:\\users\\gofor\\myvenv\\lib\\site-packages (from baselines==0.1.6) (8.1.3)\n",
"Requirement already satisfied: opencv-python in c:\\users\\gofor\\myvenv\\lib\\site-packages (from baselines==0.1.6) (4.6.0.66)\n",
"Requirement already satisfied: numpy>=1.10.4 in c:\\users\\gofor\\myvenv\\lib\\site-packages (from gym<0.16.0,>=0.15.4->baselines==0.1.6) (1.23.5)\n",
"Requirement already satisfied: pyglet<=1.5.0,>=1.4.0 in c:\\users\\gofor\\myvenv\\lib\\site-packages (from gym<0.16.0,>=0.15.4->baselines==0.1.6) (1.5.0)\n",
"Requirement already satisfied: six in c:\\users\\gofor\\myvenv\\lib\\site-packages (from gym<0.16.0,>=0.15.4->baselines==0.1.6) (1.16.0)\n",
"Requirement already satisfied: colorama in c:\\users\\gofor\\myvenv\\lib\\site-packages (from click->baselines==0.1.6) (0.4.6)\n",
"Requirement already satisfied: future in c:\\users\\gofor\\myvenv\\lib\\site-packages (from pyglet<=1.5.0,>=1.4.0->gym<0.16.0,>=0.15.4->baselines==0.1.6) (0.18.2)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" Running command git clone --filter=blob:none --quiet https://github.com/openai/baselines.git 'C:\\Users\\gofor\\AppData\\Local\\Temp\\pip-req-build-s405pyio'\n",
"\n",
"[notice] A new release of pip available: 22.2.2 -> 22.3.1\n",
"[notice] To update, run: python.exe -m pip install --upgrade pip\n"
]
}
],
"source": [
"# install baselines and other stuff\n",
"!pip install git+https://github.com/openai/baselines.git"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Loaded gym\n",
"Model weights look loadable ./pre-trained/mac_hard_breakwall/gym_gs_BreakwallNoFrameskip-v1_20211018-114642_5424.data-00000-of-00001\n",
"Model loaded weights - starting sim\n",
"Game over at frame 278 rew 2.0 rewards/frame: 0.007194244604316547\n",
"Game over at frame 453 rew 3.0 rewards/frame: 0.006622516556291391\n",
"Game over at frame 631 rew 4.0 rewards/frame: 0.006339144215530904\n",
"Game over at frame 906 rew 6.0 rewards/frame: 0.006622516556291391\n",
"Game over at frame 976 rew 6.0 rewards/frame: 0.006147540983606557\n",
"Sim ended : rew is 6.0\n"
]
}
],
"source": [
"## full check - can we use the full opencv/ openai version \n",
"## of the gym?\n",
"\n",
"# Script to test a pre-trained model\n",
"# Written by Matthew Yee-King\n",
"# MIT license \n",
"# https://mit-license.org/\n",
"\n",
"import sys\n",
"import os\n",
"from baselines.common.atari_wrappers import make_atari, wrap_deepmind\n",
"import numpy as np\n",
"import tensorflow as tf\n",
"from tensorflow import keras\n",
"from tensorflow.keras import layers\n",
"import datetime\n",
"import random\n",
"import time \n",
"\n",
"env_name = \"gym_gs:BreakwallNoFrameskip-v1\" \n",
"# for notebook users - make sure you have uploaded your pre-trained\n",
"# models... then adjust this to reflect the file path\n",
"model_file = \"./pre-trained/mac_hard_breakwall/gym_gs_BreakwallNoFrameskip-v1_20211018-114642_5424\"\n",
"\n",
"def create_q_model(num_actions):\n",
" # Network defined by the Deepmind paper\n",
" inputs = layers.Input(shape=(84, 84, 4,))\n",
" # Convolutions on the frames on the screen\n",
" layer1 = layers.Conv2D(32, 8, strides=4, activation=\"relu\")(inputs) \n",
" layer2 = layers.Conv2D(64, 4, strides=2, activation=\"relu\")(layer1)\n",
" layer3 = layers.Conv2D(64, 3, strides=1, activation=\"relu\")(layer2)\n",
" layer4 = layers.Flatten()(layer3)\n",
" layer5 = layers.Dense(512, activation=\"relu\")(layer4) \n",
" action = layers.Dense(num_actions, activation=\"linear\")(layer5) \n",
" return keras.Model(inputs=inputs, outputs=action)\n",
"\n",
"def create_env(env_name, seed=42):\n",
" try:\n",
" # Use the Baseline Atari environment because of Deepmind helper functions\n",
" env = make_atari(env_name)\n",
" # Warp the frames, grey scale, stake four frame and scale to smaller ratio\n",
" env = wrap_deepmind(env, frame_stack=True, scale=True)\n",
" print(\"Loaded gym\")\n",
" env.seed(seed)\n",
" return env\n",
" except:\n",
" print(\"Failed to make gym env\", env_name)\n",
" return None\n",
"\n",
"def run_sim(env, model, frame_count):\n",
" state = np.array(env.reset())\n",
" total_reward = 0\n",
" for i in range(frame_count):\n",
" # in the notebook version we cannot really \n",
" # render in realtime, so you just have\n",
" # to check the score :( \n",
" env.render('human')\n",
" state_tensor = keras.backend.constant(state)\n",
" state_tensor = keras.backend.expand_dims(state_tensor, 0)\n",
" action_values = model(state_tensor, training=False)\n",
" # Take best action\n",
" action = keras.backend.argmax(action_values[0]).numpy()\n",
" state, reward, done, _ = env.step(action)\n",
" state = np.array(state)\n",
" total_reward += reward\n",
" if done:\n",
" print(\"Game over at frame\", i, \"rew\", total_reward, \"rewards/frame: \", total_reward/i)\n",
" env.reset()\n",
" #break\n",
" #time.sleep(0.1)\n",
" print(\"Sim ended : rew is \", total_reward)\n",
"\n",
"def main(env_name, model_file,frame_count=1000, seed=42):\n",
" env = create_env(env_name=env_name)\n",
" assert env is not None, \"Failed to make env \" + env_name\n",
" model = create_q_model(num_actions=env.action_space.n)\n",
" model_testfile = model_file + \".data-00000-of-00001\"\n",
" assert os.path.exists(model_testfile), \"Failed to load model: \" + model_testfile\n",
" print(\"Model weights look loadable\", model_testfile)\n",
" model.load_weights(model_file)\n",
" print(\"Model loaded weights - starting sim\")\n",
" run_sim(env, model, frame_count)\n",
" \n",
"main(env_name, model_file, frame_count=1000)\n",
"\n",
"# LEV"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.8"
}
},
"nbformat": 4,
"nbformat_minor": 4
}

View File

@ -0,0 +1,77 @@
# Script to test a pre-trained model
# Written by Matthew Yee-King
# MIT license
# https://mit-license.org/
import sys
import os
from baselines.common.atari_wrappers import make_atari, wrap_deepmind
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import datetime
import random
import time
env_name = "gym_gs:BreakwallNoFrameskip-v1"
model_file = "./pre-trained/mac_hard_breakwall/gym_gs:BreakwallNoFrameskip-v1_20211018-114642_5424"
def create_q_model(num_actions):
# Network defined by the Deepmind paper
inputs = layers.Input(shape=(84, 84, 4,))
# Convolutions on the frames on the screen
layer1 = layers.Conv2D(32, 8, strides=4, activation="relu")(inputs)
layer2 = layers.Conv2D(64, 4, strides=2, activation="relu")(layer1)
layer3 = layers.Conv2D(64, 3, strides=1, activation="relu")(layer2)
layer4 = layers.Flatten()(layer3)
layer5 = layers.Dense(512, activation="relu")(layer4)
action = layers.Dense(num_actions, activation="linear")(layer5)
return keras.Model(inputs=inputs, outputs=action)
def create_env(env_name, seed=42):
try:
# Use the Baseline Atari environment because of Deepmind helper functions
env = make_atari(env_name)
# Warp the frames, grey scale, stake four frame and scale to smaller ratio
env = wrap_deepmind(env, frame_stack=True, scale=True)
print("Loaded gym")
env.seed(seed)
return env
except:
print("Failed to make gym env", env_name)
return None
def run_sim(env, model, frame_count):
state = np.array(env.reset())
total_reward = 0
for i in range(frame_count):
env.render('human')
state_tensor = keras.backend.constant(state)
state_tensor = keras.backend.expand_dims(state_tensor, 0)
action_values = model(state_tensor, training=False)
# Take best action
action = keras.backend.argmax(action_values[0]).numpy()
state, reward, done, _ = env.step(action)
state = np.array(state)
total_reward += reward
if done:
print("Game over at frame", i, "rew", total_reward)
env.reset()
#break
#time.sleep(0.1)
print("Sim ended : rew is ", total_reward)
def main(env_name, model_file,frame_count=1000, seed=42):
env = create_env(env_name=env_name)
assert env is not None, "Failed to make env " + env_name
model = create_q_model(num_actions=env.action_space.n)
model_testfile = model_file + ".data-00000-of-00001"
assert os.path.exists(model_testfile), "Failed to load model: " + model_testfile
print("Model weights look loadable", model_testfile)
model.load_weights(model_file)
print("Model loaded weights - starting sim")
run_sim(env, model, frame_count)
main(env_name, model_file, frame_count=1000)