Week 3 notes and notebooks

This commit is contained in:
levdoescode
2022-11-16 01:32:28 -05:00
parent b5bd3f476f
commit c26d4ad7f8
12 changed files with 1442 additions and 0 deletions

Submodule CM3020 Artificial Intelligence/Week 3/3.8 Lab Files/gs-gym added at 23a1bed2cd

View File

@ -0,0 +1,32 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.0"
}
},
"nbformat": 4,
"nbformat_minor": 4
}

View File

@ -0,0 +1,320 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 3,
"id": "0bbb53e7-84e7-4d65-baab-05421b7e7bc3",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"2.10.0\n"
]
}
],
"source": [
"import keras\n",
"print(keras.__version__)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9d23266a-38fb-430f-9502-85526a86a5d1",
"metadata": {},
"outputs": [],
"source": [
"from keras import layers\n",
"inputs = layers.Input(shape=(1,))\n",
"layer1 = layers.Dense(512, activation=\"relu\")(inputs)\n",
"outputs = layers.Dense(3, activation=\"linear\")(layer1)\n",
"nn_model = keras.Model(inputs=inputs, outputs=outputs)\n",
"nn_model.summary()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "397f97d8-18de-4855-ad94-89e461f85b6e",
"metadata": {},
"outputs": [],
"source": [
"output = nn_model(0.5) #throws erros"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "bded47b5-69fe-40cf-9cb4-78a360ee4244",
"metadata": {},
"outputs": [],
"source": [
"input = keras.backend.constant([[0.5]])\n",
"output = nn_model(input)\n",
"print(output)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d10e5212-c663-49c2-bb0f-34956c23070a",
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"inputs = np.arange(0, 1.1, 0.1)\n",
"input = keras.backend.constant([[0.5]])\n",
"input2 = keras.backend.constant([inputs])\n",
"output = nn_model(input)\n",
"print(input)\n",
"print(input2)\n",
"print(output)\n",
"print(inputs)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0111c2c1-6c00-4859-8715-e74fcc3883bd",
"metadata": {},
"outputs": [],
"source": [
"import gym\n",
"e = gym.make('MountainCarContinuous-v0')\n",
"e.reset()\n",
"e.render()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2dcfd9d1-43b6-48f0-9435-9ef75158152c",
"metadata": {},
"outputs": [],
"source": [
"import gym\n",
"e = gym.make('MountainCarContinuous-v0')\n",
"s1 = e.reset()\n",
"print(s1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1d4a9391-4af2-47c8-b310-06c5b3016f59",
"metadata": {},
"outputs": [],
"source": [
"e.action_space.sample()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "77781c43-bb1e-41ce-b878-2245edf9a094",
"metadata": {},
"outputs": [],
"source": [
"from keras import layers\n",
"inputs = layers.Input(shape=(2, ))\n",
"layer1 = layers.Dense(512, activation=\"relu\")(inputs)\n",
"outputs = layers.Dense(1, activation=\"linear\")(layer1)\n",
"nn_model = keras.Model(inputs=inputs, outputs=outputs)\n",
"s = e.reset()\n",
"input = keras.backend.constant([s])\n",
"a = nn_model(input)\n",
"s,r,d,i = e.step(a)\n",
"print(s)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e94051cc-7498-4c85-b296-a79669d4bc64",
"metadata": {},
"outputs": [],
"source": [
"# Control the agent with the network\n",
"from keras import layers\n",
"inputs = layers.Input(shape=(2, ))\n",
"layer1 = layers.Dense(512, activation=\"relu\")(inputs)\n",
"outputs = layers.Dense(1, activation=\"linear\")(layer1)\n",
"nn_model = keras.Model(inputs=inputs, outputs=outputs)\n",
"for item in range(10):\n",
" s = e.reset()\n",
" input = keras.backend.constant([s])\n",
" a = nn_model(input)\n",
" s,r,d,i = e.step(a)\n",
" e.render()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0cba086b-4a65-4ee0-92d3-4ee752bf2161",
"metadata": {},
"outputs": [],
"source": [
"s = e.reset()\n",
"input = keras.backend.constant([s]) # s is an array\n",
"a = nn_model(input)\n",
"s,r,d,i = e.step(a)\n",
"print(s)"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "fb686ff6-aa76-4446-b81f-1b26db98022d",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"pygame 2.1.2 (SDL 2.0.18, Python 3.10.8)\n",
"Hello from the pygame community. https://www.pygame.org/contribute.html\n",
"Discrete(4)\n"
]
}
],
"source": [
"# Breakwall enviroment again\n",
"import keras\n",
"import gym\n",
"# We get a discrete (4) action space\n",
"env_name = \"gym_gs:BreakwallNoFrameskip-v1\"\n",
"e = gym.make(env_name)\n",
"s = e.reset()\n",
"print(e.action_space)"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "585e3e61-4745-45f1-beec-d25bf6a78c11",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"2\n",
"Observation/ state shape: (600, 780, 3)\n",
"0\n",
"Observation/ state shape: (600, 780, 3)\n",
"3\n",
"Observation/ state shape: (600, 780, 3)\n",
"0\n",
"Observation/ state shape: (600, 780, 3)\n"
]
}
],
"source": [
"for i in range(4):\n",
" print(e.action_space.sample())\n",
" print(\"Observation/ state shape:\", s.shape)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "2a45d350-cf9b-459d-a1f7-1470809e14cf",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"tf.Tensor([[0. 0. 0. 0.]], shape=(1, 4), dtype=float32)\n"
]
}
],
"source": [
"# We'll update the imput layer\n",
"from keras import layers\n",
"inputs = layers.Input(shape=(600,780,3,))\n",
"layer1 = layers.Flatten()(inputs) # We flatten the the 3D input layer\n",
"layer2 = layers.Dense(512, activation=\"relu\")(layer1)\n",
"outputs = layers.Dense(4, activation=\"linear\")(layer2)\n",
"nn_model = keras.Model(inputs=inputs, outputs=outputs)\n",
"state = e.reset()\n",
"state_tensor = keras.backend.constant(state)\n",
"state_tensor = keras.backend.expand_dims(state_tensor, 0)\n",
"action_values = nn_model(state_tensor)\n",
"print(action_values)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c27e9935-2a2c-4dd4-82e0-43b86d3e0de5",
"metadata": {},
"outputs": [],
"source": [
"action = keras.backend.argmax(action_values[0]).numpy()\n",
"action = tf.argmax(action_probs[0]).numpy() # if renamed\n",
"print(keras.backend.argmax([0.1,0.5,0.01,0.2]).numpy())"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "053e95dd-48af-4c18-949b-12d5a13e1a3e",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"tf.Tensor([[-135.33052 -34.96535 -2.6175923 104.20433 ]], shape=(1, 4), dtype=float32) 3\n",
"tf.Tensor([[-133.47585 -40.308247 -9.9967165 100.298096 ]], shape=(1, 4), dtype=float32) 3\n",
"tf.Tensor([[-133.52586 -34.83712 -9.089392 106.48525 ]], shape=(1, 4), dtype=float32) 3\n",
"tf.Tensor([[-142.65623 -37.653713 -12.290092 100.456726]], shape=(1, 4), dtype=float32) 3\n",
"tf.Tensor([[-139.46967 -29.941523 -15.024807 102.14433 ]], shape=(1, 4), dtype=float32) 3\n",
"tf.Tensor([[-137.86325 -28.1824 -18.672073 102.72069 ]], shape=(1, 4), dtype=float32) 3\n",
"tf.Tensor([[-134.41286 -29.641228 -12.233908 105.396324]], shape=(1, 4), dtype=float32) 3\n",
"tf.Tensor([[-139.85345 -27.45109 -11.1273575 93.42384 ]], shape=(1, 4), dtype=float32) 3\n",
"tf.Tensor([[-134.20053 -29.01193 -11.014997 94.585106]], shape=(1, 4), dtype=float32) 3\n",
"tf.Tensor([[-136.16206 -31.467659 -6.581274 96.69547 ]], shape=(1, 4), dtype=float32) 3\n"
]
}
],
"source": [
"state = e.reset()\n",
"state,r,d,i = e.step(1)\n",
"for i in range(10):\n",
" state_tensor = keras.backend.constant(state)\n",
" state_tensor = keras.backend.expand_dims(state_tensor, 0)\n",
" action_values = nn_model(state_tensor)\n",
" action = keras.backend.argmax(action_values[0]).numpy()\n",
" print(action_values, action)\n",
" state,r,d,i = e.step(1)\n",
" e.render()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.8"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@ -0,0 +1,108 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "ec755efa-2052-422b-b49d-fe46a343d568",
"metadata": {},
"outputs": [],
"source": [
"import gym\n",
"for n in gym.envs.registry.all():\n",
" print(n)\n"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "01167b07-819c-40c8-a5c8-5463c43c8925",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import gym\n",
"import matplotlib.pyplot as plt\n",
"#e = gym.make('gym_gs:BreakwallNoFrameskip-v1')\n",
"env_name = \"MountainCarContinuous-v0\"\n",
"e = gym.make(env_name)\n",
"e.reset()\n",
"e.render()\n",
"# img = e.render('rgb_array')\n",
"# plt.imshow(e.render(mode='rgb_array'))b\n",
"# img = e.render('rgb_array')\n",
"# plt.imshow(e.render(mode='rgb_array'))"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "9328b324-7f9a-4178-94c4-d1f8cec07542",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([-0.51512, 0. ])"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"e.reset()\n",
"e.step(e.action_space.sample())\n",
"e.render()\n",
"e.reset()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "547ccc71-8e9e-4d56-8cea-bfeddd002845",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "77a096e6-3fa9-48be-8bf0-06367ac9f9aa",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.8"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@ -0,0 +1,75 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "9d9f64ee-0186-4475-ba63-b5a569662227",
"metadata": {},
"outputs": [],
"source": [
"import gym\n",
"for n in gym.envs.registry.all():\n",
" print(n)"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "ace720b1-7121-4a81-8369-f5cfc40cbe36",
"metadata": {},
"outputs": [
{
"ename": "Exception",
"evalue": "Please add mujoco library to your PATH:\nset PATH=C:\\Users\\gofor\\.mujoco\\mujoco210\\bin;%PATH%",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mException\u001b[0m Traceback (most recent call last)",
"Cell \u001b[1;32mIn [1], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mmujoco_py\u001b[39;00m\n\u001b[0;32m 2\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mos\u001b[39;00m\n\u001b[0;32m 3\u001b[0m mj_path \u001b[38;5;241m=\u001b[39m mujoco_py\u001b[38;5;241m.\u001b[39mutils\u001b[38;5;241m.\u001b[39mdiscover_mujoco()\n",
"File \u001b[1;32m~\\venv310\\lib\\site-packages\\mujoco_py\\__init__.py:2\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;66;03m#!/usr/bin/env python\u001b[39;00m\n\u001b[1;32m----> 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mmujoco_py\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mbuilder\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m cymj, ignore_mujoco_warnings, functions, MujocoException\n\u001b[0;32m 3\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mmujoco_py\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mgenerated\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m const\n\u001b[0;32m 4\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mmujoco_py\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmjrenderpool\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m MjRenderPool\n",
"File \u001b[1;32m~\\venv310\\lib\\site-packages\\mujoco_py\\builder.py:504\u001b[0m\n\u001b[0;32m 500\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m module\u001b[38;5;241m.\u001b[39mlib\u001b[38;5;241m.\u001b[39m__fun\n\u001b[0;32m 503\u001b[0m mujoco_path \u001b[38;5;241m=\u001b[39m discover_mujoco()\n\u001b[1;32m--> 504\u001b[0m cymj \u001b[38;5;241m=\u001b[39m \u001b[43mload_cython_ext\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmujoco_path\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 507\u001b[0m \u001b[38;5;66;03m# Trick to expose all mj* functions from mujoco in mujoco_py.*\u001b[39;00m\n\u001b[0;32m 508\u001b[0m \u001b[38;5;28;01mclass\u001b[39;00m \u001b[38;5;21;01mdict2\u001b[39;00m(\u001b[38;5;28mobject\u001b[39m):\n",
"File \u001b[1;32m~\\venv310\\lib\\site-packages\\mujoco_py\\builder.py:83\u001b[0m, in \u001b[0;36mload_cython_ext\u001b[1;34m(mujoco_path)\u001b[0m\n\u001b[0;32m 81\u001b[0m var \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mPATH\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 82\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m var \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m os\u001b[38;5;241m.\u001b[39menviron \u001b[38;5;129;01mor\u001b[39;00m lib_path \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m os\u001b[38;5;241m.\u001b[39menviron[var]\u001b[38;5;241m.\u001b[39msplit(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m;\u001b[39m\u001b[38;5;124m\"\u001b[39m):\n\u001b[1;32m---> 83\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mPlease add mujoco library to your PATH:\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 84\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mset \u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m=\u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m;\u001b[39m\u001b[38;5;132;01m%%\u001b[39;00m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;132;01m%%\u001b[39;00m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m%\u001b[39m (var, lib_path, var))\n\u001b[0;32m 85\u001b[0m Builder \u001b[38;5;241m=\u001b[39m WindowsExtensionBuilder\n\u001b[0;32m 86\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n",
"\u001b[1;31mException\u001b[0m: Please add mujoco library to your PATH:\nset PATH=C:\\Users\\gofor\\.mujoco\\mujoco210\\bin;%PATH%"
]
}
],
"source": [
"import mujoco_py\n",
"import os\n",
"mj_path = mujoco_py.utils.discover_mujoco()\n",
"xml_path = os.path.join(mj_path, 'model', 'humanoid.xml')\n",
"model = mujoco_py.load_model_from_path(xml_path)\n",
"sim = mujoco_py.MjSim(model)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "aa696af6-b6d2-4916-be15-93052eb43b3c",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.8"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,189 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Run a pre-trained model\n",
"\n",
"This notebook loads a pre-trained model and uses it to play games. \n",
"Note that it does not render the image of the game, it just prints out the episodic score. "
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"ename": "ModuleNotFoundError",
"evalue": "No module named 'gym'",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[1;32mIn [3], line 2\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;66;03m# sanity check: can we create breakwall?\u001b[39;00m\n\u001b[1;32m----> 2\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mgym\u001b[39;00m\n\u001b[0;32m 3\u001b[0m e \u001b[38;5;241m=\u001b[39m gym\u001b[38;5;241m.\u001b[39mmake(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mgym_gs:BreakwallNoFrameskip-v1\u001b[39m\u001b[38;5;124m'\u001b[39m)\n",
"\u001b[1;31mModuleNotFoundError\u001b[0m: No module named 'gym'"
]
}
],
"source": [
"# sanity check: can we create breakwall?\n",
"import gym\n",
"e = gym.make('gym_gs:BreakwallNoFrameskip-v1')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# install baselines and other stuff\n",
"!pip install git+https://github.com/openai/baselines.git"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"## full check - can we use the full opencv/ openai version \n",
"## of the gym?\n",
"\n",
"# Script to test a pre-trained model\n",
"# Written by Matthew Yee-King\n",
"# MIT license \n",
"# https://mit-license.org/\n",
"\n",
"import sys\n",
"import os\n",
"from baselines.common.atari_wrappers import make_atari, wrap_deepmind\n",
"import numpy as np\n",
"import tensorflow as tf\n",
"from tensorflow import keras\n",
"from tensorflow.keras import layers\n",
"import datetime\n",
"import random\n",
"import time \n",
"\n",
"env_name = \"gym_gs:BreakwallNoFrameskip-v1\" \n",
"# for notebook users - make sure you have uploaded your pre-trained\n",
"# models... then adjust this to reflect the file path\n",
"model_file = \"./pre-trained/mac_hard_breakwall/gym_gs:BreakwallNoFrameskip-v1_20211018-114642_5424\"\n",
"\n",
"def create_q_model(num_actions):\n",
" # Network defined by the Deepmind paper\n",
" inputs = layers.Input(shape=(84, 84, 4,))\n",
" # Convolutions on the frames on the screen\n",
" layer1 = layers.Conv2D(32, 8, strides=4, activation=\"relu\")(inputs) \n",
" layer2 = layers.Conv2D(64, 4, strides=2, activation=\"relu\")(layer1)\n",
" layer3 = layers.Conv2D(64, 3, strides=1, activation=\"relu\")(layer2)\n",
" layer4 = layers.Flatten()(layer3)\n",
" layer5 = layers.Dense(512, activation=\"relu\")(layer4) \n",
" action = layers.Dense(num_actions, activation=\"linear\")(layer5) \n",
" return keras.Model(inputs=inputs, outputs=action)\n",
"\n",
"def create_env(env_name, seed=42):\n",
" try:\n",
" # Use the Baseline Atari environment because of Deepmind helper functions\n",
" env = make_atari(env_name)\n",
" # Warp the frames, grey scale, stake four frame and scale to smaller ratio\n",
" env = wrap_deepmind(env, frame_stack=True, scale=True)\n",
" print(\"Loaded gym\")\n",
" env.seed(seed)\n",
" return env\n",
" except:\n",
" print(\"Failed to make gym env\", env_name)\n",
" return None\n",
"\n",
"def run_sim(env, model, frame_count):\n",
" state = np.array(env.reset())\n",
" total_reward = 0\n",
" for i in range(frame_count):\n",
" # in the notebook version we cannot really \n",
" # render in realtime, so you just have\n",
" # to check the score :( \n",
" #env.render('human')\n",
" state_tensor = keras.backend.constant(state)\n",
" state_tensor = keras.backend.expand_dims(state_tensor, 0)\n",
" action_values = model(state_tensor, training=False)\n",
" # Take best action\n",
" action = keras.backend.argmax(action_values[0]).numpy()\n",
" state, reward, done, _ = env.step(action)\n",
" state = np.array(state)\n",
" total_reward += reward\n",
" if done:\n",
" print(\"Game over at frame\", i, \"rew\", total_reward)\n",
" env.reset()\n",
" #break\n",
" #time.sleep(0.1)\n",
" print(\"Sim ended : rew is \", total_reward)\n",
"\n",
"def main(env_name, model_file,frame_count=1000, seed=42):\n",
" env = create_env(env_name=env_name)\n",
" assert env is not None, \"Failed to make env \" + env_name\n",
" model = create_q_model(num_actions=env.action_space.n)\n",
" model_testfile = model_file + \".data-00000-of-00001\"\n",
" assert os.path.exists(model_testfile), \"Failed to load model: \" + model_testfile\n",
" print(\"Model weights look loadable\", model_testfile)\n",
" model.load_weights(model_file)\n",
" print(\"Model loaded weights - starting sim\")\n",
" run_sim(env, model, frame_count)\n",
" \n",
"main(env_name, model_file, frame_count=1000)\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.0"
}
},
"nbformat": 4,
"nbformat_minor": 4
}

View File

@ -0,0 +1,49 @@
# What is a gym?
It's a way of providing standarized environments (API) for reinforcement learnign algorithms to operate in.
Allows the developer to focus on the agent instead of the simulation.
Includes versioned, standard set of enviroments enabling easy comparison.
---
There are various gym-like systems such as:
* Arcade Learning Enviroment (2013)
* Vizdoom (2016)
* OpenAI gym (2016)
# Open AI gym
Aims to combine benchmarks collections in a convenient and accessible software package.
# Keras
A Neural network is a set of processing units which are interconnected, they are fed with values and also output values.
Keras is a high level neural network API sitting atop tensorflow.
The DQN we'll look at uses Keras.
# Layers
Neural networks are made of layers of which there are different types with different nodes in them which process signals in different ways.
Keras provides many different types of layers
https://keras.io/api/layers
Convolution layers are great for image processing.
Recurrent layers for sequential data.
Attention layers for sequential data (better).
DQN uses some common layers plus some convolutional layers.
The nodes are fed data, the nodes create signals with value which are sent to other nodes. The results will be back propagated to the network again for processing.
# Convolution layers
Convolution is a filtering technique often used for images but also other signals. This is done by adding together scaled values of the original pixel and its surrounding pixels.
Example of convolution filters are edge detection and blur filter.
We can visualize what convolutional layers are doing to image data as it passes through the neural network.
https://github.com/gabrielpierobon/cnnshapes/