Add notebooks

This commit is contained in:
Fuyang Liu
2017-06-22 23:05:34 +02:00
parent 1adcbb999f
commit cdd6704b86
19 changed files with 5775 additions and 0 deletions

View File

@ -0,0 +1,397 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 201 Torch and Numpy\n",
"\n",
"View more, visit my tutorial page: https://morvanzhou.github.io/tutorials/\n",
"My Youtube Channel: https://www.youtube.com/user/MorvanZhou\n",
"\n",
"Dependencies:\n",
"* torch: 0.1.11\n",
"* numpy\n",
"\n",
"Details about math operation in torch can be found in: http://pytorch.org/docs/torch.html#math-operations\n"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import torch\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"numpy array: [[0 1 2]\n",
" [3 4 5]] \n",
"torch tensor: \n",
" 0 1 2\n",
" 3 4 5\n",
"[torch.LongTensor of size 2x3]\n",
" \n",
"tensor to array: [[0 1 2]\n",
" [3 4 5]]\n"
]
}
],
"source": [
"# convert numpy to tensor or vise versa\n",
"np_data = np.arange(6).reshape((2, 3))\n",
"torch_data = torch.from_numpy(np_data)\n",
"tensor2array = torch_data.numpy()\n",
"print(\n",
" '\\nnumpy array:', np_data, # [[0 1 2], [3 4 5]]\n",
" '\\ntorch tensor:', torch_data, # 0 1 2 \\n 3 4 5 [torch.LongTensor of size 2x3]\n",
" '\\ntensor to array:', tensor2array, # [[0 1 2], [3 4 5]]\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"abs \n",
"numpy: [1 2 1 2] \n",
"torch: \n",
" 1\n",
" 2\n",
" 1\n",
" 2\n",
"[torch.FloatTensor of size 4]\n",
"\n"
]
}
],
"source": [
"# abs\n",
"data = [-1, -2, 1, 2]\n",
"tensor = torch.FloatTensor(data) # 32-bit floating point\n",
"print(\n",
" '\\nabs',\n",
" '\\nnumpy: ', np.abs(data), # [1 2 1 2]\n",
" '\\ntorch: ', torch.abs(tensor) # [1 2 1 2]\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"\n",
" 1\n",
" 2\n",
" 1\n",
" 2\n",
"[torch.FloatTensor of size 4]"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"tensor.abs()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"sin \n",
"numpy: [-0.84147098 -0.90929743 0.84147098 0.90929743] \n",
"torch: \n",
"-0.8415\n",
"-0.9093\n",
" 0.8415\n",
" 0.9093\n",
"[torch.FloatTensor of size 4]\n",
"\n"
]
}
],
"source": [
"# sin\n",
"print(\n",
" '\\nsin',\n",
" '\\nnumpy: ', np.sin(data), # [-0.84147098 -0.90929743 0.84147098 0.90929743]\n",
" '\\ntorch: ', torch.sin(tensor) # [-0.8415 -0.9093 0.8415 0.9093]\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"\n",
" 0.2689\n",
" 0.1192\n",
" 0.7311\n",
" 0.8808\n",
"[torch.FloatTensor of size 4]"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"tensor.sigmoid()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"\n",
" 0.3679\n",
" 0.1353\n",
" 2.7183\n",
" 7.3891\n",
"[torch.FloatTensor of size 4]"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"tensor.exp()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"mean \n",
"numpy: 0.0 \n",
"torch: 0.0\n"
]
}
],
"source": [
"# mean\n",
"print(\n",
" '\\nmean',\n",
" '\\nnumpy: ', np.mean(data), # 0.0\n",
" '\\ntorch: ', torch.mean(tensor) # 0.0\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"matrix multiplication (matmul) \n",
"numpy: [[ 7 10]\n",
" [15 22]] \n",
"torch: \n",
" 7 10\n",
" 15 22\n",
"[torch.FloatTensor of size 2x2]\n",
"\n"
]
}
],
"source": [
"# matrix multiplication\n",
"data = [[1,2], [3,4]]\n",
"tensor = torch.FloatTensor(data) # 32-bit floating point\n",
"# correct method\n",
"print(\n",
" '\\nmatrix multiplication (matmul)',\n",
" '\\nnumpy: ', np.matmul(data, data), # [[7, 10], [15, 22]]\n",
" '\\ntorch: ', torch.mm(tensor, tensor) # [[7, 10], [15, 22]]\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"matrix multiplication (dot) \n",
"numpy: [[ 7 10]\n",
" [15 22]] \n",
"torch: 30.0\n"
]
}
],
"source": [
"# incorrect method\n",
"data = np.array(data)\n",
"print(\n",
" '\\nmatrix multiplication (dot)',\n",
" '\\nnumpy: ', data.dot(data), # [[7, 10], [15, 22]]\n",
" '\\ntorch: ', tensor.dot(tensor) # this will convert tensor to [1,2,3,4], you'll get 30.0\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Note that:\n",
"\n",
"torch.dot(tensor1, tensor2) → float\n",
"\n",
"Computes the dot product (inner product) of two tensors. Both tensors are treated as 1-D vectors."
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"\n",
" 7 10\n",
" 15 22\n",
"[torch.FloatTensor of size 2x2]"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"tensor.mm(tensor)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"\n",
" 1 4\n",
" 9 16\n",
"[torch.FloatTensor of size 2x2]"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"tensor * tensor"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"30.0"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"tensor.dot(tensor)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.2"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@ -0,0 +1,293 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 202 Variable\n",
"\n",
"View more, visit my tutorial page: https://morvanzhou.github.io/tutorials/\n",
"My Youtube Channel: https://www.youtube.com/user/MorvanZhou\n",
"\n",
"Dependencies:\n",
"* torch: 0.1.11\n",
"\n",
"Variable in torch is to build a computational graph,\n",
"but this graph is dynamic compared with a static graph in Tensorflow or Theano.\n",
"So torch does not have placeholder, torch can just pass variable to the computational graph.\n"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import torch\n",
"from torch.autograd import Variable"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
" 1 2\n",
" 3 4\n",
"[torch.FloatTensor of size 2x2]\n",
"\n",
"Variable containing:\n",
" 1 2\n",
" 3 4\n",
"[torch.FloatTensor of size 2x2]\n",
"\n"
]
}
],
"source": [
"tensor = torch.FloatTensor([[1,2],[3,4]]) # build a tensor\n",
"variable = Variable(tensor, requires_grad=True) # build a variable, usually for compute gradients\n",
"\n",
"print(tensor) # [torch.FloatTensor of size 2x2]\n",
"print(variable) # [torch.FloatTensor of size 2x2]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Till now the tensor and variable seem the same.\n",
"\n",
"However, the variable is a part of the graph, it's a part of the auto-gradient.\n"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"7.5\n",
"Variable containing:\n",
" 7.5000\n",
"[torch.FloatTensor of size 1]\n",
"\n"
]
}
],
"source": [
"t_out = torch.mean(tensor*tensor) # x^2\n",
"v_out = torch.mean(variable*variable) # x^2\n",
"print(t_out)\n",
"print(v_out)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"v_out.backward() # backpropagation from v_out"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"$$ v_{out} = {{1} \\over {4}} sum(variable^2) $$\n",
"\n",
"the gradients w.r.t the variable, \n",
"\n",
"$$ {d(v_{out}) \\over d(variable)} = {{1} \\over {4}} 2 variable = {variable \\over 2}$$\n",
"\n",
"let's check the result pytorch calculated for us below:"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Variable containing:\n",
" 0.5000 1.0000\n",
" 1.5000 2.0000\n",
"[torch.FloatTensor of size 2x2]"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"variable.grad"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Variable containing:\n",
" 1 2\n",
" 3 4\n",
"[torch.FloatTensor of size 2x2]"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"variable # this is data in variable format"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"\n",
" 1 2\n",
" 3 4\n",
"[torch.FloatTensor of size 2x2]"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"variable.data # this is data in tensor format"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[ 1., 2.],\n",
" [ 3., 4.]], dtype=float32)"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"variable.data.numpy() # numpy format"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Note that we did `.backward()` on `v_out` but `variable` has been assigned new values on it's `grad`.\n",
"\n",
"As this line \n",
"```\n",
"v_out = torch.mean(variable*variable)\n",
"``` \n",
"will make a new variable `v_out` and connect it with `variable` in computation graph."
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"torch.autograd.variable.Variable"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"type(v_out)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"torch.FloatTensor"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"type(v_out.data)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.2"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,133 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 303 Build NN Quickly\n",
"\n",
"View more, visit my tutorial page: https://morvanzhou.github.io/tutorials/\n",
"My Youtube Channel: https://www.youtube.com/user/MorvanZhou\n",
"\n",
"Dependencies:\n",
"* torch: 0.1.11"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import torch\n",
"import torch.nn.functional as F"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# replace following class code with an easy sequential network\n",
"class Net(torch.nn.Module):\n",
" def __init__(self, n_feature, n_hidden, n_output):\n",
" super(Net, self).__init__()\n",
" self.hidden = torch.nn.Linear(n_feature, n_hidden) # hidden layer\n",
" self.predict = torch.nn.Linear(n_hidden, n_output) # output layer\n",
"\n",
" def forward(self, x):\n",
" x = F.relu(self.hidden(x)) # activation function for hidden layer\n",
" x = self.predict(x) # linear output\n",
" return x"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"net1 = Net(1, 10, 1)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# easy and fast way to build your network\n",
"net2 = torch.nn.Sequential(\n",
" torch.nn.Linear(1, 10),\n",
" torch.nn.ReLU(),\n",
" torch.nn.Linear(10, 1)\n",
")\n"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Net (\n",
" (hidden): Linear (1 -> 10)\n",
" (predict): Linear (10 -> 1)\n",
")\n",
"Sequential (\n",
" (0): Linear (1 -> 10)\n",
" (1): ReLU ()\n",
" (2): Linear (10 -> 1)\n",
")\n"
]
}
],
"source": [
"print(net1) # net1 architecture\n",
"print(net2) # net2 architecture"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.2"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,177 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 305 Batch Train\n",
"\n",
"View more, visit my tutorial page: https://morvanzhou.github.io/tutorials/\n",
"My Youtube Channel: https://www.youtube.com/user/MorvanZhou\n",
"\n",
"Dependencies:\n",
"* torch: 0.1.11"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<torch._C.Generator at 0x7faffc159918>"
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import torch\n",
"import torch.utils.data as Data\n",
"\n",
"torch.manual_seed(1) # reproducible"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"BATCH_SIZE = 5\n",
"# BATCH_SIZE = 8"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"x = torch.linspace(1, 10, 10) # this is x data (torch tensor)\n",
"y = torch.linspace(10, 1, 10) # this is y data (torch tensor)\n"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"torch_dataset = Data.TensorDataset(data_tensor=x, target_tensor=y)\n",
"loader = Data.DataLoader(\n",
" dataset=torch_dataset, # torch TensorDataset format\n",
" batch_size=BATCH_SIZE, # mini batch size\n",
" shuffle=True, # random shuffle for training\n",
" num_workers=2, # subprocesses for loading data\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Epoch: 0 | Step: 0 | batch x: [ 6. 7. 2. 3. 1.] | batch y: [ 5. 4. 9. 8. 10.]\n",
"Epoch: 0 | Step: 1 | batch x: [ 9. 10. 4. 8. 5.] | batch y: [ 2. 1. 7. 3. 6.]\n",
"Epoch: 1 | Step: 0 | batch x: [ 3. 4. 2. 9. 10.] | batch y: [ 8. 7. 9. 2. 1.]\n",
"Epoch: 1 | Step: 1 | batch x: [ 1. 7. 8. 5. 6.] | batch y: [ 10. 4. 3. 6. 5.]\n",
"Epoch: 2 | Step: 0 | batch x: [ 3. 9. 2. 6. 7.] | batch y: [ 8. 2. 9. 5. 4.]\n",
"Epoch: 2 | Step: 1 | batch x: [ 10. 4. 8. 1. 5.] | batch y: [ 1. 7. 3. 10. 6.]\n"
]
}
],
"source": [
"for epoch in range(3): # train entire dataset 3 times\n",
" for step, (batch_x, batch_y) in enumerate(loader): # for each training step\n",
" # train your data...\n",
" print('Epoch: ', epoch, '| Step: ', step, '| batch x: ',\n",
" batch_x.numpy(), '| batch y: ', batch_y.numpy())\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Suppose a different batch size that cannot be fully divided by the number of data entreis:"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Epoch: 0 | Step: 0 | batch x: [ 3. 10. 9. 4. 7. 8. 2. 1.] | batch y: [ 8. 1. 2. 7. 4. 3. 9. 10.]\n",
"Epoch: 0 | Step: 1 | batch x: [ 5. 6.] | batch y: [ 6. 5.]\n",
"Epoch: 1 | Step: 0 | batch x: [ 4. 8. 3. 2. 1. 10. 5. 6.] | batch y: [ 7. 3. 8. 9. 10. 1. 6. 5.]\n",
"Epoch: 1 | Step: 1 | batch x: [ 7. 9.] | batch y: [ 4. 2.]\n",
"Epoch: 2 | Step: 0 | batch x: [ 6. 2. 4. 10. 9. 3. 8. 5.] | batch y: [ 5. 9. 7. 1. 2. 8. 3. 6.]\n",
"Epoch: 2 | Step: 1 | batch x: [ 7. 1.] | batch y: [ 4. 10.]\n"
]
}
],
"source": [
"BATCH_SIZE = 8\n",
"loader = Data.DataLoader(\n",
" dataset=torch_dataset, # torch TensorDataset format\n",
" batch_size=BATCH_SIZE, # mini batch size\n",
" shuffle=True, # random shuffle for training\n",
" num_workers=2, # subprocesses for loading data\n",
")\n",
"for epoch in range(3): # train entire dataset 3 times\n",
" for step, (batch_x, batch_y) in enumerate(loader): # for each training step\n",
" # train your data...\n",
" print('Epoch: ', epoch, '| Step: ', step, '| batch x: ',\n",
" batch_x.numpy(), '| batch y: ', batch_y.numpy())"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.2"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,433 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 405 DQN Reinforcement Learning\n",
"\n",
"View more, visit my tutorial page: https://morvanzhou.github.io/tutorials/\n",
"My Youtube Channel: https://www.youtube.com/user/MorvanZhou\n",
"More about Reinforcement learning: https://morvanzhou.github.io/tutorials/machine-learning/reinforcement-learning/\n",
"\n",
"Dependencies:\n",
"* torch: 0.1.11\n",
"* gym: 0.8.1\n",
"* numpy"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import torch\n",
"import torch.nn as nn\n",
"from torch.autograd import Variable\n",
"import torch.nn.functional as F\n",
"import numpy as np\n",
"import gym"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"[2017-06-20 22:23:40,418] Making new env: CartPole-v0\n"
]
}
],
"source": [
"# Hyper Parameters\n",
"BATCH_SIZE = 32\n",
"LR = 0.01 # learning rate\n",
"EPSILON = 0.9 # greedy policy\n",
"GAMMA = 0.9 # reward discount\n",
"TARGET_REPLACE_ITER = 100 # target update frequency\n",
"MEMORY_CAPACITY = 2000\n",
"env = gym.make('CartPole-v0')\n",
"env = env.unwrapped\n",
"N_ACTIONS = env.action_space.n\n",
"N_STATES = env.observation_space.shape[0]"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"class Net(nn.Module):\n",
" def __init__(self, ):\n",
" super(Net, self).__init__()\n",
" self.fc1 = nn.Linear(N_STATES, 10)\n",
" self.fc1.weight.data.normal_(0, 0.1) # initialization\n",
" self.out = nn.Linear(10, N_ACTIONS)\n",
" self.out.weight.data.normal_(0, 0.1) # initialization\n",
"\n",
" def forward(self, x):\n",
" x = self.fc1(x)\n",
" x = F.relu(x)\n",
" actions_value = self.out(x)\n",
" return actions_value"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"class DQN(object):\n",
" def __init__(self):\n",
" self.eval_net, self.target_net = Net(), Net()\n",
"\n",
" self.learn_step_counter = 0 # for target updating\n",
" self.memory_counter = 0 # for storing memory\n",
" self.memory = np.zeros((MEMORY_CAPACITY, N_STATES * 2 + 2)) # initialize memory\n",
" self.optimizer = torch.optim.Adam(self.eval_net.parameters(), lr=LR)\n",
" self.loss_func = nn.MSELoss()\n",
"\n",
" def choose_action(self, x):\n",
" x = Variable(torch.unsqueeze(torch.FloatTensor(x), 0))\n",
" # input only one sample\n",
" if np.random.uniform() < EPSILON: # greedy\n",
" actions_value = self.eval_net.forward(x)\n",
" action = torch.max(actions_value, 1)[1].data.numpy()[0, 0] # return the argmax\n",
" else: # random\n",
" action = np.random.randint(0, N_ACTIONS)\n",
" return action\n",
"\n",
" def store_transition(self, s, a, r, s_):\n",
" transition = np.hstack((s, [a, r], s_))\n",
" # replace the old memory with new memory\n",
" index = self.memory_counter % MEMORY_CAPACITY\n",
" self.memory[index, :] = transition\n",
" self.memory_counter += 1\n",
"\n",
" def learn(self):\n",
" # target parameter update\n",
" if self.learn_step_counter % TARGET_REPLACE_ITER == 0:\n",
" self.target_net.load_state_dict(self.eval_net.state_dict())\n",
" self.learn_step_counter += 1\n",
"\n",
" # sample batch transitions\n",
" sample_index = np.random.choice(MEMORY_CAPACITY, BATCH_SIZE)\n",
" b_memory = self.memory[sample_index, :]\n",
" b_s = Variable(torch.FloatTensor(b_memory[:, :N_STATES]))\n",
" b_a = Variable(torch.LongTensor(b_memory[:, N_STATES:N_STATES+1].astype(int)))\n",
" b_r = Variable(torch.FloatTensor(b_memory[:, N_STATES+1:N_STATES+2]))\n",
" b_s_ = Variable(torch.FloatTensor(b_memory[:, -N_STATES:]))\n",
"\n",
" # q_eval w.r.t the action in experience\n",
" q_eval = self.eval_net(b_s).gather(1, b_a) # shape (batch, 1)\n",
" q_next = self.target_net(b_s_).detach() # detach from graph, don't backpropagate\n",
" q_target = b_r + GAMMA * q_next.max(1)[0] # shape (batch, 1)\n",
" loss = self.loss_func(q_eval, q_target)\n",
"\n",
" self.optimizer.zero_grad()\n",
" loss.backward()\n",
" self.optimizer.step()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"dqn = DQN()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Collecting experience...\n",
"Ep: 201 | Ep_r: 1.59\n",
"Ep: 202 | Ep_r: 4.18\n",
"Ep: 203 | Ep_r: 2.73\n",
"Ep: 204 | Ep_r: 1.97\n",
"Ep: 205 | Ep_r: 1.18\n",
"Ep: 206 | Ep_r: 0.86\n",
"Ep: 207 | Ep_r: 2.88\n",
"Ep: 208 | Ep_r: 1.63\n",
"Ep: 209 | Ep_r: 3.91\n",
"Ep: 210 | Ep_r: 3.6\n",
"Ep: 211 | Ep_r: 0.98\n",
"Ep: 212 | Ep_r: 3.85\n",
"Ep: 213 | Ep_r: 1.81\n",
"Ep: 214 | Ep_r: 2.32\n",
"Ep: 215 | Ep_r: 3.75\n",
"Ep: 216 | Ep_r: 3.53\n",
"Ep: 217 | Ep_r: 4.75\n",
"Ep: 218 | Ep_r: 2.4\n",
"Ep: 219 | Ep_r: 0.64\n",
"Ep: 220 | Ep_r: 1.15\n",
"Ep: 221 | Ep_r: 2.3\n",
"Ep: 222 | Ep_r: 7.37\n",
"Ep: 223 | Ep_r: 1.25\n",
"Ep: 224 | Ep_r: 5.02\n",
"Ep: 225 | Ep_r: 10.29\n",
"Ep: 226 | Ep_r: 17.54\n",
"Ep: 227 | Ep_r: 36.2\n",
"Ep: 228 | Ep_r: 6.61\n",
"Ep: 229 | Ep_r: 10.04\n",
"Ep: 230 | Ep_r: 55.19\n",
"Ep: 231 | Ep_r: 10.03\n",
"Ep: 232 | Ep_r: 13.25\n",
"Ep: 233 | Ep_r: 8.75\n",
"Ep: 234 | Ep_r: 3.83\n",
"Ep: 235 | Ep_r: -0.92\n",
"Ep: 236 | Ep_r: 5.12\n",
"Ep: 237 | Ep_r: 3.56\n",
"Ep: 238 | Ep_r: 5.69\n",
"Ep: 239 | Ep_r: 8.43\n",
"Ep: 240 | Ep_r: 29.27\n",
"Ep: 241 | Ep_r: 17.95\n",
"Ep: 242 | Ep_r: 44.77\n",
"Ep: 243 | Ep_r: 98.0\n",
"Ep: 244 | Ep_r: 38.78\n",
"Ep: 245 | Ep_r: 45.02\n",
"Ep: 246 | Ep_r: 27.73\n",
"Ep: 247 | Ep_r: 36.96\n",
"Ep: 248 | Ep_r: 48.98\n",
"Ep: 249 | Ep_r: 111.36\n",
"Ep: 250 | Ep_r: 95.61\n",
"Ep: 251 | Ep_r: 149.77\n",
"Ep: 252 | Ep_r: 29.96\n",
"Ep: 253 | Ep_r: 2.79\n",
"Ep: 254 | Ep_r: 20.1\n",
"Ep: 255 | Ep_r: 24.25\n",
"Ep: 256 | Ep_r: 3074.75\n",
"Ep: 257 | Ep_r: 1258.49\n",
"Ep: 258 | Ep_r: 127.39\n",
"Ep: 259 | Ep_r: 283.46\n",
"Ep: 260 | Ep_r: 166.96\n",
"Ep: 261 | Ep_r: 101.71\n",
"Ep: 262 | Ep_r: 63.45\n",
"Ep: 263 | Ep_r: 288.94\n",
"Ep: 264 | Ep_r: 130.49\n",
"Ep: 265 | Ep_r: 207.05\n",
"Ep: 266 | Ep_r: 183.71\n",
"Ep: 267 | Ep_r: 142.75\n",
"Ep: 268 | Ep_r: 126.53\n",
"Ep: 269 | Ep_r: 310.79\n",
"Ep: 270 | Ep_r: 863.2\n",
"Ep: 271 | Ep_r: 365.12\n",
"Ep: 272 | Ep_r: 659.52\n",
"Ep: 273 | Ep_r: 103.98\n",
"Ep: 274 | Ep_r: 554.83\n",
"Ep: 275 | Ep_r: 246.01\n",
"Ep: 276 | Ep_r: 332.23\n",
"Ep: 277 | Ep_r: 323.35\n",
"Ep: 278 | Ep_r: 278.71\n",
"Ep: 279 | Ep_r: 613.6\n",
"Ep: 280 | Ep_r: 152.21\n",
"Ep: 281 | Ep_r: 402.02\n",
"Ep: 282 | Ep_r: 351.4\n",
"Ep: 283 | Ep_r: 115.87\n",
"Ep: 284 | Ep_r: 163.26\n",
"Ep: 285 | Ep_r: 631.0\n",
"Ep: 286 | Ep_r: 263.47\n",
"Ep: 287 | Ep_r: 511.21\n",
"Ep: 288 | Ep_r: 337.18\n",
"Ep: 289 | Ep_r: 819.76\n",
"Ep: 290 | Ep_r: 190.83\n",
"Ep: 291 | Ep_r: 442.98\n",
"Ep: 292 | Ep_r: 537.24\n",
"Ep: 293 | Ep_r: 1101.12\n",
"Ep: 294 | Ep_r: 178.42\n",
"Ep: 295 | Ep_r: 225.61\n",
"Ep: 296 | Ep_r: 252.62\n",
"Ep: 297 | Ep_r: 617.5\n",
"Ep: 298 | Ep_r: 617.8\n",
"Ep: 299 | Ep_r: 244.01\n",
"Ep: 300 | Ep_r: 687.91\n",
"Ep: 301 | Ep_r: 618.51\n",
"Ep: 302 | Ep_r: 1405.07\n",
"Ep: 303 | Ep_r: 456.95\n",
"Ep: 304 | Ep_r: 340.33\n",
"Ep: 305 | Ep_r: 502.91\n",
"Ep: 306 | Ep_r: 441.21\n",
"Ep: 307 | Ep_r: 255.81\n",
"Ep: 308 | Ep_r: 403.03\n",
"Ep: 309 | Ep_r: 229.1\n",
"Ep: 310 | Ep_r: 308.49\n",
"Ep: 311 | Ep_r: 165.37\n",
"Ep: 312 | Ep_r: 153.76\n",
"Ep: 313 | Ep_r: 442.05\n",
"Ep: 314 | Ep_r: 229.23\n",
"Ep: 315 | Ep_r: 128.52\n",
"Ep: 316 | Ep_r: 358.18\n",
"Ep: 317 | Ep_r: 319.03\n",
"Ep: 318 | Ep_r: 381.76\n",
"Ep: 319 | Ep_r: 199.19\n",
"Ep: 320 | Ep_r: 418.63\n",
"Ep: 321 | Ep_r: 223.95\n",
"Ep: 322 | Ep_r: 222.37\n",
"Ep: 323 | Ep_r: 405.4\n",
"Ep: 324 | Ep_r: 311.32\n",
"Ep: 325 | Ep_r: 184.85\n",
"Ep: 326 | Ep_r: 1026.71\n",
"Ep: 327 | Ep_r: 252.41\n",
"Ep: 328 | Ep_r: 224.93\n",
"Ep: 329 | Ep_r: 620.02\n",
"Ep: 330 | Ep_r: 174.54\n",
"Ep: 331 | Ep_r: 782.45\n",
"Ep: 332 | Ep_r: 263.79\n",
"Ep: 333 | Ep_r: 178.63\n",
"Ep: 334 | Ep_r: 242.84\n",
"Ep: 335 | Ep_r: 635.43\n",
"Ep: 336 | Ep_r: 668.89\n",
"Ep: 337 | Ep_r: 265.42\n",
"Ep: 338 | Ep_r: 207.81\n",
"Ep: 339 | Ep_r: 293.09\n",
"Ep: 340 | Ep_r: 530.23\n",
"Ep: 341 | Ep_r: 479.26\n",
"Ep: 342 | Ep_r: 559.77\n",
"Ep: 343 | Ep_r: 241.39\n",
"Ep: 344 | Ep_r: 158.83\n",
"Ep: 345 | Ep_r: 1510.69\n",
"Ep: 346 | Ep_r: 425.17\n",
"Ep: 347 | Ep_r: 266.94\n",
"Ep: 348 | Ep_r: 166.08\n",
"Ep: 349 | Ep_r: 630.52\n",
"Ep: 350 | Ep_r: 250.95\n",
"Ep: 351 | Ep_r: 625.88\n",
"Ep: 352 | Ep_r: 417.7\n",
"Ep: 353 | Ep_r: 867.81\n",
"Ep: 354 | Ep_r: 150.62\n",
"Ep: 355 | Ep_r: 230.89\n",
"Ep: 356 | Ep_r: 1017.52\n",
"Ep: 357 | Ep_r: 190.28\n",
"Ep: 358 | Ep_r: 396.91\n",
"Ep: 359 | Ep_r: 305.53\n",
"Ep: 360 | Ep_r: 131.61\n",
"Ep: 361 | Ep_r: 387.54\n",
"Ep: 362 | Ep_r: 298.82\n",
"Ep: 363 | Ep_r: 207.56\n",
"Ep: 364 | Ep_r: 248.56\n",
"Ep: 365 | Ep_r: 589.12\n",
"Ep: 366 | Ep_r: 179.52\n",
"Ep: 367 | Ep_r: 130.19\n",
"Ep: 368 | Ep_r: 1220.84\n",
"Ep: 369 | Ep_r: 126.35\n",
"Ep: 370 | Ep_r: 133.31\n",
"Ep: 371 | Ep_r: 485.81\n",
"Ep: 372 | Ep_r: 823.4\n",
"Ep: 373 | Ep_r: 253.26\n",
"Ep: 374 | Ep_r: 466.06\n",
"Ep: 375 | Ep_r: 203.27\n",
"Ep: 376 | Ep_r: 386.5\n",
"Ep: 377 | Ep_r: 491.02\n",
"Ep: 378 | Ep_r: 239.45\n",
"Ep: 379 | Ep_r: 276.93\n",
"Ep: 380 | Ep_r: 331.98\n",
"Ep: 381 | Ep_r: 764.79\n",
"Ep: 382 | Ep_r: 198.29\n",
"Ep: 383 | Ep_r: 717.18\n",
"Ep: 384 | Ep_r: 562.15\n",
"Ep: 385 | Ep_r: 29.44\n",
"Ep: 386 | Ep_r: 344.95\n",
"Ep: 387 | Ep_r: 671.87\n",
"Ep: 388 | Ep_r: 299.81\n",
"Ep: 389 | Ep_r: 899.76\n",
"Ep: 390 | Ep_r: 319.04\n",
"Ep: 391 | Ep_r: 252.11\n",
"Ep: 392 | Ep_r: 865.62\n",
"Ep: 393 | Ep_r: 255.64\n",
"Ep: 394 | Ep_r: 81.74\n",
"Ep: 395 | Ep_r: 213.13\n",
"Ep: 396 | Ep_r: 422.33\n",
"Ep: 397 | Ep_r: 167.47\n",
"Ep: 398 | Ep_r: 507.34\n",
"Ep: 399 | Ep_r: 614.0\n"
]
}
],
"source": [
"\n",
"print('\\nCollecting experience...')\n",
"for i_episode in range(400):\n",
" s = env.reset()\n",
" ep_r = 0\n",
" while True:\n",
" env.render()\n",
" a = dqn.choose_action(s)\n",
"\n",
" # take action\n",
" s_, r, done, info = env.step(a)\n",
"\n",
" # modify the reward\n",
" x, x_dot, theta, theta_dot = s_\n",
" r1 = (env.x_threshold - abs(x)) / env.x_threshold - 0.8\n",
" r2 = (env.theta_threshold_radians - abs(theta)) / env.theta_threshold_radians - 0.5\n",
" r = r1 + r2\n",
"\n",
" dqn.store_transition(s, a, r, s_)\n",
"\n",
" ep_r += r\n",
" if dqn.memory_counter > MEMORY_CAPACITY:\n",
" dqn.learn()\n",
" if done:\n",
" print('Ep: ', i_episode,\n",
" '| Ep_r: ', round(ep_r, 2))\n",
"\n",
" if done:\n",
" break\n",
" s = s_"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.2"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long