From b212b3e026a5696acf70652ec93c89ae21632a3f Mon Sep 17 00:00:00 2001 From: Morvan Zhou Date: Mon, 8 May 2017 12:48:29 +1000 Subject: [PATCH] update --- tutorial-contents/401_CNN.py | 40 +++++++-------- tutorial-contents/402_RNN_classifier.py | 50 +++++++++---------- tutorial-contents/403_RNN_regressor.py | 20 ++++---- tutorial-contents/404_autoencoder.py | 6 +-- .../405_DQN_Reinforcement_learning.py | 10 ++-- .../501_why_torch_dynamic_graph.py | 32 ++++++------ tutorial-contents/504_batch_normalization.py | 6 +-- 7 files changed, 80 insertions(+), 84 deletions(-) diff --git a/tutorial-contents/401_CNN.py b/tutorial-contents/401_CNN.py index 9121eba..1844267 100644 --- a/tutorial-contents/401_CNN.py +++ b/tutorial-contents/401_CNN.py @@ -17,10 +17,10 @@ import matplotlib.pyplot as plt torch.manual_seed(1) # reproducible # Hyper Parameters -EPOCH = 1 # train the training data n times, to save time, we just train 1 epoch +EPOCH = 1 # train the training data n times, to save time, we just train 1 epoch BATCH_SIZE = 50 -LR = 0.001 # learning rate -DOWNLOAD_MNIST = False +LR = 0.001 # learning rate +DOWNLOAD_MNIST = True # set to False if you have downloaded # Mnist digits dataset @@ -33,8 +33,8 @@ train_data = torchvision.datasets.MNIST( ) # plot one example -print(train_data.train_data.size()) # (60000, 28, 28) -print(train_data.train_labels.size()) # (60000) +print(train_data.train_data.size()) # (60000, 28, 28) +print(train_data.train_labels.size()) # (60000) plt.imshow(train_data.train_data[0].numpy(), cmap='gray') plt.title('%i' % train_data.train_labels[0]) plt.show() @@ -51,28 +51,28 @@ test_y = test_data.test_labels[:2000] class CNN(nn.Module): def __init__(self): super(CNN, self).__init__() - self.conv1 = nn.Sequential( # input shape (1, 28, 28) + self.conv1 = nn.Sequential( # input shape (1, 28, 28) nn.Conv2d( - in_channels=1, # input height - out_channels=16, # n_filters - kernel_size=5, # filter size - stride=1, # filter movement/step - padding=2, # if want same width and length of this image after con2d, padding=(kernel_size-1)/2 if stride=1 - ), # output shape (16, 28, 28) - nn.ReLU(), # activation - nn.MaxPool2d(kernel_size=2), # choose max value in 2x2 area, output shape (16, 14, 14) + in_channels=1, # input height + out_channels=16, # n_filters + kernel_size=5, # filter size + stride=1, # filter movement/step + padding=2, # if want same width and length of this image after con2d, padding=(kernel_size-1)/2 if stride=1 + ), # output shape (16, 28, 28) + nn.ReLU(), # activation + nn.MaxPool2d(kernel_size=2), # choose max value in 2x2 area, output shape (16, 14, 14) ) - self.conv2 = nn.Sequential( # input shape (1, 28, 28) - nn.Conv2d(16, 32, 5, 1, 2), # output shape (32, 14, 14) - nn.ReLU(), # activation - nn.MaxPool2d(2), # output shape (32, 7, 7) + self.conv2 = nn.Sequential( # input shape (1, 28, 28) + nn.Conv2d(16, 32, 5, 1, 2), # output shape (32, 14, 14) + nn.ReLU(), # activation + nn.MaxPool2d(2), # output shape (32, 7, 7) ) self.out = nn.Linear(32 * 7 * 7, 10) # fully connected layer, output 10 classes def forward(self, x): x = self.conv1(x) x = self.conv2(x) - x = x.view(x.size(0), -1) # flatten the output of conv2 to (batch_size, 32 * 7 * 7) + x = x.view(x.size(0), -1) # flatten the output of conv2 to (batch_size, 32 * 7 * 7) output = self.out(x) return output @@ -81,7 +81,7 @@ cnn = CNN() print(cnn) # net architecture optimizer = torch.optim.Adam(cnn.parameters(), lr=LR) # optimize all cnn parameters -loss_func = nn.CrossEntropyLoss() # the target label is not one-hotted +loss_func = nn.CrossEntropyLoss() # the target label is not one-hotted # training and testing for epoch in range(EPOCH): diff --git a/tutorial-contents/402_RNN_classifier.py b/tutorial-contents/402_RNN_classifier.py index b12d8e2..679a255 100644 --- a/tutorial-contents/402_RNN_classifier.py +++ b/tutorial-contents/402_RNN_classifier.py @@ -18,26 +18,26 @@ import matplotlib.pyplot as plt torch.manual_seed(1) # reproducible # Hyper Parameters -EPOCH = 1 # train the training data n times, to save time, we just train 1 epoch +EPOCH = 1 # train the training data n times, to save time, we just train 1 epoch BATCH_SIZE = 64 -TIME_STEP = 28 # rnn time step / image height -INPUT_SIZE = 28 # rnn input size / image width -LR = 0.01 # learning rate -DOWNLOAD_MNIST = False # set to True if haven't download the data +TIME_STEP = 28 # rnn time step / image height +INPUT_SIZE = 28 # rnn input size / image width +LR = 0.01 # learning rate +DOWNLOAD_MNIST = True # set to True if haven't download the data # Mnist digital dataset train_data = dsets.MNIST( root='./mnist/', - train=True, # this is training data - transform=transforms.ToTensor(), # Converts a PIL.Image or numpy.ndarray to - # torch.FloatTensor of shape (C x H x W) and normalize in the range [0.0, 1.0] - download=DOWNLOAD_MNIST, # download it if you don't have it + train=True, # this is training data + transform=transforms.ToTensor(), # Converts a PIL.Image or numpy.ndarray to + # torch.FloatTensor of shape (C x H x W) and normalize in the range [0.0, 1.0] + download=DOWNLOAD_MNIST, # download it if you don't have it ) # plot one example -print(train_data.train_data.size()) # (60000, 28, 28) -print(train_data.train_labels.size()) # (60000) +print(train_data.train_data.size()) # (60000, 28, 28) +print(train_data.train_labels.size()) # (60000) plt.imshow(train_data.train_data[0].numpy(), cmap='gray') plt.title('%i' % train_data.train_labels[0]) plt.show() @@ -55,11 +55,11 @@ class RNN(nn.Module): def __init__(self): super(RNN, self).__init__() - self.rnn = nn.LSTM( # if use nn.RNN(), it hardly learns + self.rnn = nn.LSTM( # if use nn.RNN(), it hardly learns input_size=28, - hidden_size=64, # rnn hidden unit - num_layers=1, # number of rnn layer - batch_first=True, # input & output will has batch size as 1s dimension. e.g. (batch, time_step, input_size) + hidden_size=64, # rnn hidden unit + num_layers=1, # number of rnn layer + batch_first=True, # input & output will has batch size as 1s dimension. e.g. (batch, time_step, input_size) ) self.out = nn.Linear(64, 10) @@ -80,22 +80,22 @@ rnn = RNN() print(rnn) optimizer = torch.optim.Adam(rnn.parameters(), lr=LR) # optimize all cnn parameters -loss_func = nn.CrossEntropyLoss() # the target label is not one-hotted +loss_func = nn.CrossEntropyLoss() # the target label is not one-hotted # training and testing for epoch in range(EPOCH): - for step, (x, y) in enumerate(train_loader): # gives batch data - b_x = Variable(x.view(-1, 28, 28)) # reshape x to (batch, time_step, input_size) - b_y = Variable(y) # batch y + for step, (x, y) in enumerate(train_loader): # gives batch data + b_x = Variable(x.view(-1, 28, 28)) # reshape x to (batch, time_step, input_size) + b_y = Variable(y) # batch y - output = rnn(b_x) # rnn output - loss = loss_func(output, b_y) # cross entropy loss - optimizer.zero_grad() # clear gradients for this training step - loss.backward() # backpropagation, compute gradients - optimizer.step() # apply gradients + output = rnn(b_x) # rnn output + loss = loss_func(output, b_y) # cross entropy loss + optimizer.zero_grad() # clear gradients for this training step + loss.backward() # backpropagation, compute gradients + optimizer.step() # apply gradients if step % 50 == 0: - test_output = rnn(test_x) # (samples, time_step, input_size) + test_output = rnn(test_x) # (samples, time_step, input_size) pred_y = torch.max(test_output, 1)[1].data.numpy().squeeze() accuracy = sum(pred_y == test_y) / test_y.size print('Epoch: ', epoch, '| train loss: %.4f' % loss.data[0], '| test accuracy: %.2f' % accuracy) diff --git a/tutorial-contents/403_RNN_regressor.py b/tutorial-contents/403_RNN_regressor.py index 2ebdca4..384101e 100644 --- a/tutorial-contents/403_RNN_regressor.py +++ b/tutorial-contents/403_RNN_regressor.py @@ -37,9 +37,9 @@ class RNN(nn.Module): self.rnn = nn.RNN( input_size=1, - hidden_size=32, # rnn hidden unit - num_layers=1, # number of rnn layer - batch_first=True, # input & output will has batch size as 1s dimension. e.g. (batch, time_step, input_size) + hidden_size=32, # rnn hidden unit + num_layers=1, # number of rnn layer + batch_first=True, # input & output will has batch size as 1s dimension. e.g. (batch, time_step, input_size) ) self.out = nn.Linear(32, 1) @@ -61,10 +61,10 @@ print(rnn) optimizer = torch.optim.Adam(rnn.parameters(), lr=LR) # optimize all cnn parameters loss_func = nn.MSELoss() -h_state = None # for initial hidden state +h_state = None # for initial hidden state plt.figure(1, figsize=(12, 5)) -plt.ion() # continuously plot +plt.ion() # continuously plot plt.show() for step in range(60): @@ -79,12 +79,12 @@ for step in range(60): prediction, h_state = rnn(x, h_state) # rnn output # !! next step is important !! - h_state = Variable(h_state.data) # repack the hidden state, break the connection from last iteration + h_state = Variable(h_state.data) # repack the hidden state, break the connection from last iteration - loss = loss_func(prediction, y) # cross entropy loss - optimizer.zero_grad() # clear gradients for this training step - loss.backward() # backpropagation, compute gradients - optimizer.step() # apply gradients + loss = loss_func(prediction, y) # cross entropy loss + optimizer.zero_grad() # clear gradients for this training step + loss.backward() # backpropagation, compute gradients + optimizer.step() # apply gradients # plotting plt.plot(steps, y_np.flatten(), 'r-') diff --git a/tutorial-contents/404_autoencoder.py b/tutorial-contents/404_autoencoder.py index c0f59d2..f9dc956 100644 --- a/tutorial-contents/404_autoencoder.py +++ b/tutorial-contents/404_autoencoder.py @@ -39,9 +39,9 @@ train_data = torchvision.datasets.MNIST( # plot one example print(train_data.train_data.size()) # (60000, 28, 28) print(train_data.train_labels.size()) # (60000) -# plt.imshow(train_data.train_data[2].numpy(), cmap='gray') -# plt.title('%i' % train_data.train_labels[2]) -# plt.show() +plt.imshow(train_data.train_data[2].numpy(), cmap='gray') +plt.title('%i' % train_data.train_labels[2]) +plt.show() # Data Loader for easy mini-batch return in training, the image batch shape will be (50, 1, 28, 28) train_loader = Data.DataLoader(dataset=train_data, batch_size=BATCH_SIZE, shuffle=True) diff --git a/tutorial-contents/405_DQN_Reinforcement_learning.py b/tutorial-contents/405_DQN_Reinforcement_learning.py index d2a375b..7b67892 100644 --- a/tutorial-contents/405_DQN_Reinforcement_learning.py +++ b/tutorial-contents/405_DQN_Reinforcement_learning.py @@ -46,8 +46,8 @@ class DQN(object): def __init__(self): self.eval_net, self.target_net = Net(), Net() - self.learn_step_counter = 0 # for target updateing - self.memory_counter = 0 # for storing memory + self.learn_step_counter = 0 # for target updating + self.memory_counter = 0 # for storing memory self.memory = np.zeros((MEMORY_CAPACITY, N_STATES * 2 + 2)) # initialize memory self.optimizer = torch.optim.Adam(self.eval_net.parameters(), lr=LR) self.loss_func = nn.MSELoss() @@ -100,7 +100,6 @@ for i_episode in range(400): ep_r = 0 while True: env.render() - a = dqn.choose_action(s) # take action @@ -112,7 +111,6 @@ for i_episode in range(400): r2 = (env.theta_threshold_radians - abs(theta)) / env.theta_threshold_radians - 0.5 r = r1 + r2 - # store experience dqn.store_transition(s, a, r, s_) ep_r += r @@ -120,10 +118,8 @@ for i_episode in range(400): dqn.learn() if done: print('Ep: ', i_episode, - '| Ep_r: ', round(ep_r, 2), - ) + '| Ep_r: ', round(ep_r, 2)) if done: break - s = s_ \ No newline at end of file diff --git a/tutorial-contents/501_why_torch_dynamic_graph.py b/tutorial-contents/501_why_torch_dynamic_graph.py index 067091e..e1c4133 100644 --- a/tutorial-contents/501_why_torch_dynamic_graph.py +++ b/tutorial-contents/501_why_torch_dynamic_graph.py @@ -17,10 +17,10 @@ torch.manual_seed(1) # reproducible # Hyper Parameters BATCH_SIZE = 64 -TIME_STEP = 5 # rnn time step / image height -INPUT_SIZE = 1 # rnn input size / image width -LR = 0.02 # learning rate -DOWNLOAD_MNIST = False # set to True if haven't download the data +TIME_STEP = 5 # rnn time step / image height +INPUT_SIZE = 1 # rnn input size / image width +LR = 0.02 # learning rate +DOWNLOAD_MNIST = True # set to False if have downloaded the data class RNN(nn.Module): @@ -29,9 +29,9 @@ class RNN(nn.Module): self.rnn = nn.RNN( input_size=1, - hidden_size=32, # rnn hidden unit - num_layers=1, # number of rnn layer - batch_first=True, # input & output will has batch size as 1s dimension. e.g. (batch, time_step, input_size) + hidden_size=32, # rnn hidden unit + num_layers=1, # number of rnn layer + batch_first=True, # input & output will has batch size as 1s dimension. e.g. (batch, time_step, input_size) ) self.out = nn.Linear(32, 1) @@ -41,8 +41,8 @@ class RNN(nn.Module): # r_out (batch, time_step, output_size) r_out, h_state = self.rnn(x, h_state) - outs = [] # this is where you can find torch is dynamic - for time_step in range(r_out.size(1)): # calculate output for each time step + outs = [] # this is where you can find torch is dynamic + for time_step in range(r_out.size(1)): # calculate output for each time step outs.append(self.out(r_out[:, time_step, :])) return torch.stack(outs, dim=1), h_state @@ -51,7 +51,7 @@ rnn = RNN() print(rnn) optimizer = torch.optim.Adam(rnn.parameters(), lr=LR) # optimize all cnn parameters -loss_func = nn.MSELoss() # the target label is not one-hotted +loss_func = nn.MSELoss() # the target label is not one-hotted h_state = None # for initial hidden state @@ -79,7 +79,7 @@ for i in range(60): ####################### Above is different ########################### - print(len(steps)) # print how many time step feed to RNN + print(len(steps)) # print how many time step feed to RNN x_np = np.sin(steps) # float32 for converting torch FloatTensor y_np = np.cos(steps) @@ -89,12 +89,12 @@ for i in range(60): prediction, h_state = rnn(x, h_state) # rnn output # !! next step is important !! - h_state = Variable(h_state.data) # repack the hidden state, break the connection from last iteration + h_state = Variable(h_state.data) # repack the hidden state, break the connection from last iteration - loss = loss_func(prediction, y) # cross entropy loss - optimizer.zero_grad() # clear gradients for this training step - loss.backward() # backpropagation, compute gradients - optimizer.step() # apply gradients + loss = loss_func(prediction, y) # cross entropy loss + optimizer.zero_grad() # clear gradients for this training step + loss.backward() # backpropagation, compute gradients + optimizer.step() # apply gradients # plotting plt.plot(steps, y_np.flatten(), 'r-') diff --git a/tutorial-contents/504_batch_normalization.py b/tutorial-contents/504_batch_normalization.py index 8009818..1a19100 100644 --- a/tutorial-contents/504_batch_normalization.py +++ b/tutorial-contents/504_batch_normalization.py @@ -58,7 +58,7 @@ class Net(nn.Module): self.bns = [] self.bn_input = nn.BatchNorm1d(1, momentum=0.5) # for input data - for i in range(N_HIDDEN): # build hidden layers and BN layers + for i in range(N_HIDDEN): # build hidden layers and BN layers input_size = 1 if i == 0 else 10 fc = nn.Linear(input_size, 10) setattr(self, 'fc%i' % i, fc) # IMPORTANT set layer to the Module @@ -83,7 +83,7 @@ class Net(nn.Module): for i in range(N_HIDDEN): x = self.fcs[i](x) pre_activation.append(x) - if self.do_bn: x = self.bns[i](x) # batch normalization + if self.do_bn: x = self.bns[i](x) # batch normalization x = ACTIVATION(x) layer_input.append(x) out = self.predict(x) @@ -147,7 +147,7 @@ for epoch in range(EPOCH): loss = loss_func(pred, b_y) opt.zero_grad() loss.backward() - opt.step() # it will also learn the parameters in Batch Normalization + opt.step() # it will also learns the parameters in Batch Normalization plt.ioff()