update to torch 0.4

2018-05-30 01:39:53 +08:00
parent 7e7c9bb383
commit 921b69a582
15 changed files with 82 additions and 104 deletions
--- a/tutorial-contents/301_regression.py
+++ b/tutorial-contents/301_regression.py
@ -3,11 +3,10 @@ View more, visit my tutorial page: https://morvanzhou.github.io/tutorials/
 My Youtube Channel: https://www.youtube.com/user/MorvanZhou

 Dependencies:
-torch: 0.1.11
+torch: 0.4
 matplotlib
 """
 import torch
-from torch.autograd import Variable
 import torch.nn.functional as F
 import matplotlib.pyplot as plt

@ -17,8 +16,9 @@ x = torch.unsqueeze(torch.linspace(-1, 1, 100), dim=1)  # x data (tensor), shape
 y = x.pow(2) + 0.2*torch.rand(x.size())                 # noisy y data (tensor), shape=(100, 1)

 # torch can only train on Variable, so convert them to Variable
-#x, y = Variable(x), Variable(y)
-#The above code is depricated. Now,autograd directly supports tensors
+# The code below is deprecated in Pytorch 0.4. Now, autograd directly supports tensors
+# x, y = Variable(x), Variable(y)
+
 # plt.scatter(x.data.numpy(), y.data.numpy())
 # plt.show()

@ -56,7 +56,7 @@ for t in range(200):
        plt.cla()
        plt.scatter(x.data.numpy(), y.data.numpy())
        plt.plot(x.data.numpy(), prediction.data.numpy(), 'r-', lw=5)
-        plt.text(0.5, 0, 'Loss=%.4f' % loss.data[0], fontdict={'size': 20, 'color':  'red'})
+        plt.text(0.5, 0, 'Loss=%.4f' % loss.data.numpy(), fontdict={'size': 20, 'color':  'red'})
        plt.pause(0.1)

 plt.ioff()
--- a/tutorial-contents/302_classification.py
+++ b/tutorial-contents/302_classification.py
@ -3,11 +3,10 @@ View more, visit my tutorial page: https://morvanzhou.github.io/tutorials/
 My Youtube Channel: https://www.youtube.com/user/MorvanZhou

 Dependencies:
-torch: 0.1.11
+torch: 0.4
 matplotlib
 """
 import torch
-from torch.autograd import Variable
 import torch.nn.functional as F
 import matplotlib.pyplot as plt

@ -22,8 +21,8 @@ y1 = torch.ones(100)                # class1 y data (tensor), shape=(100, 1)
 x = torch.cat((x0, x1), 0).type(torch.FloatTensor)  # shape (200, 2) FloatTensor = 32-bit floating
 y = torch.cat((y0, y1), ).type(torch.LongTensor)    # shape (200,) LongTensor = 64-bit integer

-# torch can only train on Variable, so convert them to Variable
-x, y = Variable(x), Variable(y)
+# The code below is deprecated in Pytorch 0.4. Now, autograd directly supports tensors
+# x, y = Variable(x), Variable(y)

 # plt.scatter(x.data.numpy()[:, 0], x.data.numpy()[:, 1], c=y.data.numpy(), s=100, lw=0, cmap='RdYlGn')
 # plt.show()
--- a/tutorial-contents/304_save_reload.py
+++ b/tutorial-contents/304_save_reload.py
@ -3,11 +3,10 @@ View more, visit my tutorial page: https://morvanzhou.github.io/tutorials/
 My Youtube Channel: https://www.youtube.com/user/MorvanZhou

 Dependencies:
-torch: 0.1.11
+torch: 0.4
 matplotlib
 """
 import torch
-from torch.autograd import Variable
 import matplotlib.pyplot as plt

 # torch.manual_seed(1)    # reproducible
@ -15,7 +14,9 @@ import matplotlib.pyplot as plt
 # fake data
 x = torch.unsqueeze(torch.linspace(-1, 1, 100), dim=1)  # x data (tensor), shape=(100, 1)
 y = x.pow(2) + 0.2*torch.rand(x.size())  # noisy y data (tensor), shape=(100, 1)
-x, y = Variable(x, requires_grad=False), Variable(y, requires_grad=False)
+
+# The code below is deprecated in Pytorch 0.4. Now, autograd directly supports tensors
+# x, y = Variable(x, requires_grad=False), Variable(y, requires_grad=False)


 def save():
--- a/tutorial-contents/306_optimizer.py
+++ b/tutorial-contents/306_optimizer.py
@ -3,13 +3,12 @@ View more, visit my tutorial page: https://morvanzhou.github.io/tutorials/
 My Youtube Channel: https://www.youtube.com/user/MorvanZhou

 Dependencies:
-torch: 0.1.11
+torch: 0.4
 matplotlib
 """
 import torch
 import torch.utils.data as Data
 import torch.nn.functional as F
-from torch.autograd import Variable
 import matplotlib.pyplot as plt

 # torch.manual_seed(1)    # reproducible
@ -27,7 +26,7 @@ plt.scatter(x.numpy(), y.numpy())
 plt.show()

 # put dateset into torch dataset
-torch_dataset = Data.TensorDataset(data_tensor=x, target_tensor=y)
+torch_dataset = Data.TensorDataset(x, y)
 loader = Data.DataLoader(dataset=torch_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2,)


@ -64,17 +63,14 @@ if __name__ == '__main__':
    # training
    for epoch in range(EPOCH):
        print('Epoch: ', epoch)
-        for step, (batch_x, batch_y) in enumerate(loader):          # for each training step
-            b_x = Variable(batch_x)
-            b_y = Variable(batch_y)
-
+        for step, (b_x, b_y) in enumerate(loader):          # for each training step
            for net, opt, l_his in zip(nets, optimizers, losses_his):
                output = net(b_x)              # get output for every net
                loss = loss_func(output, b_y)  # compute loss for every net
                opt.zero_grad()                # clear gradients for next train
                loss.backward()                # backpropagation, compute gradients
                opt.step()                     # apply gradients
-                l_his.append(loss.data[0])     # loss recoder
+                l_his.append(loss.data.numpy())     # loss recoder

    labels = ['SGD', 'Momentum', 'RMSprop', 'Adam']
    for i, l_his in enumerate(losses_his):
--- a/tutorial-contents/401_CNN.py
+++ b/tutorial-contents/401_CNN.py
@ -3,7 +3,7 @@ View more, visit my tutorial page: https://morvanzhou.github.io/tutorials/
 My Youtube Channel: https://www.youtube.com/user/MorvanZhou

 Dependencies:
-torch: 0.1.11
+torch: 0.4
 torchvision
 matplotlib
 """
@ -14,7 +14,6 @@ import os
 # third-party library
 import torch
 import torch.nn as nn
-from torch.autograd import Variable
 import torch.utils.data as Data
 import torchvision
 import matplotlib.pyplot as plt
@ -51,9 +50,9 @@ plt.show()
 # Data Loader for easy mini-batch return in training, the image batch shape will be (50, 1, 28, 28)
 train_loader = Data.DataLoader(dataset=train_data, batch_size=BATCH_SIZE, shuffle=True)

-# convert test data into Variable, pick 2000 samples to speed up testing
+# pick 2000 samples to speed up testing
 test_data = torchvision.datasets.MNIST(root='./mnist/', train=False)
-test_x = Variable(torch.unsqueeze(test_data.test_data, dim=1), volatile=True).type(torch.FloatTensor)[:2000]/255.   # shape from (2000, 28, 28) to (2000, 1, 28, 28), value in range(0,1)
+test_x = torch.unsqueeze(test_data.test_data, dim=1).type(torch.FloatTensor)[:2000]/255.   # shape from (2000, 28, 28) to (2000, 1, 28, 28), value in range(0,1)
 test_y = test_data.test_labels[:2000]


@ -106,9 +105,7 @@ def plot_with_labels(lowDWeights, labels):
 plt.ion()
 # training and testing
 for epoch in range(EPOCH):
-    for step, (x, y) in enumerate(train_loader):   # gives batch data, normalize x when iterate train_loader
-        b_x = Variable(x)   # batch x
-        b_y = Variable(y)   # batch y
+    for step, (b_x, b_y) in enumerate(train_loader):   # gives batch data, normalize x when iterate train_loader

        output = cnn(b_x)[0]               # cnn output
        loss = loss_func(output, b_y)   # cross entropy loss
@ -119,8 +116,8 @@ for epoch in range(EPOCH):
        if step % 50 == 0:
            test_output, last_layer = cnn(test_x)
            pred_y = torch.max(test_output, 1)[1].data.squeeze()
-            accuracy = sum(pred_y == test_y) / float(test_y.size(0))
-            print('Epoch: ', epoch, '| train loss: %.4f' % loss.data[0], '| test accuracy: %.2f' % accuracy)
+            accuracy = float(sum(pred_y == test_y)) / float(test_y.size(0))
+            print('Epoch: ', epoch, '| train loss: %.4f' % loss.data.numpy(), '| test accuracy: %.2f' % accuracy)
            if HAS_SK:
                # Visualization of trained flatten layer (T-SNE)
                tsne = TSNE(perplexity=30, n_components=2, init='pca', n_iter=5000)
--- a/tutorial-contents/402_RNN_classifier.py
+++ b/tutorial-contents/402_RNN_classifier.py
@ -3,13 +3,12 @@ View more, visit my tutorial page: https://morvanzhou.github.io/tutorials/
 My Youtube Channel: https://www.youtube.com/user/MorvanZhou

 Dependencies:
-torch: 0.1.11
+torch: 0.4
 matplotlib
 torchvision
 """
 import torch
 from torch import nn
-from torch.autograd import Variable
 import torchvision.datasets as dsets
 import torchvision.transforms as transforms
 import matplotlib.pyplot as plt
@ -47,7 +46,7 @@ train_loader = torch.utils.data.DataLoader(dataset=train_data, batch_size=BATCH_

 # convert test data into Variable, pick 2000 samples to speed up testing
 test_data = dsets.MNIST(root='./mnist/', train=False, transform=transforms.ToTensor())
-test_x = Variable(test_data.test_data, volatile=True).type(torch.FloatTensor)[:2000]/255.   # shape (2000, 28, 28) value in range(0,1)
+test_x = test_data.test_data.type(torch.FloatTensor)[:2000]/255.   # shape (2000, 28, 28) value in range(0,1)
 test_y = test_data.test_labels.numpy().squeeze()[:2000]    # covert to numpy array


@ -84,9 +83,8 @@ loss_func = nn.CrossEntropyLoss()                       # the target label is no

 # training and testing
 for epoch in range(EPOCH):
-    for step, (x, y) in enumerate(train_loader):        # gives batch data
-        b_x = Variable(x.view(-1, 28, 28))              # reshape x to (batch, time_step, input_size)
-        b_y = Variable(y)                               # batch y
+    for step, (b_x, b_y) in enumerate(train_loader):        # gives batch data
+        b_x = b_x.view(-1, 28, 28)              # reshape x to (batch, time_step, input_size)

        output = rnn(b_x)                               # rnn output
        loss = loss_func(output, b_y)                   # cross entropy loss
@ -97,8 +95,8 @@ for epoch in range(EPOCH):
        if step % 50 == 0:
            test_output = rnn(test_x)                   # (samples, time_step, input_size)
            pred_y = torch.max(test_output, 1)[1].data.numpy().squeeze()
-            accuracy = sum(pred_y == test_y) / float(test_y.size)
-            print('Epoch: ', epoch, '| train loss: %.4f' % loss.data[0], '| test accuracy: %.2f' % accuracy)
+            accuracy = float(sum(pred_y == test_y)) / float(test_y.size)
+            print('Epoch: ', epoch, '| train loss: %.4f' % loss.data.numpy(), '| test accuracy: %.2f' % accuracy)

 # print 10 predictions from test data
 test_output = rnn(test_x[:10].view(-1, 28, 28))
--- a/tutorial-contents/403_RNN_regressor.py
+++ b/tutorial-contents/403_RNN_regressor.py
@ -3,13 +3,12 @@ View more, visit my tutorial page: https://morvanzhou.github.io/tutorials/
 My Youtube Channel: https://www.youtube.com/user/MorvanZhou

 Dependencies:
-torch: 0.1.11
+torch: 0.4
 matplotlib
 numpy
 """
 import torch
 from torch import nn
-from torch.autograd import Variable
 import numpy as np
 import matplotlib.pyplot as plt

@ -69,19 +68,19 @@ h_state = None      # for initial hidden state
 plt.figure(1, figsize=(12, 5))
 plt.ion()           # continuously plot

-for step in range(60):
+for step in range(100):
    start, end = step * np.pi, (step+1)*np.pi   # time range
    # use sin predicts cos
    steps = np.linspace(start, end, TIME_STEP, dtype=np.float32)
    x_np = np.sin(steps)    # float32 for converting torch FloatTensor
    y_np = np.cos(steps)

-    x = Variable(torch.from_numpy(x_np[np.newaxis, :, np.newaxis]))    # shape (batch, time_step, input_size)
-    y = Variable(torch.from_numpy(y_np[np.newaxis, :, np.newaxis]))
+    x = torch.from_numpy(x_np[np.newaxis, :, np.newaxis])    # shape (batch, time_step, input_size)
+    y = torch.from_numpy(y_np[np.newaxis, :, np.newaxis])

    prediction, h_state = rnn(x, h_state)   # rnn output
    # !! next step is important !!
-    h_state = Variable(h_state.data)        # repack the hidden state, break the connection from last iteration
+    h_state = h_state.data        # repack the hidden state, break the connection from last iteration

    loss = loss_func(prediction, y)         # cross entropy loss
    optimizer.zero_grad()                   # clear gradients for this training step
--- a/tutorial-contents/404_autoencoder.py
+++ b/tutorial-contents/404_autoencoder.py
@ -3,13 +3,12 @@ View more, visit my tutorial page: https://morvanzhou.github.io/tutorials/
 My Youtube Channel: https://www.youtube.com/user/MorvanZhou

 Dependencies:
-torch: 0.1.11
+torch: 0.4
 matplotlib
 numpy
 """
 import torch
 import torch.nn as nn
-from torch.autograd import Variable
 import torch.utils.data as Data
 import torchvision
 import matplotlib.pyplot as plt
@ -87,15 +86,14 @@ f, a = plt.subplots(2, N_TEST_IMG, figsize=(5, 2))
 plt.ion()   # continuously plot

 # original data (first row) for viewing
-view_data = Variable(train_data.train_data[:N_TEST_IMG].view(-1, 28*28).type(torch.FloatTensor)/255.)
+view_data = train_data.train_data[:N_TEST_IMG].view(-1, 28*28).type(torch.FloatTensor)/255.
 for i in range(N_TEST_IMG):
    a[0][i].imshow(np.reshape(view_data.data.numpy()[i], (28, 28)), cmap='gray'); a[0][i].set_xticks(()); a[0][i].set_yticks(())

 for epoch in range(EPOCH):
-    for step, (x, y) in enumerate(train_loader):
-        b_x = Variable(x.view(-1, 28*28))   # batch x, shape (batch, 28*28)
-        b_y = Variable(x.view(-1, 28*28))   # batch y, shape (batch, 28*28)
-        b_label = Variable(y)               # batch label
+    for step, (x, b_label) in enumerate(train_loader):
+        b_x = x.view(-1, 28*28)   # batch x, shape (batch, 28*28)
+        b_y = x.view(-1, 28*28)   # batch y, shape (batch, 28*28)

        encoded, decoded = autoencoder(b_x)

@ -105,7 +103,7 @@ for epoch in range(EPOCH):
        optimizer.step()                    # apply gradients

        if step % 100 == 0:
-            print('Epoch: ', epoch, '| train loss: %.4f' % loss.data[0])
+            print('Epoch: ', epoch, '| train loss: %.4f' % loss.data.numpy())

            # plotting decoded image (second row)
            _, decoded_data = autoencoder(view_data)
@ -119,7 +117,7 @@ plt.ioff()
 plt.show()

 # visualize in 3D plot
-view_data = Variable(train_data.train_data[:200].view(-1, 28*28).type(torch.FloatTensor)/255.)
+view_data = train_data.train_data[:200].view(-1, 28*28).type(torch.FloatTensor)/255.
 encoded_data, _ = autoencoder(view_data)
 fig = plt.figure(2); ax = Axes3D(fig)
 X, Y, Z = encoded_data.data[:, 0].numpy(), encoded_data.data[:, 1].numpy(), encoded_data.data[:, 2].numpy()
--- a/tutorial-contents/405_DQN_Reinforcement_learning.py
+++ b/tutorial-contents/405_DQN_Reinforcement_learning.py
@ -4,13 +4,12 @@ My Youtube Channel: https://www.youtube.com/user/MorvanZhou
 More about Reinforcement learning: https://morvanzhou.github.io/tutorials/machine-learning/reinforcement-learning/

 Dependencies:
-torch: 0.3
+torch: 0.4
 gym: 0.8.1
 numpy
 """
 import torch
 import torch.nn as nn
-from torch.autograd import Variable
 import torch.nn.functional as F
 import numpy as np
 import gym
@ -55,7 +54,7 @@ class DQN(object):
        self.loss_func = nn.MSELoss()

    def choose_action(self, x):
-        x = Variable(torch.unsqueeze(torch.FloatTensor(x), 0))
+        x = torch.unsqueeze(torch.FloatTensor(x), 0)
        # input only one sample
        if np.random.uniform() < EPSILON:   # greedy
            actions_value = self.eval_net.forward(x)
@ -82,10 +81,10 @@ class DQN(object):
        # sample batch transitions
        sample_index = np.random.choice(MEMORY_CAPACITY, BATCH_SIZE)
        b_memory = self.memory[sample_index, :]
-        b_s = Variable(torch.FloatTensor(b_memory[:, :N_STATES]))
-        b_a = Variable(torch.LongTensor(b_memory[:, N_STATES:N_STATES+1].astype(int)))
-        b_r = Variable(torch.FloatTensor(b_memory[:, N_STATES+1:N_STATES+2]))
-        b_s_ = Variable(torch.FloatTensor(b_memory[:, -N_STATES:]))
+        b_s = torch.FloatTensor(b_memory[:, :N_STATES])
+        b_a = torch.LongTensor(b_memory[:, N_STATES:N_STATES+1].astype(int))
+        b_r = torch.FloatTensor(b_memory[:, N_STATES+1:N_STATES+2])
+        b_s_ = torch.FloatTensor(b_memory[:, -N_STATES:])

        # q_eval w.r.t the action in experience
        q_eval = self.eval_net(b_s).gather(1, b_a)  # shape (batch, 1)
--- a/tutorial-contents/406_GAN.py
+++ b/tutorial-contents/406_GAN.py
@ -3,13 +3,12 @@ View more, visit my tutorial page: https://morvanzhou.github.io/tutorials/
 My Youtube Channel: https://www.youtube.com/user/MorvanZhou

 Dependencies:
-torch: 0.1.11
+torch: 0.4
 numpy
 matplotlib
 """
 import torch
 import torch.nn as nn
-from torch.autograd import Variable
 import numpy as np
 import matplotlib.pyplot as plt

@ -35,7 +34,7 @@ def artist_works():     # painting from the famous artist (real target)
    a = np.random.uniform(1, 2, size=BATCH_SIZE)[:, np.newaxis]
    paintings = a * np.power(PAINT_POINTS, 2) + (a-1)
    paintings = torch.from_numpy(paintings).float()
-    return Variable(paintings)
+    return paintings

 G = nn.Sequential(                      # Generator
    nn.Linear(N_IDEAS, 128),            # random ideas (could from normal distribution)
@ -57,7 +56,7 @@ plt.ion()   # something about continuous plotting

 for step in range(10000):
    artist_paintings = artist_works()           # real painting from artist
-    G_ideas = Variable(torch.randn(BATCH_SIZE, N_IDEAS))    # random ideas
+    G_ideas = torch.randn(BATCH_SIZE, N_IDEAS)  # random ideas
    G_paintings = G(G_ideas)                    # fake painting from G (random ideas)

    prob_artist0 = D(artist_paintings)          # D try to increase this prob
@ -67,7 +66,7 @@ for step in range(10000):
    G_loss = torch.mean(torch.log(1. - prob_artist1))

    opt_D.zero_grad()
-    D_loss.backward(retain_variables=True)      # retain_variables for reusing computational graph
+    D_loss.backward(retain_graph=True)      # reusing computational graph
    opt_D.step()

    opt_G.zero_grad()
@ -79,9 +78,9 @@ for step in range(10000):
        plt.plot(PAINT_POINTS[0], G_paintings.data.numpy()[0], c='#4AD631', lw=3, label='Generated painting',)
        plt.plot(PAINT_POINTS[0], 2 * np.power(PAINT_POINTS[0], 2) + 1, c='#74BCFF', lw=3, label='upper bound')
        plt.plot(PAINT_POINTS[0], 1 * np.power(PAINT_POINTS[0], 2) + 0, c='#FF9359', lw=3, label='lower bound')
-        plt.text(-.5, 2.3, 'D accuracy=%.2f (0.5 for D to converge)' % prob_artist0.data.numpy().mean(), fontdict={'size': 15})
-        plt.text(-.5, 2, 'D score= %.2f (-1.38 for G to converge)' % -D_loss.data.numpy(), fontdict={'size': 15})
-        plt.ylim((0, 3));plt.legend(loc='upper right', fontsize=12);plt.draw();plt.pause(0.01)
+        plt.text(-.5, 2.3, 'D accuracy=%.2f (0.5 for D to converge)' % prob_artist0.data.numpy().mean(), fontdict={'size': 13})
+        plt.text(-.5, 2, 'D score= %.2f (-1.38 for G to converge)' % -D_loss.data.numpy(), fontdict={'size': 13})
+        plt.ylim((0, 3));plt.legend(loc='upper right', fontsize=10);plt.draw();plt.pause(0.01)

 plt.ioff()
 plt.show()
--- a/tutorial-contents/406_conditional_GAN.py
+++ b/tutorial-contents/406_conditional_GAN.py
@ -3,13 +3,12 @@ View more, visit my tutorial page: https://morvanzhou.github.io/tutorials/
 My Youtube Channel: https://www.youtube.com/user/MorvanZhou

 Dependencies:
-torch: 0.1.11
+torch: 0.4
 numpy
 matplotlib
 """
 import torch
 import torch.nn as nn
-from torch.autograd import Variable
 import numpy as np
 import matplotlib.pyplot as plt

@ -37,7 +36,7 @@ def artist_works_with_labels():     # painting from the famous artist (real targ
    labels = (a-1) > 0.5            # upper paintings (1), lower paintings (0), two classes
    paintings = torch.from_numpy(paintings).float()
    labels = torch.from_numpy(labels.astype(np.float32))
-    return Variable(paintings), Variable(labels)
+    return paintings, labels


 G = nn.Sequential(                      # Generator
@ -60,7 +59,7 @@ plt.ion()   # something about continuous plotting

 for step in range(10000):
    artist_paintings, labels = artist_works_with_labels()           # real painting, label from artist
-    G_ideas = Variable(torch.randn(BATCH_SIZE, N_IDEAS))            # random ideas
+    G_ideas = torch.randn(BATCH_SIZE, N_IDEAS)                      # random ideas
    G_inputs = torch.cat((G_ideas, labels), 1)                      # ideas with labels
    G_paintings = G(G_inputs)                                       # fake painting w.r.t label from G

@ -75,7 +74,7 @@ for step in range(10000):
    G_loss = torch.mean(D_score1)               # minimise D score w.r.t G

    opt_D.zero_grad()
-    D_loss.backward(retain_variables=True)      # retain_variables for reusing computational graph
+    D_loss.backward(retain_graph=True)      # reusing computational graph
    opt_D.step()

    opt_G.zero_grad()
@ -88,20 +87,20 @@ for step in range(10000):
        bound = [0, 0.5] if labels.data[0, 0] == 0 else [0.5, 1]
        plt.plot(PAINT_POINTS[0], 2 * np.power(PAINT_POINTS[0], 2) + bound[1], c='#74BCFF', lw=3, label='upper bound')
        plt.plot(PAINT_POINTS[0], 1 * np.power(PAINT_POINTS[0], 2) + bound[0], c='#FF9359', lw=3, label='lower bound')
-        plt.text(-.5, 2.3, 'D accuracy=%.2f (0.5 for D to converge)' % prob_artist0.data.numpy().mean(), fontdict={'size': 15})
-        plt.text(-.5, 2, 'D score= %.2f (-1.38 for G to converge)' % -D_loss.data.numpy(), fontdict={'size': 15})
-        plt.text(-.5, 1.7, 'Class = %i' % int(labels.data[0, 0]), fontdict={'size': 15})
-        plt.ylim((0, 3));plt.legend(loc='upper right', fontsize=12);plt.draw();plt.pause(0.1)
+        plt.text(-.5, 2.3, 'D accuracy=%.2f (0.5 for D to converge)' % prob_artist0.data.numpy().mean(), fontdict={'size': 13})
+        plt.text(-.5, 2, 'D score= %.2f (-1.38 for G to converge)' % -D_loss.data.numpy(), fontdict={'size': 13})
+        plt.text(-.5, 1.7, 'Class = %i' % int(labels.data[0, 0]), fontdict={'size': 13})
+        plt.ylim((0, 3));plt.legend(loc='upper right', fontsize=10);plt.draw();plt.pause(0.1)

 plt.ioff()
 plt.show()

 # plot a generated painting for upper class
-z = Variable(torch.randn(1, N_IDEAS))
-label = Variable(torch.FloatTensor([[1.]]))     # for upper class
+z = torch.randn(1, N_IDEAS)
+label = torch.FloatTensor([[1.]])     # for upper class
 G_inputs = torch.cat((z, label), 1)
 G_paintings = G(G_inputs)
 plt.plot(PAINT_POINTS[0], G_paintings.data.numpy()[0], c='#4AD631', lw=3, label='G painting for upper class',)
 plt.plot(PAINT_POINTS[0], 2 * np.power(PAINT_POINTS[0], 2) + bound[1], c='#74BCFF', lw=3, label='upper bound (class 1)')
 plt.plot(PAINT_POINTS[0], 1 * np.power(PAINT_POINTS[0], 2) + bound[0], c='#FF9359', lw=3, label='lower bound (class 1)')
-plt.ylim((0, 3));plt.legend(loc='upper right', fontsize=12);plt.show()
+plt.ylim((0, 3));plt.legend(loc='upper right', fontsize=10);plt.show()
--- a/tutorial-contents/501_why_torch_dynamic_graph.py
+++ b/tutorial-contents/501_why_torch_dynamic_graph.py
@ -3,13 +3,12 @@ View more, visit my tutorial page: https://morvanzhou.github.io/tutorials/
 My Youtube Channel: https://www.youtube.com/user/MorvanZhou

 Dependencies:
-torch: 0.1.11
+torch: 0.4
 matplotlib
 numpy
 """
 import torch
 from torch import nn
-from torch.autograd import Variable
 import numpy as np
 import matplotlib.pyplot as plt

@ -80,12 +79,12 @@ for i in range(60):
    x_np = np.sin(steps)    # float32 for converting torch FloatTensor
    y_np = np.cos(steps)

-    x = Variable(torch.from_numpy(x_np[np.newaxis, :, np.newaxis]))    # shape (batch, time_step, input_size)
-    y = Variable(torch.from_numpy(y_np[np.newaxis, :, np.newaxis]))
+    x = torch.from_numpy(x_np[np.newaxis, :, np.newaxis])    # shape (batch, time_step, input_size)
+    y = torch.from_numpy(y_np[np.newaxis, :, np.newaxis])

    prediction, h_state = rnn(x, h_state)   # rnn output
    # !! next step is important !!
-    h_state = Variable(h_state.data)        # repack the hidden state, break the connection from last iteration
+    h_state = h_state.data        # repack the hidden state, break the connection from last iteration

    loss = loss_func(prediction, y)         # cross entropy loss
    optimizer.zero_grad()                   # clear gradients for this training step
--- a/tutorial-contents/502_GPU.py
+++ b/tutorial-contents/502_GPU.py
@ -3,12 +3,11 @@ View more, visit my tutorial page: https://morvanzhou.github.io/tutorials/
 My Youtube Channel: https://www.youtube.com/user/MorvanZhou

 Dependencies:
-torch: 0.1.11
+torch: 0.4
 torchvision
 """
 import torch
 import torch.nn as nn
-from torch.autograd import Variable
 import torch.utils.data as Data
 import torchvision

@ -25,7 +24,7 @@ train_loader = Data.DataLoader(dataset=train_data, batch_size=BATCH_SIZE, shuffl
 test_data = torchvision.datasets.MNIST(root='./mnist/', train=False)

 # !!!!!!!! Change in here !!!!!!!!! #
-test_x = Variable(torch.unsqueeze(test_data.test_data, dim=1)).type(torch.FloatTensor)[:2000].cuda()/255.   # Tensor on GPU
+test_x = torch.unsqueeze(test_data.test_data, dim=1).type(torch.FloatTensor)[:2000].cuda()/255.   # Tensor on GPU
 test_y = test_data.test_labels[:2000].cuda()


@ -56,8 +55,8 @@ for epoch in range(EPOCH):
    for step, (x, y) in enumerate(train_loader):

        # !!!!!!!! Change in here !!!!!!!!! #
-        b_x = Variable(x).cuda()    # Tensor on GPU
-        b_y = Variable(y).cuda()    # Tensor on GPU
+        b_x = x.cuda()    # Tensor on GPU
+        b_y = y.cuda()    # Tensor on GPU

        output = cnn(b_x)
        loss = loss_func(output, b_y)
@ -72,7 +71,7 @@ for epoch in range(EPOCH):
            pred_y = torch.max(test_output, 1)[1].cuda().data.squeeze()  # move the computation in GPU

            accuracy = torch.sum(pred_y == test_y) / test_y.size(0)
-            print('Epoch: ', epoch, '| train loss: %.4f' % loss.data[0], '| test accuracy: %.2f' % accuracy)
+            print('Epoch: ', epoch, '| train loss: %.4f' % loss.data.numpy(), '| test accuracy: %.2f' % accuracy)


 test_output = cnn(test_x[:10])
--- a/tutorial-contents/503_dropout.py
+++ b/tutorial-contents/503_dropout.py
@ -3,11 +3,10 @@ View more, visit my tutorial page: https://morvanzhou.github.io/tutorials/
 My Youtube Channel: https://www.youtube.com/user/MorvanZhou

 Dependencies:
-torch: 0.1.11
+torch: 0.4
 matplotlib
 """
 import torch
-from torch.autograd import Variable
 import matplotlib.pyplot as plt

 # torch.manual_seed(1)    # reproducible
@ -18,12 +17,10 @@ N_HIDDEN = 300
 # training data
 x = torch.unsqueeze(torch.linspace(-1, 1, N_SAMPLES), 1)
 y = x + 0.3*torch.normal(torch.zeros(N_SAMPLES, 1), torch.ones(N_SAMPLES, 1))
-x, y = Variable(x), Variable(y)

 # test data
 test_x = torch.unsqueeze(torch.linspace(-1, 1, N_SAMPLES), 1)
 test_y = test_x + 0.3*torch.normal(torch.zeros(N_SAMPLES, 1), torch.ones(N_SAMPLES, 1))
-test_x, test_y = Variable(test_x, volatile=True), Variable(test_y, volatile=True)

 # show data
 plt.scatter(x.data.numpy(), y.data.numpy(), c='magenta', s=50, alpha=0.5, label='train')
@ -85,8 +82,8 @@ for t in range(500):
        plt.scatter(test_x.data.numpy(), test_y.data.numpy(), c='cyan', s=50, alpha=0.3, label='test')
        plt.plot(test_x.data.numpy(), test_pred_ofit.data.numpy(), 'r-', lw=3, label='overfitting')
        plt.plot(test_x.data.numpy(), test_pred_drop.data.numpy(), 'b--', lw=3, label='dropout(50%)')
-        plt.text(0, -1.2, 'overfitting loss=%.4f' % loss_func(test_pred_ofit, test_y).data[0], fontdict={'size': 20, 'color':  'red'})
-        plt.text(0, -1.5, 'dropout loss=%.4f' % loss_func(test_pred_drop, test_y).data[0], fontdict={'size': 20, 'color': 'blue'})
+        plt.text(0, -1.2, 'overfitting loss=%.4f' % loss_func(test_pred_ofit, test_y).data.numpy(), fontdict={'size': 20, 'color':  'red'})
+        plt.text(0, -1.5, 'dropout loss=%.4f' % loss_func(test_pred_drop, test_y).data.numpy(), fontdict={'size': 20, 'color': 'blue'})
        plt.legend(loc='upper left'); plt.ylim((-2.5, 2.5));plt.pause(0.1)

        # change back to train mode
--- a/tutorial-contents/504_batch_normalization.py
+++ b/tutorial-contents/504_batch_normalization.py
@ -3,12 +3,11 @@ View more, visit my tutorial page: https://morvanzhou.github.io/tutorials/
 My Youtube Channel: https://www.youtube.com/user/MorvanZhou

 Dependencies:
-torch: 0.1.11
+torch: 0.4
 matplotlib
 numpy
 """
 import torch
-from torch.autograd import Variable
 from torch import nn
 from torch.nn import init
 import torch.utils.data as Data
@ -39,10 +38,10 @@ noise = np.random.normal(0, 2, test_x.shape)
 test_y = np.square(test_x) - 5 + noise

 train_x, train_y = torch.from_numpy(x).float(), torch.from_numpy(y).float()
-test_x = Variable(torch.from_numpy(test_x).float(), volatile=True)  # not for computing gradients
-test_y = Variable(torch.from_numpy(test_y).float(), volatile=True)
+test_x = torch.from_numpy(test_x).float()
+test_y = torch.from_numpy(test_y).float()

-train_dataset = Data.TensorDataset(data_tensor=train_x, target_tensor=train_y)
+train_dataset = Data.TensorDataset(train_x, train_y)
 train_loader = Data.DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2,)

 # show data
@ -72,8 +71,8 @@ class Net(nn.Module):
        self._set_init(self.predict)            # parameters initialization

    def _set_init(self, layer):
-        init.normal(layer.weight, mean=0., std=.1)
-        init.constant(layer.bias, B_INIT)
+        init.normal_(layer.weight, mean=0., std=.1)
+        init.constant_(layer.bias, B_INIT)

    def forward(self, x):
        pre_activation = [x]
@ -127,7 +126,6 @@ for epoch in range(EPOCH):
    plot_histogram(*layer_inputs, *pre_acts)     # plot histogram

    for step, (b_x, b_y) in enumerate(train_loader):
-        b_x, b_y = Variable(b_x), Variable(b_y)
        for net, opt in zip(nets, opts):     # train for each network
            pred, _, _ = net(b_x)
            loss = loss_func(pred, b_y)