Version

import torch

print(torch.__version__) # 1.1.0

Facebook CTO Mike Schroepfer announces the release of PyTorch 1.0 at Facebook developer conference F8 on May 2, 2018 at the McEnery Convention Center in San Jose, California (Image Credit: Facebook)

(9)

(10)

(11)

(12)

Tensor: shape

import torch

a = torch.tensor(3.14)

print(a)      # tensor(3.1400)

print(a.shape, a.size())      # torch.Size([]) torch.Size([]) b = torch.tensor([1.414])

print(b)      # tensor([1.4140])

print(b.shape, b.size())      # torch.Size([1]) torch.Size([1]) c = torch.tensor([1., 2., 3.])

print(c)      # tensor([1., 2., 3.])

print(c.shape, c.size())      # torch.Size([3]) torch.Size([3]) d = torch.tensor([[1, 2], [3, 4], [5, 6]])

print(d)      # tensor([[1, 2], [3, 4], [5, 6]])

print(d.shape, d.size())      # torch.Size([3, 2]) torch.Size([3, 2]) e = torch.tensor([[[1, 2, 3], [3, 4, 5]], [[5, 6, 7], [7, 8, 9]]])

print(e)      # tensor([[[1, 2, 3], [3, 4, 5]], [[5, 6, 7], [7, 8, 9]]]) print(e.shape, e.size())      # torch.Size([2, 2, 3]) torch.Size([2, 2, 3])

print(e.shape[0], e.shape[1], e.shape[2]) # 2 2 3 print(e.size(0), e.size(1), e.size(2))    # 2 2 3

Tensor: dtype

import numpy as np import torch a = np.array([[1, 2], [3, 4]]) print(a.dtype)  # int64 b = np.array([[1., 2.], [3., 4.]]) print(b.dtype)  # float64 aa = torch.from_numpy(a) print(aa.dtype) # torch.int64 bb = torch.from_numpy(b) print(bb.dtype) # torch.float64 aa = torch.from_numpy(a).float() print(aa.dtype) # torch.float32 aa = torch.FloatTensor(a) print(aa.dtype) # torch.float32 a = aa.int() print(a.dtype)  # torch.int32

Tensor: data

import torch

a = torch.tensor([[1, 2], [3, 4]])

print(type(a))      # <class ‘torch.Tensor'> print(a)      # tensor([[1, 2], [3, 4]]) print(a[0][0], a[0][1]) # tensor(1) tensor(2)

print(a[1][0], a[1][1]) # tensor(3) tensor(4) print(a[0][0].item())   # 1

print(a[0][1].item())   # 2 print(a[1][0].item())   # 3 print(a[1][1].item())   # 4

b = a.data

print(type(b))      # <class ‘torch.Tensor'> print(b)      # tensor([[1, 2], [3, 4]]) print(b[0][0], b[0][1]) # tensor(1) tensor(2)

print(b[1][0], b[1][1]) # tensor(3) tensor(4) print(b[0][0].item())   # 1

print(b[0][1].item())   # 2 print(b[1][0].item())   # 3 print(b[1][1].item())   # 4

Interoperability with NumPy

import numpy as np import torch

a = [[1, 2], [3, 4]]

print(type(a)) # <class ‘list> b = np.array(a)

print(type(b)) # <class ‘numpy.ndarray> c = torch.tensor(b)

print(type(c)) # <class ‘torch.Tensor> c = torch.from_numpy(b)

print(type(c)) # <class ‘torch.Tensor> c = torch.as_tensor(b)

print(type(c)) # <class ‘torch.Tensor> d = c.numpy()

print(type(d)) # <class 'numpy.ndarray>

(17)

Copy vs Reference

import numpy as np import torch a = np.array([[1.]]) b = torch.tensor(a)      # copy c = torch.FloatTensor(a) # copy d = torch.from_numpy(a)  # reference e = torch.as_tensor(a)   # reference print(b.item()) # 1.0 print(c.item()) # 1.0 print(d.item()) # 1.0 print(e.item()) # 1.0 a[0][0] = 2 print(b.item()) # 1.0 print(c.item()) # 1.0 print(d.item()) # 2.0 print(e.item()) # 2.0 a[0][0] = 0 b[0][0] = 111 print(a[0][0]) # 0.0 c[0][0] = 222 print(a[0][0]) # 0.0 d[0][0] = 333 print(a[0][0]) # 333 e[0][0] = 444 print(a[0][0]) # 444

a = np.array([[12345.]]) # a new object print(b.item()) # 111.0 print(c.item()) # 222.0 print(d.item()) # 333.0 print(e.item()) # 444.0

Tensors on GPU

import numpy as np import torch a = np.array([1, 2, 3]) c = torch.from_numpy(a).float() print(c) # tensor([1., 2., 3.]) c = torch.from_numpy(a).float().to(‘cpu’) print(c) # tensor([1., 2., 3.]) g = torch.from_numpy(a).float().to(‘cuda’)

print(g) # tensor([1., 2., 3.], device=‘cuda:0’)

DEVICE = ‘cuda’ if torch.cuda.is_available() else ‘cpu’ g = torch.from_numpy(a).float().to(DEVICE)

print(g) # tensor([1., 2., 3.], device=‘cuda:0’) if torch.cuda.is_available():

print(torch.cuda.device_count()) # 1: the number of GPUs available

print(torch.cuda.current_device()) # 0: the index of a currently selected device

(20)

Random Tensor

import torch a = torch.randn(1) # 1 by 1 tensor b = torch.randn(1) # 1 by 1 tensor print(a, b) c = torch.randn(2, 3) # 2 by 3 tensor print(c)

# above this you will get different results for each run torch.manual_seed(123)

# blow this will always produce the same results a = torch.randn(1) # 1 by 1 tensor b = torch.randn(1) # 1 by 1 tensor print(a, b) c = torch.randn(2, 3) # 2 by 3 tensor print(c)

2 × 3

import torch a = torch.tensor([[1, 2], [3, 4], [5, 6]]) print(a.shape) # torch.Size([3, 2]) b = torch.tensor([[1, 2, 3], [4, 5, 6]]) print(b.shape) # torch.Size([2, 3]) c = torch.mm(a, b) print(c) # tensor([[ 9, 12, 15], [19, 26, 33], [29, 40, 51]]) c = a.mm(b) print(c) # tensor([[ 9, 12, 15], [19, 26, 33], [29, 40, 51]]) c = torch.matmul(a, b) print(c) # tensor([[ 9, 12, 15], [19, 26, 33], [29, 40, 51]]) c = a @ b print(c) # tensor([[ 9, 12, 15], [19, 26, 33], [29, 40, 51]])

(23)

can be changed later.

import torch

DEVICE = 'cuda' if torch.cuda.is_available() else ‘cpu' a = torch.rand(1, requires_grad=True).to(DEVICE)

b = torch.rand(1, dtype=torch.float).to(DEVICE)

import torch

True

import torch

print(x) # tensor(2., requires_grad=True) y = 3*x*x + 4*x + 5

25

import torch x = torch.ones(2, 2, requires_grad=True) y = x + 3 z = y*y + 1 f = z.mean()

print(x.grad) # tensor([[2., 2.], [2., 2.]])

2

4 i=1

i x y z f

= 2

import torch x = torch.tensor(3.0) w = torch.tensor(4.0, requires_grad=True) b = torch.tensor(5.0, requires_grad=True) print(x.item()) # 3.0 print(w.item()) # 4.0 print(b.item()) # 5.0 y = w * x + b print(y.item()) # 17.0

17 = 4 × 3 + 5

import torch x = torch.randn(3, requires_grad=True) print(x.requires_grad) # True y = x + 1 print(y.requires_grad) # True

# not to calculate the gradient for the variable z

z = x + 1

(31)

(32)

(33)

In-Place Operations

import torch

a = torch.tensor([1, 2, 3])

print(id(a)) # THE RESULT: (1) a += 10

print(id(a)) # THE RESULT: (2) a = a + 10

print(id(a)) # THE RESULT: (3)

# (1) and (2) are same, but (3) different from them. # (2): in-place operator (same object)

# (3): a new object was created a = torch.tensor([1, 2, 3]) b = a + 10 print(b) # tensor([11, 12, 13]) print(a is b) # False b = a.add(10) print(b) # tensor([11, 12, 13]) print(a is b) # False b = a.add_(10) print(b) # tensor([11, 12, 13]) print(a is b) # True

Pure NumPy Implementation

import numpy as np # y = w*x+b: w=2, b=1 x = np.array([1, 2, 3, 4, 5], dtype=‘float32') y = np.array([3, 5, 7, 9, 11], dtype=‘float32') w = np.random.randn(1) # weight b = np.random.randn(1) # bias

for epoch in range(10000): # iteration # prediction

y_predicted = w * x + b error = y - y_predicted

# gradient computation (manually) w_grad = (-x * error).mean() b_grad = (-error).mean() # update w -= 0.01 * w_grad b -= 0.01 * b_grad print(w, b) # [2.00000001] [0.99999998]

initial random guess

LinearRegressionCommon.py

import numpy as np import torch

# how many points? N = 1000

# the ground-truth values W = 2.0 # weight

B = 1.0 # bias

def Data():

np.random.seed(13) # random seed

x = np.random.rand(N, 1) # input: 1D array

noise = 0.1 * np.random.rand(N, 1) # noise: 1D array

y = (W * x + B) + noise # outout that has some noise

indices = np.arange(N) # point indices

np.random.shuffle(indices) # shuffled indices

train_indices = indices[:round(0.8*N)] # the first 80 random indices for train set

valid_indices = indices[round(0.8*N):] # the the remaining indices for validation set x_train, y_train = x[train_indices], y[train_indices]

x_valid, y_valid = x[valid_indices], y[valid_indices] return x_train, y_train, x_valid, y_valid

Example #1

from LinearRegressionCommon import *

w = np.random.randn(1) # weight

b = np.random.randn(1) # bias

x_train, y_train, _, _ = Data()

for epoch in range(10000): # iteration # prediction

y_predicted = w * x_train + b error = y_train - y_predicted

# update

Example #2

from LinearRegressionCommon import *

DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'

w = torch.randn(1, requires_grad=True, dtype=torch.float, device=DEVICE) b = torch.randn(1, requires_grad=True, dtype=torch.float, device=DEVICE)

x_train, y_train, _, _ = Data()

x_train = torch.from_numpy(x_train).float().to(DEVICE) y_train = torch.from_numpy(y_train).float().to(DEVICE)

for epoch in range(10000):

y_predicted = w * x_train + b error = y_train - y_predicted cost = (error*error).mean()

Example #3

from LinearRegressionCommon import *

DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'

w = torch.randn(1, requires_grad=True, dtype=torch.float, device=DEVICE) b = torch.randn(1, requires_grad=True, dtype=torch.float, device=DEVICE) x_train, y_train, _, _ = Data()

x_train = torch.from_numpy(x_train).float().to(DEVICE) y_train = torch.from_numpy(y_train).float().to(DEVICE)

optimizer = torch.optim.SGD([w, b], lr=0.01)

for epoch in range(10000):

y_predicted = w * x_train + b error = y_train - y_predicted cost = (error*error).mean()

cost.backward() optimizer.step()

Example #4

from LinearRegressionCommon import *

DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'

w = torch.randn(1, requires_grad=True, dtype=torch.float, device=DEVICE) b = torch.randn(1, requires_grad=True, dtype=torch.float, device=DEVICE) x_train, y_train, _, _ = Data()

x_train = torch.from_numpy(x_train).float().to(DEVICE) y_train = torch.from_numpy(y_train).float().to(DEVICE)

CostFunc = torch.nn.MSELoss()

optimizer = torch.optim.SGD([w, b], lr=0.01) for epoch in range(10000):

y_predicted = w * x_train + b

cost = CostFunc(y_train, y_predicted) cost.backward()

optimizer.step()

Example #5

from LinearRegressionCommon import *

DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'

class Model(torch.nn.Module): def __init__(self):

super().__init__()

self.w = torch.nn.Parameter(torch.randn(1, requires_grad=True, dtype=torch.float, device=DEVICE)) self.b = torch.nn.Parameter(torch.randn(1, requires_grad=True, dtype=torch.float, device=DEVICE)) def forward(self, x):

return self.w * x + self.b

x_train, y_train, _, _ = Data()

x_train = torch.from_numpy(x_train).float().to(DEVICE) y_train = torch.from_numpy(y_train).float().to(DEVICE) model = Model().to(DEVICE) CostFunc = torch.nn.MSELoss() optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

Example #5

for epoch in range(10000): model.train()

y_predicted = model(x_train)

cost = CostFunc(y_train, y_predicted) cost.backward() optimizer.step() optimizer.zero_grad() print(model.state_dict()) print(model.w, model.b) print(model.w.item(), model.b.item()) # 2.003204107284546 1.048282265663147

Example #6

from LinearRegressionCommon import *

DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' class Model(torch.nn.Module): def __init__(self): super().__init__() self.layer = torch.nn.Linear(1, 1) def forward(self, x): return self.layer(x)

x_train, y_train, _, _ = Data()

x_train = torch.from_numpy(x_train).float().to(DEVICE) y_train = torch.from_numpy(y_train).float().to(DEVICE) model = Model().to(DEVICE) CostFunc = torch.nn.MSELoss() optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

Example #6

for epoch in range(10000): model.train()

y_predicted = model(x_train)

cost = CostFunc(y_train, y_predicted) cost.backward() optimizer.step() optimizer.zero_grad() print(model.state_dict()) print(model.layer.weight, model.layer.bias) print(model.layer.weight.item(), model.layer.bias.item()) # 2.003204107284546 1.048282265663147

Practical Example

https://medium.com/dsnet/linear-regression-with-pytorch-3dde91d60b50

Layer

import numpy as np import torch

class Model(torch.nn.Module): def __init__(self):

super().__init__()

self.layer = torch.nn.Linear(3, 2) # 3: inputs, 2: outputs def forward(self, x):

return self.layer(x)

# input data: (temperature, rainfall, humidity)

x_train = np.array([[73, 67, 43], [91, 88, 64], [87, 134, 58], [102, 43, 37], [69, 96, 70]])

# output data: (apples, oranges)

y_train = np.array([[56, 70], [81, 101], [119, 133], [22, 37], [103, 119]]) x_train = torch.from_numpy(x_train).float() y_train = torch.from_numpy(y_train).float() model = Model() CostFunc = torch.nn.MSELoss() optimizer = torch.optim.SGD(model.parameters(), lr=0.0001)

Practical Example

for epoch in range(10000): model.train()

y_predicted = model(x_train)

cost = CostFunc(y_train, y_predicted) cost.backward() optimizer.step() optimizer.zero_grad() # test print(model(x_train)) # prediction x_test = np.array([[80, 70, 50]]) x_test = torch.from_numpy(x_test).float(); print(model(x_test))

XOR Problem

https://mc.ai/intro-to-deep-learning-with-pytorch-part-1/

(54)

(55)

It concatenates the a series of modules.

(56)

import numpy as np import torch X = torch.FloatTensor([[0, 0], [0, 1], [1, 0], [1, 1]]) Y = torch.FloatTensor([[0], [1], [1], [0]]) INPUT_DIM  = 2 HIDDEN_DIM = 10 OUTPUT_DIM = 1 model = torch.nn.Sequential(       torch.nn.Linear(INPUT_DIM, HIDDEN_DIM),       torch.nn.ReLU(),       torch.nn.Linear(HIDDEN_DIM, OUTPUT_DIM),

torch.nn.Sigmoid()) # MUST for non-linearity

CostFunc  = torch.nn.BCELoss() # Binary Cross Entropy Loss optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

for epoch in range(10000):     Y_predicted = model(X)     cost = CostFunc(Y_predicted, Y)     cost.backward()     optimizer.step()     model.zero_grad() Y_predicted = model(X) print(np.squeeze(Y_predicted.detach().numpy())) # [0.01351878 0.98831743 0.9887106  0.01278798] print(np.squeeze((Y_predicted+0.5).int().detach().numpy())) # [0 1 1 0]

import numpy as np import torch X = torch.FloatTensor([[0, 0], [0, 1], [1, 0], [1, 1]]) Y = torch.FloatTensor([[0], [1], [1], [0]]) INPUT_DIM  = 2 HIDDEN_DIM = 10 OUTPUT_DIM = 1

linear1 = torch.nn.Linear(INPUT_DIM, HIDDEN_DIM) actfnc1 = torch.nn.ReLU()

linear2 = torch.nn.Linear(HIDDEN_DIM, OUTPUT_DIM)

actfnc2 = torch.nn.Sigmoid() # MUST for non-linearity

model   = torch.nn.Sequential(linear1, actfnc1, linear2, actfnc2)

CostFunc  = torch.nn.BCELoss() # Binary Cross Entropy Loss optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

for epoch in range(10000):     Y_predicted = model(X)     cost = CostFunc(Y_predicted, Y)     cost.backward()     optimizer.step()     model.zero_grad() Y_predicted = model(X) print(np.squeeze(Y_predicted.detach().numpy())) # [0.02886132 0.9477684  0.9471025  0.07047193] print(np.squeeze((Y_predicted+0.5).int().detach().numpy())) # [0 1 1 0]

order.

(59)

import numpy as np import torch X = torch.FloatTensor([[0, 0], [0, 1], [1, 0], [1, 1]]) Y = torch.FloatTensor([[0], [1], [1], [0]]) INPUT_DIM  = 2 HIDDEN_DIM = 10 OUTPUT_DIM = 1 class Model(torch.nn.Module):

def __init__(self, INPUT_DIM, HIDDEN_DIM, OUTPUT_DIM):         super().__init__()

self.linear1 = torch.nn.Linear(INPUT_DIM, HIDDEN_DIM)         self.actfnc1 = torch.nn.ReLU()

self.linear2 = torch.nn.Linear(HIDDEN_DIM, OUTPUT_DIM)         self.actfnc2 = torch.nn.Sigmoid()

def forward(self, x):

x = self.actfnc1( self.linear1(x) )         x = self.actfnc2( self.linear2(x) )         return x

model = Model(INPUT_DIM, HIDDEN_DIM, OUTPUT_DIM)

CostFunc  = torch.nn.BCELoss() # Binary Cross Entropy Loss optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

. . .

class Model(torch.nn.Module):

def __init__(self, INPUT_DIM, HIDDEN_DIM, OUTPUT_DIM):         super().__init__()

self.linear1 = torch.nn.Linear(INPUT_DIM,  HIDDEN_DIM)         self.linear2 = torch.nn.Linear(HIDDEN_DIM, OUTPUT_DIM)     def forward(self, x):

x = torch.relu   ( self.linear1(x) )         x = torch.sigmoid( self.linear2(x) )         return x

class Model(torch.nn.Module):

def __init__(self, INPUT_DIM, HIDDEN_DIM, OUTPUT_DIM):         super().__init__()

self.linear1 = torch.nn.Linear(INPUT_DIM, HIDDEN_DIM)         self.actfnc1 = torch.nn.ReLU()

self.linear2 = torch.nn.Linear(HIDDEN_DIM, OUTPUT_DIM)         self.actfnc2 = torch.nn.Sigmoid()     def forward(self, x):         x = self.actfnc1( self.linear1(x) )         x = self.actfnc2( self.linear2(x) )         return x

Method #2

class Model(torch.nn.Module):

def __init__(self, INPUT_DIM, HIDDEN_DIM, OUTPUT_DIM):         super().__init__()

self.layer1 = torch.nn.Sequential( torch.nn.Linear(INPUT_DIM,  HIDDEN_DIM), torch.nn.ReLU()    )         self.layer2 = torch.nn.Sequential( torch.nn.Linear(HIDDEN_DIM, OUTPUT_DIM), torch.nn.Sigmoid() )     def forward(self, x):

x = self.layer1(x)         x = self.layer2(x)         return x

class Model(torch.nn.Module):

def __init__(self, INPUT_DIM, HIDDEN_DIM, OUTPUT_DIM):         super().__init__()

self.linear1 = torch.nn.Linear(INPUT_DIM,  HIDDEN_DIM)         self.linear2 = torch.nn.Linear(HIDDEN_DIM, OUTPUT_DIM)     def layer1(self, x):

return torch.sigmoid( self.linear2(x) )     def forward(self, x):         x = self.layer1(x)         x = self.layer2(x)         return x

All the transformations in the Compose are applied to the input data one by one.

import torchvision

normalize = torchvision.transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])

transformations = torchvision.transforms.Compose( [torchvision.transforms.RandomHorizontalFlip(), torchvision.transforms.RandomVerticalFlip(),

torchvision.transforms.ToTensor(), normalize] )

: Each 0.5 for each channel (Red, Green, Blue)

(66)

import numpy as np

import torch, torchvision

pixels = np.random.randint(low=0, high=256, size=(5, 5, 3)) # 5x5 RGB image

print(type(pixels)) # <class ‘numpy.ndarray'> print(np.min(pixels), ' ~ ', np.max(pixels)) # 0 ~ 255

pixels = pixels.astype('float32') / 255 # normalization: [0, 255] to [0.0, 1.0] print(type(pixels)) # <class ‘numpy.ndarray'>

print(np.min(pixels), ' ~ ', np.max(pixels)) # 0.0 ~ 1.0

image = torch.from_numpy(pixels) # to a tensor

print(type(image)) # <class ‘torch.Tensor'> print(torch.min(image).item(), ' ~ ', torch.max(image).item()) # 0.0 ~ 1.0

# transforms = torch vision.transforms.ToTensor()

transforms = torchvision.transforms.Compose([torchvision.transforms.ToTensor()])

image = transforms(pixels) # apply transformations print(type(image)) # <class ‘torch.Tensor'> print(torch.min(image).item(), ' ~ ', torch.max(image).item()) # 0.0 ~ 1.0

transforms = torchvision.transforms.Compose([torchvision.transforms.ToTensor(),

torchvision.transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) image = transforms(pixels) # apply transformations

print(type(image)) # <class ‘torch.Tensor'> print(torch.min(image).item(), ' ~ ', torch.max(image).item()) # -1.0 ~ 1.0

image = torchvision.transforms.functional.normalize(image, mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))

MNIST

import numpy as np

import torch, torchvision

from matplotlib import pyplot as plt

transforms = torchvision.transforms.Compose([torchvision.transforms.ToTensor()]) bs = 64 # batch size

test_dataset = torchvision.datasets.MNIST(root='./data/', train=False, transform=transforms)

print(train_dataset.data.shape) # torch.Size([60000, 28, 28]) print(len(train_dataset), len(test_dataset)) # 60000 10000

MNIST

for batch_index, (images, labels) in enumerate(train_dataloader): print(labels.shape) # torch.Size([64])

print(images.shape) # torch.Size([64, 1, 28, 28]) print(images[0].shape) # torch.Size([1, 28, 28])

images = np.transpose(images, (0,2,3,1)) # channel first order -> channel last order print(images.shape) # torch.Size([64, 28, 28, 1])

print(images[0].shape) # torch.Size([28, 28, 1])

plt.imshow(images[0].reshape((28, 28)), cmap=‘gray') plt.show()

break

for batch_index in range(len(train_dataset)): itr = iter(train_dataloader)

images, labels = itr.next()

print(labels.shape) # torch.Size([64])

print(images.shape) # torch.Size([64, 1, 28, 28]) print(images[0].shape) # torch.Size([1, 28, 28])

images = np.transpose(images, (0,2,3,1)) # channel first order -> channel last order print(images.shape) # torch.Size([64, 28, 28, 1]) print(images[0].shape) # torch.Size([28, 28, 1]) plt.imshow(images[0].reshape((28, 28)), cmap=‘gray') plt.show() break

(70)

CIFAR-10

import numpy as np

import torch, torchvision

from matplotlib import pyplot as plt

transforms = torchvision.transforms.Compose([torchvision.transforms.ToTensor()]) bs = 64 # batch size

test_dataset = torchvision.datasets.CIFAR10(root='./data/CIFAR10', train=False, transform=transforms)

print(train_dataset.data.shape) # (50000, 32, 32, 3)

print(len(train_dataset), len(test_dataset)) # 50000 10000

print(train_dataloader.dataset.data.shape) # (50000, 32, 32, 3)

(71)

CIFAR-10

for batch_index, (images, labels) in enumerate(train_dataloader): print(labels.shape) # torch.Size([64])

print(images.shape) # torch.Size([64, 3, 32, 32]) print(images[0].shape) # torch.Size([3, 32, 32])

images = np.transpose(images, (0,2,3,1)) # channel first order -> channel last order print(images.shape) # torch.Size([64, 32, 32, 3])

print(images[0].shape) # torch.Size([32, 32, 3]) plt.imshow(images[0].reshape((32, 32, 3)))

plt.show() break

for batch_index in range(len(train_dataset)): itr = iter(train_dataloader)

images, labels = itr.next()

print(labels.shape) # torch.Size([64])

print(images.shape) # torch.Size([64, 3, 32, 32]) print(images[0].shape) # torch.Size([3, 32, 32])

images = np.transpose(images, (0,2,3,1)) # channel first order -> channel last order print(images.shape) # torch.Size([64, 32, 32, 3]) print(images[0].shape) # torch.Size([32, 32, 3]) plt.imshow(images[0].reshape((32, 32, 3))) plt.show() break

CIFAR-100

import numpy as np

import torch, torchvision

from matplotlib import pyplot as plt

transforms = torchvision.transforms.Compose([torchvision.transforms.ToTensor()]) bs = 64 # batch size

test_dataset = torchvision.datasets.CIFAR100(root=‘./data/CIFAR100', train=False, transform=transforms)

print(train_dataset.data.shape) # (50000, 32, 32, 3)

print(len(train_dataset), len(test_dataset)) # 50000 10000

print(train_dataloader.dataset.data.shape) # (50000, 32, 32, 3)

CIFAR-100

for batch_index, (images, labels) in enumerate(train_dataloader): print(labels.shape) # torch.Size([64])

print(images.shape) # torch.Size([64, 3, 32, 32]) print(images[0].shape) # torch.Size([3, 32, 32])

images = np.transpose(images, (0,2,3,1)) # channel first order -> channel last order print(images.shape) # torch.Size([64, 32, 32, 3])

print(images[0].shape) # torch.Size([32, 32, 3]) plt.imshow(images[0].reshape((32, 32, 3)))

plt.show() break

for batch_index in range(len(train_dataset)): itr = iter(train_dataloader)

images, labels = itr.next()

print(labels.shape) # torch.Size([64])

print(images.shape) # torch.Size([64, 3, 32, 32]) print(images[0].shape) # torch.Size([3, 32, 32])

images = np.transpose(images, (0,2,3,1)) # channel first order -> channel last order print(images.shape) # torch.Size([64, 32, 32, 3]) print(images[0].shape) # torch.Size([32, 32, 3]) plt.imshow(images[0].reshape((32, 32, 3))) plt.show() break

Accuracy Evaluation

import torch

data = torch.randn(3, 10) print(data.numpy())

values, indices = torch.max(data.data, 0) print(values.numpy(), indices.numpy())

values, indices = torch.max(data.data, 1) print(values.numpy(), indices.numpy()) a = torch.tensor([1, 0, 1, 0, 1, 0]) b = torch.tensor([1, 1, 1, 1, 1, 1]) c = ( a == b ) print(c.numpy()) print(c.sum().item())

accuracy = 100 * c.sum().item() / len(c) print(accuracy, '%') 1.3683

3 × 10 tensor

-3.7282 -0.7150 -0.0891 -0.1889 -0.7913 -0.8726 -0.1097 1.9349 0.1923 -0.4194 -1.9478 0.7762 0.9239 0.3441 0.0412 0.5557 0.9953 2.2492 -0.4234 1.8282 0.1751 1.1866 0.7951 -0.8284 0.1223 -1.005 -1.3221 1.8195 -0.6615

(78)

Accuracy Evaluation

import torch

data = torch.randn(3, 10) print(data.numpy())

values, indices = torch.max(data.data, 0) print(values.numpy(), indices.numpy())

values, indices = torch.max(data.data, 1) print(values.numpy(), indices.numpy()) a = torch.tensor([1, 0, 1, 0, 1, 0]) b = torch.tensor([1, 1, 1, 1, 1, 1]) c = ( a == b ) print(c.numpy()) print(c.sum().item())

accuracy = 100 * c.sum().item() / len(c) print(accuracy, '%') 1.3683

3 × 10 tensor

-3.7282 -0.7150 -0.0891 -0.1889 -0.7913 -0.8726 -0.1097 1.9349 0.1923 -0.4194 -1.9478 0.7762 0.9239 0.3441 0.0412 0.5557 0.9953 2.2492 -0.4234 1.8282 0.1751 1.1866 0.7951 -0.8284 0.1223 -1.005 -1.3221 1.8195 -0.6615 1.8282 2

indices

0.1751 2 1.1866 2 0.9239 1 0.3441 1 0.1223 2 0.5557 1 0.9953 1 2.2492 1 0.1923 0

Accuracy Evaluation

import torch

data = torch.randn(3, 10) print(data.numpy())

values, indices = torch.max(data.data, 0) print(values.numpy(), indices.numpy())

values, indices = torch.max(data.data, 1) print(values.numpy(), indices.numpy()) a = torch.tensor([1, 0, 1, 0, 1, 0]) b = torch.tensor([1, 1, 1, 1, 1, 1]) c = ( a == b ) print(c.numpy()) print(c.sum().item())

accuracy = 100 * c.sum().item() / len(c) print(accuracy, '%') 1.3683

3 × 10 tensor

-3.7282 -0.7150 -0.0891 -0.1889 -0.7913 -0.8726 -0.1097 1.9349 0.1923 -0.4194 -1.9478 0.7762 0.9239 0.3441 0.0412 0.5557 0.9953 2.2492 -0.4234 1.8282 0.1751 1.1866 0.7951 -0.8284 0.1223 -1.005 -1.3221 1.8195 -0.6615 1.9349 8

indices

2.2492 8 1.8282 0

Accuracy Evaluation

import torch

data = torch.randn(3, 10) print(data.numpy())

values, indices = torch.max(data.data, 0) print(values.numpy(), indices.numpy())

values, indices = torch.max(data.data, 1) print(values.numpy(), indices.numpy()) a = torch.tensor([1, 0, 1, 0, 1, 0]) b = torch.tensor([1, 1, 1, 1, 1, 1]) c = ( a == b ) print(c.numpy()) print(c.sum().item())

accuracy = 100 * c.sum().item() / len(c) print(accuracy, '%') 1 1

b

0 1 1 1 0 1 1 1 0 1

Accuracy Evaluation

import torch

data = torch.randn(3, 10) print(data.numpy())

values, indices = torch.max(data.data, 0) print(values.numpy(), indices.numpy())

values, indices = torch.max(data.data, 1) print(values.numpy(), indices.numpy()) a = torch.tensor([1, 0, 1, 0, 1, 0]) b = torch.tensor([1, 1, 1, 1, 1, 1]) c = ( a == b ) print(c.numpy()) print(c.sum().item())

accuracy = 100 * c.sum().item() / len(c) print(accuracy, '%') 1 1

b

0 1 1 1 0 1 1 1 0 1 1

0 1 0 1 0

Accuracy Evaluation

import torch

data = torch.randn(3, 10) print(data.numpy())

values, indices = torch.max(data.data, 0) print(values.numpy(), indices.numpy())

values, indices = torch.max(data.data, 1) print(values.numpy(), indices.numpy()) a = torch.tensor([1, 0, 1, 0, 1, 0]) b = torch.tensor([1, 1, 1, 1, 1, 1]) c = ( a == b ) print(c.numpy()) print(c.sum().item())

accuracy = 100 * c.sum().item() / len(c) print(accuracy, '%') 1 1

b

0 1 1 1 0 1 1 1 0 1 1

0 1 0 1 0

MNIST Classification

import torch, torchvision

DEVICE = 'cuda' if torch.cuda.is_available() else ‘cpu' INPUT_DIM = 784 # = 28 x 28

HIDDEN_DIM = 100

OUTPUT_DIM = 10 # the number of classes TOTAL_EPOCHS = 10

LEARNING_RATE = 0.01 BATCH_SIZE = 2000

transforms = torchvision.transforms.Compose([torchvision.transforms.ToTensor()])

test_dataset = torchvision.datasets.MNIST(root='./data/', train=False, transform=transforms)

(84)

MNIST Classification

class Model(torch.nn.Module):

def __init__(self, INPUT_DIM, HIDDEN_DIM, OUTPUT_DIM): super(Model, self).__init__()

self.layer1 = torch.nn.Linear(INPUT_DIM, HIDDEN_DIM) self.actfn1 = torch.nn.ReLU()

self.layer2 = torch.nn.Linear(HIDDEN_DIM, OUTPUT_DIM) def forward(self, x):

y1 = self.actfn1(self.layer1(x)) y2 = self.layer2(y1)

return y2

model = Model(INPUT_DIM, HIDDEN_DIM, OUTPUT_DIM).to(DEVICE)

CostFunc = torch.nn.CrossEntropyLoss()

(85)

MNIST Classification

for epoch in range(TOTAL_EPOCHS):

images = images.reshape(-1, 784).to(DEVICE) # flattening labels = labels.to(DEVICE)

output = model(images)

cost = CostFunc(output, labels) cost.backward()

optimizer.step()

print('Cost: {:.4f}’.format(cost.item()))

# for the test, you don't need to do the gradient computation. with torch.no_grad():

correct = 0

images = images.reshape(-1, 784).to(DEVICE) # flattening labels = labels.to(DEVICE)

output = model(images)

_, predicted = torch.max(output.data, 1)

correct += (predicted == labels).sum().item()

print('Accuracy: {} %'.format(100 * correct / 10000)) # Accuracy: 97.36 %

CIFAR-10 Classification

import torch, torchvision

DEVICE = 'cuda' if torch.cuda.is_available() else ‘cpu' INPUT_DIM = 3072 # = 32 x 32 x 3

HIDDEN_DIM = 100

OUTPUT_DIM = 10 # the number of classes TOTAL_EPOCHS = 10

LEARNING_RATE = 0.01 BATCH_SIZE = 2000

transforms = torchvision.transforms.Compose([torchvision.transforms.ToTensor()])

test_dataset = torchvision.datasets.CIFAR10(root=‘./data/CIFAR10', train=False, transform=transforms)

(87)

CIFAR-10 Classification

class Model(torch.nn.Module):

def __init__(self, INPUT_DIM, HIDDEN_DIM, OUTPUT_DIM): super(Model, self).__init__()

self.layer1 = torch.nn.Linear(INPUT_DIM, HIDDEN_DIM) self.actfn1 = torch.nn.ReLU()

self.layer2 = torch.nn.Linear(HIDDEN_DIM, OUTPUT_DIM) def forward(self, x):

y1 = self.actfn1(self.layer1(x)) y2 = self.layer2(y1)

return y2

model = Model(INPUT_DIM, HIDDEN_DIM, OUTPUT_DIM).to(DEVICE)

CostFunc = torch.nn.CrossEntropyLoss()

(88)

CIFAR-10 Classification

for epoch in range(TOTAL_EPOCHS):

images = images.reshape(-1, 3072).to(DEVICE) # flattening labels = labels.to(DEVICE)

output = model(images)

cost = CostFunc(output, labels) cost.backward()

optimizer.step()

print('Cost: {:.4f}’.format(cost.item()))

# for the test, you don't need to do the gradient computation. with torch.no_grad():

correct = 0

images = images.reshape(-1, 3072).to(DEVICE) # flattening labels = labels.to(DEVICE)

output = model(images)

_, predicted = torch.max(output.data, 1)

correct += (predicted == labels).sum().item()

print('Accuracy: {} %'.format(100 * correct / 10000)) # Accuracy: 21.15 %

CNN for MNIST

class Model(torch.nn.Module): def __init__(self): super(Model, self).__init__() # (bs, 28, 28, 1) -> conv -> (bs, 28, 28, 6) -> pool -> (bs, 14, 14, 6) self.layer1 = torch.nn.Sequential(

torch.nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5, stride=1, padding=2), torch.nn.ReLU(),

torch.nn.MaxPool2d(kernel_size=2, stride=2), torch.nn.Dropout(1 - keep_ratio))

# (bs, 14, 14, 6) -> conv -> (bs, 14, 14, 9) -> pool -> (bs, 7, 7, 9)

self.layer2 = torch.nn.Sequential(

torch.nn.Conv2d(in_channels=6, out_channels=9, kernel_size=3, stride=1, padding=1), torch.nn.ReLU(),

torch.nn.MaxPool2d(kernel_size=2, stride=2), torch.nn.Dropout(1 - keep_ratio))

CNN for MNIST

# FC: 7x7x9 -> 256 self.layer3 = torch.nn.Sequential( torch.nn.Linear(7*7*9, 256), torch.nn.ReLU()) # FC: 256 -> 128 self.layer4 = torch.nn.Sequential( torch.nn.Linear(256, 128), torch.nn.ReLU()) # FC: 128 -> 10 self.layer5 = torch.nn.Linear(128, 10) def forward(self, x): x = self.layer1(x) x = self.layer2(x) x = x.reshape(x.size(0), -1) # flattening: 7x7x9 -> 441(=7x7x9) x = self.layer3(x) x = self.layer4(x) x = self.layer5(x) return x

(94)

CNN for CIFAR-10

class Model(torch.nn.Module): def __init__(self): super(Model, self).__init__() # (bs, 32, 32, 3) -> conv -> (bs, 32, 32, 6) -> pool -> (bs, 16, 16, 6) self.layer1 = torch.nn.Sequential(

torch.nn.Conv2d(in_channels=3, out_channels=6, kernel_size=5, stride=1, padding=2), torch.nn.ReLU(),

torch.nn.MaxPool2d(kernel_size=2, stride=2), torch.nn.Dropout(1 - keep_ratio))

# (bs, 16, 16, 6) -> conv -> (bs, 16, 16, 9) -> pool -> (bs, 8, 8, 9)

self.layer2 = torch.nn.Sequential(

torch.nn.Conv2d(in_channels=6, out_channels=9, kernel_size=3, stride=1, padding=1), torch.nn.ReLU(),

torch.nn.MaxPool2d(kernel_size=2, stride=2), torch.nn.Dropout(1 - keep_ratio))

CNN for CIFAR-10

# FC: 8x8x9 -> 256 self.layer3 = torch.nn.Sequential( torch.nn.Linear(8*8*9, 256), torch.nn.ReLU()) # FC: 256 -> 128 self.layer4 = torch.nn.Sequential( torch.nn.Linear(256, 128), torch.nn.ReLU()) # FC: 128 -> 10 self.layer5 = torch.nn.Linear(128, 10) def forward(self, x): x = self.layer1(x) x = self.layer2(x) x = x.reshape(x.size(0), -1) # flattening: 7x7x9 -> 441(=7x7x9) x = self.layer3(x) x = self.layer4(x) x = self.layer5(x) return x

