전체 글

(1)

PyTorch

Wanho Choi

(2)
(3)
(4)

PyTorch

Open source and free machine learning library based on the Torch library

Developed by FAIR’s (Facebook AI Research) Group

(5)

PyTorch vs TensorFlow

PyTorch

Define-and-Run

TensorFlow

Define-by-Run

: peculiar framework

(6)

PyTorch vs TensorFlow

(7)
(8)

Version

import torch

print(torch.__version__) # 1.1.0

Facebook CTO Mike Schroepfer announces the release of PyTorch 1.0 at Facebook developer conference F8 on May 2, 2018 at the McEnery Convention Center in San Jose, California (Image Credit: Facebook)

(9)

Tensor

Simply

Multi-dimensional array

Generalized matrix

Strictly

Matrix: just a collection of numbers inside brackets

Tensors have some transformation properties when changing coordinate system.

In PyTorch

Unit of data

(10)

Tensor

0D tensor

1D tensor

4D tensor

2D tensor

3D tensor

scalar

vector

matrix

cube

a vector of cube

(11)

Tensor

0D tensor

1D tensor

4D tensor

2D tensor

3D tensor

variable

array

gray scale image

RGB image

(12)

Tensor: shape

import torch

a = torch.tensor(3.14)

print(a)      # tensor(3.1400)

print(a.shape, a.size())      # torch.Size([]) torch.Size([]) b = torch.tensor([1.414])

print(b)      # tensor([1.4140])

print(b.shape, b.size())      # torch.Size([1]) torch.Size([1]) c = torch.tensor([1., 2., 3.])

print(c)      # tensor([1., 2., 3.])

print(c.shape, c.size())      # torch.Size([3]) torch.Size([3]) d = torch.tensor([[1, 2], [3, 4], [5, 6]])

print(d)      # tensor([[1, 2], [3, 4], [5, 6]])

print(d.shape, d.size())      # torch.Size([3, 2]) torch.Size([3, 2]) e = torch.tensor([[[1, 2, 3], [3, 4, 5]], [[5, 6, 7], [7, 8, 9]]])

print(e)      # tensor([[[1, 2, 3], [3, 4, 5]], [[5, 6, 7], [7, 8, 9]]]) print(e.shape, e.size())      # torch.Size([2, 2, 3]) torch.Size([2, 2, 3])

print(e.shape[0], e.shape[1], e.shape[2]) # 2 2 3 print(e.size(0), e.size(1), e.size(2))    # 2 2 3

(13)

Tensor: dtype

import numpy as np import torch a = np.array([[1, 2], [3, 4]]) print(a.dtype)  # int64 b = np.array([[1., 2.], [3., 4.]]) print(b.dtype)  # float64 aa = torch.from_numpy(a) print(aa.dtype) # torch.int64 bb = torch.from_numpy(b) print(bb.dtype) # torch.float64 aa = torch.from_numpy(a).float() print(aa.dtype) # torch.float32 aa = torch.FloatTensor(a) print(aa.dtype) # torch.float32 a = aa.int() print(a.dtype)  # torch.int32

(14)

Tensor: data

import torch

a = torch.tensor([[1, 2], [3, 4]])

print(type(a))      # <class ‘torch.Tensor'> print(a)      # tensor([[1, 2], [3, 4]]) print(a[0][0], a[0][1]) # tensor(1) tensor(2)

print(a[1][0], a[1][1]) # tensor(3) tensor(4) print(a[0][0].item())   # 1

print(a[0][1].item())   # 2 print(a[1][0].item())   # 3 print(a[1][1].item())   # 4

b = a.data

print(type(b))      # <class ‘torch.Tensor'> print(b)      # tensor([[1, 2], [3, 4]]) print(b[0][0], b[0][1]) # tensor(1) tensor(2)

print(b[1][0], b[1][1]) # tensor(3) tensor(4) print(b[0][0].item())   # 1

print(b[0][1].item())   # 2 print(b[1][0].item())   # 3 print(b[1][1].item())   # 4

(15)

Interoperability with NumPy

import numpy as np import torch

a = [[1, 2], [3, 4]]

print(type(a)) # <class ‘list> b = np.array(a)

print(type(b)) # <class ‘numpy.ndarray> c = torch.tensor(b)

print(type(c)) # <class ‘torch.Tensor> c = torch.from_numpy(b)

print(type(c)) # <class ‘torch.Tensor> c = torch.as_tensor(b)

print(type(c)) # <class ‘torch.Tensor> d = c.numpy()

print(type(d)) # <class 'numpy.ndarray>

NumPy

Array

PyTorch

Tensor

b = torch.

from_numpy

(a)

a = b.

numpy

()

b = torch.

tensor

(a)

(16)

Copy vs Reference

PyTorch tensors share the memory buffer of NumPy ndarrays.

Thus, changing one will affects the others.

torch.tensor() allocates a new memory by copying the ndarray.

torch.tensor() is an alias for the default tensor type, e.g. torch.FloatTensor().

If you want avoid a copy, use torch.from_numpy() or torch.as_tensor().

(17)

Copy vs Reference

import numpy as np import torch a = np.array([[1.]]) b = torch.tensor(a)      # copy c = torch.FloatTensor(a) # copy d = torch.from_numpy(a)  # reference e = torch.as_tensor(a)   # reference print(b.item()) # 1.0 print(c.item()) # 1.0 print(d.item()) # 1.0 print(e.item()) # 1.0 a[0][0] = 2 print(b.item()) # 1.0 print(c.item()) # 1.0 print(d.item()) # 2.0 print(e.item()) # 2.0 a[0][0] = 0 b[0][0] = 111 print(a[0][0]) # 0.0 c[0][0] = 222 print(a[0][0]) # 0.0 d[0][0] = 333 print(a[0][0]) # 333 e[0][0] = 444 print(a[0][0]) # 444

a = np.array([[12345.]]) # a new object print(b.item()) # 111.0 print(c.item()) # 222.0 print(d.item()) # 333.0 print(e.item()) # 444.0

2/2

1/2

(18)

Tensors on GPU

import numpy as np import torch a = np.array([1, 2, 3]) c = torch.from_numpy(a).float() print(c) # tensor([1., 2., 3.]) c = torch.from_numpy(a).float().to(‘cpu’) print(c) # tensor([1., 2., 3.]) g = torch.from_numpy(a).float().to(‘cuda’)

print(g) # tensor([1., 2., 3.], device=‘cuda:0’)

DEVICE = ‘cuda’ if torch.cuda.is_available() else ‘cpu’ g = torch.from_numpy(a).float().to(DEVICE)

print(g) # tensor([1., 2., 3.], device=‘cuda:0’) if torch.cuda.is_available():

  print(torch.cuda.device_count()) # 1: the number of GPUs available

  print(torch.cuda.current_device()) # 0: the index of a currently selected device

(19)
(20)

Random Tensor

import torch a = torch.randn(1) # 1 by 1 tensor b = torch.randn(1) # 1 by 1 tensor print(a, b) c = torch.randn(2, 3) # 2 by 3 tensor print(c)

# above this you will get different results for each run torch.manual_seed(123)

# blow this will always produce the same results a = torch.randn(1) # 1 by 1 tensor b = torch.randn(1) # 1 by 1 tensor print(a, b) c = torch.randn(2, 3) # 2 by 3 tensor print(c)

Uniform Distribution

frequency

torch.rand()

68%

95%

Normal Distribution

frequency

torch.randn()

(21)

Matrix Multiplication

[

1 2

3 4

5 6] [

1 2 3

4 5 6] =

9 12 15

19 26 33

29 40 51

3 × 3

3 × 2

2 × 3

import torch a = torch.tensor([[1, 2], [3, 4], [5, 6]]) print(a.shape) # torch.Size([3, 2]) b = torch.tensor([[1, 2, 3], [4, 5, 6]]) print(b.shape) # torch.Size([2, 3]) c = torch.mm(a, b) print(c) # tensor([[ 9, 12, 15], [19, 26, 33], [29, 40, 51]]) c = a.mm(b) print(c) # tensor([[ 9, 12, 15], [19, 26, 33], [29, 40, 51]]) c = torch.matmul(a, b) print(c) # tensor([[ 9, 12, 15], [19, 26, 33], [29, 40, 51]]) c = a @ b print(c) # tensor([[ 9, 12, 15], [19, 26, 33], [29, 40, 51]])

(22)

Autograd: Automatic Differentiation

An engine for computing gradients (or Jacobians).

Autograd automatically calculates the gradients (or Jacobians) by applying chain rules,

tracing from the root to the leaves of the graph.

If you set a Tensor’s attribute .requires_grad as True, it starts to track all operations on it.

When you finish your computation you can call backward() and have all the gradients (or

Jacobians) computed automatically.

The result will be accumulated into .grad attribute.

(23)

.requires_grad

Every Tensor has

a flag: .requires_grad.

Default value: False

If

False, the tensor will be excluded from the gradient computation.

Tensors that must be

included in the gradient computation must explicitly set this flag to True.

input.requires_grad = False ➜ output.requires_grad = False (automatically)

input.requires_grad = True ➜ output.requires_grad = True (automatically)

This flag can also be set when the tensor is first created, and

can be changed later.

(24)

.requires_grad

import torch

DEVICE = 'cuda' if torch.cuda.is_available() else ‘cpu' a = torch.rand(1, requires_grad=True).to(DEVICE)

b = torch.rand(1, requires_grad=True).to(DEVICE)

print(a.requires_grad, b.requires_grad) # True True a = torch.rand(1, dtype=torch.float).to(DEVICE)

b = torch.rand(1, dtype=torch.float).to(DEVICE)

print(a.requires_grad, b.requires_grad) # False False a.requires_grad_()

b.requires_grad_()

(25)

.requires_grad

import torch

x = torch.tensor([1.0]) # requires_grad=False by default y = torch.tensor([2.0]) # requires_grad=False by default z = torch.tensor([3.0], requires_grad=True) a = x + y print(a.requires_grad) # False b = a + z print(b.requires_grad) # True a.requires_grad_(True) print(a.requires_grad) # True

1

x

2

y

3

z

3

a

6

b

False

True

(26)

Autograd

import torch

x = torch.tensor(2.0, requires_grad=True)

print(x) # tensor(2., requires_grad=True) y = 3*x*x + 4*x + 5

print(y) # tensor(25., grad_fn=<AddBackward0>) print(x.grad) # None

y.backward() # compute gradients print(x.grad) # tensor(16.)

x = 2.0

16

x = 2

y = 3x

2

+ 4x + 5

∂y

∂x

= 6x + 4

x = 2

25

(27)

Autograd

import torch x = torch.ones(2, 2, requires_grad=True) y = x + 3 z = y*y + 1 f = z.mean()

print(f) # tensor(17., grad_fn=<AddBackward0>) f.backward() # gradient computation

print(x.grad) # tensor([[2., 2.], [2., 2.]])

[

1 1

1 1]

[

4 4

4 4]

[

17 17

17 17]

17

y = x + 3

z = y

2

+ 1

f = 1

4

4 i=1

z

i x y z f

∂f

∂x

i

= ∂f

∂z

i

∂z

i

∂y

i

∂y

i

∂x

i

= 1

4

× 2y × 1 = 1

2

y = 1

2

(x + 3)

when

x = 1

∂f

∂x

i

= 2

(28)

Autograd

import torch x = torch.tensor(3.0) w = torch.tensor(4.0, requires_grad=True) b = torch.tensor(5.0, requires_grad=True) print(x.item()) # 3.0 print(w.item()) # 4.0 print(b.item()) # 5.0 y = w * x + b print(y.item()) # 17.0

y.backward() # gradient computation print(w.grad.item()) # 3.0 print(b.grad.item()) # 1.0

y = wx + b

∂y

∂w

= x

∂y

∂b

= 1

17 = 4 × 3 + 5

(29)

no_grad()

import torch x = torch.randn(3, requires_grad=True) print(x.requires_grad) # True y = x + 1 print(y.requires_grad) # True

# not to calculate the gradient for the variable z

with torch.no_grad():

  z = x + 1

(30)

Tensor vs Variable

Variables are wrappers for Tensors.

Variable = Tensor + (= gradient computation)

Variables are the part of the autograd package.

The Variable API has been deprecated:

Variables are no longer necessary to use autograd with Tensors.

Autograd automatically supports Tensors with .requires_grad to True.

(31)

In-Place Operations

In PyTorch, many methods exist in two versions:

: with / without an underscore(_) suffix

ex) add(…), add_(…)

The underscore(_) indicates in-place operations in PyTorch.

Methods that ends in an underscore(_) change the tensor in-place.

In general, in-place operations increase performance, but can lead to problems and worse

performance in PyTorch.

It is recommended not to use in-place operations in most cases for efficiency.

(32)

In-Place Operations

(33)

In-Place Operations

import torch

a = torch.tensor([1, 2, 3])

print(id(a)) # THE RESULT: (1) a += 10

print(id(a)) # THE RESULT: (2) a = a + 10

print(id(a)) # THE RESULT: (3)

# (1) and (2) are same, but (3) different from them. # (2): in-place operator (same object)

# (3): a new object was created a = torch.tensor([1, 2, 3]) b = a + 10 print(b) # tensor([11, 12, 13]) print(a is b) # False b = a.add(10) print(b) # tensor([11, 12, 13]) print(a is b) # False b = a.add_(10) print(b) # tensor([11, 12, 13]) print(a is b) # True

(34)
(35)
(36)

Formulation

y

i

= wx

i

+ b (i = 1,2,3,⋯, N)

Model:

Error:

L

i

= 1

2

( ̂y

i

− y

i

)

2 known predicted

= ̂y

i

− (wx

i

+ b) = ̂y

i

− wx

i

− b

∂E

i

∂w

= − x

i

∂E

i

∂b

= − 1

Loss:

E

i

= ̂y

i

− y

i

= 1

2

E

i2

∂L

i

∂w

=

∂L

i

∂E

i

∂E

i

∂w

= E

i

⋅ (−x

i

) = − x

i

E

i

∂L

i

∂b

=

∂L

i

∂E

i

∂E

i

∂b

= E

i

⋅ (−1) = − E

i

C = 1

N

N

i=1

L

i

Cost:

∂C

∂w

= 1

N

N

i=1

∂L

i

∂w

= 1

N

N

i=1

(−x

i

E

i

)

∂C

∂b

= 1

N

N

i=1

∂L

i

∂b

= 1

N

N

i=1

(−E

i

)

(37)

Pure NumPy Implementation

import numpy as np # y = w*x+b: w=2, b=1 x = np.array([1, 2, 3, 4, 5], dtype=‘float32') y = np.array([3, 5, 7, 9, 11], dtype=‘float32') w = np.random.randn(1) # weight b = np.random.randn(1) # bias

for epoch in range(10000): # iteration # prediction

y_predicted = w * x + b error = y - y_predicted

# gradient computation (manually) w_grad = (-x * error).mean() b_grad = (-error).mean() # update w -= 0.01 * w_grad b -= 0.01 * b_grad print(w, b) # [2.00000001] [0.99999998]

∂C

∂w

=

N

i=1

(−x

i

E

i

)

∂C

∂b

=

N

i=1

(−E

i

)

known predicted

E

i

= ̂y

i

− y

i

(38)

Pure NumPy Implementation

import numpy as np # y = w*x+b: w=2, b=1 x = np.array([1, 2, 3, 4, 5], dtype=‘float32') y = np.array([3, 5, 7, 9, 11], dtype=‘float32') w = np.random.randn(1) # weight b = np.random.randn(1) # bias

for epoch in range(10000): # iteration # prediction

y_predicted = w * x + b error = y - y_predicted

# gradient computation (manually) w_grad = (-x * error).mean() b_grad = (-error).mean() # update w -= 0.01 * w_grad b -= 0.01 * b_grad print(w, b) # [2.00000001] [0.99999998]

w

n+1

= w

n

− α ∂E

∂w

b

n+1

= b

n

− α ∂E

∂b

(39)

How does it work?

Exactly the same process as solving Ax = b with the steepest descent method

y

i

= ax

i

+ b (i = 1,2,3,⋯, N)

Model:

E =

N i=1

(y

i

− ax

i

− b)

2

argmin

a,b

E

∂E

∂a

= 2

N

i=1

(y

i

− ax

i

− b) ⋅ (−x

i

) = 0

∂E

∂b

= 2

N

i=1

(y

i

− ax

i

− b) ⋅ (−1) = 0

a

(

N

i=1

x

i2

)

+ b

(

N

i=1

x

i

)

=

(

N

i=1

x

i

y

i

)

a

(

N

i=1

x

i

)

+ bN =

(

N

i=1

y

i

)

Ni=1

x

i2

Ni=1

x

i

Ni=1

x

i

N

[

a

b]

=

N i=1

x

i

y

i

Ni=1

y

i

Ax

= b

f(x) = 1

2

x

T

Ax − b

T

x + c

∂f

∂x

= 0

Ax = b

a

b

(2,1)

energy contours steepest direction

initial random guess

(40)

LinearRegressionCommon.py

import numpy as np import torch

# how many points? N = 1000

# the ground-truth values W = 2.0 # weight

B = 1.0 # bias

def Data():

np.random.seed(13) # random seed

x = np.random.rand(N, 1) # input: 1D array

noise = 0.1 * np.random.rand(N, 1) # noise: 1D array

y = (W * x + B) + noise # outout that has some noise

indices = np.arange(N) # point indices

np.random.shuffle(indices) # shuffled indices

train_indices = indices[:round(0.8*N)] # the first 80 random indices for train set

valid_indices = indices[round(0.8*N):] # the the remaining indices for validation set x_train, y_train = x[train_indices], y[train_indices]

x_valid, y_valid = x[valid_indices], y[valid_indices] return x_train, y_train, x_valid, y_valid

(41)

Example #1

from LinearRegressionCommon import *

w = np.random.randn(1) # weight

b = np.random.randn(1) # bias

x_train, y_train, _, _ = Data()

for epoch in range(10000): # iteration # prediction

y_predicted = w * x_train + b error = y_train - y_predicted

# gradient computation (manually) w_grad = (-x_train * error).mean() b_grad = (-error).mean()

# update

w -= 0.01 * w_grad b -= 0.01 * b_grad

(42)

Example #2

from LinearRegressionCommon import *

DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'

w = torch.randn(1, requires_grad=True, dtype=torch.float, device=DEVICE) b = torch.randn(1, requires_grad=True, dtype=torch.float, device=DEVICE)

x_train, y_train, _, _ = Data()

x_train = torch.from_numpy(x_train).float().to(DEVICE) y_train = torch.from_numpy(y_train).float().to(DEVICE)

for epoch in range(10000):

y_predicted = w * x_train + b error = y_train - y_predicted cost = (error*error).mean()

cost.backward() # gradient computation (automatically)

with torch.no_grad(): w -= 0.01 * w.grad b -= 0.01 * b.grad w.grad.zero_() b.grad.zero_() print(w.item(), b.item()) # 2.003204107284546 1.048282265663147

(43)

Example #3

from LinearRegressionCommon import *

DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'

w = torch.randn(1, requires_grad=True, dtype=torch.float, device=DEVICE) b = torch.randn(1, requires_grad=True, dtype=torch.float, device=DEVICE) x_train, y_train, _, _ = Data()

x_train = torch.from_numpy(x_train).float().to(DEVICE) y_train = torch.from_numpy(y_train).float().to(DEVICE)

optimizer = torch.optim.SGD([w, b], lr=0.01)

for epoch in range(10000):

y_predicted = w * x_train + b error = y_train - y_predicted cost = (error*error).mean()

cost.backward() optimizer.step()

optimizer.zero_grad()

(44)

Example #4

from LinearRegressionCommon import *

DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'

w = torch.randn(1, requires_grad=True, dtype=torch.float, device=DEVICE) b = torch.randn(1, requires_grad=True, dtype=torch.float, device=DEVICE) x_train, y_train, _, _ = Data()

x_train = torch.from_numpy(x_train).float().to(DEVICE) y_train = torch.from_numpy(y_train).float().to(DEVICE)

CostFunc = torch.nn.MSELoss()

optimizer = torch.optim.SGD([w, b], lr=0.01) for epoch in range(10000):

y_predicted = w * x_train + b

cost = CostFunc(y_train, y_predicted) cost.backward()

optimizer.step()

optimizer.zero_grad()

(45)

Example #5

from LinearRegressionCommon import *

DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'

class Model(torch.nn.Module): def __init__(self):

super().__init__()

self.w = torch.nn.Parameter(torch.randn(1, requires_grad=True, dtype=torch.float, device=DEVICE)) self.b = torch.nn.Parameter(torch.randn(1, requires_grad=True, dtype=torch.float, device=DEVICE)) def forward(self, x):

return self.w * x + self.b

x_train, y_train, _, _ = Data()

x_train = torch.from_numpy(x_train).float().to(DEVICE) y_train = torch.from_numpy(y_train).float().to(DEVICE) model = Model().to(DEVICE) CostFunc = torch.nn.MSELoss() optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

1/2

(46)

Example #5

for epoch in range(10000): model.train()

y_predicted = model(x_train)

cost = CostFunc(y_train, y_predicted) cost.backward() optimizer.step() optimizer.zero_grad() print(model.state_dict()) print(model.w, model.b) print(model.w.item(), model.b.item()) # 2.003204107284546 1.048282265663147

2/2

(47)

Example #6

from LinearRegressionCommon import *

DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' class Model(torch.nn.Module): def __init__(self): super().__init__() self.layer = torch.nn.Linear(1, 1) def forward(self, x): return self.layer(x)

x_train, y_train, _, _ = Data()

x_train = torch.from_numpy(x_train).float().to(DEVICE) y_train = torch.from_numpy(y_train).float().to(DEVICE) model = Model().to(DEVICE) CostFunc = torch.nn.MSELoss() optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

1/2

(48)

Example #6

for epoch in range(10000): model.train()

y_predicted = model(x_train)

cost = CostFunc(y_train, y_predicted) cost.backward() optimizer.step() optimizer.zero_grad() print(model.state_dict()) print(model.layer.weight, model.layer.bias) print(model.layer.weight.item(), model.layer.bias.item()) # 2.003204107284546 1.048282265663147

2/2

(49)

Practical Example

https://medium.com/dsnet/linear-regression-with-pytorch-3dde91d60b50

Input

Layer

Hidden

Layer

Output

Layer

(50)

import numpy as np import torch

class Model(torch.nn.Module): def __init__(self):

super().__init__()

self.layer = torch.nn.Linear(3, 2) # 3: inputs, 2: outputs def forward(self, x):

return self.layer(x)

# input data: (temperature, rainfall, humidity)

x_train = np.array([[73, 67, 43], [91, 88, 64], [87, 134, 58], [102, 43, 37], [69, 96, 70]])

# output data: (apples, oranges)

y_train = np.array([[56, 70], [81, 101], [119, 133], [22, 37], [103, 119]]) x_train = torch.from_numpy(x_train).float() y_train = torch.from_numpy(y_train).float() model = Model() CostFunc = torch.nn.MSELoss() optimizer = torch.optim.SGD(model.parameters(), lr=0.0001)

1/2

Practical Example

(51)

for epoch in range(10000): model.train()

y_predicted = model(x_train)

cost = CostFunc(y_train, y_predicted) cost.backward() optimizer.step() optimizer.zero_grad() # test print(model(x_train)) # prediction x_test = np.array([[80, 70, 50]]) x_test = torch.from_numpy(x_test).float(); print(model(x_test))

2/2

Practical Example

(52)
(53)

XOR Problem

https://mc.ai/intro-to-deep-learning-with-pytorch-part-1/

(54)

XOR Problem

Minsky and Papert proved

(55)

torch.nn.Sequential

A container that contains other modules

It concatenates the a series of modules.

(56)

import numpy as np import torch X = torch.FloatTensor([[0, 0], [0, 1], [1, 0], [1, 1]]) Y = torch.FloatTensor([[0], [1], [1], [0]]) INPUT_DIM  = 2 HIDDEN_DIM = 10 OUTPUT_DIM = 1 model = torch.nn.Sequential(       torch.nn.Linear(INPUT_DIM, HIDDEN_DIM),       torch.nn.ReLU(),       torch.nn.Linear(HIDDEN_DIM, OUTPUT_DIM),

      torch.nn.Sigmoid()) # MUST for non-linearity

CostFunc  = torch.nn.BCELoss() # Binary Cross Entropy Loss optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

for epoch in range(10000):     Y_predicted = model(X)     cost = CostFunc(Y_predicted, Y)     cost.backward()     optimizer.step()     model.zero_grad() Y_predicted = model(X) print(np.squeeze(Y_predicted.detach().numpy())) # [0.01351878 0.98831743 0.9887106  0.01278798] print(np.squeeze((Y_predicted+0.5).int().detach().numpy())) # [0 1 1 0]

(57)

import numpy as np import torch X = torch.FloatTensor([[0, 0], [0, 1], [1, 0], [1, 1]]) Y = torch.FloatTensor([[0], [1], [1], [0]]) INPUT_DIM  = 2 HIDDEN_DIM = 10 OUTPUT_DIM = 1

linear1 = torch.nn.Linear(INPUT_DIM, HIDDEN_DIM) actfnc1 = torch.nn.ReLU()

linear2 = torch.nn.Linear(HIDDEN_DIM, OUTPUT_DIM)

actfnc2 = torch.nn.Sigmoid() # MUST for non-linearity

model   = torch.nn.Sequential(linear1, actfnc1, linear2, actfnc2)

CostFunc  = torch.nn.BCELoss() # Binary Cross Entropy Loss optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

for epoch in range(10000):     Y_predicted = model(X)     cost = CostFunc(Y_predicted, Y)     cost.backward()     optimizer.step()     model.zero_grad() Y_predicted = model(X) print(np.squeeze(Y_predicted.detach().numpy())) # [0.02886132 0.9477684  0.9471025  0.07047193] print(np.squeeze((Y_predicted+0.5).int().detach().numpy())) # [0 1 1 0]

(58)

class Model(torch.nn.Module)

It contains two main methods.

The first method(__init__) defines layers components of the network.

In the second method(forward) we wire the network and put every component in the desired

order.

(59)

import numpy as np import torch X = torch.FloatTensor([[0, 0], [0, 1], [1, 0], [1, 1]]) Y = torch.FloatTensor([[0], [1], [1], [0]]) INPUT_DIM  = 2 HIDDEN_DIM = 10 OUTPUT_DIM = 1 class Model(torch.nn.Module):

    def __init__(self, INPUT_DIM, HIDDEN_DIM, OUTPUT_DIM):         super().__init__()

        self.linear1 = torch.nn.Linear(INPUT_DIM, HIDDEN_DIM)         self.actfnc1 = torch.nn.ReLU()

        self.linear2 = torch.nn.Linear(HIDDEN_DIM, OUTPUT_DIM)         self.actfnc2 = torch.nn.Sigmoid()

    def forward(self, x):

        x = self.actfnc1( self.linear1(x) )         x = self.actfnc2( self.linear2(x) )         return x

model = Model(INPUT_DIM, HIDDEN_DIM, OUTPUT_DIM)

CostFunc  = torch.nn.BCELoss() # Binary Cross Entropy Loss optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

. . .

(60)

class Model(torch.nn.Module):

    def __init__(self, INPUT_DIM, HIDDEN_DIM, OUTPUT_DIM):         super().__init__()

        self.linear1 = torch.nn.Linear(INPUT_DIM,  HIDDEN_DIM)         self.linear2 = torch.nn.Linear(HIDDEN_DIM, OUTPUT_DIM)     def forward(self, x):

        x = torch.relu   ( self.linear1(x) )         x = torch.sigmoid( self.linear2(x) )         return x

class Model(torch.nn.Module):

    def __init__(self, INPUT_DIM, HIDDEN_DIM, OUTPUT_DIM):         super().__init__()

        self.linear1 = torch.nn.Linear(INPUT_DIM, HIDDEN_DIM)         self.actfnc1 = torch.nn.ReLU()

        self.linear2 = torch.nn.Linear(HIDDEN_DIM, OUTPUT_DIM)         self.actfnc2 = torch.nn.Sigmoid()     def forward(self, x):         x = self.actfnc1( self.linear1(x) )         x = self.actfnc2( self.linear2(x) )         return x

Method #1

Method #2

(61)

class Model(torch.nn.Module):

    def __init__(self, INPUT_DIM, HIDDEN_DIM, OUTPUT_DIM):         super().__init__()

        self.layer1 = torch.nn.Sequential( torch.nn.Linear(INPUT_DIM,  HIDDEN_DIM), torch.nn.ReLU()    )         self.layer2 = torch.nn.Sequential( torch.nn.Linear(HIDDEN_DIM, OUTPUT_DIM), torch.nn.Sigmoid() )     def forward(self, x):

        x = self.layer1(x)         x = self.layer2(x)         return x

class Model(torch.nn.Module):

    def __init__(self, INPUT_DIM, HIDDEN_DIM, OUTPUT_DIM):         super().__init__()

        self.linear1 = torch.nn.Linear(INPUT_DIM,  HIDDEN_DIM)         self.linear2 = torch.nn.Linear(HIDDEN_DIM, OUTPUT_DIM)     def layer1(self, x):

        return torch.relu( self.linear1(x) )     def layer2(self, x):

        return torch.sigmoid( self.linear2(x) )     def forward(self, x):         x = self.layer1(x)         x = self.layer2(x)         return x

Method #3

Method #4

(62)

MNIST

(63)

torchvision

The torchvision package consists of popular datasets, model architectures, and common

image transformations for compute vision.

torchvision.datasets: MNIST, Fashion-MNIST, CIFAR, etc.

torchvision.io: video

torchvision.models: classification, object detection, etc.

(64)

torchvision.transform.Compose

It creates a series of transformations.

It compose several transformations together.

All the transformations in the Compose are applied to the input data one by one.

import torchvision

normalize = torchvision.transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])

transformations = torchvision.transforms.Compose( [torchvision.transforms.RandomHorizontalFlip(), torchvision.transforms.RandomVerticalFlip(),

torchvision.transforms.ToTensor(), normalize] )

(65)

Image Normalization

Normalization reduces the skewness, and helps to learn faster and better.

If the given dataset is already in range [0.0, 1.0], you can skip the normalization.

image = ( image - mean ) / std

(mean, std) = (0.5, 0.5) ➜ image = ( image − 0.5 ) / 0.5

: (0.0, 1.0) range ➜ (−1.0, +1.0) range

torchvision.transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])

: Each 0.5 for each channel (Red, Green, Blue)

(66)

import numpy as np

import torch, torchvision

pixels = np.random.randint(low=0, high=256, size=(5, 5, 3)) # 5x5 RGB image

print(type(pixels)) # <class ‘numpy.ndarray'> print(np.min(pixels), ' ~ ', np.max(pixels)) # 0 ~ 255

pixels = pixels.astype('float32') / 255 # normalization: [0, 255] to [0.0, 1.0] print(type(pixels)) # <class ‘numpy.ndarray'>

print(np.min(pixels), ' ~ ', np.max(pixels)) # 0.0 ~ 1.0

image = torch.from_numpy(pixels) # to a tensor

print(type(image)) # <class ‘torch.Tensor'> print(torch.min(image).item(), ' ~ ', torch.max(image).item()) # 0.0 ~ 1.0

# transforms = torch vision.transforms.ToTensor()

transforms = torchvision.transforms.Compose([torchvision.transforms.ToTensor()])

image = transforms(pixels) # apply transformations print(type(image)) # <class ‘torch.Tensor'> print(torch.min(image).item(), ' ~ ', torch.max(image).item()) # 0.0 ~ 1.0

transforms = torchvision.transforms.Compose([torchvision.transforms.ToTensor(),

torchvision.transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) image = transforms(pixels) # apply transformations

print(type(image)) # <class ‘torch.Tensor'> print(torch.min(image).item(), ' ~ ', torch.max(image).item()) # -1.0 ~ 1.0

image = torchvision.transforms.functional.normalize(image, mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))

(67)

MNIST

import numpy as np

import torch, torchvision

from matplotlib import pyplot as plt

transforms = torchvision.transforms.Compose([torchvision.transforms.ToTensor()]) bs = 64 # batch size

train_dataset = torchvision.datasets.MNIST(root='./data/', train=True, transform=transforms, download=True)

test_dataset = torchvision.datasets.MNIST(root='./data/', train=False, transform=transforms)

print(train_dataset.data.shape) # torch.Size([60000, 28, 28]) print(len(train_dataset), len(test_dataset)) # 60000 10000

train_dataloader = torch.utils.data.DataLoader(dataset=train_dataset, BATCH_SIZE=bs, shuffle=True) test_dataloader = torch.utils.data.DataLoader(dataset=test_dataset, BATCH_SIZE=bs, shuffle=True)

print(train_dataloader.dataset.data.shape) # torch.Size([60000, 28, 28])

print(len(train_dataloader), len(test_dataloader)) # 938=round(60000/bs), 157=round(10000/bs) print(len(train_dataloader.dataset), len(test_dataloader.dataset)) # 60000 10000

(68)

MNIST

for batch_index, (images, labels) in enumerate(train_dataloader): print(labels.shape) # torch.Size([64])

print(images.shape) # torch.Size([64, 1, 28, 28]) print(images[0].shape) # torch.Size([1, 28, 28])

images = np.transpose(images, (0,2,3,1)) # channel first order -> channel last order print(images.shape) # torch.Size([64, 28, 28, 1])

print(images[0].shape) # torch.Size([28, 28, 1])

plt.imshow(images[0].reshape((28, 28)), cmap=‘gray') plt.show()

break

for batch_index in range(len(train_dataset)): itr = iter(train_dataloader)

images, labels = itr.next()

print(labels.shape) # torch.Size([64])

print(images.shape) # torch.Size([64, 1, 28, 28]) print(images[0].shape) # torch.Size([1, 28, 28])

images = np.transpose(images, (0,2,3,1)) # channel first order -> channel last order print(images.shape) # torch.Size([64, 28, 28, 1]) print(images[0].shape) # torch.Size([28, 28, 1]) plt.imshow(images[0].reshape((28, 28)), cmap=‘gray') plt.show() break

2/2

(69)
(70)

CIFAR-10

import numpy as np

import torch, torchvision

from matplotlib import pyplot as plt

transforms = torchvision.transforms.Compose([torchvision.transforms.ToTensor()]) bs = 64 # batch size

train_dataset = torchvision.datasets.CIFAR10(root='./data/CIFAR10', train=True, transform=transforms, download=True)

test_dataset = torchvision.datasets.CIFAR10(root='./data/CIFAR10', train=False, transform=transforms)

print(train_dataset.data.shape) # (50000, 32, 32, 3)

print(len(train_dataset), len(test_dataset)) # 50000 10000

train_dataloader = torch.utils.data.DataLoader(dataset=train_dataset, BATCH_SIZE=bs, shuffle=True) test_dataloader = torch.utils.data.DataLoader(dataset=test_dataset, BATCH_SIZE=bs, shuffle=True)

print(train_dataloader.dataset.data.shape) # (50000, 32, 32, 3)

print(len(train_dataloader), len(test_dataloader)) # 782=round(50000/bs), 157=round(10000/bs) print(len(train_dataloader.dataset), len(test_dataloader.dataset)) # 50000 10000

(71)

CIFAR-10

for batch_index, (images, labels) in enumerate(train_dataloader): print(labels.shape) # torch.Size([64])

print(images.shape) # torch.Size([64, 3, 32, 32]) print(images[0].shape) # torch.Size([3, 32, 32])

images = np.transpose(images, (0,2,3,1)) # channel first order -> channel last order print(images.shape) # torch.Size([64, 32, 32, 3])

print(images[0].shape) # torch.Size([32, 32, 3]) plt.imshow(images[0].reshape((32, 32, 3)))

plt.show() break

for batch_index in range(len(train_dataset)): itr = iter(train_dataloader)

images, labels = itr.next()

print(labels.shape) # torch.Size([64])

print(images.shape) # torch.Size([64, 3, 32, 32]) print(images[0].shape) # torch.Size([3, 32, 32])

images = np.transpose(images, (0,2,3,1)) # channel first order -> channel last order print(images.shape) # torch.Size([64, 32, 32, 3]) print(images[0].shape) # torch.Size([32, 32, 3]) plt.imshow(images[0].reshape((32, 32, 3))) plt.show() break

2/2

(72)
(73)

CIFAR-100

import numpy as np

import torch, torchvision

from matplotlib import pyplot as plt

transforms = torchvision.transforms.Compose([torchvision.transforms.ToTensor()]) bs = 64 # batch size

train_dataset = torchvision.datasets.CIFAR100(root='./data/CIFAR100', train=True, transform=transforms, download=True)

test_dataset = torchvision.datasets.CIFAR100(root=‘./data/CIFAR100', train=False, transform=transforms)

print(train_dataset.data.shape) # (50000, 32, 32, 3)

print(len(train_dataset), len(test_dataset)) # 50000 10000

train_dataloader = torch.utils.data.DataLoader(dataset=train_dataset, BATCH_SIZE=bs, shuffle=True) test_dataloader = torch.utils.data.DataLoader(dataset=test_dataset, BATCH_SIZE=bs, shuffle=True)

print(train_dataloader.dataset.data.shape) # (50000, 32, 32, 3)

print(len(train_dataloader), len(test_dataloader)) # 782=round(50000/bs), 157=round(10000/bs) print(len(train_dataloader.dataset), len(test_dataloader.dataset)) # 50000 10000

(74)

CIFAR-100

for batch_index, (images, labels) in enumerate(train_dataloader): print(labels.shape) # torch.Size([64])

print(images.shape) # torch.Size([64, 3, 32, 32]) print(images[0].shape) # torch.Size([3, 32, 32])

images = np.transpose(images, (0,2,3,1)) # channel first order -> channel last order print(images.shape) # torch.Size([64, 32, 32, 3])

print(images[0].shape) # torch.Size([32, 32, 3]) plt.imshow(images[0].reshape((32, 32, 3)))

plt.show() break

for batch_index in range(len(train_dataset)): itr = iter(train_dataloader)

images, labels = itr.next()

print(labels.shape) # torch.Size([64])

print(images.shape) # torch.Size([64, 3, 32, 32]) print(images[0].shape) # torch.Size([3, 32, 32])

images = np.transpose(images, (0,2,3,1)) # channel first order -> channel last order print(images.shape) # torch.Size([64, 32, 32, 3]) print(images[0].shape) # torch.Size([32, 32, 3]) plt.imshow(images[0].reshape((32, 32, 3))) plt.show() break

2/2

(75)
(76)
(77)

Accuracy Evaluation

import torch

data = torch.randn(3, 10) print(data.numpy())

values, indices = torch.max(data.data, 0) print(values.numpy(), indices.numpy())

values, indices = torch.max(data.data, 1) print(values.numpy(), indices.numpy()) a = torch.tensor([1, 0, 1, 0, 1, 0]) b = torch.tensor([1, 1, 1, 1, 1, 1]) c = ( a == b ) print(c.numpy()) print(c.sum().item())

accuracy = 100 * c.sum().item() / len(c) print(accuracy, '%') 1.3683

3 × 10 tensor

-3.7282 -0.7150 -0.0891 -0.1889 -0.7913 -0.8726 -0.1097 1.9349 0.1923 -0.4194 -1.9478 0.7762 0.9239 0.3441 0.0412 0.5557 0.9953 2.2492 -0.4234 1.8282 0.1751 1.1866 0.7951 -0.8284 0.1223 -1.005 -1.3221 1.8195 -0.6615

(78)

Accuracy Evaluation

import torch

data = torch.randn(3, 10) print(data.numpy())

values, indices = torch.max(data.data, 0) print(values.numpy(), indices.numpy())

values, indices = torch.max(data.data, 1) print(values.numpy(), indices.numpy()) a = torch.tensor([1, 0, 1, 0, 1, 0]) b = torch.tensor([1, 1, 1, 1, 1, 1]) c = ( a == b ) print(c.numpy()) print(c.sum().item())

accuracy = 100 * c.sum().item() / len(c) print(accuracy, '%') 1.3683

3 × 10 tensor

-3.7282 -0.7150 -0.0891 -0.1889 -0.7913 -0.8726 -0.1097 1.9349 0.1923 -0.4194 -1.9478 0.7762 0.9239 0.3441 0.0412 0.5557 0.9953 2.2492 -0.4234 1.8282 0.1751 1.1866 0.7951 -0.8284 0.1223 -1.005 -1.3221 1.8195 -0.6615 1.8282 2

values

indices

0.1751 2 1.1866 2 0.9239 1 0.3441 1 0.1223 2 0.5557 1 0.9953 1 2.2492 1 0.1923 0

max.

(79)

Accuracy Evaluation

import torch

data = torch.randn(3, 10) print(data.numpy())

values, indices = torch.max(data.data, 0) print(values.numpy(), indices.numpy())

values, indices = torch.max(data.data, 1) print(values.numpy(), indices.numpy()) a = torch.tensor([1, 0, 1, 0, 1, 0]) b = torch.tensor([1, 1, 1, 1, 1, 1]) c = ( a == b ) print(c.numpy()) print(c.sum().item())

accuracy = 100 * c.sum().item() / len(c) print(accuracy, '%') 1.3683

3 × 10 tensor

-3.7282 -0.7150 -0.0891 -0.1889 -0.7913 -0.8726 -0.1097 1.9349 0.1923 -0.4194 -1.9478 0.7762 0.9239 0.3441 0.0412 0.5557 0.9953 2.2492 -0.4234 1.8282 0.1751 1.1866 0.7951 -0.8284 0.1223 -1.005 -1.3221 1.8195 -0.6615 1.9349 8

values

indices

2.2492 8 1.8282 0

max.

(80)

Accuracy Evaluation

import torch

data = torch.randn(3, 10) print(data.numpy())

values, indices = torch.max(data.data, 0) print(values.numpy(), indices.numpy())

values, indices = torch.max(data.data, 1) print(values.numpy(), indices.numpy()) a = torch.tensor([1, 0, 1, 0, 1, 0]) b = torch.tensor([1, 1, 1, 1, 1, 1]) c = ( a == b ) print(c.numpy()) print(c.sum().item())

accuracy = 100 * c.sum().item() / len(c) print(accuracy, '%') 1 1

a

b

0 1 1 1 0 1 1 1 0 1

(81)

Accuracy Evaluation

import torch

data = torch.randn(3, 10) print(data.numpy())

values, indices = torch.max(data.data, 0) print(values.numpy(), indices.numpy())

values, indices = torch.max(data.data, 1) print(values.numpy(), indices.numpy()) a = torch.tensor([1, 0, 1, 0, 1, 0]) b = torch.tensor([1, 1, 1, 1, 1, 1]) c = ( a == b ) print(c.numpy()) print(c.sum().item())

accuracy = 100 * c.sum().item() / len(c) print(accuracy, '%') 1 1

a

b

0 1 1 1 0 1 1 1 0 1 1

c

c.sum().item()

3

0 1 0 1 0

(82)

Accuracy Evaluation

import torch

data = torch.randn(3, 10) print(data.numpy())

values, indices = torch.max(data.data, 0) print(values.numpy(), indices.numpy())

values, indices = torch.max(data.data, 1) print(values.numpy(), indices.numpy()) a = torch.tensor([1, 0, 1, 0, 1, 0]) b = torch.tensor([1, 1, 1, 1, 1, 1]) c = ( a == b ) print(c.numpy()) print(c.sum().item())

accuracy = 100 * c.sum().item() / len(c) print(accuracy, '%') 1 1

a

b

0 1 1 1 0 1 1 1 0 1 1

c

c.sum().item()

3

accuracy

=

50.0 %

0 1 0 1 0

(83)

MNIST Classification

import torch, torchvision

DEVICE = 'cuda' if torch.cuda.is_available() else ‘cpu' INPUT_DIM = 784 # = 28 x 28

HIDDEN_DIM = 100

OUTPUT_DIM = 10 # the number of classes TOTAL_EPOCHS = 10

LEARNING_RATE = 0.01 BATCH_SIZE = 2000

transforms = torchvision.transforms.Compose([torchvision.transforms.ToTensor()])

train_dataset = torchvision.datasets.MNIST(root='./data/', train=True, transform=transforms, download=True)

test_dataset = torchvision.datasets.MNIST(root='./data/', train=False, transform=transforms)

train_dataloader = torch.utils.data.DataLoader(dataset=train_dataset, BATCH_SIZE=BATCH_SIZE, shuffle=True) test_dataloader = torch.utils.data.DataLoader(dataset=test_dataset, BATCH_SIZE=BATCH_SIZE, shuffle=True)

(84)

MNIST Classification

class Model(torch.nn.Module):

def __init__(self, INPUT_DIM, HIDDEN_DIM, OUTPUT_DIM): super(Model, self).__init__()

self.layer1 = torch.nn.Linear(INPUT_DIM, HIDDEN_DIM) self.actfn1 = torch.nn.ReLU()

self.layer2 = torch.nn.Linear(HIDDEN_DIM, OUTPUT_DIM) def forward(self, x):

y1 = self.actfn1(self.layer1(x)) y2 = self.layer2(y1)

return y2

model = Model(INPUT_DIM, HIDDEN_DIM, OUTPUT_DIM).to(DEVICE)

CostFunc = torch.nn.CrossEntropyLoss()

optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)

(85)

MNIST Classification

for epoch in range(TOTAL_EPOCHS):

for images, labels in train_dataloader:

images = images.reshape(-1, 784).to(DEVICE) # flattening labels = labels.to(DEVICE)

output = model(images)

cost = CostFunc(output, labels) cost.backward()

optimizer.step()

optimizer.zero_grad()

print('Cost: {:.4f}’.format(cost.item()))

# for the test, you don't need to do the gradient computation. with torch.no_grad():

correct = 0

for images, labels in test_dataloader:

images = images.reshape(-1, 784).to(DEVICE) # flattening labels = labels.to(DEVICE)

output = model(images)

_, predicted = torch.max(output.data, 1)

correct += (predicted == labels).sum().item()

print('Accuracy: {} %'.format(100 * correct / 10000)) # Accuracy: 97.36 %

(86)

CIFAR-10 Classification

import torch, torchvision

DEVICE = 'cuda' if torch.cuda.is_available() else ‘cpu' INPUT_DIM = 3072 # = 32 x 32 x 3

HIDDEN_DIM = 100

OUTPUT_DIM = 10 # the number of classes TOTAL_EPOCHS = 10

LEARNING_RATE = 0.01 BATCH_SIZE = 2000

transforms = torchvision.transforms.Compose([torchvision.transforms.ToTensor()])

train_dataset = torchvision.datasets.CIFAR10(root='./data/CIFAR10', train=True, transform=transforms, download=True)

test_dataset = torchvision.datasets.CIFAR10(root=‘./data/CIFAR10', train=False, transform=transforms)

train_dataloader = torch.utils.data.DataLoader(dataset=train_dataset, BATCH_SIZE=BATCH_SIZE, shuffle=True) test_dataloader = torch.utils.data.DataLoader(dataset=test_dataset, BATCH_SIZE=BATCH_SIZE, shuffle=True)

(87)

CIFAR-10 Classification

class Model(torch.nn.Module):

def __init__(self, INPUT_DIM, HIDDEN_DIM, OUTPUT_DIM): super(Model, self).__init__()

self.layer1 = torch.nn.Linear(INPUT_DIM, HIDDEN_DIM) self.actfn1 = torch.nn.ReLU()

self.layer2 = torch.nn.Linear(HIDDEN_DIM, OUTPUT_DIM) def forward(self, x):

y1 = self.actfn1(self.layer1(x)) y2 = self.layer2(y1)

return y2

model = Model(INPUT_DIM, HIDDEN_DIM, OUTPUT_DIM).to(DEVICE)

CostFunc = torch.nn.CrossEntropyLoss()

optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)

(88)

CIFAR-10 Classification

for epoch in range(TOTAL_EPOCHS):

for images, labels in train_dataloader:

images = images.reshape(-1, 3072).to(DEVICE) # flattening labels = labels.to(DEVICE)

output = model(images)

cost = CostFunc(output, labels) cost.backward()

optimizer.step()

optimizer.zero_grad()

print('Cost: {:.4f}’.format(cost.item()))

# for the test, you don't need to do the gradient computation. with torch.no_grad():

correct = 0

for images, labels in test_dataloader:

images = images.reshape(-1, 3072).to(DEVICE) # flattening labels = labels.to(DEVICE)

output = model(images)

_, predicted = torch.max(output.data, 1)

correct += (predicted == labels).sum().item()

print('Accuracy: {} %'.format(100 * correct / 10000)) # Accuracy: 21.15 %

(89)

Problems

Slow convergence

Too low accuracy

Flattened input data

In this process, the spatial information is lost.

(90)
(91)

CNN: Convolutional Neural Network

input image

feature maps

(n channels)

feature maps

(m channels)

activation maps

(m channels)

layer softmax

FC

dog

cat

bird

!

convolution

with n kernels

pooling

convolution

with m kernels

pooling

activation maps

(n channels)

(92)

CNN for MNIST

class Model(torch.nn.Module): def __init__(self): super(Model, self).__init__() # (bs, 28, 28, 1) -> conv -> (bs, 28, 28, 6) -> pool -> (bs, 14, 14, 6) self.layer1 = torch.nn.Sequential(

torch.nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5, stride=1, padding=2), torch.nn.ReLU(),

torch.nn.MaxPool2d(kernel_size=2, stride=2), torch.nn.Dropout(1 - keep_ratio))

# (bs, 14, 14, 6) -> conv -> (bs, 14, 14, 9) -> pool -> (bs, 7, 7, 9)

self.layer2 = torch.nn.Sequential(

torch.nn.Conv2d(in_channels=6, out_channels=9, kernel_size=3, stride=1, padding=1), torch.nn.ReLU(),

torch.nn.MaxPool2d(kernel_size=2, stride=2), torch.nn.Dropout(1 - keep_ratio))

(93)

CNN for MNIST

# FC: 7x7x9 -> 256 self.layer3 = torch.nn.Sequential( torch.nn.Linear(7*7*9, 256), torch.nn.ReLU()) # FC: 256 -> 128 self.layer4 = torch.nn.Sequential( torch.nn.Linear(256, 128), torch.nn.ReLU()) # FC: 128 -> 10 self.layer5 = torch.nn.Linear(128, 10) def forward(self, x): x = self.layer1(x) x = self.layer2(x) x = x.reshape(x.size(0), -1) # flattening: 7x7x9 -> 441(=7x7x9) x = self.layer3(x) x = self.layer4(x) x = self.layer5(x) return x

2/2

(94)

CNN for CIFAR-10

class Model(torch.nn.Module): def __init__(self): super(Model, self).__init__() # (bs, 32, 32, 3) -> conv -> (bs, 32, 32, 6) -> pool -> (bs, 16, 16, 6) self.layer1 = torch.nn.Sequential(

torch.nn.Conv2d(in_channels=3, out_channels=6, kernel_size=5, stride=1, padding=2), torch.nn.ReLU(),

torch.nn.MaxPool2d(kernel_size=2, stride=2), torch.nn.Dropout(1 - keep_ratio))

# (bs, 16, 16, 6) -> conv -> (bs, 16, 16, 9) -> pool -> (bs, 8, 8, 9)

self.layer2 = torch.nn.Sequential(

torch.nn.Conv2d(in_channels=6, out_channels=9, kernel_size=3, stride=1, padding=1), torch.nn.ReLU(),

torch.nn.MaxPool2d(kernel_size=2, stride=2), torch.nn.Dropout(1 - keep_ratio))

(95)

CNN for CIFAR-10

# FC: 8x8x9 -> 256 self.layer3 = torch.nn.Sequential( torch.nn.Linear(8*8*9, 256), torch.nn.ReLU()) # FC: 256 -> 128 self.layer4 = torch.nn.Sequential( torch.nn.Linear(256, 128), torch.nn.ReLU()) # FC: 128 -> 10 self.layer5 = torch.nn.Linear(128, 10) def forward(self, x): x = self.layer1(x) x = self.layer2(x) x = x.reshape(x.size(0), -1) # flattening: 7x7x9 -> 441(=7x7x9) x = self.layer3(x) x = self.layer4(x) x = self.layer5(x) return x

2/2

(96)
(97)
(98)
(99)
(100)

수치

Updating...

관련 주제 :