# VAE (Variational AutoEncoder

## 전체 글

(1)

(2)

(3)
(4)

(5)

(6)

(7)

(8)

(9)

### Codec = (En)coder + Decoder (or Compression + Decompression)

http://aess.com.tr/encoding-decoding-2/ https://www.joydeepdeb.com/tools/url-encoding-decoding.html

(10)

latent vector

### Output Layer

인지 네트워크 (recognition network) 생성 네트워크 (generative network)

(11)

(12)

(13)

(14)

(15)

### Manifold 상에서는 데이터의 분류(classification) 또한 쉽고 명확해진다.

https://www.iro.umontreal.ca/~bengioy/talks/gss2012-YB3-algorithms-AE-depth.pdf

### m

https://www.slideshare.net/NaverEngineering/ss-96581209 https://en.wikipedia.org/wiki/Manifold_regularization

(16)

(17)

### 이 저차원의 manifold를 벗어나는 순간 데이터의 밀도는 급격히 낮아진다.

https://dsp.stackexchange.com/questions/34126/random-noise-removal-in-images

(18)

(19)

(20)

(21)

### Vanilla AE as PCA

import torch

import numpy as np

import matplotlib.pyplot as plt

from mpl_toolkits.mplot3d import Axes3D LEARNING_RATE = 0.001

TOTAL_EPOCHS = 1000

def Points(n, w1, w2, noise): points = np.empty((n, 3))

angles = np.random.rand(n) * 3 * np.pi / 2 - 0.5

points[:,0] = np.cos(angles) + np.sin(angles)/2 + noise * np.random.randn(n) / 2 points[:,1] = np.sin(angles) * 0.7 + noise * np.random.randn(n) / 2

points[:,2] = points[:,0] * w1 + points[:,1] * w2 + noise * np.random.randn(n) return points

points = Points(100, 0.1, 0.3, 0.1) x = torch.from_numpy(points).float()

(22)

### Vanilla AE as PCA

def Display3D(points): fig = plt.figure() ax = Axes3D(fig)

ax.scatter(points[:,0], points[:,1], points[:,2]) plt.show() def Display2D(points): plt.plot(points[:,0], points[:,1], “b.") plt.show() class Model(torch.nn.Module): def __init__(self): super(Model, self).__init__()

self.encoder = torch.nn.Linear(3, 2) # from 3D to 2D

self.decoder = torch.nn.Linear(2, 3) # from 2D to 3D

def forward(self, x):

x = self.encoder(x) x = self.decoder(x) return x

def z(self, x): # latent vector

z = self.encoder(x) return z

(23)

### Vanilla AE as PCA

model = Model()

CostFunc = torch.nn.MSELoss()

optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE) for epoch in range(TOTAL_EPOCHS):

output = model(x) cost = CostFunc(output, x) cost.backward() optimizer.step() optimizer.zero_grad() print('Cost: {:.4f}’.format(cost.item())) Display3D(points) z = model.z(x).detach() Display2D(z.numpy())

(24)

(25)

### Vanilla AE for MNIST

import torch, torchvision

from matplotlib import pyplot as plt BATCH_SIZE = 100 LEARNING_RATE = 0.001 TOTAL_EPOCHS = 3 transforms = torchvision.transforms.Compose([torchvision.transforms.ToTensor()]) train_dataset

(26)

### Vanilla AE for MNIST

class Model(torch.nn.Module): def __init__(self): super(Model, self).__init__() self.encoder = torch.nn.Linear(28*28, 100) self.decoder = torch.nn.Linear(100, 28*28) def forward(self, x): x = self.encoder(x) x = self.decoder(x) return x model = Model() CostFunc = torch.nn.MSELoss()

optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE) for epoch in range(TOTAL_EPOCHS):

images = images.reshape(-1, 784) # flattening for input

output = model(images) # feed forward

cost = CostFunc(output, images) # compare with input images (not with labels)

cost.backward() optimizer.step()

print('Cost: {:.4f}’.format(cost.item()))

(27)

### Vanilla AE for MNIST

plt.figure(figsize=(20, 4)) for i in range(10): img = images plt.subplot(2, 10, i+1) plt.imshow(img[i].reshape(28, 28)) plt.gray() plt.axis(‘off') img = output.detach().numpy() plt.subplot(2, 10, i+11) plt.imshow(img[i].reshape(28, 28)) plt.gray() plt.axis(‘off') plt.tight_layout() plt.show()

(28)
(29)

### Vanilla AE as De-noiser

import torch, torchvision

from matplotlib import pyplot as plt BATCH_SIZE = 100 LEARNING_RATE = 0.001 TOTAL_EPOCHS = 3 transforms = torchvision.transforms.Compose([torchvision.transforms.ToTensor()]) train_dataset

(30)

### Vanilla AE as De-noiser

class Model(torch.nn.Module): def __init__(self): super(Model, self).__init__() self.encoder = torch.nn.Linear(28*28, 100) self.decoder = torch.nn.Linear(100, 28*28) def forward(self, x): x = self.encoder(x) x = self.decoder(x) return x model = Model() CostFunc = torch.nn.MSELoss()

optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE) for epoch in range(TOTAL_EPOCHS):

for images, _ in train_dataloader: images = images.reshape(-1, 784)

images = images + torch.randn(images.size()) * 0.5 # add noise

output = model(images)

cost = CostFunc(output, images) cost.backward()

optimizer.step()

print('Cost: {:.4f}’.format(cost.item()))

(31)

### Vanilla AE as De-noiser

plt.figure(figsize=(20, 4)) for i in range(10): img = images plt.subplot(2, 10, i+1) plt.imshow(img[i].reshape(28, 28)) plt.gray() plt.axis(‘off') img = output.detach().numpy() plt.subplot(2, 10, i+11) plt.imshow(img[i].reshape(28, 28)) plt.gray() plt.axis(‘off') plt.tight_layout() plt.show()

(32)
(33)

latent vector

### Output Layer

인지 네트워크 (recognition network) 생성 네트워크 (generative network)

(34)

### Deep AE as De-noiser

class Model(torch.nn.Module): def __init__(self): super(Model, self).__init__() self.encoder = torch.nn.Sequential( torch.nn.Linear(28*28, 256), torch.nn.ReLU(), torch.nn.Linear( 256, 64), torch.nn.ReLU(), torch.nn.Linear( 64, 16), torch.nn.ReLU(), torch.nn.Linear( 16, 4)) self.decoder = torch.nn.Sequential( torch.nn.Linear( 4, 16), torch.nn.ReLU(), torch.nn.Linear( 16, 64), torch.nn.ReLU(), torch.nn.Linear( 64, 256), torch.nn.ReLU(), torch.nn.Linear(256, 28*28), torch.nn.Tanh()) def forward(self, x): x = self.encoder(x) x = self.decoder(x) return x

(35)
(36)

latent vector

### Output Layer

인지 네트워크 (recognition network) 생성 네트워크 (generative network)

(37)

### Convolutional AutoEncoder (CAE)

import torch, torchvision

from matplotlib import pyplot as plt BATCH_SIZE = 100 LEARNING_RATE = 0.001 TOTAL_EPOCHS = 3 transforms = torchvision.transforms.Compose([torchvision.transforms.ToTensor()]) train_dataset

(38)

### Convolutional AutoEncoder (CAE)

class Model(torch.nn.Module): def __init__(self): super(Model, self).__init__() # (bs, 28, 28, 1) -> conv -> (bs, 28, 28, 8) -> pool -> (bs, 14, 14, 8) # (bs, 14, 14, 8) -> conv -> (bs, 14, 14, 4) -> pool -> (bs, 7, 7, 4) self.encoder = torch.nn.Sequential(

torch.nn.Conv2d(1, 128, 3, 1, 1), # in_channels, out_channels, kernel_size, stride, padding

torch.nn.MaxPool2d(2, 2),

torch.nn.Conv2d(128, 8, 3, 1, 1), # in_channels, out_channels, kernel_size, stride, padding

torch.nn.MaxPool2d(2, 2))

# (kernel_size, strid) = (2, 2) will increase the spatial dims by 2 # (bs, 7, 7, 4) -> (bs, 14, 14, 8)

# (bs, 14, 14, 8) -> (bs, 28, 28, 1)

self.decoder = torch.nn.Sequential(

torch.nn.ConvTranspose2d(8, 128, 2, 2, 0), # in_channels, out_channels, kernel_size, stride, padding

torch.nn.ConvTranspose2d(128, 1, 2, 2, 0)) # in_channels, out_channels, kernel_size, stride, padding def forward(self, x): x = self.encoder(x) x = self.decoder(x) return x

(39)

### Convolutional AutoEncoder (CAE)

model = Model()

CostFunc = torch.nn.MSELoss()

optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE) for epoch in range(TOTAL_EPOCHS):

for images, _ in train_dataloader: # no flattening process

output = model(images)

cost = CostFunc(output, images) cost.backward()

optimizer.step()

print('Cost: {:.4f}'.format(cost.item()))

(40)

### Convolutional AutoEncoder (CAE)

plt.figure(figsize=(20, 4)) for i in range(10): img = images plt.subplot(2, 10, i+1) plt.imshow(img[i].reshape(28, 28)) plt.gray() plt.axis(‘off') img = output.detach().numpy() plt.subplot(2, 10, i+11) plt.imshow(img[i].reshape(28, 28)) plt.gray() plt.axis(‘off') plt.tight_layout() plt.show()

(41)
(42)

(43)

### Decoder

Noisy Input Denoised Output

Newly Generated Output Features

(44)

### Decoder

Noisy Input Denoised Output

Newly Generated Output Features

(45)

(46)

밑 진수

(47)

2

2

(48)

1 2

(49)

(50)

0

a

1

a

(51)

a

m

nm

n

a

n

a

(52)

a

a

m

n

m

n

m+n

(53)

a

a

m

n

m

n

m−n

a

a

a

(54)

(55)

(56)

(57)

(58)

(59)

(60)

(61)

(62)

(63)

(64)

(65)

(66)

2

2

2

2

2

2

2

2

2

2

2

2

(67)

(68)

(69)

.

(70)

.

(71)

.

(72)

.

(73)

(74)

(75)

### X

0 0.05 0.1 0.15 0.2

(76)

(77)

(78)

(79)

2

2

(80)

(81)

(82)

(83)

(84)

(85)

(86)

(87)

(88)

(89)

(90)

(91)

(92)

KL

KL

(93)

(94)
(95)

(96)

(97)

2

2 2

P

P2

P

2

2 P

P

2 Q

Q

2

2 Q

(98)

2

2 2

P

P2

P

2

2 P

P

2 Q

Q

2

2 Q

P

P2

P

P

2

2 P

P

Q2

P

Q

2

2 Q

P

P

P

P

P

(99)

2

2 2

P

P2

P

2

2 P

P

2 Q

Q

2

2 Q

P

P2

P

P

2

2 P

P

Q2

P

Q

2

2 Q

P

P

P

P

P

P

Q2

P

P2

P

P

2

2 P

P

Q

2

2 Q

P

P

P

P

(100)

2

2 2

P

P2

P

2

2 P

P

2 Q

Q

2

2 Q

P

P2

P

P

2

2 P

P

Q2

P

Q

2

2 Q

P

P

P

P

P

P

P

Q2

P

P2

P

P

2

2 P

P

Q

2

2 Q

P

P

P

P

Q2

P2

P

P

2

2 P

P

Q

2

2 Q

(101)

2

2 2

P

P2

P

2

2 P

P

2 Q

Q

2

2 Q

P

P2

P

P

2

2 P

P

Q2

P

Q

2

2 Q

P

P

P

P

P

P

P

Q2

P

P2

P

P

2

2 P

P

Q

2

2 Q

P

P

P

P

Q2

P2

P

P

2

2 P

P

Q

2

2 Q

Q2

P2

P

P

2

P2

P

Q

2

2 Q

Updating...

관련 주제 :