• 검색 결과가 없습니다.

# VAE (Variational AutoEncoder

N/A
N/A
Protected

Share "VAE (Variational AutoEncoder"

Copied!
160
0
0

로드 중.... (전체 텍스트 보기)

전체 글

(1)

(2)

(3)
(4)

(5)

(6)

(7)

(8)

(9)

### Codec = (En)coder + Decoder (or Compression + Decompression)

http://aess.com.tr/encoding-decoding-2/ https://www.joydeepdeb.com/tools/url-encoding-decoding.html

(10)

latent vector

### Output Layer

인지 네트워크 (recognition network) 생성 네트워크 (generative network)

(11)

(12)

(13)

(14)

(15)

### Manifold 상에서는 데이터의 분류(classification) 또한 쉽고 명확해진다.

https://www.iro.umontreal.ca/~bengioy/talks/gss2012-YB3-algorithms-AE-depth.pdf

### m

https://www.slideshare.net/NaverEngineering/ss-96581209 https://en.wikipedia.org/wiki/Manifold_regularization

(16)

(17)

### 이 저차원의 manifold를 벗어나는 순간 데이터의 밀도는 급격히 낮아진다.

https://dsp.stackexchange.com/questions/34126/random-noise-removal-in-images

(18)

(19)

(20)

(21)

### Vanilla AE as PCA

import torch

import numpy as np

import matplotlib.pyplot as plt

from mpl_toolkits.mplot3d import Axes3D LEARNING_RATE = 0.001

TOTAL_EPOCHS = 1000

def Points(n, w1, w2, noise): points = np.empty((n, 3))

angles = np.random.rand(n) * 3 * np.pi / 2 - 0.5

points[:,0] = np.cos(angles) + np.sin(angles)/2 + noise * np.random.randn(n) / 2 points[:,1] = np.sin(angles) * 0.7 + noise * np.random.randn(n) / 2

points[:,2] = points[:,0] * w1 + points[:,1] * w2 + noise * np.random.randn(n) return points

points = Points(100, 0.1, 0.3, 0.1) x = torch.from_numpy(points).float()

(22)

### Vanilla AE as PCA

def Display3D(points): fig = plt.figure() ax = Axes3D(fig)

ax.scatter(points[:,0], points[:,1], points[:,2]) plt.show() def Display2D(points): plt.plot(points[:,0], points[:,1], “b.") plt.show() class Model(torch.nn.Module): def __init__(self): super(Model, self).__init__()

self.encoder = torch.nn.Linear(3, 2) # from 3D to 2D

self.decoder = torch.nn.Linear(2, 3) # from 2D to 3D

def forward(self, x):

x = self.encoder(x) x = self.decoder(x) return x

def z(self, x): # latent vector

z = self.encoder(x) return z

(23)

### Vanilla AE as PCA

model = Model()

CostFunc = torch.nn.MSELoss()

optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE) for epoch in range(TOTAL_EPOCHS):

output = model(x) cost = CostFunc(output, x) cost.backward() optimizer.step() optimizer.zero_grad() print('Cost: {:.4f}’.format(cost.item())) Display3D(points) z = model.z(x).detach() Display2D(z.numpy())

(24)

(25)

### Vanilla AE for MNIST

import torch, torchvision

from matplotlib import pyplot as plt BATCH_SIZE = 100 LEARNING_RATE = 0.001 TOTAL_EPOCHS = 3 transforms = torchvision.transforms.Compose([torchvision.transforms.ToTensor()]) train_dataset

(26)

### Vanilla AE for MNIST

class Model(torch.nn.Module): def __init__(self): super(Model, self).__init__() self.encoder = torch.nn.Linear(28*28, 100) self.decoder = torch.nn.Linear(100, 28*28) def forward(self, x): x = self.encoder(x) x = self.decoder(x) return x model = Model() CostFunc = torch.nn.MSELoss()

optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE) for epoch in range(TOTAL_EPOCHS):

images = images.reshape(-1, 784) # flattening for input

output = model(images) # feed forward

cost = CostFunc(output, images) # compare with input images (not with labels)

cost.backward() optimizer.step()

print('Cost: {:.4f}’.format(cost.item()))

(27)

### Vanilla AE for MNIST

plt.figure(figsize=(20, 4)) for i in range(10): img = images plt.subplot(2, 10, i+1) plt.imshow(img[i].reshape(28, 28)) plt.gray() plt.axis(‘off') img = output.detach().numpy() plt.subplot(2, 10, i+11) plt.imshow(img[i].reshape(28, 28)) plt.gray() plt.axis(‘off') plt.tight_layout() plt.show()

(28)
(29)

### Vanilla AE as De-noiser

import torch, torchvision

from matplotlib import pyplot as plt BATCH_SIZE = 100 LEARNING_RATE = 0.001 TOTAL_EPOCHS = 3 transforms = torchvision.transforms.Compose([torchvision.transforms.ToTensor()]) train_dataset

(30)

### Vanilla AE as De-noiser

class Model(torch.nn.Module): def __init__(self): super(Model, self).__init__() self.encoder = torch.nn.Linear(28*28, 100) self.decoder = torch.nn.Linear(100, 28*28) def forward(self, x): x = self.encoder(x) x = self.decoder(x) return x model = Model() CostFunc = torch.nn.MSELoss()

optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE) for epoch in range(TOTAL_EPOCHS):

for images, _ in train_dataloader: images = images.reshape(-1, 784)

images = images + torch.randn(images.size()) * 0.5 # add noise

output = model(images)

cost = CostFunc(output, images) cost.backward()

optimizer.step()

print('Cost: {:.4f}’.format(cost.item()))

(31)

### Vanilla AE as De-noiser

plt.figure(figsize=(20, 4)) for i in range(10): img = images plt.subplot(2, 10, i+1) plt.imshow(img[i].reshape(28, 28)) plt.gray() plt.axis(‘off') img = output.detach().numpy() plt.subplot(2, 10, i+11) plt.imshow(img[i].reshape(28, 28)) plt.gray() plt.axis(‘off') plt.tight_layout() plt.show()

(32)
(33)

latent vector

### Output Layer

인지 네트워크 (recognition network) 생성 네트워크 (generative network)

(34)

### Deep AE as De-noiser

class Model(torch.nn.Module): def __init__(self): super(Model, self).__init__() self.encoder = torch.nn.Sequential( torch.nn.Linear(28*28, 256), torch.nn.ReLU(), torch.nn.Linear( 256, 64), torch.nn.ReLU(), torch.nn.Linear( 64, 16), torch.nn.ReLU(), torch.nn.Linear( 16, 4)) self.decoder = torch.nn.Sequential( torch.nn.Linear( 4, 16), torch.nn.ReLU(), torch.nn.Linear( 16, 64), torch.nn.ReLU(), torch.nn.Linear( 64, 256), torch.nn.ReLU(), torch.nn.Linear(256, 28*28), torch.nn.Tanh()) def forward(self, x): x = self.encoder(x) x = self.decoder(x) return x

(35)
(36)

latent vector

### Output Layer

인지 네트워크 (recognition network) 생성 네트워크 (generative network)

(37)

### Convolutional AutoEncoder (CAE)

import torch, torchvision

from matplotlib import pyplot as plt BATCH_SIZE = 100 LEARNING_RATE = 0.001 TOTAL_EPOCHS = 3 transforms = torchvision.transforms.Compose([torchvision.transforms.ToTensor()]) train_dataset

(38)

### Convolutional AutoEncoder (CAE)

class Model(torch.nn.Module): def __init__(self): super(Model, self).__init__() # (bs, 28, 28, 1) -> conv -> (bs, 28, 28, 8) -> pool -> (bs, 14, 14, 8) # (bs, 14, 14, 8) -> conv -> (bs, 14, 14, 4) -> pool -> (bs, 7, 7, 4) self.encoder = torch.nn.Sequential(

torch.nn.Conv2d(1, 128, 3, 1, 1), # in_channels, out_channels, kernel_size, stride, padding

torch.nn.MaxPool2d(2, 2),

torch.nn.Conv2d(128, 8, 3, 1, 1), # in_channels, out_channels, kernel_size, stride, padding

torch.nn.MaxPool2d(2, 2))

# (kernel_size, strid) = (2, 2) will increase the spatial dims by 2 # (bs, 7, 7, 4) -> (bs, 14, 14, 8)

# (bs, 14, 14, 8) -> (bs, 28, 28, 1)

self.decoder = torch.nn.Sequential(

torch.nn.ConvTranspose2d(8, 128, 2, 2, 0), # in_channels, out_channels, kernel_size, stride, padding

torch.nn.ConvTranspose2d(128, 1, 2, 2, 0)) # in_channels, out_channels, kernel_size, stride, padding def forward(self, x): x = self.encoder(x) x = self.decoder(x) return x

(39)

### Convolutional AutoEncoder (CAE)

model = Model()

CostFunc = torch.nn.MSELoss()

optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE) for epoch in range(TOTAL_EPOCHS):

for images, _ in train_dataloader: # no flattening process

output = model(images)

cost = CostFunc(output, images) cost.backward()

optimizer.step()

print('Cost: {:.4f}'.format(cost.item()))

(40)

### Convolutional AutoEncoder (CAE)

plt.figure(figsize=(20, 4)) for i in range(10): img = images plt.subplot(2, 10, i+1) plt.imshow(img[i].reshape(28, 28)) plt.gray() plt.axis(‘off') img = output.detach().numpy() plt.subplot(2, 10, i+11) plt.imshow(img[i].reshape(28, 28)) plt.gray() plt.axis(‘off') plt.tight_layout() plt.show()

(41)
(42)

(43)

### Decoder

Noisy Input Denoised Output

Newly Generated Output Features

(44)

### Decoder

Noisy Input Denoised Output

Newly Generated Output Features

(45)

(46)

밑 진수

(47)

2

2

(48)

1 2

(49)

(50)

0

a

1

a

(51)

a

m

nm

n

a

n

a

(52)

a

a

m

n

m

n

m+n

(53)

a

a

m

n

m

n

m−n

a

a

a

(54)

(55)

(56)

(57)

(58)

(59)

(60)

(61)

(62)

(63)

(64)

(65)

(66)

2

2

2

2

2

2

2

2

2

2

2

2

(67)

(68)

(69)

.

(70)

.

(71)

.

(72)

.

(73)

(74)

(75)

### X

0 0.05 0.1 0.15 0.2

(76)

(77)

(78)

(79)

2

2

(80)

(81)

(82)

(83)

(84)

(85)

(86)

(87)

(88)

(89)

(90)

(91)

(92)

KL

KL

(93)

(94)
(95)

(96)

(97)

2

2 2

P

P2

P

2

2 P

P

2 Q

Q

2

2 Q

(98)

2

2 2

P

P2

P

2

2 P

P

2 Q

Q

2

2 Q

P

P2

P

P

2

2 P

P

Q2

P

Q

2

2 Q

P

P

P

P

P

(99)

2

2 2

P

P2

P

2

2 P

P

2 Q

Q

2

2 Q

P

P2

P

P

2

2 P

P

Q2

P

Q

2

2 Q

P

P

P

P

P

P

Q2

P

P2

P

P

2

2 P

P

Q

2

2 Q

P

P

P

P

(100)

2

2 2

P

P2

P

2

2 P

P

2 Q

Q

2

2 Q

P

P2

P

P

2

2 P

P

Q2

P

Q

2

2 Q

P

P

P

P

P

P

P

Q2

P

P2

P

P

2

2 P

P

Q

2

2 Q

P

P

P

P

Q2

P2

P

P

2

2 P

P

Q

2

2 Q

(101)

2

2 2

P

P2

P

2

2 P

P

2 Q

Q

2

2 Q

P

P2

P

P

2

2 P

P

Q2

P

Q

2

2 Q

P

P

P

P

P

P

P

Q2

P

P2

P

P

2

2 P

P

Q

2

2 Q

P

P

P

P

Q2

P2

P

P

2

2 P

P

Q

2

2 Q

Q2

P2

P

P

2

P2

P

Q

2

2 Q

### B

참조

관련 문서

It considers the energy use of the different components that are involved in the distribution and viewing of video content: data centres and content delivery networks

Furthermore, it is necessary to provide support so that the innovation strategy of the GRIs is included in the goal and scenario to reach carbon neutrality by 2050 on

And that result interpreted that water hydrolysis of adhesive resin cause porphyrin value loss, so that adhesive resin was degradated.. Thus, by using the

- The probability density function of the Normal distribution is symmetric about its mean value, and this distribution cannot be used to model right-skewed or left- skewed

→ making it possible to store spatial data w/ their associated attribute data in a single DB advantages (compare to geo-relational model). take full advantage of

To focus our discussion on inverse kinematics, we wifi assume that the necessary transformations have been performed so that the goal point is a specification of the

- The best value of a parameter of a probability distribution should be that value which maximizes the likelihood or joint probability of occurrence of

- Imprecise statistical estimation (probability distribution type, parameters, …).. - Only depending on the sample size and location - Ex) lack of