728x90

이번에는 RNN을 MNIST로 돌려보는 것을 코딩해본다.

 

Experimental setups

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.autograd import Variable
import os
import matplotlib.pyplot as plt
import numpy as np
torch.__version__

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
torch.manual_seed(2891)
num_gpu = 1
if torch.cuda.device_count() > 1:
    num_gpu = torch.cuda.device_count()
print("Let's use", num_gpu, "GPUs!")
print('our device', device)

'''
Let's use 1 GPUs!
our device cuda
'''

 

RNN 모델 설계

class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(RNN, self).__init__()
        
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, self.hidden_size, self.num_layers, batch_first=True)
        self.gru = nn.GRU(input_size, self.hidden_size, self.num_layers, batch_first=True)
        self.fc = nn.Linear(self.hidden_size, num_classes)
        
        
    def forward(self, x, rnn):
        
        if rnn == 'lstm':
            rnn_layer = self.lstm
            h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device) 
            c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
            out, _ = self.lstm(x, (h0, c0))
        else:
            rnn_layer = self.gru
            h = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device) 
            out, _ = self.gru(x, h)
        
        out = self.fc(out[:, -1, :])
        return out

model call시 원하는 rnn 계열 (lstm, gru) 선택 가능하도록 구현함

 

Parameter & Model shape & Hyper-parameter check 

sequence_length = 28
input_size = 28
hidden_size = 128
num_layers = 2
num_classes = 10
batch_size = 100
num_epochs = 10
learning_rate = 0.01

model = RNN(input_size, hidden_size, num_layers, num_classes).to(device)
#model shape
for p in model.parameters():
    print(p.size())
'''
torch.Size([512, 28])
torch.Size([512, 128])
torch.Size([512])
torch.Size([512])
torch.Size([512, 128])
torch.Size([512, 128])
torch.Size([512])
torch.Size([512])
torch.Size([384, 28])
torch.Size([384, 128])
torch.Size([384])
torch.Size([384])
torch.Size([384, 128])
torch.Size([384, 128])
torch.Size([384])
torch.Size([384])
torch.Size([10, 128])
torch.Size([10])
'''
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

model_hp = count_parameters(model)
print('model"s hyper parameters', model_hp)
# model"s hyper parameters 374026

 

Dataset load, train, test loader 선언

train_loader = torch.utils.data.DataLoader(datasets.MNIST('data', train=True, download=True, transform=transforms.ToTensor()),batch_size=batch_size, shuffle=True)
print(len(train_loader)) # 600
test_loader = torch.utils.data.DataLoader(datasets.MNIST('data', train=False, transform=transforms.ToTensor()),batch_size=1000)
print(len(test_loader)) # 10
'''
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to data/MNIST/raw/train-images-idx3-ubyte.gz
9913344/? [04:53<00:00, 33768.69it/s]

Extracting data/MNIST/raw/train-images-idx3-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to data/MNIST/raw/train-labels-idx1-ubyte.gz
29696/? [00:00<00:00, 433940.88it/s]

Extracting data/MNIST/raw/train-labels-idx1-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to data/MNIST/raw/t10k-images-idx3-ubyte.gz
1649664/? [00:51<00:00, 32232.92it/s]

Extracting data/MNIST/raw/t10k-images-idx3-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to data/MNIST/raw/t10k-labels-idx1-ubyte.gz
5120/? [00:00<00:00, 108088.65it/s]

Extracting data/MNIST/raw/t10k-labels-idx1-ubyte.gz to data/MNIST/raw

Processing...
Done!
600
10
/usr/local/lib/python3.7/dist-packages/torchvision/datasets/mnist.py:502: UserWarning: The given NumPy array is not writeable, and PyTorch does not support non-writeable tensors. This means you can write to the underlying (supposedly non-writeable) NumPy array using the tensor. You may want to copy the array to protect its data or make it writeable before converting it to a tensor. This type of warning will be suppressed for the rest of this program. (Triggered internally at  /pytorch/torch/csrc/utils/tensor_numpy.cpp:143.)
  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)
'''

 

Loss, optimizer 선언

이전에는 F.nll_loss로 하였는데, 이번에는 CrossEntropy loss를 선언하여 진행함

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

 

Training 진행

model.train()
total_loss = 0
total_acc = 0
train_loss = []
train_accuracy = []
i = 0
for epoch in range(num_epochs):
    for data, target in train_loader:
        data = data.reshape(-1, sequence_length, input_size).to(device)
        target = target.to(device)
         
        optimizer.zero_grad()
        output = model(data, 'lstm')
        loss = criterion(output, target)
       
        
        loss.backward()    # calc gradients
       
        total_loss += loss
       
        train_loss.append(total_loss/i)
        optimizer.step()   # update gradients
       
        prediction = output.data.max(1)[1]   # first column has actual prob.
        accuracy = prediction.eq(target.data).sum()/batch_size*100
       
        total_acc += accuracy
       
        train_accuracy.append(total_acc/i)
       
        if i % 10 == 0:
            print('Epoch: {}\t Train Step: {}\tLoss: {:.3f}\tAccuracy: {:.3f}'.format(epoch+1, i, loss, accuracy))
        i += 1
    print('Epoch: {} finished'.format(epoch+1))
'''
Epoch: 9 finished
Epoch: 10	 Train Step: 5400	Loss: 0.031	Accuracy: 99.000
Epoch: 10	 Train Step: 5410	Loss: 0.012	Accuracy: 100.000
Epoch: 10	 Train Step: 5420	Loss: 0.045	Accuracy: 99.000
Epoch: 10	 Train Step: 5430	Loss: 0.107	Accuracy: 98.000
Epoch: 10	 Train Step: 5440	Loss: 0.006	Accuracy: 100.000
Epoch: 10	 Train Step: 5450	Loss: 0.031	Accuracy: 99.000
Epoch: 10	 Train Step: 5460	Loss: 0.038	Accuracy: 98.000
Epoch: 10	 Train Step: 5470	Loss: 0.003	Accuracy: 100.000
Epoch: 10	 Train Step: 5480	Loss: 0.010	Accuracy: 100.000
Epoch: 10	 Train Step: 5490	Loss: 0.024	Accuracy: 100.000
Epoch: 10	 Train Step: 5500	Loss: 0.109	Accuracy: 97.000
Epoch: 10	 Train Step: 5510	Loss: 0.085	Accuracy: 96.000
Epoch: 10	 Train Step: 5520	Loss: 0.016	Accuracy: 100.000
Epoch: 10	 Train Step: 5530	Loss: 0.065	Accuracy: 99.000
Epoch: 10	 Train Step: 5540	Loss: 0.088	Accuracy: 98.000
Epoch: 10	 Train Step: 5550	Loss: 0.042	Accuracy: 99.000
Epoch: 10	 Train Step: 5560	Loss: 0.049	Accuracy: 98.000
Epoch: 10	 Train Step: 5570	Loss: 0.032	Accuracy: 99.000
Epoch: 10	 Train Step: 5580	Loss: 0.095	Accuracy: 97.000
Epoch: 10	 Train Step: 5590	Loss: 0.038	Accuracy: 98.000
Epoch: 10	 Train Step: 5600	Loss: 0.106	Accuracy: 98.000
Epoch: 10	 Train Step: 5610	Loss: 0.026	Accuracy: 99.000
Epoch: 10	 Train Step: 5620	Loss: 0.021	Accuracy: 100.000
Epoch: 10	 Train Step: 5630	Loss: 0.069	Accuracy: 98.000
Epoch: 10	 Train Step: 5640	Loss: 0.010	Accuracy: 100.000
Epoch: 10	 Train Step: 5650	Loss: 0.011	Accuracy: 100.000
Epoch: 10	 Train Step: 5660	Loss: 0.154	Accuracy: 97.000
Epoch: 10	 Train Step: 5670	Loss: 0.005	Accuracy: 100.000
Epoch: 10	 Train Step: 5680	Loss: 0.075	Accuracy: 97.000
Epoch: 10	 Train Step: 5690	Loss: 0.107	Accuracy: 97.000
Epoch: 10	 Train Step: 5700	Loss: 0.050	Accuracy: 98.000
Epoch: 10	 Train Step: 5710	Loss: 0.043	Accuracy: 98.000
Epoch: 10	 Train Step: 5720	Loss: 0.064	Accuracy: 99.000
Epoch: 10	 Train Step: 5730	Loss: 0.039	Accuracy: 98.000
Epoch: 10	 Train Step: 5740	Loss: 0.014	Accuracy: 100.000
Epoch: 10	 Train Step: 5750	Loss: 0.042	Accuracy: 99.000
Epoch: 10	 Train Step: 5760	Loss: 0.137	Accuracy: 96.000
Epoch: 10	 Train Step: 5770	Loss: 0.101	Accuracy: 97.000
Epoch: 10	 Train Step: 5780	Loss: 0.054	Accuracy: 97.000
Epoch: 10	 Train Step: 5790	Loss: 0.084	Accuracy: 97.000
Epoch: 10	 Train Step: 5800	Loss: 0.054	Accuracy: 98.000
Epoch: 10	 Train Step: 5810	Loss: 0.062	Accuracy: 97.000
Epoch: 10	 Train Step: 5820	Loss: 0.076	Accuracy: 98.000
Epoch: 10	 Train Step: 5830	Loss: 0.094	Accuracy: 97.000
Epoch: 10	 Train Step: 5840	Loss: 0.147	Accuracy: 96.000
Epoch: 10	 Train Step: 5850	Loss: 0.048	Accuracy: 99.000
Epoch: 10	 Train Step: 5860	Loss: 0.010	Accuracy: 100.000
Epoch: 10	 Train Step: 5870	Loss: 0.034	Accuracy: 98.000
Epoch: 10	 Train Step: 5880	Loss: 0.048	Accuracy: 99.000
Epoch: 10	 Train Step: 5890	Loss: 0.025	Accuracy: 99.000
Epoch: 10	 Train Step: 5900	Loss: 0.110	Accuracy: 97.000
Epoch: 10	 Train Step: 5910	Loss: 0.092	Accuracy: 98.000
Epoch: 10	 Train Step: 5920	Loss: 0.149	Accuracy: 96.000
Epoch: 10	 Train Step: 5930	Loss: 0.014	Accuracy: 100.000
Epoch: 10	 Train Step: 5940	Loss: 0.056	Accuracy: 98.000
Epoch: 10	 Train Step: 5950	Loss: 0.058	Accuracy: 97.000
Epoch: 10	 Train Step: 5960	Loss: 0.064	Accuracy: 98.000
Epoch: 10	 Train Step: 5970	Loss: 0.050	Accuracy: 98.000
Epoch: 10	 Train Step: 5980	Loss: 0.040	Accuracy: 99.000
Epoch: 10	 Train Step: 5990	Loss: 0.015	Accuracy: 100.000
Epoch: 10 finished
'''

RNN은 lstm으로 진행하였음

 

 

Plotting 결과

plt.figure()
plt.plot(np.arange(len(train_loss)), train_loss)
plt.show()
#plt.savefig('./train_loss_result.png')

plt.figure()
plt.plot(np.arange(len(train_accuracy)), train_accuracy)
plt.show()
#plt.savefig('./train_accuracy_result.png')

 

step에 따른 training loss 변화도
step에 따른 training accuracy 변화도

 

Evaluation 결과

with torch.no_grad():
    model.eval()
    correct = 0
   
    for data, target in test_loader:
        
        data = data.reshape(-1, sequence_length, input_size).to(device)
        target = target.to(device)        
        output = model(data, 'lstm')

        prediction = output.data.max(1)[1]
        correct += prediction.eq(target.data).sum()

print('\nTest set: Accuracy: {:.2f}%'.format(100. * correct / len(test_loader.dataset)))
# Test set: Accuracy: 97.63%

 

사실 RNN은 MNIST data를 돌리기에 최적의 모델이 아님

왜냐하면 RNN의 개념이 sequential 한 data에 적합하기 때문임

 

그럼에도 불구하고 결과값은 MLP의 성능을 넘어섰음

728x90
728x90

이번에는 이전 post와 같은 MNIST dataset을 활용하여, CNN으로 성능 뽑는것을 진행해본다.

 

CNN은 Fully-connected layer와 달리 flatten을 해줄 필요가 없어서 parameter가 비교적 적게 들고, 연산이 빠른 장점이 있으며, receptive field를 통해 local feature를 뽑는 것에 강인한 특징이 있음

 

Library importing 및 device 설정

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.autograd import Variable
import os
import matplotlib.pyplot as plt
import numpy as np
torch.__version__

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
torch.manual_seed(2891)
num_gpu = 1
if torch.cuda.device_count() > 1:
    num_gpu = torch.cuda.device_count()
print("Let's use", num_gpu, "GPUs!")

print('our device', device)
'''
Let's use 1 GPUs!
our device cuda
'''

2-layer CNN 네트워크 설계 (add here 부분에 batchnormalization, 더 깊게 쌓는 것들을 연습해보세요)

class CNN(nn.Module):
    def __init__(self, num_class, drop_prob):
        super(CNN, self).__init__()
        # input is 28x28
        # padding=2 for same padding
        self.conv1 = nn.Conv2d(1, 32, 5, padding=2) #input_channel, output_channel, filter_size, padding_size, (kernel=omit)
        # feature map size is 14*14 by pooling
        # padding=2 for same padding
        self.conv2 = nn.Conv2d(32, 64, 5, padding=2)
        # feature map size is 7*7 by pooling
        '''
        add here.. make more deep...

        batchnormalization ++
        '''
        self.dropout = nn.Dropout(p=drop_prob)

        self.fc1 = nn.Linear(64*7*7, 1024)
        self.reduce_layer = nn.Linear(1024, num_class)
        self.log_softmax = nn.LogSoftmax(dim=1)
        
    def forward(self, x):
        x = F.max_pool2d(F.relu(self.conv1(x)), 2) # -> (B, 14, 14, 32)
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        '''
        add here.. make more deep...
        and use dropout
        '''
        x = x.view(-1, 64*7*7)   # reshape Variable for using Linear (because linear only permit 1D. So we call this task as "flatten")
        x = F.relu(self.fc1(x))
        
        output = self.reduce_layer(x)
        return self.log_softmax(output)

 

Model loading 및 parameter, shape 체크

model = CNN(10, 0.3)
model.to(device)
'''
CNN(
  (conv1): Conv2d(1, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (conv2): Conv2d(32, 64, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (dropout): Dropout(p=0.3, inplace=False)
  (fc1): Linear(in_features=3136, out_features=1024, bias=True)
  (reduce_layer): Linear(in_features=1024, out_features=10, bias=True)
  (log_softmax): LogSoftmax(dim=1)
)
'''
#model shape
for p in model.parameters():
    print(p.size())
'''
torch.Size([32, 1, 5, 5])
torch.Size([32])
torch.Size([64, 32, 5, 5])
torch.Size([64])
torch.Size([1024, 3136])
torch.Size([1024])
torch.Size([10, 1024])
torch.Size([10])
'''
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

model_hp = count_parameters(model)
print('model"s hyper parameters', model_hp)
# model"s hyper parameters 3274634

Data setup 및 train, test loader 설정

batch_size = 64
train_loader = torch.utils.data.DataLoader(datasets.MNIST('data', train=True, download=True, transform=transforms.ToTensor()),batch_size=batch_size, shuffle=True)
print(len(train_loader)) # 938, 64 * 938 = 60032
test_loader = torch.utils.data.DataLoader(datasets.MNIST('data', train=False, transform=transforms.ToTensor()),batch_size=1000)
print(len(test_loader)) # 10, (10 * 1000 = 10000)
'''
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to data/MNIST/raw/train-images-idx3-ubyte.gz
9913344/? [04:51<00:00, 34050.71it/s]

Extracting data/MNIST/raw/train-images-idx3-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to data/MNIST/raw/train-labels-idx1-ubyte.gz
29696/? [00:01<00:00, 26930.77it/s]

Extracting data/MNIST/raw/train-labels-idx1-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to data/MNIST/raw/t10k-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 503: Service Unavailable

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to data/MNIST/raw/t10k-images-idx3-ubyte.gz
1649664/? [00:00<00:00, 3989386.71it/s]

Extracting data/MNIST/raw/t10k-images-idx3-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to data/MNIST/raw/t10k-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 503: Service Unavailable

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz to data/MNIST/raw/t10k-labels-idx1-ubyte.gz
5120/? [00:00<00:00, 139107.35it/s]

Extracting data/MNIST/raw/t10k-labels-idx1-ubyte.gz to data/MNIST/raw

Processing...
Done!
938
10
/usr/local/lib/python3.7/dist-packages/torchvision/datasets/mnist.py:502: UserWarning: The given NumPy array is not writeable, and PyTorch does not support non-writeable tensors. This means you can write to the underlying (supposedly non-writeable) NumPy array using the tensor. You may want to copy the array to protect its data or make it writeable before converting it to a tensor. This type of warning will be suppressed for the rest of this program. (Triggered internally at  /pytorch/torch/csrc/utils/tensor_numpy.cpp:143.)
  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)
'''

 

Adam optimizer, learning rate 1e-4로 설정

optimizer = optim.Adam(model.parameters(), lr=1e-4)

 

Model training, epoch은 10으로 설정

model.train()
epochs = 10 ### change
total_loss = 0
total_acc = 0
train_loss = []
train_accuracy = []
i = 0
for epoch in range(epochs):
    for data, target in train_loader:
        data, target = Variable(data), Variable(target)
        data = data.to(device)        
       
        target = target.to(device)
         
        optimizer.zero_grad()
        output = model(data)
       
        loss = F.nll_loss(output, target)
        loss.backward()    # calc gradients
       
        total_loss += loss
       
        train_loss.append(total_loss/i)
        optimizer.step()   # update gradients
       
        prediction = output.data.max(1)[1]   # first column has actual prob.
        accuracy = prediction.eq(target.data).sum()/batch_size*100
       
        total_acc += accuracy
       
        train_accuracy.append(total_acc/i)
       
        if i % 10 == 0:
            print('Epoch: {}\t Train Step: {}\tLoss: {:.3f}\tAccuracy: {:.3f}'.format(epoch+1, i, loss, accuracy))
        i += 1
    print('Epoch: {} finished'.format(epoch+1))
'''
Epoch: 10	 Train Step: 8450	Loss: 0.015	Accuracy: 100.000
Epoch: 10	 Train Step: 8460	Loss: 0.015	Accuracy: 100.000
Epoch: 10	 Train Step: 8470	Loss: 0.052	Accuracy: 98.438
Epoch: 10	 Train Step: 8480	Loss: 0.005	Accuracy: 100.000
Epoch: 10	 Train Step: 8490	Loss: 0.012	Accuracy: 100.000
Epoch: 10	 Train Step: 8500	Loss: 0.032	Accuracy: 98.438
Epoch: 10	 Train Step: 8510	Loss: 0.014	Accuracy: 100.000
Epoch: 10	 Train Step: 8520	Loss: 0.037	Accuracy: 98.438
Epoch: 10	 Train Step: 8530	Loss: 0.006	Accuracy: 100.000
Epoch: 10	 Train Step: 8540	Loss: 0.006	Accuracy: 100.000
Epoch: 10	 Train Step: 8550	Loss: 0.060	Accuracy: 98.438
Epoch: 10	 Train Step: 8560	Loss: 0.004	Accuracy: 100.000
Epoch: 10	 Train Step: 8570	Loss: 0.011	Accuracy: 100.000
Epoch: 10	 Train Step: 8580	Loss: 0.002	Accuracy: 100.000
Epoch: 10	 Train Step: 8590	Loss: 0.075	Accuracy: 96.875
Epoch: 10	 Train Step: 8600	Loss: 0.006	Accuracy: 100.000
Epoch: 10	 Train Step: 8610	Loss: 0.035	Accuracy: 98.438
Epoch: 10	 Train Step: 8620	Loss: 0.005	Accuracy: 100.000
Epoch: 10	 Train Step: 8630	Loss: 0.059	Accuracy: 98.438
Epoch: 10	 Train Step: 8640	Loss: 0.026	Accuracy: 98.438
Epoch: 10	 Train Step: 8650	Loss: 0.003	Accuracy: 100.000
Epoch: 10	 Train Step: 8660	Loss: 0.017	Accuracy: 100.000
Epoch: 10	 Train Step: 8670	Loss: 0.001	Accuracy: 100.000
Epoch: 10	 Train Step: 8680	Loss: 0.002	Accuracy: 100.000
Epoch: 10	 Train Step: 8690	Loss: 0.001	Accuracy: 100.000
Epoch: 10	 Train Step: 8700	Loss: 0.005	Accuracy: 100.000
Epoch: 10	 Train Step: 8710	Loss: 0.002	Accuracy: 100.000
Epoch: 10	 Train Step: 8720	Loss: 0.002	Accuracy: 100.000
Epoch: 10	 Train Step: 8730	Loss: 0.003	Accuracy: 100.000
Epoch: 10	 Train Step: 8740	Loss: 0.049	Accuracy: 98.438
Epoch: 10	 Train Step: 8750	Loss: 0.002	Accuracy: 100.000
Epoch: 10	 Train Step: 8760	Loss: 0.028	Accuracy: 98.438
Epoch: 10	 Train Step: 8770	Loss: 0.031	Accuracy: 98.438
Epoch: 10	 Train Step: 8780	Loss: 0.008	Accuracy: 100.000
Epoch: 10	 Train Step: 8790	Loss: 0.059	Accuracy: 98.438
Epoch: 10	 Train Step: 8800	Loss: 0.011	Accuracy: 100.000
Epoch: 10	 Train Step: 8810	Loss: 0.025	Accuracy: 98.438
Epoch: 10	 Train Step: 8820	Loss: 0.002	Accuracy: 100.000
Epoch: 10	 Train Step: 8830	Loss: 0.034	Accuracy: 96.875
Epoch: 10	 Train Step: 8840	Loss: 0.003	Accuracy: 100.000
Epoch: 10	 Train Step: 8850	Loss: 0.002	Accuracy: 100.000
Epoch: 10	 Train Step: 8860	Loss: 0.009	Accuracy: 100.000
Epoch: 10	 Train Step: 8870	Loss: 0.020	Accuracy: 98.438
Epoch: 10	 Train Step: 8880	Loss: 0.011	Accuracy: 100.000
Epoch: 10	 Train Step: 8890	Loss: 0.003	Accuracy: 100.000
Epoch: 10	 Train Step: 8900	Loss: 0.001	Accuracy: 100.000
Epoch: 10	 Train Step: 8910	Loss: 0.013	Accuracy: 98.438
Epoch: 10	 Train Step: 8920	Loss: 0.043	Accuracy: 98.438
Epoch: 10	 Train Step: 8930	Loss: 0.001	Accuracy: 100.000
Epoch: 10	 Train Step: 8940	Loss: 0.003	Accuracy: 100.000
Epoch: 10	 Train Step: 8950	Loss: 0.001	Accuracy: 100.000
Epoch: 10	 Train Step: 8960	Loss: 0.018	Accuracy: 98.438
Epoch: 10	 Train Step: 8970	Loss: 0.006	Accuracy: 100.000
Epoch: 10	 Train Step: 8980	Loss: 0.033	Accuracy: 98.438
Epoch: 10	 Train Step: 8990	Loss: 0.022	Accuracy: 100.000
Epoch: 10	 Train Step: 9000	Loss: 0.008	Accuracy: 100.000
Epoch: 10	 Train Step: 9010	Loss: 0.011	Accuracy: 100.000
Epoch: 10	 Train Step: 9020	Loss: 0.000	Accuracy: 100.000
Epoch: 10	 Train Step: 9030	Loss: 0.013	Accuracy: 100.000
Epoch: 10	 Train Step: 9040	Loss: 0.002	Accuracy: 100.000
Epoch: 10	 Train Step: 9050	Loss: 0.001	Accuracy: 100.000
Epoch: 10	 Train Step: 9060	Loss: 0.030	Accuracy: 98.438
Epoch: 10	 Train Step: 9070	Loss: 0.013	Accuracy: 100.000
Epoch: 10	 Train Step: 9080	Loss: 0.009	Accuracy: 100.000
Epoch: 10	 Train Step: 9090	Loss: 0.018	Accuracy: 98.438
Epoch: 10	 Train Step: 9100	Loss: 0.002	Accuracy: 100.000
Epoch: 10	 Train Step: 9110	Loss: 0.007	Accuracy: 100.000
Epoch: 10	 Train Step: 9120	Loss: 0.001	Accuracy: 100.000
Epoch: 10	 Train Step: 9130	Loss: 0.008	Accuracy: 100.000
Epoch: 10	 Train Step: 9140	Loss: 0.001	Accuracy: 100.000
Epoch: 10	 Train Step: 9150	Loss: 0.042	Accuracy: 98.438
Epoch: 10	 Train Step: 9160	Loss: 0.004	Accuracy: 100.000
Epoch: 10	 Train Step: 9170	Loss: 0.001	Accuracy: 100.000
Epoch: 10	 Train Step: 9180	Loss: 0.055	Accuracy: 98.438
Epoch: 10	 Train Step: 9190	Loss: 0.016	Accuracy: 98.438
Epoch: 10	 Train Step: 9200	Loss: 0.002	Accuracy: 100.000
Epoch: 10	 Train Step: 9210	Loss: 0.007	Accuracy: 100.000
Epoch: 10	 Train Step: 9220	Loss: 0.000	Accuracy: 100.000
Epoch: 10	 Train Step: 9230	Loss: 0.007	Accuracy: 100.000
Epoch: 10	 Train Step: 9240	Loss: 0.004	Accuracy: 100.000
Epoch: 10	 Train Step: 9250	Loss: 0.101	Accuracy: 96.875
Epoch: 10	 Train Step: 9260	Loss: 0.017	Accuracy: 100.000
Epoch: 10	 Train Step: 9270	Loss: 0.007	Accuracy: 100.000
Epoch: 10	 Train Step: 9280	Loss: 0.005	Accuracy: 100.000
Epoch: 10	 Train Step: 9290	Loss: 0.002	Accuracy: 100.000
Epoch: 10	 Train Step: 9300	Loss: 0.006	Accuracy: 100.000
Epoch: 10	 Train Step: 9310	Loss: 0.012	Accuracy: 100.000
Epoch: 10	 Train Step: 9320	Loss: 0.009	Accuracy: 100.000
Epoch: 10	 Train Step: 9330	Loss: 0.003	Accuracy: 100.000
Epoch: 10	 Train Step: 9340	Loss: 0.004	Accuracy: 100.000
Epoch: 10	 Train Step: 9350	Loss: 0.003	Accuracy: 100.000
Epoch: 10	 Train Step: 9360	Loss: 0.030	Accuracy: 98.438
Epoch: 10	 Train Step: 9370	Loss: 0.008	Accuracy: 100.000
Epoch: 10 finished
'''

Training accuracy에서 MLP 보다 높은 것을 알 수 있음

 

Plotting 결과

plt.figure()
plt.plot(np.arange(len(train_loss)), train_loss)
plt.show()
#plt.savefig('./train_loss_result.png')

plt.figure()
plt.plot(np.arange(len(train_accuracy)), train_accuracy)
plt.show()
#plt.savefig('./train_accuracy_result.png')

step에 따른 training loss 변화도
step에 따른 training accuracy 변화도

 

Evaluation 결과

with torch.no_grad():
    model.eval()
    correct = 0
   
    for data, target in test_loader:
        data, target = Variable(data), Variable(target)
        data = data.to(device)
        target = target.to(device)
        output = model(data)
        prediction = output.data.max(1)[1]
        correct += prediction.eq(target.data).sum()

print('\nTest set: Accuracy: {:.2f}%'.format(100. * correct / len(test_loader.dataset)))
# Test set: Accuracy: 99.11%

마찬가지로 MLP보다 CNN의 성능이 더 높음을 알 수 있음

 

728x90
728x90

이번 시간에는 여러 개의 Fully-connected layer를 쌓는 것을 코딩해본다.

 

먼저 아래의 코드를 통해 library 를 importing 함

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.autograd import Variable
import os
import matplotlib.pyplot as plt
import numpy as np

 

또한 아래의 코드를 통해 현재의 pytorch 버전에 대해 확인함

torch.__version__

 

이후 연산할 장치에 대해 선언해야 함

아래와 같이 device를 gpu로 설정함

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
torch.manual_seed(2891)
num_gpu = 1
if torch.cuda.device_count() > 1:
    num_gpu = torch.cuda.device_count()
print("Let's use", num_gpu, "GPUs!") # 1
print('device', device) # cuda

이후 간단한 MLP (Multi-Layer Perceptron) 모델을 구현함

 

입력은 MNIST dataset을 사용함

 

MNIST dataset의 각각의 요소는 (28, 28) 의 shape을 갖고 있기 때문에,

MLP를 통과하기 위해서는 각 요소를 flatten 시켜야 함

 

즉 reshape 을 사용하여 (28, 28) --> (1, 784)로 변경해야 함

 

class MnistMLP(nn.Module):
    def __init__(self, num_class, drop_prob):
        super(MnistMLP, self).__init__()
        # input is 28x28
        # need for flatten ==> 784
        self.dropout = nn.Dropout(p=drop_prob)
        self.linear1 = nn.Linear(784, 512)
        self.linear2 = nn.Linear(512, 256)
        self.linear3 = nn.Linear(256, 10)

        self.reduce_layer = nn.Linear(10, num_class)
        self.logsoftmax = nn.LogSoftmax(dim=1)
       
    def forward(self, x):
       
        x = x.float()
        mlp1 = F.relu(self.linear1(x.view(-1, 784)))
        mlp1 = self.dropout(mlp1)
        mlp2 = F.relu(self.linear2(mlp1))
        mlp2 = self.dropout(mlp2)
        mlp3 = F.relu(self.linear3(mlp2))
        mlp3 = self.dropout(mlp3)
  
        output = self.reduce_layer(mlp3)

    
        return self.logsoftmax(output)

 

이후 아래의 코드처럼 모델을 선언하고 gpu에 올림

model = MnistMLP(10, 0.3)
model.to(device)

'''
MnistMLP(
  (dropout): Dropout(p=0.3, inplace=False)
  (linear1): Linear(in_features=784, out_features=512, bias=True)
  (linear2): Linear(in_features=512, out_features=256, bias=True)
  (linear3): Linear(in_features=256, out_features=10, bias=True)
  (reduce_layer): Linear(in_features=10, out_features=10, bias=True)
  (logsoftmax): LogSoftmax(dim=1)
)
'''

MNIST의 class 개수는 10개 이므로, 첫 번째 인자에 10을 넣었고, dropout은 30%확률로 진행

 

작성한 모델의 매 layer 마다의 shape은 아래를 통해서 확인할 수 있고,

#model shape
for p in model.parameters():
    print(p.size())
'''
torch.Size([512, 784])
torch.Size([512])
torch.Size([256, 512])
torch.Size([256])
torch.Size([10, 256])
torch.Size([10])
torch.Size([10, 10])
torch.Size([10])
'''

 

총 hyperparameter는 아래를 통해 확인 가능함

def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

model_hp = count_parameters(model)
print('model"s hyper parameters', model_hp)
'''
model"s hyper parameters 535928
'''

 

이제 모델 선언은 끝났고, data를 loading 해야 함

아래의 코드를 통해 MNIST dataset을 다운받고, train 및 test로 분할함

batch_size = 128
train_loader = torch.utils.data.DataLoader(datasets.MNIST('data', train=True, download=True, transform=transforms.ToTensor()),batch_size=batch_size, shuffle=True)
print(len(train_loader)) # 118, 512 * 118 = 60000
test_loader = torch.utils.data.DataLoader(datasets.MNIST('data', train=False, transform=transforms.ToTensor()),batch_size=1000)
print(len(test_loader)) # 10, 10 * 1000 = 10000
'''
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to data/MNIST/raw/train-images-idx3-ubyte.gz
9913344/? [04:54<00:00, 33670.03it/s]

Extracting data/MNIST/raw/train-images-idx3-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to data/MNIST/raw/train-labels-idx1-ubyte.gz
29696/? [00:01<00:00, 26891.25it/s]

Extracting data/MNIST/raw/train-labels-idx1-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to data/MNIST/raw/t10k-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 503: Service Unavailable

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to data/MNIST/raw/t10k-images-idx3-ubyte.gz
1649664/? [00:00<00:00, 3911534.90it/s]

Extracting data/MNIST/raw/t10k-images-idx3-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to data/MNIST/raw/t10k-labels-idx1-ubyte.gz
5120/? [00:00<00:00, 159181.34it/s]

Extracting data/MNIST/raw/t10k-labels-idx1-ubyte.gz to data/MNIST/raw

Processing...
Done!
469
10
/usr/local/lib/python3.7/dist-packages/torchvision/datasets/mnist.py:502: UserWarning: The given NumPy array is not writeable, and PyTorch does not support non-writeable tensors. This means you can write to the underlying (supposedly non-writeable) NumPy array using the tensor. You may want to copy the array to protect its data or make it writeable before converting it to a tensor. This type of warning will be suppressed for the rest of this program. (Triggered internally at  /pytorch/torch/csrc/utils/tensor_numpy.cpp:143.)
  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)
'''

 

Optimizer 선언은 아래와 같이 진행하며, 가장 많이 사용되는 Adam을 learning rate 1e-4로 사용

optimizer = optim.Adam(model.parameters(), lr=1e-4)

 

Training을 진행함 (epoch은 10까지만 진행)

model.train()
epochs = 10 ### change
total_loss = 0
total_acc = 0
train_loss = []
train_accuracy = []
i = 0
for epoch in range(epochs):
    for data, target in train_loader:
        data, target = Variable(data), Variable(target)
        data = data.to(device)        
       
        target = target.to(device)
         
        optimizer.zero_grad()
        output = model(data)
       
        loss = F.nll_loss(output, target)
        loss.backward()    # calc gradients
       
        total_loss += loss
       
        train_loss.append(total_loss/i)
        optimizer.step()   # update gradients
       
        prediction = output.data.max(1)[1]   # first column has actual prob.
        accuracy = prediction.eq(target.data).sum()/batch_size*100
       
        total_acc += accuracy
       
        train_accuracy.append(total_acc/i)
       
        if i % 10 == 0:
            print('Epoch: {}\t Train Step: {}\tLoss: {:.3f}\tAccuracy: {:.3f}'.format(epoch+1, i, loss, accuracy))
        i += 1
    print('Epoch: {} finished'.format(epoch+1))
'''
Epoch: 9	 Train Step: 4200	Loss: 0.652	Accuracy: 78.906
Epoch: 9	 Train Step: 4210	Loss: 0.422	Accuracy: 85.156
Epoch: 9	 Train Step: 4220	Loss: 0.496	Accuracy: 61.719
Epoch: 9 finished
Epoch: 10	 Train Step: 4230	Loss: 0.432	Accuracy: 84.375
Epoch: 10	 Train Step: 4240	Loss: 0.435	Accuracy: 89.062
Epoch: 10	 Train Step: 4250	Loss: 0.370	Accuracy: 86.719
Epoch: 10	 Train Step: 4260	Loss: 0.468	Accuracy: 83.594
Epoch: 10	 Train Step: 4270	Loss: 0.479	Accuracy: 85.156
Epoch: 10	 Train Step: 4280	Loss: 0.422	Accuracy: 85.156
Epoch: 10	 Train Step: 4290	Loss: 0.538	Accuracy: 78.906
Epoch: 10	 Train Step: 4300	Loss: 0.493	Accuracy: 87.500
Epoch: 10	 Train Step: 4310	Loss: 0.531	Accuracy: 82.031
Epoch: 10	 Train Step: 4320	Loss: 0.524	Accuracy: 82.031
Epoch: 10	 Train Step: 4330	Loss: 0.520	Accuracy: 83.594
Epoch: 10	 Train Step: 4340	Loss: 0.557	Accuracy: 82.812
Epoch: 10	 Train Step: 4350	Loss: 0.597	Accuracy: 80.469
Epoch: 10	 Train Step: 4360	Loss: 0.272	Accuracy: 90.625
Epoch: 10	 Train Step: 4370	Loss: 0.402	Accuracy: 85.938
Epoch: 10	 Train Step: 4380	Loss: 0.552	Accuracy: 78.906
Epoch: 10	 Train Step: 4390	Loss: 0.450	Accuracy: 85.156
Epoch: 10	 Train Step: 4400	Loss: 0.505	Accuracy: 85.156
Epoch: 10	 Train Step: 4410	Loss: 0.498	Accuracy: 79.688
Epoch: 10	 Train Step: 4420	Loss: 0.550	Accuracy: 77.344
Epoch: 10	 Train Step: 4430	Loss: 0.515	Accuracy: 84.375
Epoch: 10	 Train Step: 4440	Loss: 0.556	Accuracy: 78.125
Epoch: 10	 Train Step: 4450	Loss: 0.363	Accuracy: 88.281
Epoch: 10	 Train Step: 4460	Loss: 0.376	Accuracy: 88.281
Epoch: 10	 Train Step: 4470	Loss: 0.409	Accuracy: 86.719
Epoch: 10	 Train Step: 4480	Loss: 0.494	Accuracy: 84.375
Epoch: 10	 Train Step: 4490	Loss: 0.550	Accuracy: 82.031
Epoch: 10	 Train Step: 4500	Loss: 0.349	Accuracy: 90.625
Epoch: 10	 Train Step: 4510	Loss: 0.465	Accuracy: 82.812
Epoch: 10	 Train Step: 4520	Loss: 0.577	Accuracy: 78.906
Epoch: 10	 Train Step: 4530	Loss: 0.412	Accuracy: 85.938
Epoch: 10	 Train Step: 4540	Loss: 0.557	Accuracy: 81.250
Epoch: 10	 Train Step: 4550	Loss: 0.481	Accuracy: 83.594
Epoch: 10	 Train Step: 4560	Loss: 0.373	Accuracy: 86.719
Epoch: 10	 Train Step: 4570	Loss: 0.445	Accuracy: 84.375
Epoch: 10	 Train Step: 4580	Loss: 0.543	Accuracy: 77.344
Epoch: 10	 Train Step: 4590	Loss: 0.358	Accuracy: 88.281
Epoch: 10	 Train Step: 4600	Loss: 0.408	Accuracy: 87.500
Epoch: 10	 Train Step: 4610	Loss: 0.523	Accuracy: 82.812
Epoch: 10	 Train Step: 4620	Loss: 0.418	Accuracy: 86.719
Epoch: 10	 Train Step: 4630	Loss: 0.423	Accuracy: 85.938
Epoch: 10	 Train Step: 4640	Loss: 0.512	Accuracy: 79.688
Epoch: 10	 Train Step: 4650	Loss: 0.625	Accuracy: 77.344
Epoch: 10	 Train Step: 4660	Loss: 0.379	Accuracy: 86.719
Epoch: 10	 Train Step: 4670	Loss: 0.440	Accuracy: 82.812
Epoch: 10	 Train Step: 4680	Loss: 0.499	Accuracy: 81.250
Epoch: 10 finished
'''

Training에 대한 loss를 시각화 하기 위해 matplotlib 사용

plt.figure()
plt.plot(np.arange(len(train_loss)), train_loss)
plt.show()
#plt.savefig('./train_loss_result.png')

plt.figure()
plt.plot(np.arange(len(train_accuracy)), train_accuracy)
plt.show()
#plt.savefig('./train_accuracy_result.png')

training step에 따른 loss 변화도
training step에 따른 accuracy 변화도

모델의 실제 성능 평가를 하기 위해 training에 쓰이지 않은 test data로 아래와 같이 평가 진행

with torch.no_grad():
    model.eval()
    correct = 0
   
    for data, target in test_loader:
        data, target = Variable(data), Variable(target)
        data = data.to(device)
        target = target.to(device)
        output = model(data)
        prediction = output.data.max(1)[1]
        correct += prediction.eq(target.data).sum()

print('\nTest set: Accuracy: {:.2f}%'.format(100. * correct / len(test_loader.dataset)))
#Test set: Accuracy: 96.04%

간단한 MLP 3-layer 만으로도 96%의 성능을 얻었음

728x90

+ Recent posts