728x90

이번에는 RNN을 MNIST로 돌려보는 것을 코딩해본다.

 

Experimental setups

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.autograd import Variable
import os
import matplotlib.pyplot as plt
import numpy as np
torch.__version__

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
torch.manual_seed(2891)
num_gpu = 1
if torch.cuda.device_count() > 1:
    num_gpu = torch.cuda.device_count()
print("Let's use", num_gpu, "GPUs!")
print('our device', device)

'''
Let's use 1 GPUs!
our device cuda
'''

 

RNN 모델 설계

class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(RNN, self).__init__()
        
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, self.hidden_size, self.num_layers, batch_first=True)
        self.gru = nn.GRU(input_size, self.hidden_size, self.num_layers, batch_first=True)
        self.fc = nn.Linear(self.hidden_size, num_classes)
        
        
    def forward(self, x, rnn):
        
        if rnn == 'lstm':
            rnn_layer = self.lstm
            h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device) 
            c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
            out, _ = self.lstm(x, (h0, c0))
        else:
            rnn_layer = self.gru
            h = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device) 
            out, _ = self.gru(x, h)
        
        out = self.fc(out[:, -1, :])
        return out

model call시 원하는 rnn 계열 (lstm, gru) 선택 가능하도록 구현함

 

Parameter & Model shape & Hyper-parameter check 

sequence_length = 28
input_size = 28
hidden_size = 128
num_layers = 2
num_classes = 10
batch_size = 100
num_epochs = 10
learning_rate = 0.01

model = RNN(input_size, hidden_size, num_layers, num_classes).to(device)
#model shape
for p in model.parameters():
    print(p.size())
'''
torch.Size([512, 28])
torch.Size([512, 128])
torch.Size([512])
torch.Size([512])
torch.Size([512, 128])
torch.Size([512, 128])
torch.Size([512])
torch.Size([512])
torch.Size([384, 28])
torch.Size([384, 128])
torch.Size([384])
torch.Size([384])
torch.Size([384, 128])
torch.Size([384, 128])
torch.Size([384])
torch.Size([384])
torch.Size([10, 128])
torch.Size([10])
'''
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

model_hp = count_parameters(model)
print('model"s hyper parameters', model_hp)
# model"s hyper parameters 374026

 

Dataset load, train, test loader 선언

train_loader = torch.utils.data.DataLoader(datasets.MNIST('data', train=True, download=True, transform=transforms.ToTensor()),batch_size=batch_size, shuffle=True)
print(len(train_loader)) # 600
test_loader = torch.utils.data.DataLoader(datasets.MNIST('data', train=False, transform=transforms.ToTensor()),batch_size=1000)
print(len(test_loader)) # 10
'''
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to data/MNIST/raw/train-images-idx3-ubyte.gz
9913344/? [04:53<00:00, 33768.69it/s]

Extracting data/MNIST/raw/train-images-idx3-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to data/MNIST/raw/train-labels-idx1-ubyte.gz
29696/? [00:00<00:00, 433940.88it/s]

Extracting data/MNIST/raw/train-labels-idx1-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to data/MNIST/raw/t10k-images-idx3-ubyte.gz
1649664/? [00:51<00:00, 32232.92it/s]

Extracting data/MNIST/raw/t10k-images-idx3-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to data/MNIST/raw/t10k-labels-idx1-ubyte.gz
5120/? [00:00<00:00, 108088.65it/s]

Extracting data/MNIST/raw/t10k-labels-idx1-ubyte.gz to data/MNIST/raw

Processing...
Done!
600
10
/usr/local/lib/python3.7/dist-packages/torchvision/datasets/mnist.py:502: UserWarning: The given NumPy array is not writeable, and PyTorch does not support non-writeable tensors. This means you can write to the underlying (supposedly non-writeable) NumPy array using the tensor. You may want to copy the array to protect its data or make it writeable before converting it to a tensor. This type of warning will be suppressed for the rest of this program. (Triggered internally at  /pytorch/torch/csrc/utils/tensor_numpy.cpp:143.)
  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)
'''

 

Loss, optimizer 선언

이전에는 F.nll_loss로 하였는데, 이번에는 CrossEntropy loss를 선언하여 진행함

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

 

Training 진행

model.train()
total_loss = 0
total_acc = 0
train_loss = []
train_accuracy = []
i = 0
for epoch in range(num_epochs):
    for data, target in train_loader:
        data = data.reshape(-1, sequence_length, input_size).to(device)
        target = target.to(device)
         
        optimizer.zero_grad()
        output = model(data, 'lstm')
        loss = criterion(output, target)
       
        
        loss.backward()    # calc gradients
       
        total_loss += loss
       
        train_loss.append(total_loss/i)
        optimizer.step()   # update gradients
       
        prediction = output.data.max(1)[1]   # first column has actual prob.
        accuracy = prediction.eq(target.data).sum()/batch_size*100
       
        total_acc += accuracy
       
        train_accuracy.append(total_acc/i)
       
        if i % 10 == 0:
            print('Epoch: {}\t Train Step: {}\tLoss: {:.3f}\tAccuracy: {:.3f}'.format(epoch+1, i, loss, accuracy))
        i += 1
    print('Epoch: {} finished'.format(epoch+1))
'''
Epoch: 9 finished
Epoch: 10	 Train Step: 5400	Loss: 0.031	Accuracy: 99.000
Epoch: 10	 Train Step: 5410	Loss: 0.012	Accuracy: 100.000
Epoch: 10	 Train Step: 5420	Loss: 0.045	Accuracy: 99.000
Epoch: 10	 Train Step: 5430	Loss: 0.107	Accuracy: 98.000
Epoch: 10	 Train Step: 5440	Loss: 0.006	Accuracy: 100.000
Epoch: 10	 Train Step: 5450	Loss: 0.031	Accuracy: 99.000
Epoch: 10	 Train Step: 5460	Loss: 0.038	Accuracy: 98.000
Epoch: 10	 Train Step: 5470	Loss: 0.003	Accuracy: 100.000
Epoch: 10	 Train Step: 5480	Loss: 0.010	Accuracy: 100.000
Epoch: 10	 Train Step: 5490	Loss: 0.024	Accuracy: 100.000
Epoch: 10	 Train Step: 5500	Loss: 0.109	Accuracy: 97.000
Epoch: 10	 Train Step: 5510	Loss: 0.085	Accuracy: 96.000
Epoch: 10	 Train Step: 5520	Loss: 0.016	Accuracy: 100.000
Epoch: 10	 Train Step: 5530	Loss: 0.065	Accuracy: 99.000
Epoch: 10	 Train Step: 5540	Loss: 0.088	Accuracy: 98.000
Epoch: 10	 Train Step: 5550	Loss: 0.042	Accuracy: 99.000
Epoch: 10	 Train Step: 5560	Loss: 0.049	Accuracy: 98.000
Epoch: 10	 Train Step: 5570	Loss: 0.032	Accuracy: 99.000
Epoch: 10	 Train Step: 5580	Loss: 0.095	Accuracy: 97.000
Epoch: 10	 Train Step: 5590	Loss: 0.038	Accuracy: 98.000
Epoch: 10	 Train Step: 5600	Loss: 0.106	Accuracy: 98.000
Epoch: 10	 Train Step: 5610	Loss: 0.026	Accuracy: 99.000
Epoch: 10	 Train Step: 5620	Loss: 0.021	Accuracy: 100.000
Epoch: 10	 Train Step: 5630	Loss: 0.069	Accuracy: 98.000
Epoch: 10	 Train Step: 5640	Loss: 0.010	Accuracy: 100.000
Epoch: 10	 Train Step: 5650	Loss: 0.011	Accuracy: 100.000
Epoch: 10	 Train Step: 5660	Loss: 0.154	Accuracy: 97.000
Epoch: 10	 Train Step: 5670	Loss: 0.005	Accuracy: 100.000
Epoch: 10	 Train Step: 5680	Loss: 0.075	Accuracy: 97.000
Epoch: 10	 Train Step: 5690	Loss: 0.107	Accuracy: 97.000
Epoch: 10	 Train Step: 5700	Loss: 0.050	Accuracy: 98.000
Epoch: 10	 Train Step: 5710	Loss: 0.043	Accuracy: 98.000
Epoch: 10	 Train Step: 5720	Loss: 0.064	Accuracy: 99.000
Epoch: 10	 Train Step: 5730	Loss: 0.039	Accuracy: 98.000
Epoch: 10	 Train Step: 5740	Loss: 0.014	Accuracy: 100.000
Epoch: 10	 Train Step: 5750	Loss: 0.042	Accuracy: 99.000
Epoch: 10	 Train Step: 5760	Loss: 0.137	Accuracy: 96.000
Epoch: 10	 Train Step: 5770	Loss: 0.101	Accuracy: 97.000
Epoch: 10	 Train Step: 5780	Loss: 0.054	Accuracy: 97.000
Epoch: 10	 Train Step: 5790	Loss: 0.084	Accuracy: 97.000
Epoch: 10	 Train Step: 5800	Loss: 0.054	Accuracy: 98.000
Epoch: 10	 Train Step: 5810	Loss: 0.062	Accuracy: 97.000
Epoch: 10	 Train Step: 5820	Loss: 0.076	Accuracy: 98.000
Epoch: 10	 Train Step: 5830	Loss: 0.094	Accuracy: 97.000
Epoch: 10	 Train Step: 5840	Loss: 0.147	Accuracy: 96.000
Epoch: 10	 Train Step: 5850	Loss: 0.048	Accuracy: 99.000
Epoch: 10	 Train Step: 5860	Loss: 0.010	Accuracy: 100.000
Epoch: 10	 Train Step: 5870	Loss: 0.034	Accuracy: 98.000
Epoch: 10	 Train Step: 5880	Loss: 0.048	Accuracy: 99.000
Epoch: 10	 Train Step: 5890	Loss: 0.025	Accuracy: 99.000
Epoch: 10	 Train Step: 5900	Loss: 0.110	Accuracy: 97.000
Epoch: 10	 Train Step: 5910	Loss: 0.092	Accuracy: 98.000
Epoch: 10	 Train Step: 5920	Loss: 0.149	Accuracy: 96.000
Epoch: 10	 Train Step: 5930	Loss: 0.014	Accuracy: 100.000
Epoch: 10	 Train Step: 5940	Loss: 0.056	Accuracy: 98.000
Epoch: 10	 Train Step: 5950	Loss: 0.058	Accuracy: 97.000
Epoch: 10	 Train Step: 5960	Loss: 0.064	Accuracy: 98.000
Epoch: 10	 Train Step: 5970	Loss: 0.050	Accuracy: 98.000
Epoch: 10	 Train Step: 5980	Loss: 0.040	Accuracy: 99.000
Epoch: 10	 Train Step: 5990	Loss: 0.015	Accuracy: 100.000
Epoch: 10 finished
'''

RNN은 lstm으로 진행하였음

 

 

Plotting 결과

plt.figure()
plt.plot(np.arange(len(train_loss)), train_loss)
plt.show()
#plt.savefig('./train_loss_result.png')

plt.figure()
plt.plot(np.arange(len(train_accuracy)), train_accuracy)
plt.show()
#plt.savefig('./train_accuracy_result.png')

 

step에 따른 training loss 변화도
step에 따른 training accuracy 변화도

 

Evaluation 결과

with torch.no_grad():
    model.eval()
    correct = 0
   
    for data, target in test_loader:
        
        data = data.reshape(-1, sequence_length, input_size).to(device)
        target = target.to(device)        
        output = model(data, 'lstm')

        prediction = output.data.max(1)[1]
        correct += prediction.eq(target.data).sum()

print('\nTest set: Accuracy: {:.2f}%'.format(100. * correct / len(test_loader.dataset)))
# Test set: Accuracy: 97.63%

 

사실 RNN은 MNIST data를 돌리기에 최적의 모델이 아님

왜냐하면 RNN의 개념이 sequential 한 data에 적합하기 때문임

 

그럼에도 불구하고 결과값은 MLP의 성능을 넘어섰음

728x90

+ Recent posts