728x90
이번에는 RNN을 MNIST로 돌려보는 것을 코딩해본다.
Experimental setups
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.autograd import Variable
import os
import matplotlib.pyplot as plt
import numpy as np
torch.__version__
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
torch.manual_seed(2891)
num_gpu = 1
if torch.cuda.device_count() > 1:
num_gpu = torch.cuda.device_count()
print("Let's use", num_gpu, "GPUs!")
print('our device', device)
'''
Let's use 1 GPUs!
our device cuda
'''
RNN 모델 설계
class RNN(nn.Module):
def __init__(self, input_size, hidden_size, num_layers, num_classes):
super(RNN, self).__init__()
self.hidden_size = hidden_size
self.num_layers = num_layers
self.lstm = nn.LSTM(input_size, self.hidden_size, self.num_layers, batch_first=True)
self.gru = nn.GRU(input_size, self.hidden_size, self.num_layers, batch_first=True)
self.fc = nn.Linear(self.hidden_size, num_classes)
def forward(self, x, rnn):
if rnn == 'lstm':
rnn_layer = self.lstm
h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
out, _ = self.lstm(x, (h0, c0))
else:
rnn_layer = self.gru
h = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
out, _ = self.gru(x, h)
out = self.fc(out[:, -1, :])
return out
model call시 원하는 rnn 계열 (lstm, gru) 선택 가능하도록 구현함
Parameter & Model shape & Hyper-parameter check
sequence_length = 28
input_size = 28
hidden_size = 128
num_layers = 2
num_classes = 10
batch_size = 100
num_epochs = 10
learning_rate = 0.01
model = RNN(input_size, hidden_size, num_layers, num_classes).to(device)
#model shape
for p in model.parameters():
print(p.size())
'''
torch.Size([512, 28])
torch.Size([512, 128])
torch.Size([512])
torch.Size([512])
torch.Size([512, 128])
torch.Size([512, 128])
torch.Size([512])
torch.Size([512])
torch.Size([384, 28])
torch.Size([384, 128])
torch.Size([384])
torch.Size([384])
torch.Size([384, 128])
torch.Size([384, 128])
torch.Size([384])
torch.Size([384])
torch.Size([10, 128])
torch.Size([10])
'''
def count_parameters(model):
return sum(p.numel() for p in model.parameters() if p.requires_grad)
model_hp = count_parameters(model)
print('model"s hyper parameters', model_hp)
# model"s hyper parameters 374026
Dataset load, train, test loader 선언
train_loader = torch.utils.data.DataLoader(datasets.MNIST('data', train=True, download=True, transform=transforms.ToTensor()),batch_size=batch_size, shuffle=True)
print(len(train_loader)) # 600
test_loader = torch.utils.data.DataLoader(datasets.MNIST('data', train=False, transform=transforms.ToTensor()),batch_size=1000)
print(len(test_loader)) # 10
'''
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to data/MNIST/raw/train-images-idx3-ubyte.gz
9913344/? [04:53<00:00, 33768.69it/s]
Extracting data/MNIST/raw/train-images-idx3-ubyte.gz to data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to data/MNIST/raw/train-labels-idx1-ubyte.gz
29696/? [00:00<00:00, 433940.88it/s]
Extracting data/MNIST/raw/train-labels-idx1-ubyte.gz to data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to data/MNIST/raw/t10k-images-idx3-ubyte.gz
1649664/? [00:51<00:00, 32232.92it/s]
Extracting data/MNIST/raw/t10k-images-idx3-ubyte.gz to data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to data/MNIST/raw/t10k-labels-idx1-ubyte.gz
5120/? [00:00<00:00, 108088.65it/s]
Extracting data/MNIST/raw/t10k-labels-idx1-ubyte.gz to data/MNIST/raw
Processing...
Done!
600
10
/usr/local/lib/python3.7/dist-packages/torchvision/datasets/mnist.py:502: UserWarning: The given NumPy array is not writeable, and PyTorch does not support non-writeable tensors. This means you can write to the underlying (supposedly non-writeable) NumPy array using the tensor. You may want to copy the array to protect its data or make it writeable before converting it to a tensor. This type of warning will be suppressed for the rest of this program. (Triggered internally at /pytorch/torch/csrc/utils/tensor_numpy.cpp:143.)
return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)
'''
Loss, optimizer 선언
이전에는 F.nll_loss로 하였는데, 이번에는 CrossEntropy loss를 선언하여 진행함
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
Training 진행
model.train()
total_loss = 0
total_acc = 0
train_loss = []
train_accuracy = []
i = 0
for epoch in range(num_epochs):
for data, target in train_loader:
data = data.reshape(-1, sequence_length, input_size).to(device)
target = target.to(device)
optimizer.zero_grad()
output = model(data, 'lstm')
loss = criterion(output, target)
loss.backward() # calc gradients
total_loss += loss
train_loss.append(total_loss/i)
optimizer.step() # update gradients
prediction = output.data.max(1)[1] # first column has actual prob.
accuracy = prediction.eq(target.data).sum()/batch_size*100
total_acc += accuracy
train_accuracy.append(total_acc/i)
if i % 10 == 0:
print('Epoch: {}\t Train Step: {}\tLoss: {:.3f}\tAccuracy: {:.3f}'.format(epoch+1, i, loss, accuracy))
i += 1
print('Epoch: {} finished'.format(epoch+1))
'''
Epoch: 9 finished
Epoch: 10 Train Step: 5400 Loss: 0.031 Accuracy: 99.000
Epoch: 10 Train Step: 5410 Loss: 0.012 Accuracy: 100.000
Epoch: 10 Train Step: 5420 Loss: 0.045 Accuracy: 99.000
Epoch: 10 Train Step: 5430 Loss: 0.107 Accuracy: 98.000
Epoch: 10 Train Step: 5440 Loss: 0.006 Accuracy: 100.000
Epoch: 10 Train Step: 5450 Loss: 0.031 Accuracy: 99.000
Epoch: 10 Train Step: 5460 Loss: 0.038 Accuracy: 98.000
Epoch: 10 Train Step: 5470 Loss: 0.003 Accuracy: 100.000
Epoch: 10 Train Step: 5480 Loss: 0.010 Accuracy: 100.000
Epoch: 10 Train Step: 5490 Loss: 0.024 Accuracy: 100.000
Epoch: 10 Train Step: 5500 Loss: 0.109 Accuracy: 97.000
Epoch: 10 Train Step: 5510 Loss: 0.085 Accuracy: 96.000
Epoch: 10 Train Step: 5520 Loss: 0.016 Accuracy: 100.000
Epoch: 10 Train Step: 5530 Loss: 0.065 Accuracy: 99.000
Epoch: 10 Train Step: 5540 Loss: 0.088 Accuracy: 98.000
Epoch: 10 Train Step: 5550 Loss: 0.042 Accuracy: 99.000
Epoch: 10 Train Step: 5560 Loss: 0.049 Accuracy: 98.000
Epoch: 10 Train Step: 5570 Loss: 0.032 Accuracy: 99.000
Epoch: 10 Train Step: 5580 Loss: 0.095 Accuracy: 97.000
Epoch: 10 Train Step: 5590 Loss: 0.038 Accuracy: 98.000
Epoch: 10 Train Step: 5600 Loss: 0.106 Accuracy: 98.000
Epoch: 10 Train Step: 5610 Loss: 0.026 Accuracy: 99.000
Epoch: 10 Train Step: 5620 Loss: 0.021 Accuracy: 100.000
Epoch: 10 Train Step: 5630 Loss: 0.069 Accuracy: 98.000
Epoch: 10 Train Step: 5640 Loss: 0.010 Accuracy: 100.000
Epoch: 10 Train Step: 5650 Loss: 0.011 Accuracy: 100.000
Epoch: 10 Train Step: 5660 Loss: 0.154 Accuracy: 97.000
Epoch: 10 Train Step: 5670 Loss: 0.005 Accuracy: 100.000
Epoch: 10 Train Step: 5680 Loss: 0.075 Accuracy: 97.000
Epoch: 10 Train Step: 5690 Loss: 0.107 Accuracy: 97.000
Epoch: 10 Train Step: 5700 Loss: 0.050 Accuracy: 98.000
Epoch: 10 Train Step: 5710 Loss: 0.043 Accuracy: 98.000
Epoch: 10 Train Step: 5720 Loss: 0.064 Accuracy: 99.000
Epoch: 10 Train Step: 5730 Loss: 0.039 Accuracy: 98.000
Epoch: 10 Train Step: 5740 Loss: 0.014 Accuracy: 100.000
Epoch: 10 Train Step: 5750 Loss: 0.042 Accuracy: 99.000
Epoch: 10 Train Step: 5760 Loss: 0.137 Accuracy: 96.000
Epoch: 10 Train Step: 5770 Loss: 0.101 Accuracy: 97.000
Epoch: 10 Train Step: 5780 Loss: 0.054 Accuracy: 97.000
Epoch: 10 Train Step: 5790 Loss: 0.084 Accuracy: 97.000
Epoch: 10 Train Step: 5800 Loss: 0.054 Accuracy: 98.000
Epoch: 10 Train Step: 5810 Loss: 0.062 Accuracy: 97.000
Epoch: 10 Train Step: 5820 Loss: 0.076 Accuracy: 98.000
Epoch: 10 Train Step: 5830 Loss: 0.094 Accuracy: 97.000
Epoch: 10 Train Step: 5840 Loss: 0.147 Accuracy: 96.000
Epoch: 10 Train Step: 5850 Loss: 0.048 Accuracy: 99.000
Epoch: 10 Train Step: 5860 Loss: 0.010 Accuracy: 100.000
Epoch: 10 Train Step: 5870 Loss: 0.034 Accuracy: 98.000
Epoch: 10 Train Step: 5880 Loss: 0.048 Accuracy: 99.000
Epoch: 10 Train Step: 5890 Loss: 0.025 Accuracy: 99.000
Epoch: 10 Train Step: 5900 Loss: 0.110 Accuracy: 97.000
Epoch: 10 Train Step: 5910 Loss: 0.092 Accuracy: 98.000
Epoch: 10 Train Step: 5920 Loss: 0.149 Accuracy: 96.000
Epoch: 10 Train Step: 5930 Loss: 0.014 Accuracy: 100.000
Epoch: 10 Train Step: 5940 Loss: 0.056 Accuracy: 98.000
Epoch: 10 Train Step: 5950 Loss: 0.058 Accuracy: 97.000
Epoch: 10 Train Step: 5960 Loss: 0.064 Accuracy: 98.000
Epoch: 10 Train Step: 5970 Loss: 0.050 Accuracy: 98.000
Epoch: 10 Train Step: 5980 Loss: 0.040 Accuracy: 99.000
Epoch: 10 Train Step: 5990 Loss: 0.015 Accuracy: 100.000
Epoch: 10 finished
'''
RNN은 lstm으로 진행하였음
Plotting 결과
plt.figure()
plt.plot(np.arange(len(train_loss)), train_loss)
plt.show()
#plt.savefig('./train_loss_result.png')
plt.figure()
plt.plot(np.arange(len(train_accuracy)), train_accuracy)
plt.show()
#plt.savefig('./train_accuracy_result.png')
Evaluation 결과
with torch.no_grad():
model.eval()
correct = 0
for data, target in test_loader:
data = data.reshape(-1, sequence_length, input_size).to(device)
target = target.to(device)
output = model(data, 'lstm')
prediction = output.data.max(1)[1]
correct += prediction.eq(target.data).sum()
print('\nTest set: Accuracy: {:.2f}%'.format(100. * correct / len(test_loader.dataset)))
# Test set: Accuracy: 97.63%
사실 RNN은 MNIST data를 돌리기에 최적의 모델이 아님
왜냐하면 RNN의 개념이 sequential 한 data에 적합하기 때문임
그럼에도 불구하고 결과값은 MLP의 성능을 넘어섰음
728x90
'딥러닝 > Pytorch' 카테고리의 다른 글
Covolutional Neural Networks (CNN) pytorch 코딩 (0) | 2021.05.02 |
---|---|
Multi-Layer Perceptron (MLP, Fully-connected layer) (0) | 2021.05.02 |