11. 손글씨 도형 CNN모델 구현

Notice

Recent Posts

Recent Comments

Link

깃허브

« 2026/06 »
일	월	화	수	목	금	토
	1	2	3	4	5	6
7	8	9	10	11	12	13
14	15	16	17	18	19	20
21	22	23	24	25	26	27
28	29	30

Tags more

Archives

Today

Total

관리 메뉴

수달이네 기술 블로그

11. 손글씨 도형 CNN모델 구현 본문

AI공부/딥러닝

11. 손글씨 도형 CNN모델 구현

슬픈 수달이 2026. 2. 8. 20:07

손글씨로 그려진 도형 셋을 분류하는 CNN모델을 만들것이다.

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt

사용할 모듈 리스트이다.

transform = transforms.Compose([
    transforms.Resize((28, 28)),   
    transforms.Grayscale(1),       
    transforms.RandomInvert(1),      
    transforms.ToTensor(),          
    transforms.Normalize((0.5,), (0.5,))
])

transform.Resize((28, 28)): 이미지를 넣을땐 항상 같은 크기로 넣어주는게 좋으므로 사이즈 변환
transforms.Grayscale(1): 흑백 이미지로 변환한다. 도형만 분류할 것이기 때문에 컬러가 필요 없다.
transforms,ToTensor(): 텐서형으로 바꿔주고 1로 정규화 시켜준다.
transforms.RandomInvert(1): 이미지를 반전시켜줘, 1로 하면 전부 반전 시키라는 뜻.
- 도형은 하얀 배경에 검은 글씨이므로 색을 반전시켜 코스트를 줄인다.(255: 흰색, 0 검은색, 대부분 흰색이므로 0으로 변환)
transforms.Nomalize((0.5,), (0.5,)): 첫번째 평균을 0.5, 두번째 표준편차를 0.5로 정규화 시켜줘

train_path = './shape/train'
test_path = './shape/test'
train_dataset = torchvision.datasets.ImageFolder(root=train_path, transform=transform)
test_dataset = torchvision.datasets.ImageFolder(root=test_path, transform=transform)

len(train_dataset), len(test_dataset)

# (240, 60)

ImageFolder(): 폴더를 가져와서 해당 폴더 내의 이미지들을 라벨링시켜줌.
- 위와 같이 되어있을때, train을 가져와 cir은 0, tri는 1, x는 2로 라벨링됨.
transform = transform: 이미지를 넣을때마다 위 transform객체를 거쳐서 결과물이 들어간다.

train_dataset.__getitem__(0)  # 첫 번째 이미지와 레이블 확인
# (tensor([[[-1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000,
#            -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000,
#            -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000,
#            -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000],
#           [-1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000,
#            -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000,
#            -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000,
#            -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000],
#           [-1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000,
#            -1.0000, -1.0000, -1.0000, -0.8196, -0.4824, -0.4431, -0.4353,
#            -0.4353, -0.4431, -0.4353, -0.4353, -0.4431, -0.9294, -1.0000,
#            -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000],

텐서에 들어간 값을 확인할 수 있으며

train_dataset.classes, test_dataset.classes

# (['cir', 'tri', 'x'], ['cir', 'tri', 'x'])

라벨링된 클래스들을 확인할 수도 있다.

class_map = {
    0:'cir',
    1:'tri',
    2:'x'
}

매핑해준다.

데이터로더

loader = DataLoader(
    dataset=train_dataset,
    batch_size=16,
    shuffle=True
)

imgs, labels = next(iter(loader))
print(imgs.shape)  # torch.Size([16, 1, 28, 28])

데이터셋을 배치사이즈크기에 맞는 작은 묶음으로 제공한다.
dataset = train_dataset: train_dataset을 데이터셋에 넣어줌
batch_size = 배치사이즈를 정함

결과적으로 imgs 64개 labels 64개가 나온다.

데이터 확인

fig, axes = plt.subplots(8, 8, figsize=(16, 16))

for ax, img, label in zip(axes.flatten(), imgs, labels):
    ax.imshow(img.reshape(28, 28), cmap='gray')
    ax.set_title(class_map[label.item()])
    ax.axis('off')

# 장치 확인
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

장치를 세팅해준다.

CNN모델만들기

class ConvNeuralNetwork(nn.Module):
    def __init__(self):
        super(ConvNeuralNetwork, self).__init__()   
        self.flatten = nn.Flatten()
        self.classifier = nn.Sequential(
            nn.Conv2d(1, 28, kernel_size=3, padding = 'same'), #합성곱 층 1>28
            nn.ReLU(),  #비선형을 살려주는 활성화 함수
            nn.Conv2d(28, 28, kernel_size=3, padding = 'same'),#합성곱 층 28>28
            #이미지를 2번씩 연속으로 봐주어 특징을 더 잘 잡아낼 수 있도록 함
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),  #다운샘플링
            nn.Dropout(0.25),  #과적합 방지

            nn.Conv2d(28, 56, kernel_size=3, padding = 'same'), #합성곱 층 28>56
            nn.ReLU(),
            nn.Conv2d(56, 56, kernel_size=3, padding = 'same'), #합성곱 층 56>56
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),  #다운샘플링
            nn.Dropout(0.25),  #과적합 방지
        )
        self.Linear = nn.Linear(56*7*7, 3) #56*7*7개의 특징을 3개의 클래스로 분류

    def forward(self, x):
        x = self.classifier(x)  #특징 추출 부분
        x = self.flatten(x)     #평탄화
        output = self.Linear(x)      #분류 부분
        return output

model = ConvNeuralNetwork().to(device)
model

# ConvNeuralNetwork(
#   (flatten): Flatten(start_dim=1, end_dim=-1)
#   (classifier): Sequential(
#     (0): Conv2d(1, 28, kernel_size=(3, 3), stride=(1, 1), padding=same)
#     (1): ReLU()
#     (2): Conv2d(28, 28, kernel_size=(3, 3), stride=(1, 1), padding=same)
#     (3): ReLU()
#     (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
#     (5): Dropout(p=0.25, inplace=False)
#     (6): Conv2d(28, 56, kernel_size=(3, 3), stride=(1, 1), padding=same)
#     (7): ReLU()
#     (8): Conv2d(56, 56, kernel_size=(3, 3), stride=(1, 1), padding=same)
#     (9): ReLU()
#     (10): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
#     (11): Dropout(p=0.25, inplace=False)
#   )
#   (Linear): Linear(in_features=2744, out_features=3, bias=True)
# )

학습환경

loss = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

loss function: CrossEntropy

optimizer: Adam

def train_loop(loader, model, loss_fn, optimizer):
    sum_loss = 0
    sum_accs = 0
    for X_batch, y_batch in loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)   #배치 데이터 장치로 이동
        y_pred = model(X_batch)         #모델 예측값 계산
        loss = loss_fn(y_pred, y_batch) #손실 계산
        optimizer.zero_grad()       
        loss.backward()             #역전파 수행
        optimizer.step()            
        sum_losses += loss.item()       #배치 손실 누적
        y_prob = nn.Softmax(1)(y_pred) # 열끼리의 확률로 변환
        y_pred_index = torch.argmax(y_prob, axis=1) #가장 높은 확률의 인덱스를 예측 레이블로 사용
        acc = (y_batch == y_pred_index).float().sum() / len(y_batch) * 100 #배치 정확도 계산
        sum_accs += acc.item()         #배치 정확도 누적

    avg_loss = sum_losses / len(loader)  #평균 손실 계산
    av_acc = sum_accs / len(loader)        #평균 정확도 계산
    return avg_loss, av_acc

dropout: 입력값이 있을때 다음으로 연결되는 노드들에 연결되는 노드들을 껐다 키는 것
- 이미지로 따지면 이미지의 픽셀당 연결되는 것들을 껐다 키는 것.
- Dropout2d(0.25): 피처를 껐다 킨다.

위처럼 train_loop를 구성한다.

epochs = 50

for i in range(epochs):
    print(f'Epoch {i+1}/{epochs}')
    avg_loss, avg_acc = train_loop(loader, model, loss, optimizer)  
    print(f'Loss: {avg_loss:.4f}, Accuracy: {avg_acc:.2f}%\\n')

에폭 50으로 학습을 진행하면

Epoch 50/50 Loss: 0.0098, Accuracy: 100.00%

테스트 로더

test_loader = DataLoader(
    dataset=test_dataset,
    batch_size=32,
    shuffle=True
)

imgs, labels = next(iter(test_loader))

imgs, labels = next(iter(test_loader))
fig, axes = plt.subplots(4, 8, figsize=(16, 8))

for ax, img, label in zip(axes.flatten(), imgs, labels):
    ax.imshow(img.reshape(28, 28), cmap='gray')
    ax.set_title(class_map[label.item()])
    ax.axis('off')

보통은 지금까지 한것처럼 껍데기 모델을 만들어서 사용하는 경우는 거의 없다. 대부분 전이학습함

가중치 저장 후 불러오기

가중치 저장

#모델의 가중치와 매개변수만 저장
torch.save(model.state_dict(), 'model_weight.pth')#pth: 가중치 저장 확장자

껍데기 모델

model2 = ConvNeuralNetwork().to(device)#학습이 안된 껍데기 모델

y_pred_list2, y_true_list2, img_list2, test_acc2 = test(model2, test_loader)
print(f'Test Accuracy (before loading weights): {test_acc2:.2f}%')

# Test Accuracy (before loading weights): 33.26%

가중치가 없으므로 찍기보다 못하는 모습

model2.load_state_dict(torch.load('model_weight.pth'))#가중치 불러오기
y_pred_list2, y_true_list2, img_list2, test_acc2 = test(model2, test_loader)
print(f'Test Accuracy (after loading weights): {test_acc2:.2f}%')

# Test Accuracy (after loading weights): 89.96%

가중치를 불러옴으로써 다시 잘 맞추는 모습

모델 전체 저장후 불러오기

model3 = torch.load('model_full.pth')#모델 전체 불러오기
y_pred_list3, y_true_list3, img_list3, test_acc3 = test(model3, test_loader)
print(f'Test Accuracy (after loading full model): {test_acc3:.2f}%')

# Weights only load failed.

모델 전체를 불러올 순 없다.

model3 = torch.load('model_full.pth', weights_only=False)#모델 전체 불러오기
y_pred_list3, y_true_list3, img_list3, test_acc3 = test(model3, test_loader)
print(f'Test Accuracy (after loading full model): {test_acc3:.2f}%')

# Test Accuracy (after loading full model): 89.96%

모델의 weight를 넣어주면, 모델이 불러와진다.

'AI공부 > 딥러닝' 카테고리의 다른 글

13. Ailen vs Predator 데이터셋 2(전이학습 적용) (0)	2026.02.13
12. Ailen vs Predator 데이터셋(전이학습으로 해결하기) (0)	2026.02.12
10. CNN 구현 (0)	2026.02.06
9. CNN(Convolutional Neural Network) (0)	2026.02.05
8. Multi-class weather dataset(날씨 이미지 데이터셋)2 + 모델 학습 (0)	2026.02.04

'AI공부/딥러닝' Related Articles

수달이네 기술 블로그

11. 손글씨 도형 CNN모델 구현 본문

11. 손글씨 도형 CNN모델 구현

CNN모델만들기

학습환경

테스트 로더

가중치 저장 후 불러오기

모델 전체 저장후 불러오기

'AI공부 > 딥러닝' 카테고리의 다른 글

티스토리툴바