본문 바로가기

Deep Learning

Deep Learning : ImageDataGenerator 를 이용해서 데이터 증강 하는방법

ImageDataGenerator 클래스란?

이미지를 학습시킬 때 학습데이터의 양이 적을 경우, 학습데이터를 조금씩 변형시켜서 학습데이터의 양을 늘리는 방식중에 하나이다.

  • rescale = 1./255 : 값을 0과 1 사이로 변경
  • rotation_range = 30 : 무작위 회전각도 30도 이내
  • shear_range = 0.2 : 층밀리기 강도 20% 
  • zoom_range = 0.2 : 무작위 줌 범위 20%
  • horizontal_flip = True : 무작위로 가로로 뒤짚는다.

ImageDataGenerator - 이미지 변형

train_generator = train_datagen.flow(
    x=x_train, y=y_train,
    batch_size=32,
    shuffle=True
)

train_datagen 이라는 틀에 flow 함수를 사용해서 실제 데이터를 파라미터를 넣어주면 이미지 변형이 완료된다.

  • batch_size : 배치사이즈
  • shuffle : 랜덤 여부

그 밖에

  • target_size : 이미지 사이즈
  • color_mode : 이미지 채널 수 ex) 'rgb'
  • class_mode : Y 값 변화방법 ex) 'categorical'

 


실습

# 개와 고양이 분류 실습 문제
import os
import zipfile
import random
import tensorflow as tf
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from shutil import copyfile
 
 
# 파일 받아서 압축 풀기
 

In : 

!wget --no-check-certificate \
    "https://block-edu-test.s3.ap-northeast-2.amazonaws.com/kagglecatsanddogs_5340.zip" \
    -O "/tmp/cats-and-dogs.zip"

local_zip = '/tmp/cats-and-dogs.zip'
zip_ref = zipfile.ZipFile(local_zip, 'r')
zip_ref.extractall('/tmp')
zip_ref.close()

 

Out :

--2022-06-15 07:07:49--  https://block-edu-test.s3.ap-northeast-2.amazonaws.com/kagglecatsanddogs_5340.zip
Resolving block-edu-test.s3.ap-northeast-2.amazonaws.com (block-edu-test.s3.ap-northeast-2.amazonaws.com)... 52.219.146.54
Connecting to block-edu-test.s3.ap-northeast-2.amazonaws.com (block-edu-test.s3.ap-northeast-2.amazonaws.com)|52.219.146.54|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 824887076 (787M) [application/zip]
Saving to: ‘/tmp/cats-and-dogs.zip’

/tmp/cats-and-dogs. 100%[===================>] 786.67M  20.4MB/s    in 41s     

2022-06-15 07:08:32 (19.0 MB/s) - ‘/tmp/cats-and-dogs.zip’ saved [824887076/824887076]

 

 

In :

print(len(os.listdir('/tmp/PetImages/Cat/')))
print(len(os.listdir('/tmp/PetImages/Dog/')))

# Expected Output:
# 12501
# 12501

 

 

Out : 

12501 12501

 

 

 

# os.mkdir 이용하여, 사진 저장할 다음 디렉토리 만들기 
# '/tmp/cats-v-dogs'
# '/tmp/cats-v-dogs/training'
# '/tmp/cats-v-dogs/testing'
# '/tmp/cats-v-dogs/training/cats'
# '/tmp/cats-v-dogs/training/dogs'
# '/tmp/cats-v-dogs/testing/cats'
# '/tmp/cats-v-dogs/testing/dogs'

In : 


try:
    os.mkdir('/tmp/cats-v-dogs')
    os.mkdir('/tmp/cats-v-dogs/training')
    os.mkdir('/tmp/cats-v-dogs/testing')
    os.mkdir('/tmp/cats-v-dogs/training/cats')
    os.mkdir('/tmp/cats-v-dogs/training/dogs')
    os.mkdir('/tmp/cats-v-dogs/testing/cats')
    os.mkdir('/tmp/cats-v-dogs/testing/dogs')
except OSError:
    pass
    
    
    
filename_cat = os.listdir('/tmp/PetImages/Cat')

suffled_list = random.sample(filename_cat ,  len(filename_cat))

print(suffled_list)
print(len(suffled_list))

In : 

 

Out :

['10737.jpg', '6432.jpg', '889.jpg', '2762.jpg', '7935.jpg', '4215.jpg', '7875.jpg', '10481.jpg', '7271.jpg', '11369.jpg', '49.jpg', '12471.jpg', '4665.jpg', '8589.jpg', '8481.jpg', '9512.jpg', '10335.jpg', '1154.jpg', '545.jpg', '11785.jpg', '1004.jpg', '7167.jpg', '6156.jpg', '3191.jpg', '11572.jpg', '5180.jpg', '3095.jpg', '7650.jpg', '8765.jpg', '4039.jpg', '8366.jpg', '9835.jpg', '9137.jpg', '11664.jpg', '9061.jpg', '1455.jpg', '7674.jpg', '12328.jpg', '7359.jpg', '6829.jpg', '8836.jpg', '2081.jpg', '6186.jpg', '11586.jpg', '2734.jpg', 등등...............

 

In :

int( len(suffled_list) * 0.9 )

 

 

Out : 

11250

 

 

In : 

training = suffled_list[ 0 :  11250 ]

testing = suffled_list[ 11250 :  ]

print(training)
print(len(training))

 

 

Out : 

['10737.jpg', '6432.jpg', '889.jpg', '2762.jpg', '7935.jpg', '4215.jpg', '7875.jpg', '10481.jpg', '7271.jpg', '11369.jpg', '49.jpg', '12471.jpg', '4665.jpg', '8589.jpg', '8481.jpg', '9512.jpg', '10335.jpg', '1154.jpg', '545.jpg', '11785.jpg', '1004.jpg', '7167.jpg', '6156.jpg', '3191.jpg', '11572.jpg', '5180.jpg', '3095.jpg', '7650.jpg', '8765.jpg', '4039.jpg', '8366.jpg', '9835.jpg', '9137.jpg', '11664.jpg', '9061.jpg', '1455.jpg', '7674.jpg', '12328.jpg', '7359.jpg', '6829.jpg', '8836.jpg', '2081.jpg', '6186.jpg', '11586.jpg', '2734.jpg', '327.jpg', '2056.jpg', '9095.jpg', '12015.jpg', '1042.jpg', '2849.jpg', '11981.jpg', '12200.jpg' 등등......

 

In : 

print(testing)
print(len(testing))

 

 

Out  :

['7852.jpg', '1660.jpg', '12020.jpg', '3508.jpg', '9972.jpg', '4394.jpg', '2738.jpg', '3093.jpg', '3666.jpg', '8368.jpg', '6522.jpg' 등등...............

 

# 다음 함수를 완성하시오.

# 먼저 파일들을 잘 섞은 후,
# 트레이닝은 90%, 테스트는 10%로 파일을 나눕니다.
# 그리고 파일의 사이즈가 0이 아니면, 
# 그 파일을 트레이닝 또는 테스트 디렉토리에 실제로 저장을 합니다.

# 아래 함수들을 참고하여, 위의 절차대로 프로그래밍 하세요.
#
# os.listdir(DIRECTORY) 디렉토리에 있는 파일 리스팅
# os.path.getsize(PATH) 파일 사이즈 확인
# copyfile(source, destination) 원하는 디렉토리로 파일 복사
# random.sample(list, len(list)) 리스트를 섞는다.(파일명 섞기)
 
In : 
def split_data(SOURCE, TRAINING, TESTING, SPLIT_SIZE):
  filename_list = os.listdir(SOURCE)
  shuffled_list = random.sample(filename_list, len(filename_list))
  index = int(len(shuffled_list) * SPLIT_SIZE)

  training = shuffled_list[ : index ]
  testing = shuffled_list [ index : ]

  for filename in training :
    if os.path.getsize(SOURCE+filename) > 0 :
      copyfile( SOURCE+filename , TRAINING+filename)

  for filename in testing :
    if os.path.getsize(SOURCE+filename) > 0 :
      copyfile( SOURCE+filename, TESTING+filename)

CAT_SOURCE_DIR = "/tmp/PetImages/Cat/"
TRAINING_CATS_DIR = "/tmp/cats-v-dogs/training/cats/"
TESTING_CATS_DIR = "/tmp/cats-v-dogs/testing/cats/"
DOG_SOURCE_DIR = "/tmp/PetImages/Dog/"
TRAINING_DOGS_DIR = "/tmp/cats-v-dogs/training/dogs/"
TESTING_DOGS_DIR = "/tmp/cats-v-dogs/testing/dogs/"

split_size = .9
split_data(CAT_SOURCE_DIR, TRAINING_CATS_DIR, TESTING_CATS_DIR, split_size)
split_data(DOG_SOURCE_DIR, TRAINING_DOGS_DIR, TESTING_DOGS_DIR, split_size)
print(len(os.listdir('/tmp/cats-v-dogs/training/cats/')))
print(len(os.listdir('/tmp/cats-v-dogs/training/dogs/')))
print(len(os.listdir('/tmp/cats-v-dogs/testing/cats/')))
print(len(os.listdir('/tmp/cats-v-dogs/testing/dogs/')))

# 실행하면 아래처럼 나올것이다 :
# 11250
# 11250
# 1250
# 1250

 

Out : 11249 11249 1251 1251

 

In : 

import tensorflow as tf
from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPooling2D
from tensorflow.keras.models import Sequential

 

 

# 분류 모델 작성하기. 적어도 3개의 컨볼루션 사용하기.
 
def build_model() :
  model = Sequential()
  model.add( Conv2D(16, (3,3), activation='relu', input_shape=(300, 300, 3) ) )
  model.add( MaxPooling2D( (2,2), 2 ) )
  model.add( Conv2D(32, (3,3), activation='relu' ) )
  model.add( MaxPooling2D( 2,2 ) )
  model.add( Conv2D(64, (3,3), activation='relu' ) )
  model.add( MaxPooling2D( 2,2 ) )
  
  model.add( Flatten() )
  model.add( Dense(units=512, activation='relu') )
  model.add( Dense(units=1, activation='sigmoid'  ) )
  return model
  
model = build_model()

model.compile(optimizer=RMSprop(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])
 
from tensorflow.keras.preprocessing.image import ImageDataGenerator
TRAINING_DIR = "/tmp/cats-v-dogs/training/"
train_datagen = ImageDataGenerator(rescale= 1/255.0)
# 제너레이터의 배치 사이즈는 꼭 10으로 하세요.
train_generator = train_datagen.flow_from_directory(TRAINING_DIR, target_size=(300,300), \
                                                    class_mode='binary', batch_size=10)

VALIDATION_DIR = "/tmp/cats-v-dogs/testing/"
validation_datagen = ImageDataGenerator(rescale= 1/255.0)
# 제너레이터의 배치 사이즈는 꼭 10으로 하세요.
validation_generator = validation_datagen.flow_from_directory(VALIDATION_DIR, \
                                       target_size=(300,300), class_mode='binary', batch_size=10)
# Expected Output:
# Found 22498 images belonging to 2 classes.
# Found 2500 images belonging to 2 classes.

 

Out : Found 22497 images belonging to 2 classes. Found 2501 images belonging to 2 classes.

 

In :

history = model.fit(train_generator,
                              epochs=15,
                              verbose=1,
                              validation_data=validation_generator)
# LOSS 와 ACCURACY 에 대한 차트를 그린다. 그냥 실행하시오!
%matplotlib inline

import matplotlib.image  as mpimg
import matplotlib.pyplot as plt

#-----------------------------------------------------------
# Retrieve a list of list results on training and test data
# sets for each training epoch
#-----------------------------------------------------------
acc=history.history['accuracy']
val_acc=history.history['val_accuracy']
loss=history.history['loss']
val_loss=history.history['val_loss']

epochs=range(len(acc)) # Get number of epochs

#------------------------------------------------
# Plot training and validation accuracy per epoch
#------------------------------------------------
plt.plot(epochs, acc, 'r', "Training Accuracy")
plt.plot(epochs, val_acc, 'b', "Validation Accuracy")
plt.title('Training and validation accuracy')
plt.figure()

#------------------------------------------------
# Plot training and validation loss per epoch
#------------------------------------------------
plt.plot(epochs, loss, 'r', "Training Loss")
plt.plot(epochs, val_loss, 'b', "Validation Loss")


plt.title('Training and validation loss')

 

 
 
# 이미지를 업로드 하여, 위에서 만든 모델을 테스트 해보자.
 
import numpy as np
from google.colab import files
from keras.preprocessing import image

uploaded = files.upload()

for fn in uploaded.keys():
 
  # predicting images
  path = '/content/' + fn
  img = image.load_img(path, target_size=(# YOUR CODE HERE))
  x = image.img_to_array(img)
  x = np.expand_dims(x, axis=0)

  images = np.vstack([x])
  classes = model.predict(images, batch_size=10)
  print(classes[0])
  if classes[0]>0.5:
    print(fn + " is a dog")
  else:
    print(fn + " is a cat")