ImageDataGenerator 클래스란?
이미지를 학습시킬 때 학습데이터의 양이 적을 경우, 학습데이터를 조금씩 변형시켜서 학습데이터의 양을 늘리는 방식중에 하나이다.
- rescale = 1./255 : 값을 0과 1 사이로 변경
- rotation_range = 30 : 무작위 회전각도 30도 이내
- shear_range = 0.2 : 층밀리기 강도 20%
- zoom_range = 0.2 : 무작위 줌 범위 20%
- horizontal_flip = True : 무작위로 가로로 뒤짚는다.
ImageDataGenerator - 이미지 변형
train_generator = train_datagen.flow(
x=x_train, y=y_train,
batch_size=32,
shuffle=True
)
train_datagen 이라는 틀에 flow 함수를 사용해서 실제 데이터를 파라미터를 넣어주면 이미지 변형이 완료된다.
- batch_size : 배치사이즈
- shuffle : 랜덤 여부
그 밖에
- target_size : 이미지 사이즈
- color_mode : 이미지 채널 수 ex) 'rgb'
- class_mode : Y 값 변화방법 ex) 'categorical'
실습
In :
!wget --no-check-certificate \
"https://block-edu-test.s3.ap-northeast-2.amazonaws.com/kagglecatsanddogs_5340.zip" \
-O "/tmp/cats-and-dogs.zip"
local_zip = '/tmp/cats-and-dogs.zip'
zip_ref = zipfile.ZipFile(local_zip, 'r')
zip_ref.extractall('/tmp')
zip_ref.close()
Out :
--2022-06-15 07:07:49-- https://block-edu-test.s3.ap-northeast-2.amazonaws.com/kagglecatsanddogs_5340.zip
Resolving block-edu-test.s3.ap-northeast-2.amazonaws.com (block-edu-test.s3.ap-northeast-2.amazonaws.com)... 52.219.146.54
Connecting to block-edu-test.s3.ap-northeast-2.amazonaws.com (block-edu-test.s3.ap-northeast-2.amazonaws.com)|52.219.146.54|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 824887076 (787M) [application/zip]
Saving to: ‘/tmp/cats-and-dogs.zip’
/tmp/cats-and-dogs. 100%[===================>] 786.67M 20.4MB/s in 41s
2022-06-15 07:08:32 (19.0 MB/s) - ‘/tmp/cats-and-dogs.zip’ saved [824887076/824887076]
In :
print(len(os.listdir('/tmp/PetImages/Cat/')))
print(len(os.listdir('/tmp/PetImages/Dog/')))
# Expected Output:
# 12501
# 12501
Out :
12501 12501
# os.mkdir 이용하여, 사진 저장할 다음 디렉토리 만들기
# '/tmp/cats-v-dogs'
# '/tmp/cats-v-dogs/training'
# '/tmp/cats-v-dogs/testing'
# '/tmp/cats-v-dogs/training/cats'
# '/tmp/cats-v-dogs/training/dogs'
# '/tmp/cats-v-dogs/testing/cats'
# '/tmp/cats-v-dogs/testing/dogs'
In :
try:
os.mkdir('/tmp/cats-v-dogs')
os.mkdir('/tmp/cats-v-dogs/training')
os.mkdir('/tmp/cats-v-dogs/testing')
os.mkdir('/tmp/cats-v-dogs/training/cats')
os.mkdir('/tmp/cats-v-dogs/training/dogs')
os.mkdir('/tmp/cats-v-dogs/testing/cats')
os.mkdir('/tmp/cats-v-dogs/testing/dogs')
except OSError:
pass
filename_cat = os.listdir('/tmp/PetImages/Cat')
suffled_list = random.sample(filename_cat , len(filename_cat))
print(suffled_list)
print(len(suffled_list))
In :
Out :
['10737.jpg', '6432.jpg', '889.jpg', '2762.jpg', '7935.jpg', '4215.jpg', '7875.jpg', '10481.jpg', '7271.jpg', '11369.jpg', '49.jpg', '12471.jpg', '4665.jpg', '8589.jpg', '8481.jpg', '9512.jpg', '10335.jpg', '1154.jpg', '545.jpg', '11785.jpg', '1004.jpg', '7167.jpg', '6156.jpg', '3191.jpg', '11572.jpg', '5180.jpg', '3095.jpg', '7650.jpg', '8765.jpg', '4039.jpg', '8366.jpg', '9835.jpg', '9137.jpg', '11664.jpg', '9061.jpg', '1455.jpg', '7674.jpg', '12328.jpg', '7359.jpg', '6829.jpg', '8836.jpg', '2081.jpg', '6186.jpg', '11586.jpg', '2734.jpg', 등등...............
In :
int( len(suffled_list) * 0.9 )
Out :
11250
In :
training = suffled_list[ 0 : 11250 ]
testing = suffled_list[ 11250 : ]
print(training)
print(len(training))
Out :
['10737.jpg', '6432.jpg', '889.jpg', '2762.jpg', '7935.jpg', '4215.jpg', '7875.jpg', '10481.jpg', '7271.jpg', '11369.jpg', '49.jpg', '12471.jpg', '4665.jpg', '8589.jpg', '8481.jpg', '9512.jpg', '10335.jpg', '1154.jpg', '545.jpg', '11785.jpg', '1004.jpg', '7167.jpg', '6156.jpg', '3191.jpg', '11572.jpg', '5180.jpg', '3095.jpg', '7650.jpg', '8765.jpg', '4039.jpg', '8366.jpg', '9835.jpg', '9137.jpg', '11664.jpg', '9061.jpg', '1455.jpg', '7674.jpg', '12328.jpg', '7359.jpg', '6829.jpg', '8836.jpg', '2081.jpg', '6186.jpg', '11586.jpg', '2734.jpg', '327.jpg', '2056.jpg', '9095.jpg', '12015.jpg', '1042.jpg', '2849.jpg', '11981.jpg', '12200.jpg' 등등......
In :
print(testing)
print(len(testing))
Out :
['7852.jpg', '1660.jpg', '12020.jpg', '3508.jpg', '9972.jpg', '4394.jpg', '2738.jpg', '3093.jpg', '3666.jpg', '8368.jpg', '6522.jpg' 등등...............
def split_data(SOURCE, TRAINING, TESTING, SPLIT_SIZE):
filename_list = os.listdir(SOURCE)
shuffled_list = random.sample(filename_list, len(filename_list))
index = int(len(shuffled_list) * SPLIT_SIZE)
training = shuffled_list[ : index ]
testing = shuffled_list [ index : ]
for filename in training :
if os.path.getsize(SOURCE+filename) > 0 :
copyfile( SOURCE+filename , TRAINING+filename)
for filename in testing :
if os.path.getsize(SOURCE+filename) > 0 :
copyfile( SOURCE+filename, TESTING+filename)
CAT_SOURCE_DIR = "/tmp/PetImages/Cat/"
TRAINING_CATS_DIR = "/tmp/cats-v-dogs/training/cats/"
TESTING_CATS_DIR = "/tmp/cats-v-dogs/testing/cats/"
DOG_SOURCE_DIR = "/tmp/PetImages/Dog/"
TRAINING_DOGS_DIR = "/tmp/cats-v-dogs/training/dogs/"
TESTING_DOGS_DIR = "/tmp/cats-v-dogs/testing/dogs/"
split_size = .9
split_data(CAT_SOURCE_DIR, TRAINING_CATS_DIR, TESTING_CATS_DIR, split_size)
split_data(DOG_SOURCE_DIR, TRAINING_DOGS_DIR, TESTING_DOGS_DIR, split_size)
print(len(os.listdir('/tmp/cats-v-dogs/training/cats/')))
print(len(os.listdir('/tmp/cats-v-dogs/training/dogs/')))
print(len(os.listdir('/tmp/cats-v-dogs/testing/cats/')))
print(len(os.listdir('/tmp/cats-v-dogs/testing/dogs/')))
# 실행하면 아래처럼 나올것이다 :
# 11250
# 11250
# 1250
# 1250
Out : 11249 11249 1251 1251
In :
import tensorflow as tf
from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPooling2D
from tensorflow.keras.models import Sequential
def build_model() :
model = Sequential()
model.add( Conv2D(16, (3,3), activation='relu', input_shape=(300, 300, 3) ) )
model.add( MaxPooling2D( (2,2), 2 ) )
model.add( Conv2D(32, (3,3), activation='relu' ) )
model.add( MaxPooling2D( 2,2 ) )
model.add( Conv2D(64, (3,3), activation='relu' ) )
model.add( MaxPooling2D( 2,2 ) )
model.add( Flatten() )
model.add( Dense(units=512, activation='relu') )
model.add( Dense(units=1, activation='sigmoid' ) )
return model
model = build_model()
model.compile(optimizer=RMSprop(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])
from tensorflow.keras.preprocessing.image import ImageDataGenerator
TRAINING_DIR = "/tmp/cats-v-dogs/training/"
train_datagen = ImageDataGenerator(rescale= 1/255.0)
# 제너레이터의 배치 사이즈는 꼭 10으로 하세요.
train_generator = train_datagen.flow_from_directory(TRAINING_DIR, target_size=(300,300), \
class_mode='binary', batch_size=10)
VALIDATION_DIR = "/tmp/cats-v-dogs/testing/"
validation_datagen = ImageDataGenerator(rescale= 1/255.0)
# 제너레이터의 배치 사이즈는 꼭 10으로 하세요.
validation_generator = validation_datagen.flow_from_directory(VALIDATION_DIR, \
target_size=(300,300), class_mode='binary', batch_size=10)
# Expected Output:
# Found 22498 images belonging to 2 classes.
# Found 2500 images belonging to 2 classes.
Out : Found 22497 images belonging to 2 classes. Found 2501 images belonging to 2 classes.
In :
history = model.fit(train_generator,
epochs=15,
verbose=1,
validation_data=validation_generator)
# LOSS 와 ACCURACY 에 대한 차트를 그린다. 그냥 실행하시오!
%matplotlib inline
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
#-----------------------------------------------------------
# Retrieve a list of list results on training and test data
# sets for each training epoch
#-----------------------------------------------------------
acc=history.history['accuracy']
val_acc=history.history['val_accuracy']
loss=history.history['loss']
val_loss=history.history['val_loss']
epochs=range(len(acc)) # Get number of epochs
#------------------------------------------------
# Plot training and validation accuracy per epoch
#------------------------------------------------
plt.plot(epochs, acc, 'r', "Training Accuracy")
plt.plot(epochs, val_acc, 'b', "Validation Accuracy")
plt.title('Training and validation accuracy')
plt.figure()
#------------------------------------------------
# Plot training and validation loss per epoch
#------------------------------------------------
plt.plot(epochs, loss, 'r', "Training Loss")
plt.plot(epochs, val_loss, 'b', "Validation Loss")
plt.title('Training and validation loss')
import numpy as np
from google.colab import files
from keras.preprocessing import image
uploaded = files.upload()
for fn in uploaded.keys():
# predicting images
path = '/content/' + fn
img = image.load_img(path, target_size=(# YOUR CODE HERE))
x = image.img_to_array(img)
x = np.expand_dims(x, axis=0)
images = np.vstack([x])
classes = model.predict(images, batch_size=10)
print(classes[0])
if classes[0]>0.5:
print(fn + " is a dog")
else:
print(fn + " is a cat")