import os, shutil
original_dataset_dir=r'dogs-vs-cats/train'
base_dir=r'cats_and_dogs_small'
if not os.path.isdir(base_dir): os.mkdir(base_dir)
train_dir=os.path.join(base_dir,'train_dir')
if not os.path.isdir(train_dir):os.mkdir(train_dir) #建立訓練的檔案夾
validation_dir=os.path.join(base_dir,'validation_dir')
if not os.path.isdir(validation_dir):os.mkdir(validation_dir) #建立驗證的檔案夾
test_dir=os.path.join(base_dir,'test_dir')
if not os.path.isdir(test_dir):os.mkdir(test_dir) #建立測試的檔案夾
train_cat_dir=os.path.join(train_dir,'train_cat_dir')
if not os.path.isdir(train_cat_dir):os.mkdir(train_cat_dir) #建立貓訓練的檔案的夾
train_dog_dir=os.path.join(train_dir,'train_dog_dir')
if not os.path.isdir(train_dog_dir):os.mkdir(train_dog_dir) #建立狗訓練的檔案夾
validation_cat_dir=os.path.join(validation_dir,'validation_cat_dir')
if not os.path.isdir(validation_cat_dir):os.mkdir(validation_cat_dir) #建立貓驗證的檔案夾
validation_dog_dir=os.path.join(validation_dir,'validation_dog_dir')
if not os.path.isdir(validation_dog_dir):os.mkdir(validation_dog_dir) #建立狗驗證的檔案夾
test_cat_dir=os.path.join(test_dir,'test_cat_dir')
if not os.path.isdir(test_cat_dir):os.mkdir(test_cat_dir) #建立貓測試的檔案夾
test_dog_dir=os.path.join(test_dir,'test_dog_dir')
uif not os.path.isdir(test_dog_dir):os.mkdir(test_dog_dir) #建立狗測試的檔案夾
#複製前面1000張圖片到train_cat_dir訓練目錄下
fname=['cat.{}.jpg'.format(i) for i in range(1000)]
for fname in fname:
src=os.path.join(original_dataset_dir,fname)
dst=os.path.join(train_cat_dir,fname)
shutil.copyfile(src,dst)
#複製下500張圖片到validation_cat_dir訓練目錄下
fname=['cat.{}.jpg'.format(i) for i in range(1000,1500)]
for fname in fname:
src=os.path.join(original_dataset_dir,fname)
dst=os.path.join(validation_cat_dir,fname)
shutil.copyfile(src,dst)
#複製下500張圖片到test_cat_dir訓練目錄下
fname=['cat.{}.jpg'.format(i) for i in range(1500,2000)]
for fname in fname:
src=os.path.join(original_dataset_dir,fname)
dst=os.path.join(test_cat_dir,fname)
shutil.copyfile(src,dst)
#複製前面1000張圖片到train_dog_dir訓練目錄下
fname=['dog.{}.jpg'.format(i) for i in range(1000)]
for fname in fname:
src=os.path.join(original_dataset_dir,fname)
dst=os.path.join(train_dog_dir,fname)
shutil.copyfile(src,dst)
#複製下500張圖片到validation_dog_dir訓練目錄下
fname=['dog.{}.jpg'.format(i) for i in range(1000,1500)]
for fname in fname:
src=os.path.join(original_dataset_dir,fname)
dst=os.path.join(validation_dog_dir,fname)
shutil.copyfile(src,dst)
#複製下500張圖片到test_dog_dir訓練目錄下
fname=['dog.{}.jpg'.format(i) for i in range(1500,2000)]
for fname in fname:
src=os.path.join(original_dataset_dir,fname)
dst=os.path.join(test_dog_dir,fname)
shutil.copyfile(src,dst)
print('訓練用的貓圖片數',len(os.listdir(train_cat_dir)))
print('驗證用的貓圖片數',len(os.listdir(validation_cat_dir)))
print('測試用的貓圖片數',len(os.listdir(test_cat_dir)))
print('訓練用的狗圖片數',len(os.listdir(train_dog_dir)))
print('驗證用的狗圖片數',len(os.listdir(validation_dog_dir)))
print('測試用的狗圖片數',len(os.listdir(test_dog_dir)))
#建立神經網路
from keras import layers
from keras import models
model=models.Sequential()
model.add(layers.Conv2D(32,(3,3),activation='relu',input_shape=(150,150,3)))
model.add(layers.MaxPooling2D(2,2))
model.add(layers.Conv2D(64,(3,3),activation='relu'))
model.add(layers.MaxPooling2D(2,2))
model.add(layers.Conv2D(128,(3,3),activation='relu'))
model.add(layers.MaxPooling2D(2,2))
model.add(layers.Flatten())
model.add(layers.Dense(512,activation='relu'))
model.add(layers.Dense(1,activation='sigmoid'))
model.summary()
from keras import optimizers
model.compile(loss='binary_crossentropy',optimizer=optimizers.RMSprop(lr=1e-4),metrics=['acc'])
#資料預處理
#1. 讀取檔案 2. 將JPEG內容解碼成RGB像素 3. 將RGB像素轉換成福點數張量 4.將像素(0-255)轉換成(0-1)區間
#可以利用keras.preprocessing.image的ImageDataGenerator快速設定Python產生器,自動將影像擋轉換成批次張量
from keras.preprocessing.image import ImageDataGenerator
train_datagen=ImageDataGenerator(rescale=1./255)
test_datagen=ImageDataGenerator(rescale=1./255)
train_generator=train_datagen.flow_from_directory(train_dir,target_size=(150,150),batch_size=20,class_mode='binary')
validation_generator=test_datagen.flow_from_directory(validation_dir,target_size=(150,150),batch_size=20,class_mode='binary')
for data_batch,labels_batch in validation_generator:
print('data batch shape:',data_batch.shape)
print('labels batch shape:',labels_batch.shape)
break
history=model.fit_generator(train_generator,
steps_per_epoch=10,
epochs=50,
validation_data=validation_generator,
validation_steps=50)
model.save('cats_and_dogs_small_1.h5')
import matplotlib.pyplot as plt
acc=history.history['acc']
val_acc=history.history['val_acc']
loss=history.history['loss']
val_loss=history.history['val_loss']
epochs=range(1,len(acc)+1)
plt.plot(epochs,acc,'bo',label='Training acc')
plt.plot(epochs,val_acc,'b',label='Validation acc')
plt.title('Training and validation accuracy')
plt.legend()
plt.figure()
plt.plot(epochs,loss,'bo',label='Training loss')
plt.plot(epochs,val_loss,'b',label='Validation loss')
plt.title('Training and validation loss')
plt.legend()
plt.show()
因為訓練影像數目少,容易造成overdfit問題,overfit問題解法
1. 使用資料擴增法訓練的樣本空間變大
2. 使用regularization #使用keras不需要設定
3. 使用dropout將神經網路的變數減少
以下使用資料擴增法
#可以利用keras.preprocessing.image的ImageDataGenerator擴增資料
datagen=ImageDataGenerator(rotation_range=40,
width_shift_range=0.2,
height_shift_range=0.2,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True,
fill_mode='nearest')
from keras.preprocessing import image
fnames=[os.path.join(train_cat_dir,fname) for fname in os.listdir(train_cat_dir)]
#用image.load_img()讀取第三張照片
img=image.load_img(fnames[3],target_size=(150,150))
img
#將影像轉換成矩陣
x=image.img_to_array(img) #影像轉換成(150,150,3)的矩陣
x=x.reshape((1,)+x.shape) #影像轉換成(1,150,150,3)的矩陣
i=0
for batch in datagen.flow(x,batch_size=1):
plt.figure(i)
imgplot=plt.imshow(image.array_to_img(batch[0]))
i+=1
if i%3==0:
break
plt.show()

以上我們知道了如何將有限的樣本空間用keras.preprocessing.image的ImageDataGenerator擴增資料
使用資料擴增法訓練的樣本空間變大
使用dropout將神經網路的變數減少
以下我們要用這兩個方法來解決overfit問題
model=models.Sequential()
model.add(layers.Conv2D(32,(3,3),activation='relu',input_shape=(150,150,3)))
model.add(layers.MaxPooling2D(2,2))
model.add(layers.Conv2D(64,(3,3),activation='relu'))
model.add(layers.MaxPooling2D(2,2))
model.add(layers.Conv2D(128,(3,3),activation='relu'))
model.add(layers.MaxPooling2D(2,2))
model.add(layers.Flatten())
model.add(layers.Dropout(0.5)) #加入dropout層丟棄50%資料
model.add(layers.Dense(512,activation='relu'))
model.add(layers.Dense(1,activation='sigmoid'))
from keras import optimizers
model.compile(loss='binary_crossentropy',optimizer=optimizers.RMSprop(lr=1e-4),metrics=['acc'])
#設定擴增資料方式
train_datagen=ImageDataGenerator(rescale=1./255,
rotation_range=40,
width_shift_range=0.2,
height_shift_range=0.2,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True
)
test_datagen=ImageDataGenerator(rescale=1./255)
#使用上述設定的擴增資料方式來擴增資料
train_generator=train_datagen.flow_from_directory(train_dir,
target_size=(150,150),
batch_size=32,
class_mode='binary')
validation_generator=test_datagen.flow_from_directory(validation_dir,
target_size=(150,150),
batch_size=32,
class_mode='binary')
history=model.fit_generator(train_generator,
steps_per_epoch=10,
epochs=50,
validation_data=validation_generator,
validation_steps=50)
model.save('cats_and-dogs_small_2.h5')
import matplotlib.pyplot as plt
acc=history.history['acc']
val_acc=history.history['val_acc']
loss=history.history['loss']
val_loss=history.history['val_loss']
epochs=range(1,len(acc)+1)
plt.plot(epochs,acc,'bo',label='Training acc')
plt.plot(epochs,val_acc,'b',label='Validation acc')
plt.title('Training and validation accuracy')
plt.legend()
plt.figure()
plt.plot(epochs,loss,'bo',label='Training loss')
plt.plot(epochs,val_loss,'b',label='Validation loss')
plt.title('Training and validation loss')
plt.legend()
plt.show()
model.save('cats_and-dogs_small_2.h5')