Cats vs Dogs model

following code is meant to be run on google colaboratory, it may not work on local envirnoment.

Initial setup

!apt install --allow-change-held-packages libcudnn8=8.4.1.50-1+cuda11.6

import os
import zipfile
import random
import shutil
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from shutil import copyfile
import matplotlib.pyplot as plt

get the data

!wget --no-check-certificate \
"https://download.microsoft.com/download/3/E/1/3E1C3F21-ECDB-4869-8368-6DEBA77B919F/kagglecatsanddogs_5340.zip" \
-O "/tmp/cats-and-dogs.zip"

local_zip = '/tmp/cats-and-dogs.zip'
zip_ref   = zipfile.ZipFile(local_zip, 'r')
zip_ref.extractall('/tmp')
zip_ref.close()

source_path = '/tmp/PetImages'

source_path_dogs = os.path.join(source_path, 'Dog')
source_path_cats = os.path.join(source_path, 'Cat')
	
!find /tmp/PetImages/ -type f ! -name "*.jpg" -exec rm {} +

Now the images are stored within the /tmp/PetImages directory. There is a subdirectory for each class, so one for dogs and one for cats.

making directories

root_dir = '/tmp/cats-v-dogs'
path = os.path.join(root_path, "training")
path2 = os.path.join(root_path, "validation")
os.makedirs(path)
os.makedirs(path2)
path_c = os.path.join(path, "cats")
os.makedirs(path_c)
path_p = os.path.join(path, "dogs")
os.makedirs(path_p)
path2_c = os.path.join(path2, "cats")
os.makedirs(path2_c)
path2_p = os.path.join(path2, "dogs")
os.makedirs(path2_p)

Now we have made training and testing directories for both cats and dogs

To test to see if the directories have been made

for rootdir, dirs, files in os.walk(root_dir):
for subdir in dirs:
print(os.path.join(rootdir, subdir))

Expected Output (directory order might vary):

/tmp/cats-v-dogs/training
/tmp/cats-v-dogs/validation
/tmp/cats-v-dogs/training/cats
/tmp/cats-v-dogs/training/dogs
/tmp/cats-v-dogs/validation/cats
/tmp/cats-v-dogs/validation/dogs

Splitting the data

This funtion will split the data randomly, so that the training set is a random sample of the files, and the validation set is made up of the remaining files. for this example we will split the data 9:1. And before putting them in to the directories, it will be checked, so if they have a zero file length, they will be omitted from the copying process.

def split_data(SOURCE_DIR, TRAINING_DIR, VALIDATION_DIR, SPLIT_SIZE):
"""
Splits the data into train and test sets
	
Args:
SOURCE_DIR (string): directory path containing the images
TRAINING_DIR (string): directory path to be used for training
VALIDATION_DIR (string): directory path to be used for validation
SPLIT_SIZE (float): proportion of the dataset to be used for training
	
Returns:
None
"""

data = random.sample(os.listdir(SOURCE_DIR), len(os.listdir(SOURCE_DIR)))
n = list()
for i in data:
  if (os.path.getsize(SOURCE_DIR+i)) == 0:
		n.append(i)
  for i in n:
	data.remove(i)
	print(f"{i} is zero length, so ignoring.")

  f = data[:int(len(data)* SPLIT_SIZE)]
  s = data[int(len(data)* SPLIT_SIZE):]
  for i in f:
	copyfile(SOURCE_DIR + i, TRAINING_DIR + i)
  for i in s:
	copyfile(SOURCE_DIR + i, VALIDATION_DIR + i)

Generators

Now we will feed our images to the image generators

def train_val_generators(TRAINING_DIR, VALIDATION_DIR):
"""
Creates the training and validation data generators

Args:
  TRAINING_DIR (string): directory path containing the training images
  VALIDATION_DIR (string): directory path containing the testing/validation images
  
Returns:
  train_generator, validation_generator - tuple containing the generators
"""

train_datagen = ImageDataGenerator( rescale = 1.0/255. )

train_generator = train_datagen.flow_from_directory(directory=TRAINING_DIR,
						batch_size=20,
						class_mode='binary',
						target_size=((150, 150)))
validation_datagen = ImageDataGenerator( rescale = 1.0/255. )

validation_generator = validation_datagen.flow_from_directory(directory=VALIDATION_DIR,
						batch_size=20,
						class_mode='binary',
						target_size=(150, 150))
return train_generator, validation_generator
train_generator, validation_generator = train_val_generators(TRAINING_DIR, VALIDATION_DIR)

Expected output

Found 22498 images belonging to 2 classes.
Found 2500 images belonging to 2 classes.

Model

def create_model():
from keras.preprocessing.image import ImageDataGenerator, load_img
from keras.layers import MaxPooling2D, BatchNormalization, Conv2D, Dropout, Flatten, Dense
from keras.callbacks import ModelCheckpoint
from keras.callbacks import ReduceLROnPlateau
from keras.callbacks import EarlyStopping

from keras.models import load_model

model = tf.keras.Sequential([
  tf.keras.layers.Conv2D(16, (3, 3), activation='relu', input_shape=(150, 150, 3)),
  tf.keras.layers.MaxPool2D((2, 2)),
  tf.keras.layers.Conv2D(32, (3, 3), activation='relu'),
  tf.keras.layers.MaxPool2D((2, 2)),
  tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
  tf.keras.layers.MaxPool2D((2, 2)),
  tf.keras.layers.Flatten(),
  tf.keras.layers.Dense(512, activation='relu'),
  tf.keras.layers.Dense(1, activation='sigmoid')])
  
model.compile(optimizer="adam",
			  loss='binary_crossentropy',
			  metrics=['accuracy']) 
return model

Training

model = create_model()
history = model.fit(train_generator,
	epochs=15,
	verbose=1,
	validation_data=validation_generator)