Hey everyone,
We know that computing resources may be really difficult to come by, especially for beginners, so we have written a new starter notebook that allows you to train a MaskRCNN model directly on Colab.
This dataset and notebook correspond to the Food Recognition Challenge being held on AICrowd.
In this Notebook, we will first do an analysis of the Food Recognition Dataset and then use maskrcnn for training on the dataset.
The Challenge¶
- Given Images of Food, we are asked to provide Instance Segmentation over the images for the food items.
- The Training Data is provided in the COCO format, making it simpler to load with pre-available COCO data processors in popular libraries.
- The test set provided in the public dataset is similar to Validation set, but with no annotations.
- The test set after submission is much larger and contains private images upon which every submission is evaluated.
- Pariticipants have to submit their trained model along with trained weights. Immediately after the submission the AICrowd Grader picks up the submitted model and produces inference on the private test set using Cloud GPUs.
- This requires Users to structure their repositories and follow a provided paradigm for submission.
- The AICrowd AutoGrader picks up the Dockerfile provided with the repository, builds it and then mounts the tests folder in the container. Once inference is made, the final results are checked with the ground truth.
For more submission related information, please check the AIcrowd Challenge page and the starter kit.
The Notebook¶
- Installation of MaskRCNN
- Using MatterPort MaskRCNN Library and Making local inference with it
- Local Evaluation Using Matterport MaskRCNN
A bonus section on other resources to read is also added!
Dataset Download¶
Note: By downloading this data you are argeeing to the competition rules specified here
!wget -q https://s3.eu-central-1.wasabisys.com/aicrowd-public-datasets/myfoodrepo/round-2/train.tar.gz
!wget -q https://s3.eu-central-1.wasabisys.com/aicrowd-public-datasets/myfoodrepo/round-2/val.tar.gz
!mkdir data
!mkdir data/val
!mkdir data/train
!tar -xf train.tar.gz -C data/train
!tar -xf val.tar.gz -C data/val
Installation¶
#Directories present
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
for dirname, _, filenames in os.walk('data/'):
print(dirname)
import warnings
warnings.filterwarnings("ignore")
pip install -q -U numpy==1.16.1
import os
import sys
import random
import math
import numpy as np
import cv2
import matplotlib.pyplot as plt
import json
from imgaug import augmenters as iaa
from tqdm import tqdm
import pandas as pd
import glob
!pip install -q tensorflow-gpu==1.13.1
import tensorflow as tf
tf.__version__
DATA_DIR = 'data'
# Directory to save logs and trained model
ROOT_DIR = 'working'
!git clone https://www.github.com/matterport/Mask_RCNN.git
os.chdir('Mask_RCNN')
!pip install -q -r requirements.txt
!python setup.py -q install
# Import Mask RCNN
sys.path.append(os.path.join('.', 'Mask_RCNN')) # To find local version of the library
from mrcnn.config import Config
from mrcnn import utils
import mrcnn.model as modellib
from mrcnn import visualize
from mrcnn.model import log
!pip uninstall pycocotools -y
!pip install -q git+https://github.com/waleedka/coco.git#subdirectory=PythonAPI
from mrcnn import utils
import numpy as np
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
from pycocotools import mask as maskUtils
MaskRCNN¶
To train MaskRCNN, two things we have to define FoodChallengeDataset
that implements the Dataset
class of MaskRCNN and FoodChallengeConfig
that implements the Config
class.
The FoodChallengeDataset
helps define certain functions that allow us to load the data.
The FoodChallengeConfig
gives the information like NUM_CLASSES
, BACKBONE
, etc.
class FoodChallengeDataset(utils.Dataset):
def load_dataset(self, dataset_dir, load_small=False, return_coco=True):
""" Loads dataset released for the AICrowd Food Challenge
Params:
- dataset_dir : root directory of the dataset (can point to the train/val folder)
- load_small : Boolean value which signals if the annotations for all the images need to be loaded into the memory,
or if only a small subset of the same should be loaded into memory
"""
self.load_small = load_small
if self.load_small:
annotation_path = os.path.join(dataset_dir, "annotation-small.json")
else:
annotation_path = os.path.join(dataset_dir, "annotations.json")
image_dir = os.path.join(dataset_dir, "images")
print("Annotation Path ", annotation_path)
print("Image Dir ", image_dir)
assert os.path.exists(annotation_path) and os.path.exists(image_dir)
self.coco = COCO(annotation_path)
self.image_dir = image_dir
# Load all classes (Only Building in this version)
classIds = self.coco.getCatIds()
# Load all images
image_ids = list(self.coco.imgs.keys())
# register classes
for _class_id in classIds:
self.add_class("crowdai-food-challenge", _class_id, self.coco.loadCats(_class_id)[0]["name"])
# Register Images
for _img_id in image_ids:
assert(os.path.exists(os.path.join(image_dir, self.coco.imgs[_img_id]['file_name'])))
self.add_image(
"crowdai-food-challenge", image_id=_img_id,
path=os.path.join(image_dir, self.coco.imgs[_img_id]['file_name']),
width=self.coco.imgs[_img_id]["width"],
height=self.coco.imgs[_img_id]["height"],
annotations=self.coco.loadAnns(self.coco.getAnnIds(
imgIds=[_img_id],
catIds=classIds,
iscrowd=None)))
if return_coco:
return self.coco
def load_mask(self, image_id):
""" Loads instance mask for a given image
This function converts mask from the coco format to a
a bitmap [height, width, instance]
Params:
- image_id : reference id for a given image
Returns:
masks : A bool array of shape [height, width, instances] with
one mask per instance
class_ids : a 1D array of classIds of the corresponding instance masks
(In this version of the challenge it will be of shape [instances] and always be filled with the class-id of the "Building" class.)
"""
image_info = self.image_info[image_id]
assert image_info["source"] == "crowdai-food-challenge"
instance_masks = []
class_ids = []
annotations = self.image_info[image_id]["annotations"]
# Build mask of shape [height, width, instance_count] and list
# of class IDs that correspond to each channel of the mask.
for annotation in annotations:
class_id = self.map_source_class_id(
"crowdai-food-challenge.{}".format(annotation['category_id']))
if class_id:
m = self.annToMask(annotation, image_info["height"],
image_info["width"])
# Some objects are so small that they're less than 1 pixel area
# and end up rounded out. Skip those objects.
if m.max() < 1:
continue
# Ignore the notion of "is_crowd" as specified in the coco format
# as we donot have the said annotation in the current version of the dataset
instance_masks.append(m)
class_ids.append(class_id)
# Pack instance masks into an array
if class_ids:
mask = np.stack(instance_masks, axis=2)
class_ids = np.array(class_ids, dtype=np.int32)
return mask, class_ids
else:
# Call super class to return an empty mask
return super(FoodChallengeDataset, self).load_mask(image_id)
def image_reference(self, image_id):
"""Return a reference for a particular image
Ideally you this function is supposed to return a URL
but in this case, we will simply return the image_id
"""
return "crowdai-food-challenge::{}".format(image_id)
# The following two functions are from pycocotools with a few changes.
def annToRLE(self, ann, height, width):
"""
Convert annotation which can be polygons, uncompressed RLE to RLE.
:return: binary mask (numpy 2D array)
"""
segm = ann['segmentation']
if isinstance(segm, list):
# polygon -- a single object might consist of multiple parts
# we merge all parts into one mask rle code
rles = maskUtils.frPyObjects(segm, height, width)
rle = maskUtils.merge(rles)
elif isinstance(segm['counts'], list):
# uncompressed RLE
rle = maskUtils.frPyObjects(segm, height, width)
else:
# rle
rle = ann['segmentation']
return rle
def annToMask(self, ann, height, width):
"""
Convert annotation which can be polygons, uncompressed RLE, or RLE to binary mask.
:return: binary mask (numpy 2D array)
"""
rle = self.annToRLE(ann, height, width)
m = maskUtils.decode(rle)
return m
class FoodChallengeConfig(Config):
"""Configuration for training on data in MS COCO format.
Derives from the base Config class and overrides values specific
to the COCO dataset.
"""
# Give the configuration a recognizable name
NAME = "crowdai-food-challenge"
# We use a GPU with 12GB memory, which can fit two images.
# Adjust down if you use a smaller GPU.
IMAGES_PER_GPU = 4
# Uncomment to train on 8 GPUs (default is 1)
GPU_COUNT = 1
BACKBONE = 'resnet50'
# Number of classes (including background)
NUM_CLASSES = 62 # 1 Background + 61 classes
STEPS_PER_EPOCH=150
VALIDATION_STEPS=50
LEARNING_RATE=0.001
IMAGE_MAX_DIM=256
IMAGE_MIN_DIM=256
config = FoodChallengeConfig()
config.display()
You can change other values in the FoodChallengeConfig
as well and try out different combinations for best results!
!mkdir pretrained
PRETRAINED_MODEL_PATH = os.path.join("pretrained", "mask_rcnn_coco.h5")
LOGS_DIRECTORY = os.path.join(ROOT_DIR, "logs")
if not os.path.exists(PRETRAINED_MODEL_PATH):
utils.download_trained_weights(PRETRAINED_MODEL_PATH)
from keras import backend as K
K.tensorflow_backend._get_available_gpus()
import keras.backend
K = keras.backend.backend()
if K=='tensorflow':
keras.backend.common.image_dim_ordering()
model = modellib.MaskRCNN(mode="training", config=config, model_dir=LOGS_DIRECTORY)
model_path = PRETRAINED_MODEL_PATH
model.load_weights(model_path, by_name=True, exclude=[
"mrcnn_class_logits", "mrcnn_bbox_fc",
"mrcnn_bbox", "mrcnn_mask"])
dataset_train = FoodChallengeDataset()
dataset_train.load_dataset('/content/data/train', load_small=False)
dataset_train.prepare()
dataset_val = FoodChallengeDataset()
val_coco = dataset_val.load_dataset(dataset_dir='/content/data/val', load_small=False, return_coco=True)
dataset_val.prepare()
class_names = dataset_train.class_names
# If you don't have the correct classes here, there must be some error in your DatasetConfig
assert len(class_names)==62, "Please check DatasetConfig"
class_names
Lets start training!!¶
print("Training network")
model.train(dataset_train, dataset_val,
learning_rate=config.LEARNING_RATE,
epochs=15,
layers='heads')
model_path = model.find_last()
model_path
class InferenceConfig(FoodChallengeConfig):
GPU_COUNT = 1
IMAGES_PER_GPU = 1
NUM_CLASSES = 62 # 1 Background + 61 classes
IMAGE_MAX_DIM=256
IMAGE_MIN_DIM=256
NAME = "food"
DETECTION_MIN_CONFIDENCE=0
inference_config = InferenceConfig()
inference_config.display()
# Recreate the model in inference mode
model = modellib.MaskRCNN(mode='inference',
config=inference_config,
model_dir=ROOT_DIR)
# Load trained weights (fill in path to trained weights here)
assert model_path != "", "Provide path to trained weights"
print("Loading weights from ", model_path)
model.load_weights(model_path, by_name=True)
# Show few example of ground truth vs. predictions on the validation dataset
dataset = dataset_val
fig = plt.figure(figsize=(10, 30))
for i in range(4):
image_id = random.choice(dataset.image_ids)
original_image, image_meta, gt_class_id, gt_bbox, gt_mask =\
modellib.load_image_gt(dataset_val, inference_config,
image_id, use_mini_mask=False)
print(original_image.shape)
plt.subplot(6, 2, 2*i + 1)
visualize.display_instances(original_image, gt_bbox, gt_mask, gt_class_id,
dataset.class_names, ax=fig.axes[-1])
plt.subplot(6, 2, 2*i + 2)
results = model.detect([original_image]) #, verbose=1)
r = results[0]
visualize.display_instances(original_image, r['rois'], r['masks'], r['class_ids'],
dataset.class_names, r['scores'], ax=fig.axes[-1])
import json
with open('/content/data/val/annotations.json') as json_file:
data = json.load(json_file)
d = {}
for x in data["categories"]:
d[x["name"]]=x["id"]
id_category = [0]
for x in dataset.class_names[1:]:
id_category.append(d[x])
#id_category
import tqdm
import skimage
files = glob.glob(os.path.join('/content/data/val/test_images/images', "*.jpg"))
_final_object = []
for file in tqdm.tqdm(files):
images = [skimage.io.imread(file) ]
#if(len(images)!= inference_config.IMAGES_PER_GPU):
# images = images + [images[-1]]*(inference_config.BATCH_SIZE - len(images))
predictions = model.detect(images, verbose=0)
#print(file)
for _idx, r in enumerate(predictions):
image_id = int(file.split("/")[-1].replace(".jpg",""))
for _idx, class_id in enumerate(r["class_ids"]):
if class_id > 0:
mask = r["masks"].astype(np.uint8)[:, :, _idx]
bbox = np.around(r["rois"][_idx], 1)
bbox = [float(x) for x in bbox]
_result = {}
_result["image_id"] = image_id
_result["category_id"] = id_category[class_id]
_result["score"] = float(r["scores"][_idx])
_mask = maskUtils.encode(np.asfortranarray(mask))
_mask["counts"] = _mask["counts"].decode("UTF-8")
_result["segmentation"] = _mask
_result["bbox"] = [bbox[1], bbox[0], bbox[3] - bbox[1], bbox[2] - bbox[0]]
_final_object.append(_result)
fp = open('/content/output.json', "w")
import json
print("Writing JSON...")
fp.write(json.dumps(_final_object))
fp.close()
submission_file = json.loads(open("/content/output.json").read())
len(submission_file)
type(submission_file)
import random
import json
import numpy as np
import argparse
import base64
import glob
import os
from PIL import Image
from pycocotools.coco import COCO
GROUND_TRUTH_ANNOTATION_PATH = "/content/data/val/annotations.json"
ground_truth_annotations = COCO(GROUND_TRUTH_ANNOTATION_PATH)
submission_file = json.loads(open("/content/output.json").read())
results = ground_truth_annotations.loadRes(submission_file)
cocoEval = COCOeval(ground_truth_annotations, results, 'segm')
cocoEval.evaluate()
cocoEval.accumulate()
cocoEval.summarize()
This, in addition to the existing Mask RCNN baseline repo should allow you to plug and play models for easy submission and experimentation.
As an alternative to Colab, Paperspace is an amazing option. Their gradient community notebooks let you use free cloud GPU’s and CPU’s and also provide internal storage that let you save models and resume training after the deployment time expires.
Regards
AICrowd Team