192 lines
7.4 KiB
Python
Executable File
192 lines
7.4 KiB
Python
Executable File
"""
|
|
Mask R-CNN
|
|
Configurations and data loading code for the synthetic Shapes dataset.
|
|
This is a duplicate of the code in the noteobook train_shapes.ipynb for easy
|
|
import into other notebooks, such as inspect_model.ipynb.
|
|
|
|
Copyright (c) 2017 Matterport, Inc.
|
|
Licensed under the MIT License (see LICENSE for details)
|
|
Written by Waleed Abdulla
|
|
"""
|
|
|
|
import os
|
|
import sys
|
|
import math
|
|
import random
|
|
import numpy as np
|
|
import cv2
|
|
|
|
# Root directory of the project
|
|
ROOT_DIR = os.path.abspath("../../")
|
|
|
|
# Import Mask RCNN
|
|
sys.path.append(ROOT_DIR) # To find local version of the library
|
|
from mrcnn.config import Config
|
|
from mrcnn import utils
|
|
|
|
|
|
class ShapesConfig(Config):
|
|
"""Configuration for training on the toy shapes dataset.
|
|
Derives from the base Config class and overrides values specific
|
|
to the toy shapes dataset.
|
|
"""
|
|
# Give the configuration a recognizable name
|
|
NAME = "shapes"
|
|
|
|
# Train on 1 GPU and 8 images per GPU. We can put multiple images on each
|
|
# GPU because the images are small. Batch size is 8 (GPUs * images/GPU).
|
|
GPU_COUNT = 1
|
|
IMAGES_PER_GPU = 8
|
|
|
|
# Number of classes (including background)
|
|
NUM_CLASSES = 1 + 3 # background + 3 shapes
|
|
|
|
# Use small images for faster training. Set the limits of the small side
|
|
# the large side, and that determines the image shape.
|
|
IMAGE_MIN_DIM = 128
|
|
IMAGE_MAX_DIM = 128
|
|
|
|
# Use smaller anchors because our image and objects are small
|
|
RPN_ANCHOR_SCALES = (8, 16, 32, 64, 128) # anchor side in pixels
|
|
|
|
# Reduce training ROIs per image because the images are small and have
|
|
# few objects. Aim to allow ROI sampling to pick 33% positive ROIs.
|
|
TRAIN_ROIS_PER_IMAGE = 32
|
|
|
|
# Use a small epoch since the data is simple
|
|
STEPS_PER_EPOCH = 100
|
|
|
|
# use small validation steps since the epoch is small
|
|
VALIDATION_STEPS = 5
|
|
|
|
|
|
class ShapesDataset(utils.Dataset):
|
|
"""Generates the shapes synthetic dataset. The dataset consists of simple
|
|
shapes (triangles, squares, circles) placed randomly on a blank surface.
|
|
The images are generated on the fly. No file access required.
|
|
"""
|
|
|
|
def load_shapes(self, count, height, width):
|
|
"""Generate the requested number of synthetic images.
|
|
count: number of images to generate.
|
|
height, width: the size of the generated images.
|
|
"""
|
|
# Add classes
|
|
self.add_class("shapes", 1, "square")
|
|
self.add_class("shapes", 2, "circle")
|
|
self.add_class("shapes", 3, "triangle")
|
|
|
|
# Add images
|
|
# Generate random specifications of images (i.e. color and
|
|
# list of shapes sizes and locations). This is more compact than
|
|
# actual images. Images are generated on the fly in load_image().
|
|
for i in range(count):
|
|
bg_color, shapes = self.random_image(height, width)
|
|
self.add_image("shapes", image_id=i, path=None,
|
|
width=width, height=height,
|
|
bg_color=bg_color, shapes=shapes)
|
|
|
|
def load_image(self, image_id):
|
|
"""Generate an image from the specs of the given image ID.
|
|
Typically this function loads the image from a file, but
|
|
in this case it generates the image on the fly from the
|
|
specs in image_info.
|
|
"""
|
|
info = self.image_info[image_id]
|
|
bg_color = np.array(info['bg_color']).reshape([1, 1, 3])
|
|
image = np.ones([info['height'], info['width'], 3], dtype=np.uint8)
|
|
image = image * bg_color.astype(np.uint8)
|
|
for shape, color, dims in info['shapes']:
|
|
image = self.draw_shape(image, shape, dims, color)
|
|
return image
|
|
|
|
def image_reference(self, image_id):
|
|
"""Return the shapes data of the image."""
|
|
info = self.image_info[image_id]
|
|
if info["source"] == "shapes":
|
|
return info["shapes"]
|
|
else:
|
|
super(self.__class__).image_reference(self, image_id)
|
|
|
|
def load_mask(self, image_id):
|
|
"""Generate instance masks for shapes of the given image ID.
|
|
"""
|
|
info = self.image_info[image_id]
|
|
shapes = info['shapes']
|
|
count = len(shapes)
|
|
mask = np.zeros([info['height'], info['width'], count], dtype=np.uint8)
|
|
for i, (shape, _, dims) in enumerate(info['shapes']):
|
|
mask[:, :, i:i + 1] = self.draw_shape(mask[:, :, i:i + 1].copy(),
|
|
shape, dims, 1)
|
|
# Handle occlusions
|
|
occlusion = np.logical_not(mask[:, :, -1]).astype(np.uint8)
|
|
for i in range(count - 2, -1, -1):
|
|
mask[:, :, i] = mask[:, :, i] * occlusion
|
|
occlusion = np.logical_and(
|
|
occlusion, np.logical_not(mask[:, :, i]))
|
|
# Map class names to class IDs.
|
|
class_ids = np.array([self.class_names.index(s[0]) for s in shapes])
|
|
return mask, class_ids.astype(np.int32)
|
|
|
|
def draw_shape(self, image, shape, dims, color):
|
|
"""Draws a shape from the given specs."""
|
|
# Get the center x, y and the size s
|
|
x, y, s = dims
|
|
if shape == 'square':
|
|
image = cv2.rectangle(image, (x - s, y - s),
|
|
(x + s, y + s), color, -1)
|
|
elif shape == "circle":
|
|
image = cv2.circle(image, (x, y), s, color, -1)
|
|
elif shape == "triangle":
|
|
points = np.array([[(x, y - s),
|
|
(x - s / math.sin(math.radians(60)), y + s),
|
|
(x + s / math.sin(math.radians(60)), y + s),
|
|
]], dtype=np.int32)
|
|
image = cv2.fillPoly(image, points, color)
|
|
return image
|
|
|
|
def random_shape(self, height, width):
|
|
"""Generates specifications of a random shape that lies within
|
|
the given height and width boundaries.
|
|
Returns a tuple of three valus:
|
|
* The shape name (square, circle, ...)
|
|
* Shape color: a tuple of 3 values, RGB.
|
|
* Shape dimensions: A tuple of values that define the shape size
|
|
and location. Differs per shape type.
|
|
"""
|
|
# Shape
|
|
shape = random.choice(["square", "circle", "triangle"])
|
|
# Color
|
|
color = tuple([random.randint(0, 255) for _ in range(3)])
|
|
# Center x, y
|
|
buffer = 20
|
|
y = random.randint(buffer, height - buffer - 1)
|
|
x = random.randint(buffer, width - buffer - 1)
|
|
# Size
|
|
s = random.randint(buffer, height // 4)
|
|
return shape, color, (x, y, s)
|
|
|
|
def random_image(self, height, width):
|
|
"""Creates random specifications of an image with multiple shapes.
|
|
Returns the background color of the image and a list of shape
|
|
specifications that can be used to draw the image.
|
|
"""
|
|
# Pick random background color
|
|
bg_color = np.array([random.randint(0, 255) for _ in range(3)])
|
|
# Generate a few random shapes and record their
|
|
# bounding boxes
|
|
shapes = []
|
|
boxes = []
|
|
N = random.randint(1, 4)
|
|
for _ in range(N):
|
|
shape, color, dims = self.random_shape(height, width)
|
|
shapes.append((shape, color, dims))
|
|
x, y, s = dims
|
|
boxes.append([y - s, x - s, y + s, x + s])
|
|
# Apply non-max suppression wit 0.3 threshold to avoid
|
|
# shapes covering each other
|
|
keep_ixs = utils.non_max_suppression(
|
|
np.array(boxes), np.arange(N), 0.3)
|
|
shapes = [s for i, s in enumerate(shapes) if i in keep_ixs]
|
|
return bg_color, shapes
|