Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Pipeline for paperdetector #231

Draft
wants to merge 17 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -39,3 +39,6 @@ docs/theme/fonts/*
docs/theme/img/*

install.sh

## Conda env ##
env/
2 changes: 1 addition & 1 deletion paz/backend/image/image.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ def crop_image(image, crop_box):

# Arguments
image: Numpy array.
crop_box: List of four ints.
crop_box: List of four ints. (x_min, y_min, x_max, y_max)

# Returns
Numpy array.
Expand Down
59 changes: 59 additions & 0 deletions paz/backend/image/opencv_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,65 @@
BILINEAR = cv2.INTER_LINEAR


def pad_image(image, size, color=[0,0,0]):
"""
Pads the image with a specific color.
*Normally only black (zero-padding) is needed.*
# Arguments
image: Numpy array.
size: List of two ints. -> (width, height) **This must be bigger than the original size of the image**
color: List of three ints for RGB color
# Returns
Numpy array.
"""
if (type(image) != np.ndarray):
raise ValueError(
'Recieved Image is not of type numpy array', type(image))
if size[0] < image.shape[1] or size[1] < image.shape[0]:
raise ValueError('size for padding needs to be bigger than original image size')

delta_w = size[0] - image.shape[1]
delta_h = size[1] - image.shape[0]
top = delta_h//2
bottom = delta_h-top
left = delta_w//2
right = delta_w-left
return cv2.copyMakeBorder(image, top, bottom, left, right, cv2.BORDER_CONSTANT,
value=color)

def resize_with_padding(image, size, color=[0,0,0], method=BILINEAR):
"""
resizes image while keeping aspect ratio and adds zero-padding if necessary.
Adjusted from : https://jdhao.github.io/2017/11/06/resize-image-to-square-with-padding/#using-opencv

# Arguments
image: Numpy array.
size: List of two ints. -> (width, height)
method: Flag indicating interpolation method i.e.
paz.backend.image.CUBIC

# Returns
Numpy array.
"""

if (type(image) != np.ndarray):
raise ValueError(
'Recieved Image is not of type numpy array', type(image))
else:
# old_size is in (height, width) format(numpy)
old_height, old_width = image.shape[:2]
target_width, target_height = size
width_ratio = target_width / old_width
height_ratio = target_height / old_height
ratio = min([width_ratio, height_ratio])
new_size = (int(ratio * old_width), int(ratio * old_height))

image = resize_image(image, new_size)

new_image = pad_image(image, size=size, color=color)
return new_image


def resize_image(image, size, method=BILINEAR):
"""Resize image.

Expand Down
49 changes: 23 additions & 26 deletions paz/backend/keypoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def build_cube_points3D(width, height, depth):
point_5, point_6, point_7, point_8])


def normalize_keypoints2D(points2D, height, width):
def normalize_keypoints2D(points2D, height, width, norm_range=(-1, 1)):
"""Transform points2D in image coordinates to normalized coordinates i.e.
[U, V] -> [-1, 1]. UV have maximum values of [W, H] respectively.

Expand All @@ -65,18 +65,24 @@ def normalize_keypoints2D(points2D, height, width):
points2D: Numpy array of shape (num_keypoints, 2).
height: Int. Height of the image
width: Int. Width of the image
norm_range: Tuple of Floats. (-1, 1) means keypoints will be normalized to the range [-1, 1]

# Returns
Numpy array of shape (num_keypoints, 2).
"""
image_shape = np.array([width, height])
points2D = points2D / image_shape # [W, 0], [0, H] -> [1, 0], [0, 1]
points2D = 2.0 * points2D # [1, 0], [0, 1] -> [2, 0], [0, 2]
points2D = points2D - 1.0 # [2, 0], [0, 2] -> [-1, 1], [-1, 1]
return points2D
length = norm_range[1] - norm_range[0]
x = points2D[:, 0]
y = points2D[:, 1]
u = (x / width) * length + norm_range[0]
v = (y / height) * length + norm_range[0]
u_v = np.empty(shape=points2D.shape)
u_v[:, 0] = u
u_v[:, 1] = v

return u_v


def denormalize_keypoints2D(points2D, height, width):
def denormalize_keypoints2D(points2D, height, width, norm_range=(-1,1)):
"""Transform nomralized points2D to image UV coordinates i.e.
[-1, 1] -> [U, V]. UV have maximum values of [W, H] respectively.

Expand All @@ -94,29 +100,20 @@ def denormalize_keypoints2D(points2D, height, width):
points2D: Numpy array of shape (num_keypoints, 2).
height: Int. Height of the image
width: Int. Width of the image
norm_range: Tuple of Floats. (-1, 1) means keypoints are assumed to be in the range [-1, 1]

# Returns
Numpy array of shape (num_keypoints, 2).
"""
image_shape = np.array([width, height])
points2D = points2D + 1.0 # [-1, 1], [-1, 1] -> [2, 0], [0, 2]
points2D = points2D / 2.0 # [2 , 0], [0 , 2] -> [1, 0], [0, 1]
points2D = points2D * image_shape # [1 , 0], [0 , 1] -> [W, 0], [0, H]
return points2D


"""
def denormalize_keypoints(keypoints, height, width):
for keypoint_arg, keypoint in enumerate(keypoints):
x, y = keypoint[:2]
# transform key-point coordinates to image coordinates
x = (min(max(x, -1), 1) * width / 2 + width / 2) - 0.5
# flip since the image coordinates for y are flipped
y = height - 0.5 - (min(max(y, -1), 1) * height / 2 + height / 2)
x, y = int(round(x)), int(round(y))
keypoints[keypoint_arg][:2] = [x, y]
return keypoints
"""
length = norm_range[1] - norm_range[0]
u = points2D[:, 0]
v = points2D[:, 1]
x = ((u - norm_range[0]) * width) / length
y = ((v - norm_range[0]) * height) / length
x_y = np.empty(shape=points2D.shape)
x_y[:, 0] = x
x_y[:, 1] = y
return x_y


def cascade_classifier(path):
Expand Down
1 change: 1 addition & 0 deletions paz/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,4 @@
from .segmentation import UNET_VGG19
from .segmentation import UNET_RESNET50
from .pose_estimation import HigherHRNet
from .detection.paper_detection import PaperDetection, CornerRefiner
3 changes: 3 additions & 0 deletions paz/models/detection/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
from .ssd300 import SSD300
from .ssd512 import SSD512
from .haar_cascade import HaarCascadeDetector
from .paper_detection import PaperDetection
from .paper_detection import CornerRefiner

154 changes: 154 additions & 0 deletions paz/models/detection/paper_detection.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
'''
This module provides the paper detection models
'''

# System imports
from pathlib import Path
import zipfile

# 3rd party imports
from tensorflow.keras.models import load_model
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.utils import get_file


# local imports

# end file header
__author__ = 'Adrian Lubitz'

CACHE_SUBDIR = Path('paz', 'models')

DETECTION_MODEL_URL = 'https://github.com/adrianlubitz/open_source_models/releases/download/v0.0.0/corner_detection.zip'
DETECTION_MODEL_PATH = Path(CACHE_SUBDIR, 'corner_detection')
REFINER_MODEL_URL = 'https://github.com/adrianlubitz/open_source_models/releases/download/v0.0.0/corner_refiner.zip'
REFINER_MODEL_PATH = Path(CACHE_SUBDIR, 'corner_refiner')


def PaperDetection():
path = get_file(origin=DETECTION_MODEL_URL, extract=True, cache_subdir=CACHE_SUBDIR)
model = load_model(Path(path).with_suffix(''))
return model


def CornerRefiner():
path = get_file(origin=REFINER_MODEL_URL, extract=True, cache_subdir=CACHE_SUBDIR)
model = load_model(Path(path).with_suffix(''))
return model

def build_model(model_name, **kwargs):
"""
loads the model with the corresponding model name
Args:
model_name: Name of the model. Can be of 'corner_detection', 'corner_refiner'
Returns:
model: a keras model
"""
if model_name.lower() == 'corner_detection':
return build_get_corners(**kwargs)
if model_name.lower() == 'corner_refiner':
return build_refine_corner(**kwargs)


def build_get_corners(): # TODO:support arguments for shape and maybe others
'''
rebuild the corner detection model from https://khurramjaved.com/RecursiveCNN.pdf
'''
# TODO: more modern approach is maybe to use seperabelConv2D: https://keras.io/examples/vision/keypoint_detection/
model = tf.keras.Sequential()
model.add(tf.keras.Input(shape=(32, 32, 3)))
# Conv 1
model.add(layers.Conv2D(kernel_size=(5, 5), activation='relu', filters=20,
padding='same')) # TODO: maybe use 3D filter instead of 2D?
model.add(layers.BatchNormalization())
model.add(layers.MaxPool2D(pool_size=(2, 2)))
# Conv 2
model.add(layers.Conv2D(kernel_size=(5, 5),
activation='relu', filters=40, padding='same'))
model.add(layers.BatchNormalization())
model.add(layers.Conv2D(kernel_size=(5, 5),
activation='relu', filters=40, padding='same'))
model.add(layers.BatchNormalization())
model.add(layers.MaxPool2D(pool_size=(2, 2)))
# Conv 3
model.add(layers.Conv2D(kernel_size=(5, 5),
activation='relu', filters=60, padding='same'))
model.add(layers.BatchNormalization())
model.add(layers.Conv2D(kernel_size=(5, 5),
activation='relu', filters=60, padding='same'))
model.add(layers.BatchNormalization())
model.add(layers.MaxPool2D(pool_size=(2, 2)))
# Conv 4
model.add(layers.Conv2D(kernel_size=(5, 5),
activation='relu', filters=80, padding='same'))
model.add(layers.BatchNormalization())
model.add(layers.MaxPool2D(pool_size=(2, 2)))
# Conv 5
model.add(layers.Conv2D(kernel_size=(5, 5),
activation='relu', filters=100, padding='same'))
model.add(layers.BatchNormalization())
model.add(layers.MaxPool2D(pool_size=(2, 2)))
# Conv 6
model.add(layers.Conv2D(kernel_size=(5, 5),
activation='relu', filters=100, padding='same'))
model.add(layers.BatchNormalization())
model.add(layers.Flatten())
# Fully Connected 1
# TODO: Paper stated 0.8 dropout rate
model.add(layers.Dense(500, activation='relu'))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(500, activation='relu'))
model.add(layers.Dropout(0.5))
# Sigmoid Activation
# regression is needed here -> sigmoid
model.add(layers.Dense(8, activation='sigmoid'))
model.compile(loss=tf.keras.losses.MeanSquaredError(), optimizer='adam', metrics=[
'mse', 'mae']) # metrics from https://keras.io/api/metrics/regression_metrics/
model._name = 'corner_detection'
return model


def build_refine_corner(): # TODO:support arguments for shape and maybe others
'''
rebuild the corner refiner model from https://khurramjaved.com/RecursiveCNN.pdf
'''
# TODO: more modern approach is maybe to use seperabelConv2D: https://keras.io/examples/vision/keypoint_detection/
model = tf.keras.Sequential()
model.add(tf.keras.Input(shape=(32, 32, 3)))
# Conv 1
model.add(layers.Conv2D(kernel_size=(5, 5), activation='relu', filters=10,
padding='same')) # TODO: maybe use 3D filter instead of 2D?
model.add(layers.BatchNormalization())
model.add(layers.MaxPool2D(pool_size=(2, 2)))
# Conv 2
model.add(layers.Conv2D(kernel_size=(5, 5),
activation='relu', filters=10, padding='same'))
model.add(layers.BatchNormalization())
model.add(layers.MaxPool2D(pool_size=(2, 2)))
# Conv 3
model.add(layers.Conv2D(kernel_size=(5, 5),
activation='relu', filters=20, padding='same'))
model.add(layers.BatchNormalization())
model.add(layers.MaxPool2D(pool_size=(2, 2)))
# Conv 4
model.add(layers.Conv2D(kernel_size=(5, 5),
activation='relu', filters=30, padding='same'))
model.add(layers.BatchNormalization())
model.add(layers.MaxPool2D(pool_size=(2, 2)))
# Conv 5
model.add(layers.Conv2D(kernel_size=(5, 5),
activation='relu', filters=40, padding='same'))
model.add(layers.BatchNormalization())
model.add(layers.Flatten())
# Fully Connected 1
# TODO: Paper stated 0.8 dropout rate
model.add(layers.Dense(300, activation='relu'))
model.add(layers.Dropout(0.5))
# Sigmoid Activation
# regression is needed here -> sigmoid
model.add(layers.Dense(2, activation='sigmoid'))
model.compile(loss=tf.keras.losses.MeanSquaredError(), optimizer='adam', metrics=[
'mse', 'mae']) # metrics from https://keras.io/api/metrics/regression_metrics/
model._name = 'corner_refiner'
return model
1 change: 1 addition & 0 deletions paz/pipelines/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from .detection import DetectFaceKeypointNet2D32
from .detection import SSD512HandDetection
from .detection import SSD512MinimalHandPose
from .detection import DetectPaper

from .keypoints import KeypointNetSharedAugmentation
from .keypoints import KeypointNetInference
Expand Down
Loading