oarriaga · adriandavidauer · Oct 6, 2022 · Oct 25, 2022 · Oct 25, 2022 · Oct 28, 2022
diff --git a/.gitignore b/.gitignore
@@ -39,3 +39,6 @@ docs/theme/fonts/*
 docs/theme/img/*
 
 install.sh
+
+## Conda env ##
+env/
diff --git a/paz/backend/image/image.py b/paz/backend/image/image.py
@@ -107,7 +107,7 @@ def crop_image(image, crop_box):
 
     # Arguments
         image: Numpy array.
-        crop_box: List of four ints.
+        crop_box: List of four ints. (x_min, y_min, x_max, y_max)
 
     # Returns
         Numpy array.

diff --git a/paz/backend/image/opencv_image.py b/paz/backend/image/opencv_image.py
@@ -15,6 +15,65 @@
 BILINEAR = cv2.INTER_LINEAR
 
 
+def pad_image(image, size, color=[0,0,0]):
+    """
+    Pads the image with a specific color. 
+    *Normally only black (zero-padding) is needed.*
+    # Arguments
+        image: Numpy array.
+        size: List of two ints. -> (width, height) **This must be bigger than the original size of the image**
+        color: List of three ints for RGB color
+    # Returns
+        Numpy array.    
+    """
+    if (type(image) != np.ndarray):
+        raise ValueError(
+            'Recieved Image is not of type numpy array', type(image))
+    if size[0] < image.shape[1] or size[1] < image.shape[0]:
+        raise ValueError('size for padding needs to be bigger than original image size')
+
+    delta_w = size[0] - image.shape[1]
+    delta_h = size[1] - image.shape[0]
+    top = delta_h//2
+    bottom = delta_h-top
+    left = delta_w//2
+    right = delta_w-left
+    return cv2.copyMakeBorder(image, top, bottom, left, right, cv2.BORDER_CONSTANT,
+                                       value=color)    
+
+def resize_with_padding(image, size, color=[0,0,0], method=BILINEAR):
+    """
+    resizes image while keeping aspect ratio and adds zero-padding if necessary.
+    Adjusted from : https://jdhao.github.io/2017/11/06/resize-image-to-square-with-padding/#using-opencv
+
+    # Arguments
+        image: Numpy array.
+        size: List of two ints. -> (width, height)
+        method: Flag indicating interpolation method i.e.
+            paz.backend.image.CUBIC
+
+    # Returns
+        Numpy array.
+    """
+
+    if (type(image) != np.ndarray):
+        raise ValueError(
+            'Recieved Image is not of type numpy array', type(image))
+    else:
+        # old_size is in (height, width) format(numpy)
+        old_height, old_width = image.shape[:2]
+        target_width, target_height = size
+        width_ratio = target_width / old_width
+        height_ratio = target_height / old_height
+        ratio = min([width_ratio, height_ratio])
+        new_size = (int(ratio * old_width), int(ratio * old_height))
+
+        image = resize_image(image, new_size)
+
+        new_image = pad_image(image, size=size, color=color)
+        return new_image
+
+
 def resize_image(image, size, method=BILINEAR):
     """Resize image.
 

diff --git a/paz/backend/keypoints.py b/paz/backend/keypoints.py
@@ -46,7 +46,7 @@ def build_cube_points3D(width, height, depth):
                      point_5, point_6, point_7, point_8])
 
 
-def normalize_keypoints2D(points2D, height, width):
+def normalize_keypoints2D(points2D, height, width, norm_range=(-1, 1)):
     """Transform points2D in image coordinates to normalized coordinates i.e.
         [U, V] -> [-1, 1]. UV have maximum values of [W, H] respectively.
 
@@ -65,18 +65,24 @@ def normalize_keypoints2D(points2D, height, width):
         points2D: Numpy array of shape (num_keypoints, 2).
         height: Int. Height of the image
         width: Int. Width of the image
+        norm_range: Tuple of Floats. (-1, 1) means keypoints will be normalized to the range [-1, 1]
 
     # Returns
         Numpy array of shape (num_keypoints, 2).
     """
-    image_shape = np.array([width, height])
-    points2D = points2D / image_shape  # [W, 0], [0, H] -> [1,  0], [0,  1]
-    points2D = 2.0 * points2D          # [1, 0], [0, 1] -> [2,  0], [0,  2]
-    points2D = points2D - 1.0          # [2, 0], [0, 2] -> [-1, 1], [-1, 1]
-    return points2D
+    length = norm_range[1] - norm_range[0]
+    x = points2D[:, 0]
+    y = points2D[:, 1]
+    u = (x / width) * length + norm_range[0]
+    v = (y / height) * length + norm_range[0]
+    u_v = np.empty(shape=points2D.shape)
+    u_v[:, 0] = u
+    u_v[:, 1] = v
+
+    return u_v
 
 
-def denormalize_keypoints2D(points2D, height, width):
+def denormalize_keypoints2D(points2D, height, width, norm_range=(-1,1)):
     """Transform nomralized points2D to image UV coordinates i.e.
         [-1, 1] -> [U, V]. UV have maximum values of [W, H] respectively.
 
@@ -94,29 +100,20 @@ def denormalize_keypoints2D(points2D, height, width):
         points2D: Numpy array of shape (num_keypoints, 2).
         height: Int. Height of the image
         width: Int. Width of the image
+        norm_range: Tuple of Floats. (-1, 1) means keypoints are assumed to be in the range [-1, 1]
 
     # Returns
         Numpy array of shape (num_keypoints, 2).
     """
-    image_shape = np.array([width, height])
-    points2D = points2D + 1.0          # [-1, 1], [-1, 1] -> [2, 0], [0, 2]
-    points2D = points2D / 2.0          # [2 , 0], [0 , 2] -> [1, 0], [0, 1]
-    points2D = points2D * image_shape  # [1 , 0], [0 , 1] -> [W, 0], [0, H]
-    return points2D
-
-
-"""
-def denormalize_keypoints(keypoints, height, width):
-    for keypoint_arg, keypoint in enumerate(keypoints):
-        x, y = keypoint[:2]
-        # transform key-point coordinates to image coordinates
-        x = (min(max(x, -1), 1) * width / 2 + width / 2) - 0.5
-        # flip since the image coordinates for y are flipped
-        y = height - 0.5 - (min(max(y, -1), 1) * height / 2 + height / 2)
-        x, y = int(round(x)), int(round(y))
-        keypoints[keypoint_arg][:2] = [x, y]
-    return keypoints
-"""
+    length = norm_range[1] - norm_range[0]
+    u = points2D[:, 0]
+    v = points2D[:, 1]
+    x = ((u - norm_range[0]) * width) / length
+    y = ((v - norm_range[0]) * height) / length
+    x_y = np.empty(shape=points2D.shape)
+    x_y[:, 0] = x
+    x_y[:, 1] = y
+    return x_y
 
 
 def cascade_classifier(path):

diff --git a/paz/models/__init__.py b/paz/models/__init__.py
@@ -16,3 +16,4 @@
 from .segmentation import UNET_VGG19
 from .segmentation import UNET_RESNET50
 from .pose_estimation import HigherHRNet
+from .detection.paper_detection import PaperDetection, CornerRefiner
diff --git a/paz/models/detection/__init__.py b/paz/models/detection/__init__.py
@@ -1,3 +1,6 @@
 from .ssd300 import SSD300
 from .ssd512 import SSD512
 from .haar_cascade import HaarCascadeDetector
+from .paper_detection import PaperDetection
+from .paper_detection import CornerRefiner
+
diff --git a/paz/models/detection/paper_detection.py b/paz/models/detection/paper_detection.py
@@ -0,0 +1,154 @@
+'''
+This module provides the paper detection models
+'''
+
+# System imports
+from pathlib import Path
+import zipfile
+
+# 3rd party imports
+from tensorflow.keras.models import load_model
+import tensorflow as tf
+from tensorflow.keras import layers
+from tensorflow.keras.utils import get_file
+
+
+# local imports
+
+# end file header
+__author__ = 'Adrian Lubitz'
+
+CACHE_SUBDIR = Path('paz', 'models')
+
+DETECTION_MODEL_URL = 'https://github.com/adrianlubitz/open_source_models/releases/download/v0.0.0/corner_detection.zip'
+DETECTION_MODEL_PATH = Path(CACHE_SUBDIR, 'corner_detection')
+REFINER_MODEL_URL = 'https://github.com/adrianlubitz/open_source_models/releases/download/v0.0.0/corner_refiner.zip'
+REFINER_MODEL_PATH = Path(CACHE_SUBDIR, 'corner_refiner')
+
+
+def PaperDetection():
+    path = get_file(origin=DETECTION_MODEL_URL, extract=True, cache_subdir=CACHE_SUBDIR)
+    model = load_model(Path(path).with_suffix(''))
+    return model
+
+
+def CornerRefiner():
+    path = get_file(origin=REFINER_MODEL_URL, extract=True, cache_subdir=CACHE_SUBDIR)
+    model = load_model(Path(path).with_suffix(''))
+    return model
+
+def build_model(model_name, **kwargs):
+    """
+    loads the model with the corresponding model name
+    Args:
+        model_name: Name of the model. Can be of 'corner_detection', 'corner_refiner'
+    Returns:
+        model: a keras model
+    """
+    if model_name.lower() == 'corner_detection':
+        return build_get_corners(**kwargs)
+    if model_name.lower() == 'corner_refiner':
+        return build_refine_corner(**kwargs)
+
+
+def build_get_corners():  # TODO:support arguments for shape and maybe others
+    '''
+    rebuild the corner detection model from https://khurramjaved.com/RecursiveCNN.pdf 
+    '''
+    # TODO: more modern approach is maybe to use seperabelConv2D: https://keras.io/examples/vision/keypoint_detection/
+    model = tf.keras.Sequential()
+    model.add(tf.keras.Input(shape=(32, 32, 3)))
+    # Conv 1
+    model.add(layers.Conv2D(kernel_size=(5, 5), activation='relu', filters=20,
+              padding='same'))  # TODO: maybe use 3D filter instead of 2D?
+    model.add(layers.BatchNormalization())
+    model.add(layers.MaxPool2D(pool_size=(2, 2)))
+    # Conv 2
+    model.add(layers.Conv2D(kernel_size=(5, 5),
+              activation='relu', filters=40, padding='same'))
+    model.add(layers.BatchNormalization())
+    model.add(layers.Conv2D(kernel_size=(5, 5),
+              activation='relu', filters=40, padding='same'))
+    model.add(layers.BatchNormalization())
+    model.add(layers.MaxPool2D(pool_size=(2, 2)))
+    # Conv 3
+    model.add(layers.Conv2D(kernel_size=(5, 5),
+              activation='relu', filters=60, padding='same'))
+    model.add(layers.BatchNormalization())
+    model.add(layers.Conv2D(kernel_size=(5, 5),
+              activation='relu', filters=60, padding='same'))
+    model.add(layers.BatchNormalization())
+    model.add(layers.MaxPool2D(pool_size=(2, 2)))
+    # Conv 4
+    model.add(layers.Conv2D(kernel_size=(5, 5),
+              activation='relu', filters=80, padding='same'))
+    model.add(layers.BatchNormalization())
+    model.add(layers.MaxPool2D(pool_size=(2, 2)))
+    # Conv 5
+    model.add(layers.Conv2D(kernel_size=(5, 5),
+              activation='relu', filters=100, padding='same'))
+    model.add(layers.BatchNormalization())
+    model.add(layers.MaxPool2D(pool_size=(2, 2)))
+    # Conv 6
+    model.add(layers.Conv2D(kernel_size=(5, 5),
+              activation='relu', filters=100, padding='same'))
+    model.add(layers.BatchNormalization())
+    model.add(layers.Flatten())
+    # Fully Connected 1
+    # TODO: Paper stated 0.8 dropout rate
+    model.add(layers.Dense(500, activation='relu'))
+    model.add(layers.Dropout(0.5))
+    model.add(layers.Dense(500, activation='relu'))
+    model.add(layers.Dropout(0.5))
+    # Sigmoid Activation
+    # regression is needed here -> sigmoid
+    model.add(layers.Dense(8, activation='sigmoid'))
+    model.compile(loss=tf.keras.losses.MeanSquaredError(), optimizer='adam', metrics=[
+                  'mse', 'mae'])  # metrics from https://keras.io/api/metrics/regression_metrics/
+    model._name = 'corner_detection'
+    return model
+
+
+def build_refine_corner():  # TODO:support arguments for shape and maybe others
+    '''
+    rebuild the corner refiner model from https://khurramjaved.com/RecursiveCNN.pdf 
+    '''
+    # TODO: more modern approach is maybe to use seperabelConv2D: https://keras.io/examples/vision/keypoint_detection/
+    model = tf.keras.Sequential()
+    model.add(tf.keras.Input(shape=(32, 32, 3)))
+    # Conv 1
+    model.add(layers.Conv2D(kernel_size=(5, 5), activation='relu', filters=10,
+              padding='same'))  # TODO: maybe use 3D filter instead of 2D?
+    model.add(layers.BatchNormalization())
+    model.add(layers.MaxPool2D(pool_size=(2, 2)))
+    # Conv 2
+    model.add(layers.Conv2D(kernel_size=(5, 5),
+              activation='relu', filters=10, padding='same'))
+    model.add(layers.BatchNormalization())
+    model.add(layers.MaxPool2D(pool_size=(2, 2)))
+    # Conv 3
+    model.add(layers.Conv2D(kernel_size=(5, 5),
+              activation='relu', filters=20, padding='same'))
+    model.add(layers.BatchNormalization())
+    model.add(layers.MaxPool2D(pool_size=(2, 2)))
+    # Conv 4
+    model.add(layers.Conv2D(kernel_size=(5, 5),
+              activation='relu', filters=30, padding='same'))
+    model.add(layers.BatchNormalization())
+    model.add(layers.MaxPool2D(pool_size=(2, 2)))
+    # Conv 5
+    model.add(layers.Conv2D(kernel_size=(5, 5),
+              activation='relu', filters=40, padding='same'))
+    model.add(layers.BatchNormalization())
+    model.add(layers.Flatten())
+    # Fully Connected 1
+    # TODO: Paper stated 0.8 dropout rate
+    model.add(layers.Dense(300, activation='relu'))
+    model.add(layers.Dropout(0.5))
+    # Sigmoid Activation
+    # regression is needed here -> sigmoid
+    model.add(layers.Dense(2, activation='sigmoid'))
+    model.compile(loss=tf.keras.losses.MeanSquaredError(), optimizer='adam', metrics=[
+                  'mse', 'mae'])  # metrics from https://keras.io/api/metrics/regression_metrics/
+    model._name = 'corner_refiner'
+    return model
diff --git a/paz/pipelines/__init__.py b/paz/pipelines/__init__.py
@@ -21,6 +21,7 @@
 from .detection import DetectFaceKeypointNet2D32
 from .detection import SSD512HandDetection
 from .detection import SSD512MinimalHandPose
+from .detection import DetectPaper
 
 from .keypoints import KeypointNetSharedAugmentation
 from .keypoints import KeypointNetInference