add visualization of keypoint localization

pengsida · pengsida · commit e3d0b68bdb42 · 2019-05-06T21:52:04.000+08:00
diff --git a/README.md b/README.md
@@ -95,6 +95,10 @@ If setup correctly, the output will look like
 
 ![cat](./assets/cat.png)
 
+### Visualization of the voting procedure
+
+We add a jupyter notebook [visualization.ipynb](./visualization.ipynb) for the keypoint detection pipeline of PVNet, aiming to make it easier for readers to understand our paper. Thanks for Kudlur, M 's suggestion. 
+
 ## Training and testing
 
 ### Training on the LINEMOD
diff --git a/lib/ransac_voting_gpu_layer/ransac_voting_gpu.py b/lib/ransac_voting_gpu_layer/ransac_voting_gpu.py
@@ -980,6 +980,60 @@ def ransac_motion_voting(mask, vertex):
 
     return torch.cat(pts,0)
 
+def generate_hypothesis(mask, vertex, round_hyp_num, inlier_thresh=0.999, confidence=0.99, max_iter=20,
+                           min_num=5, max_num=30000):
+    '''
+    :param mask:      [b,h,w]
+    :param vertex:    [b,h,w,vn,2]
+    :param round_hyp_num:
+    :param inlier_thresh:
+    :return: [b,vn,2]
+    '''
+    b, h, w, vn, _ = vertex.shape
+    batch_hyp_pts = []
+    batch_hyp_counts = []
+    for bi in range(b):
+        hyp_num = 0
+        cur_mask = (mask[bi]).byte()
+        foreground_num = torch.sum(cur_mask)
+
+        # if too few points, just skip it
+        if foreground_num < min_num:
+            win_pts = torch.zeros([1, vn, 2], dtype=torch.float32, device=mask.device)
+            batch_win_pts.append(win_pts)  # [1,vn,2]
+            continue
+
+        # if too many inliers, we randomly down sample it
+        if foreground_num > max_num:
+            selection = torch.zeros(cur_mask.shape, dtype=torch.float32, device=mask.device).uniform_(0, 1)
+            selected_mask = (selection < (max_num / foreground_num.float()))
+            cur_mask *= selected_mask
+
+        coords = torch.nonzero(cur_mask).float()  # [tn,2]
+        coords = coords[:, [1, 0]]
+        direct = vertex[bi].masked_select(torch.unsqueeze(torch.unsqueeze(cur_mask, 2), 3))  # [tn,vn,2]
+        direct = direct.view([coords.shape[0], vn, 2])
+        tn = coords.shape[0]
+        idxs = torch.zeros([round_hyp_num, vn, 2], dtype=torch.int32, device=mask.device).random_(0, direct.shape[0])
+        all_win_ratio = torch.zeros([vn], dtype=torch.float32, device=mask.device)
+        all_win_pts = torch.zeros([vn, 2], dtype=torch.float32, device=mask.device)
+
+        # generate hypothesis
+        cur_hyp_pts = ransac_voting.generate_hypothesis(direct, coords, idxs)  # [hn,vn,2]
+
+        # voting for hypothesis
+        cur_inlier = torch.zeros([round_hyp_num, vn, tn], dtype=torch.uint8, device=mask.device)
+        ransac_voting.voting_for_hypothesis(direct, coords, cur_hyp_pts, cur_inlier, inlier_thresh)  # [hn,vn,tn]
+
+        # find max
+        cur_inlier_counts = torch.sum(cur_inlier, 2)                   # [hn,vn]
+
+        batch_hyp_pts.append(cur_hyp_pts)
+        batch_hyp_counts.append(cur_inlier_counts)
+
+    return torch.stack(batch_hyp_pts), torch.stack(batch_hyp_counts)
+
+
 
 if __name__=="__main__":
     from lib.datasets.linemod_dataset import LineModDatasetRealAug,VotingType
diff --git a/lib/utils/draw_utils.py b/lib/utils/draw_utils.py
@@ -183,25 +183,26 @@ def visualize_voting_ellipse(rgb,mean,var,target,save=False, save_fn=None):
     :return:
     '''
     b,vn,_=mean.shape
+    yellow=np.array([1.0,0.0,0.0])
+    red=np.asarray([1.0,1.0,0.0])
+    num=5
     for bi in range(b):
-        _, ax = plt.subplots(1)
-
         for vi in range(vn):
+            _, ax = plt.subplots(1, figsize=(10, 8))
             cov=var[bi,vi]
             w,v=np.linalg.eig(cov)
-            w*=50
-            elp=patches.Ellipse(mean[bi,vi],w[0],w[1],np.arctan2(v[1,0],v[0,0])/np.pi*180,fill=False)
-            ax.add_patch(elp)
-
-        ax.plot(target[bi,:,0],target[bi,:,1],'*')
-        ax.scatter(mean[bi,:,0],mean[bi,:,1],c=np.arange(vn))
-        ax.imshow(rgb[bi])
-        if save:
-            plt.savefig(save_fn.format(bi))
-        else:
-            plt.show()
-        plt.close()
+            for k in range(num):
+                size=w*k*3
+                elp = patches.Ellipse(mean[bi, vi], size[0], size[1], np.arctan2(v[1, 0], v[0, 0]) / np.pi * 180, fill=False, color=yellow/num*(num-k)+red/num*k)
+                ax.add_patch(elp)
 
+            ax.scatter(mean[bi,vi,0],mean[bi,vi,1], marker='*', c=[yellow], s=8)
+            ax.imshow(rgb[bi])
+            if save:
+                plt.savefig(save_fn.format(bi))
+            else:
+                plt.show()
+            plt.close()
 
 
 
diff --git a/tools/demo.py b/tools/demo.py
@@ -3,7 +3,6 @@
 sys.path.append('.')
 sys.path.append('..')
 from lib.networks.model_repository import *
-from lib.utils.arg_utils import args
 from lib.utils.net_utils import smooth_l1_loss, load_model, compute_precision_recall
 import torch
 from lib.ransac_voting_gpu_layer.ransac_voting_gpu import ransac_voting_layer_v3
@@ -22,9 +21,9 @@
 import numpy as np
 import matplotlib.pyplot as plt
 
-with open(args.cfg_file, 'r') as f:
+with open('configs/linemod_train.json', 'r') as f:
     train_cfg = json.load(f)
-train_cfg['model_name'] = '{}_{}'.format(args.linemod_cls, train_cfg['model_name'])
+train_cfg['model_name'] = '{}_{}'.format('cat', train_cfg['model_name'])
 
 vote_num = 9
 
@@ -104,18 +103,74 @@ def read_data():
     return data, points_3d, bb8_3d
 
 
+def visualize_mask(mask):
+    plt.imshow(mask[0])
+    plt.show()
+
+
+def visualize_vertex(vertex, vertex_weights):
+    vertex = vertex * vertex_weights
+    for i in range(9):
+        _, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 8))
+        ax1.imshow(vertex[0, 2*i])
+        ax2.imshow(vertex[0, 2*i+1])
+        plt.show()
+
+
+def visualize_hypothesis(image, seg_pred, vertex_pred, corner_target):
+    from lib.ransac_voting_gpu_layer.ransac_voting_gpu import generate_hypothesis
+
+    vertex_pred = vertex_pred.permute(0, 2, 3, 1)
+    b, h, w, vn_2 = vertex_pred.shape
+    vertex_pred = vertex_pred.view(b, h, w, vn_2 // 2, 2)
+    mask = torch.argmax(seg_pred, 1)
+    hyp, hyp_counts = generate_hypothesis(mask, vertex_pred, 1024, inlier_thresh=0.99)
+
+    image = imagenet_to_uint8(image.detach().cpu().numpy())
+    hyp = hyp.detach().cpu().numpy()
+    hyp_counts = hyp_counts.detach().cpu().numpy()
+
+    from lib.utils.draw_utils import visualize_hypothesis
+    visualize_hypothesis(image, hyp, hyp_counts, corner_target)
+
+
+def visualize_voting_ellipse(image, seg_pred, vertex_pred, corner_target):
+    from lib.ransac_voting_gpu_layer.ransac_voting_gpu import estimate_voting_distribution_with_mean
+
+    vertex_pred = vertex_pred.permute(0, 2, 3, 1)
+    b, h, w, vn_2 = vertex_pred.shape
+    vertex_pred = vertex_pred.view(b, h, w, vn_2//2, 2)
+    mask = torch.argmax(seg_pred, 1)
+    mean = ransac_voting_layer_v3(mask, vertex_pred, 512, inlier_thresh=0.99)
+    mean, var = estimate_voting_distribution_with_mean(mask, vertex_pred, mean)
+
+    image = imagenet_to_uint8(image.detach().cpu().numpy())
+    mean = mean.detach().cpu().numpy()
+    var = var.detach().cpu().numpy()
+    corner_target = corner_target.detach().cpu().numpy()
+
+    from lib.utils.draw_utils import visualize_voting_ellipse
+    visualize_voting_ellipse(image, mean, var, corner_target)
+
+
+
 def demo():
     net = Resnet18_8s(ver_dim=vote_num * 2, seg_dim=2)
     net = NetWrapper(net).cuda()
     net = DataParallel(net)
 
     optimizer = optim.Adam(net.parameters(), lr=train_cfg['lr'])
     model_dir = os.path.join(cfg.MODEL_DIR, "cat_demo")
-    load_model(net.module.net, optimizer, model_dir, args.load_epoch)
+    load_model(net.module.net, optimizer, model_dir, -1)
     data, points_3d, bb8_3d = read_data()
     image, mask, vertex, vertex_weights, pose, corner_target = [d.unsqueeze(0).cuda() for d in data]
     seg_pred, vertex_pred, loss_seg, loss_vertex, precision, recall = net(image, mask, vertex, vertex_weights)
 
+    # visualize_mask(mask)
+    # visualize_vertex(vertex, vertex_weights)
+    # visualize_hypothesis(image, seg_pred, vertex_pred, corner_target)
+    # visualize_voting_ellipse(image, seg_pred, vertex_pred, corner_target)
+
     eval_net = DataParallel(EvalWrapper().cuda())
     corner_pred = eval_net(seg_pred, vertex_pred).cpu().detach().numpy()[0]
     camera_matrix = np.array([[572.4114, 0., 325.2611],
diff --git a/visualization.ipynb b/visualization.ipynb