open-mmlab · caizhongang · Apr 1, 2022 · Mar 30, 2022 · Mar 30, 2022 · Mar 30, 2022
diff --git a/docs/preprocess_dataset.md b/docs/preprocess_dataset.md
@@ -677,23 +677,25 @@ h36m_p1=dict(
 
 For data preparation of [Human3.6M](http://vision.imar.ro/human3.6m/description.php) for HMR and SPIN training, we use the [MoShed](https://mosh.is.tue.mpg.de/) data provided in [HMR](https://github.com/akanazawa/hmr) for training. However, due to license limitations, we are not allowed to redistribute the data. Even if you do not have access to these parameters, you can still generate the preprocessed h36m npz file without mosh parameters using our [converter](https://github.com/open-mmlab/mmhuman3d/tree/main/mmhuman3d/data/data_converters/h36m.py).
 
-To do so, modify the `h36m_p1` config in [DATASET_CONFIG](https://github.com/open-mmlab/mmhuman3d/blob/main/tools/convert_datasets.py):
+You will need to extract images from raw videos for training. Do note that preprocessing can take a long time if image extraction is required. To do so, modify the `h36m_p1` config in [DATASET_CONFIG](https://github.com/open-mmlab/mmhuman3d/blob/main/tools/convert_datasets.py):
 
 Config without mosh:
 ```python
 h36m_p1=dict(
     type='H36mConverter',
     modes=['train', 'valid'],
     protocol=1,
+    extract_img=True,  # this is to specify you want to extract images from videos
     prefix='h36m'),
 ```
 
-Config:
+Config with mosh:
 ```python
 h36m_p1=dict(
     type='H36mConverter',
     modes=['train', 'valid'],
     protocol=1,
+    extract_img=True,  # this is to specify you want to extract images from videos
     mosh_dir='data/datasets/h36m_mosh', # supply the directory to the mosh if available
     prefix='h36m'),
 ```
@@ -872,9 +874,18 @@ mmhuman3d
  doi={10.1109/3dv.2017.00064},
 }
 ```
-
 </details>
 
+You will need to extract images from raw videos for training. Do note that preprocessing can take a long time if image extraction is required. To do so, modify the `mpi_inf_3dhp` config in [DATASET_CONFIG](https://github.com/open-mmlab/mmhuman3d/blob/main/tools/convert_datasets.py):
+
+Config:
+```python
+mpi_inf_3dhp=dict(
+  type='MpiInf3dhpConverter',
+  modes=['train', 'test'],
+  extract_img=True),  # this is to specify you want to extract images from videos
+```
+
 For [MPI-INF-3DHP](http://gvv.mpi-inf.mpg.de/3dhp-dataset/), download and extract them under `$MMHUMAN3D/data/datasets`, and make them look like this:
 
 ```text
@@ -921,6 +932,7 @@ mmhuman3d
 ```
 
 
+
 ### MPII
 
 <!-- [DATASET] -->
@@ -940,7 +952,7 @@ mmhuman3d
 
 </details>
 
-For [MPII](http://human-pose.mpi-inf.mpg.de/) data, please download from [MPII Human Pose Dataset](http://human-pose.mpi-inf.mpg.de/).
+For [MPII](http://human-pose.mpi-inf.mpg.de/) data, please download images from [MPII Human Pose Dataset](http://human-pose.mpi-inf.mpg.de/ and annotations from [here](https://github.com/princeton-vl/pose-hg-train/tree/master/data/mpii/annot?rgh-link-date=2020-07-05T04%3A14%3A02Z).
 Extract them under `$MMHUMAN3D/data/datasets`, and make them look like this:
 
 ```text

diff --git a/mmhuman3d/data/data_converters/agora.py b/mmhuman3d/data/data_converters/agora.py
@@ -1,6 +1,7 @@
 import os
 from typing import List, Tuple
 
+import cv2
 import numpy as np
 import pickle5 as pickle
 from tqdm import tqdm
@@ -40,6 +41,101 @@ def __init__(self, modes: List = [], fit: str = 'smpl',
                 Use either (1280, 720) or (3840, 2160)')
         self.res = res
 
+    def get_global_orient(self,
+                          imgPath,
+                          df,
+                          i,
+                          pNum,
+                          globalOrient=None,
+                          meanPose=False):
+        """Modified from https://github.com/pixelite1201/agora_evaluation/blob/
+        master/agora_evaluation/projection.py specific to AGORA.
+
+        Args:
+            imgPath: image path
+            df: annotation dataframe
+            i: frame index
+            pNum: person index
+            globalOrient: original global orientation
+            meanPose: Store True for mean pose from vposer
+
+        Returns:
+            globalOrient: rotated global orientation
+        """
+        if 'hdri' in imgPath:
+            camYaw = 0
+            camPitch = 0
+
+        elif 'cam00' in imgPath:
+            camYaw = 135
+            camPitch = 30
+        elif 'cam01' in imgPath:
+            camYaw = -135
+            camPitch = 30
+        elif 'cam02' in imgPath:
+            camYaw = -45
+            camPitch = 30
+        elif 'cam03' in imgPath:
+            camYaw = 45
+            camPitch = 30
+        elif 'ag2' in imgPath:
+            camYaw = 0
+            camPitch = 15
+        else:
+            camYaw = df.iloc[i]['camYaw']
+            camPitch = 0
+
+        if meanPose:
+            yawSMPL = 0
+        else:
+            yawSMPL = df.iloc[i]['Yaw'][pNum]
+
+        # scans have a 90deg rotation, but for mean pose from vposer there is
+        # no such rotation
+        if meanPose:
+            rotMat, _ = cv2.Rodrigues(
+                np.array([[0, (yawSMPL) / 180 * np.pi, 0]], dtype=float))
+        else:
+            rotMat, _ = cv2.Rodrigues(
+                np.array([[0, ((yawSMPL - 90) / 180) * np.pi, 0]],
+                         dtype=float))
+
+        camera_rotationMatrix, _ = cv2.Rodrigues(
+            np.array([0, ((-camYaw) / 180) * np.pi, 0]).reshape(3, 1))
+        camera_rotationMatrix2, _ = cv2.Rodrigues(
+            np.array([camPitch / 180 * np.pi, 0, 0]).reshape(3, 1))
+
+        # flip pose
+        R_mod = cv2.Rodrigues(np.array([np.pi, 0, 0]))[0]
+        R_root = cv2.Rodrigues(globalOrient.reshape(-1))[0]
+        new_root = R_root.dot(R_mod)
+        globalOrient = cv2.Rodrigues(new_root)[0].reshape(3)
+
+        # apply camera matrices
+        globalOrient = self.rotate_global_orient(rotMat, globalOrient)
+        globalOrient = self.rotate_global_orient(camera_rotationMatrix,
+                                                 globalOrient)
+        globalOrient = self.rotate_global_orient(camera_rotationMatrix2,
+                                                 globalOrient)
+
+        return globalOrient
+
+    @staticmethod
+    def rotate_global_orient(rotMat, global_orient):
+        """Transform global orientation given rotation matrix.
+
+        Args:
+            rotMat: rotation matrix
+            global_orient: original global orientation
+
+        Returns:
+            new_global_orient: transformed global orientation
+        """
+        new_global_orient = cv2.Rodrigues(
+            np.dot(rotMat,
+                   cv2.Rodrigues(global_orient.reshape(-1))[0]))[0].T[0]
+        return new_global_orient
+
     def convert_by_mode(self, dataset_path: str, out_path: str,
                         mode: str) -> dict:
         """
@@ -132,8 +228,7 @@ def convert_by_mode(self, dataset_path: str, out_path: str,
                     if self.fit == 'smplx':
                         # obtain smplx data
                         body_model['body_pose'].append(ann['body_pose'])
-                        body_model['global_orient'].append(
-                            ann['global_orient'])
+                        global_orient = ann['global_orient']
                         body_model['betas'].append(
                             ann['betas'].reshape(-1)[:10])
                         body_model['transl'].append(ann['transl'])
@@ -150,14 +245,16 @@ def convert_by_mode(self, dataset_path: str, out_path: str,
                         # obtain smpl data
                         body_model['body_pose'].append(
                             ann['body_pose'].cpu().detach().numpy())
-                        body_model['global_orient'].append(
-                            ann['root_pose'].cpu().detach().numpy())
+                        global_orient = ann['root_pose'].cpu().detach().numpy()
                         body_model['betas'].append(
                             ann['betas'].cpu().detach().numpy().reshape(
                                 -1)[:10])
                         body_model['transl'].append(
                             ann['translation'].cpu().detach().numpy())
 
+                    global_orient = self.get_global_orient(
+                        img_path, df, idx, pidx, global_orient.reshape(-1))
+
                     # add confidence column
                     keypoints2d = np.hstack(
                         [keypoints2d, np.ones((num_keypoints, 1))])
@@ -177,7 +274,7 @@ def convert_by_mode(self, dataset_path: str, out_path: str,
                     keypoints3d_.append(keypoints3d)
                     bbox_xywh_.append(bbox_xywh)
                     image_path_.append(img_path)
-
+                    body_model['global_orient'].append(global_orient)
                     meta['gender'].append(gender)
                     meta['age'].append(age)
                     meta['kid'].append(kid)

diff --git a/mmhuman3d/data/data_converters/mpi_inf_3dhp.py b/mmhuman3d/data/data_converters/mpi_inf_3dhp.py
@@ -128,8 +128,8 @@ def convert_by_mode(self, dataset_path: str, out_path: str,
                                     break
                                 frame += 1
                                 # image name
-                                imgname = os.path.join(
-                                    imgs_path, 'frame_%06d.jpg' % frame)
+                                imgname = os.path.join(imgs_path,
+                                                       '%06d.jpg' % frame)
                                 # save image
                                 cv2.imwrite(imgname, image)
 

diff --git a/mmhuman3d/data/data_converters/surreal.py b/mmhuman3d/data/data_converters/surreal.py
@@ -7,8 +7,15 @@
 from tqdm import tqdm
 
 from mmhuman3d.core.cameras.camera_parameters import CameraParameter
-from mmhuman3d.core.conventions.keypoints_mapping import convert_kps
+from mmhuman3d.core.conventions.keypoints_mapping import (
+    convert_kps,
+    get_flip_pairs,
+)
 from mmhuman3d.data.data_structures.human_data import HumanData
+from mmhuman3d.data.datasets.pipelines.transforms import (
+    _flip_keypoints,
+    _flip_smpl_pose,
+)
 from .base_converter import BaseModeConverter
 from .builder import DATA_CONVERTERS
 
@@ -139,6 +146,8 @@ def convert_by_mode(self, dataset_path: str, out_path: str,
         data_path = os.path.join(dataset_path,
                                  '{}/run{}'.format(mode, self.run))
 
+        flip_pairs = get_flip_pairs('smpl')
+
         # go through all the .pkl files
         for seq_name in tqdm(os.listdir(data_path)):
             seq_path = os.path.join(data_path, seq_name)
@@ -205,6 +214,12 @@ def convert_by_mode(self, dataset_path: str, out_path: str,
                     bbox_xyxy = self._bbox_expand(bbox_xyxy, scale_factor=1.2)
                     bbox_xywh = self._xyxy2xywh(bbox_xyxy)
 
+                    # Left-right flipping is required to correct the original
+                    # keypoints and poses obtained from raw annotations
+                    bpose = pose[:, idx]
+                    bpose = _flip_smpl_pose(bpose)
+                    keypoints3d = _flip_keypoints(keypoints3d, flip_pairs)
+
                     # add confidence column
                     keypoints2d = np.hstack([keypoints2d, np.ones((24, 1))])
                     keypoints3d = np.hstack([keypoints3d, np.ones([24, 1])])
@@ -218,8 +233,8 @@ def convert_by_mode(self, dataset_path: str, out_path: str,
                     keypoints2d_.append(keypoints2d)
                     keypoints3d_.append(keypoints3d)
                     bbox_xywh_.append(bbox_xywh)
-                    smpl['body_pose'].append(pose[3:, idx].reshape((23, 3)))
-                    smpl['global_orient'].append(pose[:3, idx])
+                    smpl['body_pose'].append(bpose[3:, ].reshape((23, 3)))
+                    smpl['global_orient'].append(bpose[:3, ])
                     smpl['betas'].append(beta[:, idx])
                     cam_param_.append(parameter_dict)
 

diff --git a/tools/convert_datasets.py b/tools/convert_datasets.py
@@ -52,7 +52,7 @@
     h36m_spin=dict(
         type='H36mSpinConverter',
         modes=['train'],
-        mosh_dir='data/datasets/h36m_extras/mosh_data',
+        mosh_dir='data/datasets/h36m_mosh',
         prefix='h36m'),
     vibe=dict(
         type='VibeConverter',